#! /usr/bin/env python3 # def wave_regression_knn ( ): #*****************************************************************************80 # ## wave_regression_knn() uses k-nearest neighbor regression on wave data. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 15 June 2023 # # Author: # # Andreas Mueller, Sarah Guido. # Modifications by John Burkardt. # # Reference: # # Andreas Mueller, Sarah Guido, # Introduction to Machine Learning with Python, # OReilly, 2017, # ISBN: 978-1-449-36941-5 # import matplotlib.pyplot as plt import mglearn import numpy as np import pandas as pd import platform import sklearn print ( '' ) print ( 'wave_regression_knn():' ) print ( ' Python version: ' + platform.python_version ( ) ) print ( ' scikit-learn version: '+ sklearn.__version__ ) # # Generate the dataset. # print ( '' ) print ( ' Generate the wave dataset, (X, y).' ) X, y = mglearn.datasets.make_wave ( n_samples = 40 ) print ( " X.shape:", X.shape ) # # Plot the dataset. # print ( ' Plot the dataset.' ) plt.clf ( ) plt.plot ( X, y, 'o' ) plt.ylim ( -3.0, +3.0 ) plt.xlabel ( "Feature" ) plt.ylabel ( "Target" ) filename = "wave_regression_data.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Creat plots demonstrating k-nearest neighbors with k = 1 and 3. # print ( ' Demonstrate k-nearest-neighbors with k = 1.' ) plt.clf ( ) mglearn.plots.plot_knn_regression ( n_neighbors = 1 ) filename = "wave_regression_k1.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) print ( ' Demonstrate k-nearest-neighbors with k = 3.' ) plt.clf ( ) mglearn.plots.plot_knn_regression ( n_neighbors = 3 ) filename = "wave_regression_k3.png" plt.savefig ( filename ) print ( " Graphics saved as '" + filename + "'" ) # # Split the dataset. # from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split ( X, y, random_state = 0 ) # # Create the model. # from sklearn.neighbors import KNeighborsRegressor reg = KNeighborsRegressor ( n_neighbors = 3 ) reg.fit ( X_train, y_train ) print ( '' ) print ( 'Test set predictions:' ) print ( reg.predict ( X_test ) ) print ( 'Test set R^2:' ) print ( reg.score ( X_test, y_test ) ) # # Show predictions for all possible feature values. # print ( '' ) print ( 'Plot predictions for all possible values of feature:' ) fig, axes = plt.subplots ( 1, 3, figsize = ( 15, 4 ) ) line = np.linspace ( -3.0, 3.0, 1000 ).reshape ( -1, 1 ) for n_neighbors, ax in zip ( [ 1, 3, 9 ], axes ): reg = KNeighborsRegressor ( n_neighbors = n_neighbors ) reg.fit ( X_train, y_train ) ax.plot ( line, reg.predict ( line ) ) ax.plot ( X_train, y_train, '^', c=mglearn.cm2(0), markersize = 8 ) ax.plot ( X_test, y_test, 'v', c=mglearn.cm2(1), markersize = 8 ) ax.set_title ( "{} neighbor(s)\n train score: {:.2f} test score: {:.2f}".format( n_neighbors, reg.score(X_train,y_train), reg.score(X_test,y_test) ) ) ax.set_xlabel ( 'Feature' ) ax.set_ylabel ( 'Target' ) axes[0].legend ( ["Model predictions", "Training data/target", "Test data/target"], loc="best" ) filename = "wave_regression_all.png" plt.savefig ( filename ) print ( ' Graphics saved as "' + filename + '"' ) # # Produce linear regression coefficients y = w[0] * x + b # print ( '' ) print ( ' Produce linear regression coefficients y = w[0] * x + b' ) mglearn.plots.plot_linear_regression_wave ( ) # # Terminate. # print ( '' ) print ( 'wave_regression_knn():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) wave_regression_knn ( ) timestamp ( )