#! /usr/bin/env python3
#
def wave_regression_knn ( ):

#*****************************************************************************80
#
## wave_regression_knn() uses k-nearest neighbor regression on wave data.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    15 June 2023
#
#  Author:
#
#    Andreas Mueller, Sarah Guido.
#    Modifications by John Burkardt.
#
#  Reference:
#
#    Andreas Mueller, Sarah Guido,
#    Introduction to Machine Learning with Python,
#    OReilly, 2017,
#    ISBN: 978-1-449-36941-5
#
  import matplotlib.pyplot as plt
  import mglearn
  import numpy as np
  import pandas as pd
  import platform
  import sklearn

  print ( '' )
  print ( 'wave_regression_knn():' )
  print ( '  Python version: ' + platform.python_version ( ) )
  print ( '  scikit-learn version: '+ sklearn.__version__ )
#
#  Generate the dataset.
#
  print ( '' )
  print ( '  Generate the wave dataset, (X, y).' )
  X, y = mglearn.datasets.make_wave ( n_samples = 40 )
  print ( "  X.shape:", X.shape )
#
#  Plot the dataset.
#
  print ( '  Plot the dataset.' )

  plt.clf ( )
  plt.plot ( X, y, 'o' )
  plt.ylim ( -3.0, +3.0 )
  plt.xlabel ( "Feature" )
  plt.ylabel ( "Target" )
  filename = "wave_regression_data.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Creat plots demonstrating k-nearest neighbors with k = 1 and 3.
#
  print ( '  Demonstrate k-nearest-neighbors with k = 1.' )
  plt.clf ( )
  mglearn.plots.plot_knn_regression ( n_neighbors = 1 )
  filename = "wave_regression_k1.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )

  print ( '  Demonstrate k-nearest-neighbors with k = 3.' )
  plt.clf ( )
  mglearn.plots.plot_knn_regression ( n_neighbors = 3 )
  filename = "wave_regression_k3.png"
  plt.savefig ( filename )
  print ( "  Graphics saved as '" + filename + "'" )
#
#  Split the dataset.
#
  from sklearn.model_selection import train_test_split
  X_train, X_test, y_train, y_test = train_test_split ( X, y, random_state = 0 )
#
#  Create the model.
#
  from sklearn.neighbors import KNeighborsRegressor
  reg = KNeighborsRegressor ( n_neighbors = 3 )
  reg.fit ( X_train, y_train )

  print ( '' )
  print ( 'Test set predictions:' )
  print ( reg.predict ( X_test ) )
  print ( 'Test set R^2:' )
  print ( reg.score ( X_test, y_test ) )
#
#  Show predictions for all possible feature values.
#
  print ( '' )
  print ( 'Plot predictions for all possible values of feature:' )

  fig, axes = plt.subplots ( 1, 3, figsize = ( 15, 4 ) )
  line = np.linspace ( -3.0, 3.0, 1000 ).reshape ( -1, 1 )

  for n_neighbors, ax in zip ( [ 1, 3, 9 ], axes ):
    reg = KNeighborsRegressor ( n_neighbors = n_neighbors )
    reg.fit ( X_train, y_train )
    ax.plot ( line, reg.predict ( line ) )
    ax.plot ( X_train, y_train, '^', c=mglearn.cm2(0), markersize = 8 )
    ax.plot ( X_test, y_test, 'v', c=mglearn.cm2(1), markersize = 8 )
    ax.set_title ( 
      "{} neighbor(s)\n train score: {:.2f} test score: {:.2f}".format(
      n_neighbors, reg.score(X_train,y_train), reg.score(X_test,y_test) ) )
    ax.set_xlabel ( 'Feature' )
    ax.set_ylabel ( 'Target' )
  axes[0].legend ( ["Model predictions", "Training data/target",
    "Test data/target"], loc="best" )

  filename = "wave_regression_all.png"
  plt.savefig ( filename )
  print ( '  Graphics saved as "' + filename + '"' )
#
#  Produce linear regression coefficients y = w[0] * x + b
#
  print ( '' )
  print ( '  Produce linear regression coefficients y = w[0] * x + b' )

  mglearn.plots.plot_linear_regression_wave ( )
#
#  Terminate.
#
  print ( '' )
  print ( 'wave_regression_knn():' )
  print ( '  Normal end of execution.' )

  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    21 August 2019
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return

if ( __name__ == '__main__' ):
  timestamp ( )
  wave_regression_knn ( )
  timestamp ( )