Saturday, July 27, 2019

KNN SCI Kit Learn Tutorial

KNN SCI Kit Learn Tutorial

KNN SCI Kit Learn Tutorial

Soumil Nitin Shah

Bachelor in Electronic Engineering | Masters in Electrical Engineering | Master in Computer Engineering |

Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python

In [21]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

import scikitplot as skplt
In [26]:
df = pd.read_csv('KNN_Project_Data')
df.columns
Out[26]:
Index(['XVPM', 'GWYH', 'TRAT', 'TLLZ', 'IGGA', 'HYKR', 'EDFS', 'GUUB', 'MGJM',
       'JHZC', 'TARGET CLASS'],
      dtype='object')
In [74]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt
import matplotlib.pyplot as plt
%matplotlib inline

class NN():
    
    def __init__(self, max_iter=150,  knn_value=1):
        self.max_iter=max_iter
        self. knn_value = knn_value
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
        self.errorrate = []
    
    @property
    def preprocess(self):
        df = pd.read_csv('KNN_Project_Data')
        X_Data = df[['XVPM', 'GWYH', 'TRAT', 'TLLZ', 'IGGA', 'HYKR', 'EDFS', 'GUUB', 'MGJM','JHZC']]
        Y_Data = df ['TARGET CLASS']
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_model(self):
        """
        return : Model Object 
        """
        model = KNeighborsClassifier(n_neighbors=self. knn_value)
        return model
        
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        skplt.metrics.plot_confusion_matrix(self.Y_Test, 
                                            pred,
                                           figsize=(6,6),
                                           title="Confusion Matrix")
        return report, matrix
    
    def choose_kvalue(self, value=40):
        
        for i in range(1,value):
            newmodel = KNeighborsClassifier(n_neighbors = i)
            newmodel.fit(self.X_Train, self.Y_Train)
            pred = newmodel.predict(self.X_Test)
            self.errorrate.append(np.mean(pred != self.Y_Test))
            
        plt.figure(figsize=(10,6))
        plt.plot(range(1, value), self.errorrate, color='blue', linestyle='dashed', marker='o', markerfacecolor='red', markersize=10)
        plt.title('Error Rate vs. K Value')
        plt.xlabel('K')
        plt.ylabel('Error Rate')
            
    
    @property
    def plot(self):
        pass
In [76]:
neural = NN(knn_value=36)
neural.choose_kvalue()
neural.train
pred = neural.test
report, matrix = neural.download_report
print(report)
              precision    recall  f1-score   support

           0       0.81      0.79      0.80       199
           1       0.80      0.81      0.80       201

   micro avg       0.80      0.80      0.80       400
   macro avg       0.80      0.80      0.80       400
weighted avg       0.80      0.80      0.80       400

In [ ]:
 

No comments:

Post a Comment

Learn How to configure your Spark Session to Join Managed (S3 Table Buckets) and Unmanaged Iceberg Tables | Hands on Labs

test-tble-bucket-joins Learn How to configure your Spark Session to Join Managed (S...