Saturday, July 27, 2019

KNN SCI Kit Learn Tutorial

KNN SCI Kit Learn Tutorial

KNN SCI Kit Learn Tutorial

Soumil Nitin Shah

Bachelor in Electronic Engineering | Masters in Electrical Engineering | Master in Computer Engineering |

Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python

In [21]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

import scikitplot as skplt
In [26]:
df = pd.read_csv('KNN_Project_Data')
df.columns
Out[26]:
Index(['XVPM', 'GWYH', 'TRAT', 'TLLZ', 'IGGA', 'HYKR', 'EDFS', 'GUUB', 'MGJM',
       'JHZC', 'TARGET CLASS'],
      dtype='object')
In [74]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt
import matplotlib.pyplot as plt
%matplotlib inline

class NN():
    
    def __init__(self, max_iter=150,  knn_value=1):
        self.max_iter=max_iter
        self. knn_value = knn_value
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
        self.errorrate = []
    
    @property
    def preprocess(self):
        df = pd.read_csv('KNN_Project_Data')
        X_Data = df[['XVPM', 'GWYH', 'TRAT', 'TLLZ', 'IGGA', 'HYKR', 'EDFS', 'GUUB', 'MGJM','JHZC']]
        Y_Data = df ['TARGET CLASS']
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_model(self):
        """
        return : Model Object 
        """
        model = KNeighborsClassifier(n_neighbors=self. knn_value)
        return model
        
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        skplt.metrics.plot_confusion_matrix(self.Y_Test, 
                                            pred,
                                           figsize=(6,6),
                                           title="Confusion Matrix")
        return report, matrix
    
    def choose_kvalue(self, value=40):
        
        for i in range(1,value):
            newmodel = KNeighborsClassifier(n_neighbors = i)
            newmodel.fit(self.X_Train, self.Y_Train)
            pred = newmodel.predict(self.X_Test)
            self.errorrate.append(np.mean(pred != self.Y_Test))
            
        plt.figure(figsize=(10,6))
        plt.plot(range(1, value), self.errorrate, color='blue', linestyle='dashed', marker='o', markerfacecolor='red', markersize=10)
        plt.title('Error Rate vs. K Value')
        plt.xlabel('K')
        plt.ylabel('Error Rate')
            
    
    @property
    def plot(self):
        pass
In [76]:
neural = NN(knn_value=36)
neural.choose_kvalue()
neural.train
pred = neural.test
report, matrix = neural.download_report
print(report)
              precision    recall  f1-score   support

           0       0.81      0.79      0.80       199
           1       0.80      0.81      0.80       201

   micro avg       0.80      0.80      0.80       400
   macro avg       0.80      0.80      0.80       400
weighted avg       0.80      0.80      0.80       400

In [ ]:
 

No comments:

Post a Comment

Learn How to Connect to the Glue Data Catalog using AWS Glue Iceberg REST endpoint

gluecat Learn How to Connect to the Glue Data Catalog using AWS Glue Iceberg REST e...