Saturday, July 27, 2019

Random Forest and decision Tree the Pythonic way

Decision Tree and Random Forest

Random Forest and decision Tree the Pythonic way

Soumil Nitin Shah

Bachelor in Electronic Engineering | Masters in Electrical Engineering | Master in Computer Engineering |

Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python

Decision Tree

In [65]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt

class NN():
    
    def __init__(self, max_iter=150):
        self.max_iter=max_iter
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
    
    @property
    def preprocess(self):
        
        # Read the Dataset 
        df = pd.read_csv('loan_data.csv')
        
        # convert the column purpose which is Categorical into Numbers
        final_data = pd.get_dummies(df, columns=["purpose"], drop_first=True)
        
        X_Data = final_data[['int.rate', 'installment', 'log.annual.inc', 'dti',
                        'fico', 'days.with.cr.line', 'revol.bal', 'revol.util',
                        'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid',
                        'purpose_credit_card', 'purpose_debt_consolidation',
                        'purpose_educational', 'purpose_home_improvement',
                        'purpose_major_purchase', 'purpose_small_business']]
        
        Y_Data = final_data ['credit.policy']
        
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_model(self):
        """
        return : Model Object 
        """
        model = DecisionTreeClassifier()
        return model
        
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        
        skplt.metrics.plot_confusion_matrix(self.Y_Test, 
                                            pred,
                                           figsize=(6,6),
                                           title="Confusion Matrix")
        return report, matrix
    
    @property
    def plot(self):
        pass
In [69]:
neural = NN()
neural.train
pred = neural.test
report, matrix = neural.download_report
print(report)
print(matrix)
/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
              precision    recall  f1-score   support

           0       0.97      0.93      0.95       724
           1       0.98      0.99      0.99      3108

   micro avg       0.98      0.98      0.98      3832
   macro avg       0.98      0.96      0.97      3832
weighted avg       0.98      0.98      0.98      3832

[[ 674   50]
 [  18 3090]]

Random Forest

In [67]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt

class NN():
    
    def __init__(self, max_iter=150):
        self.max_iter=max_iter
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
    
    @property
    def preprocess(self):
        # Read the Dataset 
        df = pd.read_csv('loan_data.csv')
        # convert the column purpose which is Categorical into Numbers
        final_data = pd.get_dummies(df, columns=["purpose"], drop_first=True)
        
        X_Data = final_data[['int.rate', 'installment', 'log.annual.inc', 'dti',
                        'fico', 'days.with.cr.line', 'revol.bal', 'revol.util',
                        'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid',
                        'purpose_credit_card', 'purpose_debt_consolidation',
                        'purpose_educational', 'purpose_home_improvement',
                        'purpose_major_purchase', 'purpose_small_business']]
        
        Y_Data = final_data ['credit.policy']
        
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_model(self):
        """
        return : Model Object 
        """
        model = RandomForestClassifier()
        return model
        
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        
        skplt.metrics.plot_confusion_matrix(self.Y_Test, 
                                            pred,
                                           figsize=(6,6),
                                           title="Confusion Matrix")
        return report, matrix
    
    @property
    def plot(self):
        pass

neural = NN()
neural.train
pred = neural.test
report, matrix = neural.download_report
print(report)
print(matrix)
/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
              precision    recall  f1-score   support

           0       0.97      0.94      0.96       724
           1       0.99      0.99      0.99      3108

   micro avg       0.98      0.98      0.98      3832
   macro avg       0.98      0.97      0.97      3832
weighted avg       0.98      0.98      0.98      3832

[[ 683   41]
 [  19 3089]]

No comments:

Post a Comment

Learn How to configure your Spark Session to Join Managed (S3 Table Buckets) and Unmanaged Iceberg Tables | Hands on Labs

test-tble-bucket-joins Learn How to configure your Spark Session to Join Managed (S...