Random Forest and decision Tree the Pythonic way¶

Soumil Nitin Shah¶

Bachelor in Electronic Engineering | Masters in Electrical Engineering | Master in Computer Engineering |

Website : https://soumilshah.herokuapp.com
Github: https://github.com/soumilshah1995
Linkedin: https://www.linkedin.com/in/shah-soumil/
Blog: https://soumilshah1995.blogspot.com/
Youtube : https://www.youtube.com/channel/UC_eOodxvwS_H7x2uLQa-svw?view_as=subscriber
Facebook Page : https://www.facebook.com/soumilshah1995/
Email : shahsoumil519@gmail.com
projects : https://soumilshah.herokuapp.com/project

Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python

Decision Tree¶

import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt

class NN():
    
    def __init__(self, max_iter=150):
        self.max_iter=max_iter
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
    
    @property
    def preprocess(self):
        
        # Read the Dataset 
        df = pd.read_csv('loan_data.csv')
        
        # convert the column purpose which is Categorical into Numbers
        final_data = pd.get_dummies(df, columns=["purpose"], drop_first=True)
        
        X_Data = final_data[['int.rate', 'installment', 'log.annual.inc', 'dti',
                        'fico', 'days.with.cr.line', 'revol.bal', 'revol.util',
                        'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid',
                        'purpose_credit_card', 'purpose_debt_consolidation',
                        'purpose_educational', 'purpose_home_improvement',
                        'purpose_major_purchase', 'purpose_small_business']]
        
        Y_Data = final_data ['credit.policy']
        
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_model(self):
        """
        return : Model Object 
        """
        model = DecisionTreeClassifier()
        return model
        
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        
        skplt.metrics.plot_confusion_matrix(self.Y_Test, 
                                            pred,
                                           figsize=(6,6),
                                           title="Confusion Matrix")
        return report, matrix
    
    @property
    def plot(self):
        pass

neural = NN()
neural.train
pred = neural.test
report, matrix = neural.download_report
print(report)
print(matrix)

/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)

              precision    recall  f1-score   support

           0       0.97      0.93      0.95       724
           1       0.98      0.99      0.99      3108

   micro avg       0.98      0.98      0.98      3832
   macro avg       0.98      0.96      0.97      3832
weighted avg       0.98      0.98      0.98      3832

[[ 674   50]
 [  18 3090]]

Random Forest¶

import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt

class NN():
    
    def __init__(self, max_iter=150):
        self.max_iter=max_iter
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
    
    @property
    def preprocess(self):
        # Read the Dataset 
        df = pd.read_csv('loan_data.csv')
        # convert the column purpose which is Categorical into Numbers
        final_data = pd.get_dummies(df, columns=["purpose"], drop_first=True)
        
        X_Data = final_data[['int.rate', 'installment', 'log.annual.inc', 'dti',
                        'fico', 'days.with.cr.line', 'revol.bal', 'revol.util',
                        'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid',
                        'purpose_credit_card', 'purpose_debt_consolidation',
                        'purpose_educational', 'purpose_home_improvement',
                        'purpose_major_purchase', 'purpose_small_business']]
        
        Y_Data = final_data ['credit.policy']
        
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    
    @property
    def create_model(self):
        """
        return : Model Object 
        """
        model = RandomForestClassifier()
        return model
        
    
    @property
    def train(self):
        """
        return None Train the Model
        """
        self.model.fit(self.X_Train, self.Y_Train)
        
    @property   
    def test(self):
        """
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        """
        pred = self.model.predict(self.X_Test)
        return pred
    
    @property
    def download_report(self):
        """
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        """
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
        
        skplt.metrics.plot_confusion_matrix(self.Y_Test, 
                                            pred,
                                           figsize=(6,6),
                                           title="Confusion Matrix")
        return report, matrix
    
    @property
    def plot(self):
        pass

neural = NN()
neural.train
pred = neural.test
report, matrix = neural.download_report
print(report)
print(matrix)

/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)

              precision    recall  f1-score   support

           0       0.97      0.94      0.96       724
           1       0.99      0.99      0.99      3108

   micro avg       0.98      0.98      0.98      3832
   macro avg       0.98      0.97      0.97      3832
weighted avg       0.98      0.98      0.98      3832

[[ 683   41]
 [  19 3089]]

Pythonist

Saturday, July 27, 2019

Random Forest and decision Tree the Pythonic way

Random Forest and decision Tree the Pythonic way¶

Soumil Nitin Shah¶

Decision Tree¶

Random Forest¶

No comments:

Post a Comment

Getting started with LakeFS and Apache Iceberg Running Locally