Saturday, July 27, 2019

Random Forest and decision Tree the Pythonic way

Decision Tree and Random Forest

Random Forest and decision Tree the Pythonic way

Soumil Nitin Shah

Bachelor in Electronic Engineering | Masters in Electrical Engineering | Master in Computer Engineering |

Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python

Decision Tree

In [65]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt

class NN():
    def __init__(self, max_iter=150):
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
    def preprocess(self):
        # Read the Dataset 
        df = pd.read_csv('loan_data.csv')
        # convert the column purpose which is Categorical into Numbers
        final_data = pd.get_dummies(df, columns=["purpose"], drop_first=True)
        X_Data = final_data[['int.rate', 'installment', '', 'dti',
                        'fico', '', 'revol.bal', 'revol.util',
                        'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid',
                        'purpose_credit_card', 'purpose_debt_consolidation',
                        'purpose_educational', 'purpose_home_improvement',
                        'purpose_major_purchase', 'purpose_small_business']]
        Y_Data = final_data ['credit.policy']
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    def create_model(self):
        return : Model Object 
        model = DecisionTreeClassifier()
        return model
    def train(self):
        return None Train the Model
        """, self.Y_Train)
    def test(self):
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        pred = self.model.predict(self.X_Test)
        return pred
    def download_report(self):
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
                                           title="Confusion Matrix")
        return report, matrix
    def plot(self):
In [69]:
neural = NN()
pred = neural.test
report, matrix = neural.download_report
/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/ FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
              precision    recall  f1-score   support

           0       0.97      0.93      0.95       724
           1       0.98      0.99      0.99      3108

   micro avg       0.98      0.98      0.98      3832
   macro avg       0.98      0.96      0.97      3832
weighted avg       0.98      0.98      0.98      3832

[[ 674   50]
 [  18 3090]]

Random Forest

In [67]:
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import scikitplot as skplt

class NN():
    def __init__(self, max_iter=150):
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess
        self.model = self.create_model
    def preprocess(self):
        # Read the Dataset 
        df = pd.read_csv('loan_data.csv')
        # convert the column purpose which is Categorical into Numbers
        final_data = pd.get_dummies(df, columns=["purpose"], drop_first=True)
        X_Data = final_data[['int.rate', 'installment', '', 'dti',
                        'fico', '', 'revol.bal', 'revol.util',
                        'inq.last.6mths', 'delinq.2yrs', 'pub.rec', 'not.fully.paid',
                        'purpose_credit_card', 'purpose_debt_consolidation',
                        'purpose_educational', 'purpose_home_improvement',
                        'purpose_major_purchase', 'purpose_small_business']]
        Y_Data = final_data ['credit.policy']
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)
        return X_Train, X_Test, Y_Train, Y_Test 
    def create_model(self):
        return : Model Object 
        model = RandomForestClassifier()
        return model
    def train(self):
        return None Train the Model
        """, self.Y_Train)
    def test(self):
        return pred [Array ]
        return coef_ [array]
        return intercept_ [array]
        pred = self.model.predict(self.X_Test)
        return pred
    def download_report(self):
        return confusion matrix 
        return classification report
        return plots the confusion matrix 
        pred = self.test
        report = classification_report(self.Y_Test, pred)
        matrix = confusion_matrix(self.Y_Test, pred)
                                           title="Confusion Matrix")
        return report, matrix
    def plot(self):

neural = NN()
pred = neural.test
report, matrix = neural.download_report
/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/ FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
              precision    recall  f1-score   support

           0       0.97      0.94      0.96       724
           1       0.99      0.99      0.99      3108

   micro avg       0.98      0.98      0.98      3832
   macro avg       0.98      0.97      0.97      3832
weighted avg       0.98      0.98      0.98      3832

[[ 683   41]
 [  19 3089]]

No comments:

Post a Comment

Learn How to Connect to the Glue Data Catalog using AWS Glue Iceberg REST endpoint

gluecat Learn How to Connect to the Glue Data Catalog using AWS Glue Iceberg REST e...