Saturday, July 27, 2019

Run Multiple Sci Kit Model in one Go

Master

Soumil Nitin Shah

Bachelor in Electronic Engineering | Masters in Electrical Engineering | Master in Computer Engineering |

Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City. I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering. I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more I have more than 2 Years of Experience in Python

In [4]:
try:

    import pandas as pd
    import seaborn as sns

    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_breast_cancer

    # Reporting the Data
    from sklearn.metrics import classification_report, confusion_matrix
    import scikitplot as skplt
    
    # Linear Classification Models
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.preprocessing import StandardScaler
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC
    from sklearn.model_selection import learning_curve, GridSearchCV
    from sklearn.linear_model import SGDClassifier
except:
    print("Some Modules are Missings ...... ")
In [5]:
__Name__ = ["Shah Soumil Nitin"]
__Email__ = ["soushah@my.bridgeport.edu","shahsoumil519@gmail.com"]
__Version__ = "1.0.0"
__Github__ = "https://github.com/soumilshah1995"
__Website__ = "https://soumilshah.herokuapp.com/"
__Blog__ = "https://soumilshah1995.blogspot.com/"
__Youtube__ = "https://www.youtube.com/channel/UC_eOodxvwS_H7x2uLQa-svw?view_as=subscriber"
__FaceBook__ = "https://www.facebook.com/soumilshah1995/"
__Project__ = "https://soumilshah.herokuapp.com/project"

Description = """
    Hello! I’m Soumil Nitin Shah, a Software and Hardware Developer based in New York City.
    I have completed by Bachelor in Electronic Engineering and my Double master’s in Computer and Electrical Engineering.
    I Develop Python Based Cross Platform Desktop Application , Webpages , Software, REST API, Database and much more
    I have more than 2 Years of Experience in Python
"""

try:

    import pandas as pd
    import seaborn as sns

    from sklearn.model_selection import train_test_split
    from sklearn.datasets import load_breast_cancer

    # Reporting the Data
    from sklearn.metrics import classification_report, confusion_matrix
    import scikitplot as skplt


    # Linear Classification Models
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.preprocessing import StandardScaler
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC
    from sklearn.model_selection import learning_curve, GridSearchCV
    from sklearn.linear_model import SGDClassifier
except:
    print("Some Modules are Missings ...... ")


class SciKitMaster(object):

    def __init__(self):
        self.X_Train, self.X_Test, self.Y_Train, self.Y_Test = self.preprocess_data

        self.param_grid = {'C': [0.1, 1, 10, 100, 1000],
                           'gamma': [1, 0.1, 0.01, 0.01, 0.0001]}
        self.Models = {
            "model_logistic_rg": LogisticRegression(),
            "model_multibinomial": MultinomialNB(),
            "model_svc": GridSearchCV(SVC(), param_grid=self.param_grid),
            'model_decision_tree': DecisionTreeClassifier(),
            'model_random_forest': RandomForestClassifier(),
            'model_SGD': SGDClassifier(loss="hinge", penalty="l2", max_iter=100)}

    @property
    def preprocess_data(self):

        df = pd.DataFrame(data=load_breast_cancer()["data"], columns=load_breast_cancer()["feature_names"])

        df["target"] = load_breast_cancer()["target"]

        X_Data = df[['mean radius', 'mean texture', 'mean perimeter', 'mean area',
                     'mean smoothness', 'mean compactness', 'mean concavity',
                     'mean concave points', 'mean symmetry', 'mean fractal dimension',
                     'radius error', 'texture error', 'perimeter error', 'area error',
                     'smoothness error', 'compactness error', 'concavity error',
                     'concave points error', 'symmetry error', 'fractal dimension error',
                     'worst radius', 'worst texture', 'worst perimeter', 'worst area',
                     'worst smoothness', 'worst compactness', 'worst concavity',
                     'worst concave points', 'worst symmetry', 'worst fractal dimension']]

        Y_Data = df["target"]
        X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data, Y_Data, test_size=0.4, random_state=101)

        return X_Train, X_Test, Y_Train, Y_Test

    @property
    def Train_Test_Architecture(self):

        for counter, model in enumerate(self.Models):
            print("="*65)
            print("\n")
            print("\t{}\t{}".format(counter, model))
            print("\n")
            
            print(self.Models.get(model))
            print("\n")
            model = self.Models.get(model)
            model.fit(self.X_Train, self.Y_Train)
            pred = model.predict(self.X_Test)
            
            report = classification_report(self.Y_Test, pred)
            print(report)
            print("\n")
            print(confusion_matrix(self.Y_Test, pred))
            print("\n")
            skplt.metrics.plot_confusion_matrix(self.Y_Test, pred, figsize=(6,6), title="Confusion Matrix {}".format(counter))
            


if __name__  == "__main__":
    neural = SciKitMaster()
    neural.Train_Test_Architecture
=================================================================


 0 model_logistic_rg


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)


              precision    recall  f1-score   support

           0       0.96      0.89      0.92        83
           1       0.94      0.98      0.96       145

   micro avg       0.95      0.95      0.95       228
   macro avg       0.95      0.94      0.94       228
weighted avg       0.95      0.95      0.95       228



[[ 74   9]
 [  3 142]]


=================================================================


 1 model_multibinomial


MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)


              precision    recall  f1-score   support

           0       0.90      0.83      0.86        83
           1       0.91      0.94      0.93       145

   micro avg       0.90      0.90      0.90       228
   macro avg       0.90      0.89      0.89       228
weighted avg       0.90      0.90      0.90       228



[[ 69  14]
 [  8 137]]


=================================================================


 2 model_svc


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.01, 0.0001]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)


/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py:2053: FutureWarning: You should specify a value for 'cv' instead of relying on the default value. The default value will change from 3 to 5 in version 0.22.
  warnings.warn(CV_WARNING, FutureWarning)
              precision    recall  f1-score   support

           0       0.90      0.89      0.90        83
           1       0.94      0.94      0.94       145

   micro avg       0.93      0.93      0.93       228
   macro avg       0.92      0.92      0.92       228
weighted avg       0.93      0.93      0.93       228



[[ 74   9]
 [  8 137]]


=================================================================


 3 model_decision_tree


DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')


              precision    recall  f1-score   support

           0       0.90      0.86      0.88        83
           1       0.92      0.94      0.93       145

   micro avg       0.91      0.91      0.91       228
   macro avg       0.91      0.90      0.90       228
weighted avg       0.91      0.91      0.91       228



[[ 71  12]
 [  8 137]]


=================================================================


 4 model_random_forest


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


              precision    recall  f1-score   support

           0       0.89      0.94      0.91        83
           1       0.96      0.93      0.95       145

   micro avg       0.93      0.93      0.93       228
   macro avg       0.93      0.94      0.93       228
weighted avg       0.94      0.93      0.93       228



[[ 78   5]
 [ 10 135]]


/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
=================================================================


 5 model_SGD


SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=100,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False)


              precision    recall  f1-score   support

           0       0.91      0.86      0.88        83
           1       0.92      0.95      0.94       145

   micro avg       0.92      0.92      0.92       228
   macro avg       0.92      0.90      0.91       228
weighted avg       0.92      0.92      0.92       228



[[ 71  12]
 [  7 138]]


/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/stochastic_gradient.py:183: FutureWarning: max_iter and tol parameters have been added in SGDClassifier in 0.19. If max_iter is set but tol is left unset, the default value for tol in 0.19 and 0.20 will be None (which is equivalent to -infinity, so it has no effect) but will change in 0.21 to 1e-3. Specify tol to silence this warning.
  FutureWarning)

Developer Guide: Getting Started with Flink (PyFlink) and Hudi - Setting Up Your Local Environment and Performing CRUD Operations via flink

flink-hudi-final Install Flink and Python ¶ conda info --envs # Create ENV conda ...