import numpy as np import pandas as pd import os from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.model_selection import RepeatedKFold from sklearn.model_selection import GridSearchCV import matplotlib.pyplot as plt path = os.getcwd() os.chdir('/Users/marin/TEACHING/2324/M2-GLM-HAX912X/TP') data = pd.read_csv("voice.csv") data.info() x = data.drop(["label"], axis = 1) x.info() scaler = StandardScaler().fit(x) x = scaler.transform(x) y = data.label x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.07, random_state = 42) x_test.shape logit = LogisticRegression(penalty=None) logit.fit(x_train,y_train) y_chap = logit.predict(x_test) table=pd.crosstab(y_test,y_chap) table Crossvalid = RepeatedKFold(n_splits=5, n_repeats=5) parameters = {'C':np.linspace(0.1,2.1,num=11)} logitl1 = GridSearchCV(LogisticRegression(penalty='l1',solver='saga', max_iter=1000), parameters, cv=Crossvalid,scoring='neg_log_loss') logitl1.fit(x_train,y_train) logitl1.best_params_ logitl1.best_score_ results = pd.DataFrame(logitl1.cv_results_) results.mean_test_score plt.plot(np.linspace(0.1,2.1,num=11),results.mean_test_score) plt.show() y_chap = logitl1.predict(x_test) table=pd.crosstab(y_test,y_chap) table logitl2 = GridSearchCV(LogisticRegression(penalty='l2',solver='lbfgs', max_iter=1000), parameters, cv=Crossvalid,scoring='neg_log_loss') logitl2.fit(x_train,y_train) logitl2.best_params_ logitl2.best_score_ results = pd.DataFrame(logitl2.cv_results_) results.mean_test_score plt.plot(np.linspace(0.1,2.1,num=11),results.mean_test_score) plt.show() y_chap = logitl2.predict(x_test) table=pd.crosstab(y_test,y_chap) table