# %reset import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import confusion_matrix from math import sqrt iris = load_iris() X, y = iris.data, iris.target classes = iris.target_names X.shape rf = RandomForestClassifier( n_estimators=500, oob_score=True, bootstrap=True, random_state=34, max_features=int(sqrt(4)) ) rf.fit(X, y) rf.oob_decision_function_[0:30,] oob_pred = np.argmax(rf.oob_decision_function_, axis=1) print(pd.DataFrame(confusion_matrix(y, oob_pred), index=classes, columns=classes)) from sklearn import preprocessing from sklearn.model_selection import train_test_split data = pd.read_csv("creditcard.csv") y = data['Class'] X = data.drop(['Class'], axis = 1) X.shape X.head() pd.Series(y).value_counts() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 2023) pd.Series(y_train).value_counts() pd.Series(y_test).value_counts() rf = RandomForestClassifier( n_estimators=100, max_features=int(sqrt(30)), oob_score=True, random_state=34, n_jobs=-1) rf.fit(X_train,y_train) y_pred = rf.predict(X_test) print(pd.DataFrame(confusion_matrix(y_test, y_pred)))