Random forest Classifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42, stratify=y
)
rf = RandomForestClassifier(
n_estimators=100, # number of trees
max_depth=None, # tree depth (can tune this)
random_state=42,
class_weight="balanced" # helps with churn imbalance
)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_proba_rf = rf.predict_proba(X_test)[:, 1] # churn probability
#################
#Need to scale X
#################