First run:
from sklearn.model_selection import GridSearchCV
param_grid = {
'n_estimators':[5,50,300],
'max_depth':[2,10,None],
'min_samples_split':[2,8],
'max_features':['sqrt', 'log2', None],
'n_jobs':[-1]
}
grid_search = GridSearchCV(forest,
param_grid,
cv=3,
scoring='neg_mean_squared_error',
return_train_score=True)
grid_search.fit(X_train, y_train);
print(f'Best estimator parameters: {grid_search.best_estimator_}')
print(f'Best estimator score: {grid_search.best_estimator_.score(X_test, y_test_log)}')
Second run:
param_grid = {
'n_estimators':[300, 600],
'max_depth':[None],
'min_samples_split':[8, 16, 24],
'max_features':[None],
'n_jobs':[-1]
}
grid_search = GridSearchCV(forest,
param_grid,
cv=3,
scoring='neg_mean_squared_error',
return_train_score=True)
grid_search.fit(X_train, y_train);
print(f'Best estimator parameters: {grid_search.best_estimator_}')
print(f'Best estimator score: {grid_search.best_estimator_.score(X_test, y_test_log)}')
Best estimator parameters: RandomForestRegressor(max_features=None, min_samples_split=8, n_estimators=600, n_jobs=-1) Best estimator score: 0.8385917192458379