## 2  人工调参

(此外其他参数不展开说明) 对于n_estimators：

## 3 网格/随机搜索

``````import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
# 选择模型
model = RandomForestClassifier()
# 参数搜索空间
param_grid = {
'max_depth': np.arange(1, 20, 1),
'n_estimators': np.arange(1, 50, 10),
'max_leaf_nodes': np.arange(2, 100, 10)
}
# 网格搜索模型参数
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1_micro')
grid_search.fit(x, y)
print(grid_search.best_params_)
print(grid_search.best_score_)
print(grid_search.best_estimator_)
# 随机搜索模型参数
rd_search = RandomizedSearchCV(model, param_grid, n_iter=200, cv=5, scoring='f1_micro')
rd_search.fit(x, y)
print(rd_search.best_params_)
print(rd_search.best_score_)
print(rd_search.best_estimator_)
``````

## 4.2 算法流程

``````for循环n次迭代：
采集函数依据学习的目标函数(或初始化)给出下个开采极值点 Xn+1;
评估超参数Xn+1得到表现Yn+1;
加入新的Xn+1、Yn+1数据样本，并更新高斯过程模型；
``````

``````"""

"""
import numpy as np
from hyperopt import hp, tpe, Trials, STATUS_OK, Trials, anneal
from functools import partial
from hyperopt.fmin import fmin
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
def model_metrics(model, x, y):
""" 评估指标 """
yhat = model.predict(x)
return  f1_score(y, yhat,average='micro')
def bayes_fmin(train_x, test_x, train_y, test_y, eval_iters=50):
"""
bayes优化超参数
eval_iters：迭代次数
"""
def factory(params):
"""
定义优化的目标函数
"""
fit_params = {
'max_depth':int(params['max_depth']),
'n_estimators':int(params['n_estimators']),
'max_leaf_nodes': int(params['max_leaf_nodes'])
}
# 选择模型
model = RandomForestClassifier(**fit_params)
model.fit(train_x, train_y)
# 最小化测试集（- f1score）为目标
train_metric = model_metrics(model, train_x, train_y)
test_metric = model_metrics(model, test_x, test_y)
loss = - test_metric
return {"loss": loss, "status":STATUS_OK}
# 参数空间
space = {
'max_depth': hp.quniform('max_depth', 1, 20, 1),
'n_estimators': hp.quniform('n_estimators', 2, 50, 1),
'max_leaf_nodes': hp.quniform('max_leaf_nodes', 2, 100, 1)
}
# bayes优化搜索参数
best_params = fmin(factory, space, algo=partial(anneal.suggest,), max_evals=eval_iters, trials=Trials(),return_argmin=True)
# 参数转为整型
best_params["max_depth"] = int(best_params["max_depth"])
best_params["max_leaf_nodes"] = int(best_params["max_leaf_nodes"])
best_params["n_estimators"] = int(best_params["n_estimators"])
return best_params
#  搜索最优参数
best_params = bayes_fmin(train_x, test_x, train_y, test_y, 100)
print(best_params)
``````