@浙大疏锦行
CRWU Bearings SVM_Fault_Classification
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import accuracy_score, f1_score
from skopt import BayesSearchCV
from skopt.space import Real, Integer
# 生成示例信贷数据
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=0, random_state=42)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义基模型
estimators = [
('lr', LogisticRegression(random_state=42)),
('rf', RandomForestClassifier(max_depth=10, max_features='sqrt', min_samples_leaf=1,
min_samples_split=10, n_estimators=200, random_state=42)),
('xgb', xgb.XGBClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=3,
n_estimators=100, subsample=0.8, random_state=42)),
('lgb', lgb.LGBMClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=5,
n_estimators=100, num_leaves=31, subsample=0.8, random_state=42))
]
# 定义元模型
final_estimator = LogisticRegression(random_state=42)
# 创建 Stacking 分类器
stacking_clf = StackingClassifier(
estimators=estimators,
final_estimator=final_estimator
)
# 定义贝叶斯优化的搜索空间
search_spaces = {
'final_estimator__C': Real(1e-6, 1e+6, prior='log-uniform'),
'rf__n_estimators': Integer(50, 300),
'xgb__n_estimators': Integer(50, 300),
'lgb__n_estimators': Integer(50, 300)
}
# 创建贝叶斯优化搜索器
bayes_search = BayesSearchCV(
estimator=stacking_clf,
search_spaces=search_spaces,
n_iter=30,
cv=5,
scoring='f1',
n_jobs=-1
)
# 使用贝叶斯优化进行模型训练
bayes_search.fit(X_train, y_train)
# 输出最佳参数
print("Best parameters:", bayes_search.best_params_)
# 在测试集上进行预测
y_pred = bayes_search.predict(X_test)
# 计算 F1 分数
#f1 = f1_score(y_test, y_pred)
#print(f"Stacking 模型(贝叶斯优化后)的 F1 分数: {f1}")
# 输出测试集 AUC
auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
print("测试集 AUC: ", auc)
[LightGBM] [Info] Number of positive: 403, number of negative: 397
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.503750 -> initscore=0.015000
[LightGBM] [Info] Start training from score 0.015000
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
...
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
Best parameters: OrderedDict([('final_estimator__C', 0.00403118538093137), ('lgb__n_estimators', 300), ('rf__n_estimators', 188), ('xgb__n_estimators', 284)])
测试集 AUC: 0.94921875
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...