Python 学习日记 day15

@浙大疏锦行

CRWU Bearings SVM_Fault_Classification

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import accuracy_score, f1_score
from skopt import BayesSearchCV
from skopt.space import Real, Integer

# 生成示例信贷数据
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=0, random_state=42)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义基模型
estimators = [
    ('lr', LogisticRegression(random_state=42)),
    ('rf', RandomForestClassifier(max_depth=10, max_features='sqrt', min_samples_leaf=1,
                                  min_samples_split=10, n_estimators=200, random_state=42)),
    ('xgb', xgb.XGBClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=3,
                              n_estimators=100, subsample=0.8, random_state=42)),
    ('lgb', lgb.LGBMClassifier(colsample_bytree=0.8, learning_rate=0.1, max_depth=5,
                               n_estimators=100, num_leaves=31, subsample=0.8, random_state=42))
]

# 定义元模型
final_estimator = LogisticRegression(random_state=42)

# 创建 Stacking 分类器
stacking_clf = StackingClassifier(
    estimators=estimators,
    final_estimator=final_estimator
)

# 定义贝叶斯优化的搜索空间
search_spaces = {
    'final_estimator__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'rf__n_estimators': Integer(50, 300),
    'xgb__n_estimators': Integer(50, 300),
    'lgb__n_estimators': Integer(50, 300)
}

# 创建贝叶斯优化搜索器
bayes_search = BayesSearchCV(
    estimator=stacking_clf,
    search_spaces=search_spaces,
    n_iter=30,
    cv=5,
    scoring='f1',
    n_jobs=-1
)

# 使用贝叶斯优化进行模型训练
bayes_search.fit(X_train, y_train)

# 输出最佳参数
print("Best parameters:", bayes_search.best_params_)

# 在测试集上进行预测
y_pred = bayes_search.predict(X_test)

# 计算 F1 分数
#f1 = f1_score(y_test, y_pred)
#print(f"Stacking 模型(贝叶斯优化后)的 F1 分数: {f1}")

# 输出测试集 AUC
auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
print("测试集 AUC: ", auc)

[LightGBM] [Info] Number of positive: 403, number of negative: 397
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2550
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.503750 -> initscore=0.015000
[LightGBM] [Info] Start training from score 0.015000
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
...
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
Best parameters: OrderedDict([('final_estimator__C', 0.00403118538093137), ('lgb__n_estimators', 300), ('rf__n_estimators', 188), ('xgb__n_estimators', 284)])
测试集 AUC:  0.94921875
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...

你可能感兴趣的:(Python,学习日记,python,学习,机器学习)