Python学习Day10

学习来源:@浙大疏锦行

知识点:

  1. 数据集的划分
  2. 机器学习模型建模的三行代码
  3. 机器学习模型分类问题的评估
  • 对心脏病数据集采用机器学习模型建模和评估
    import pandas as pd 
    file_path = "heart.csv"
    data = pd.read_csv(file_path)
    data.info()
    data.isnull().sum()
    
    
    # 划分训练集和测试机
    from sklearn.model_selection import train_test_split
    X = data.drop(['target'], axis=1)  # 特征,axis=1表示按列删除
    y = data['target']  # 标签
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 划分数据集,20%作为测试集,随机种子为42
    # 训练集和测试集的形状
    print(f"训练集形状: {X_train.shape}, 测试集形状: {X_test.shape}")  # 打印训练集和测试集的形状
    
    
    from sklearn.svm import SVC #支持向量机分类器
    from sklearn.neighbors import KNeighborsClassifier #K近邻分类器
    from sklearn.linear_model import LogisticRegression #逻辑回归分类器
    import xgboost as xgb #XGBoost分类器
    import lightgbm as lgb #LightGBM分类器
    from sklearn.ensemble import RandomForestClassifier #随机森林分类器
    from catboost import CatBoostClassifier #CatBoost分类器
    from sklearn.tree import DecisionTreeClassifier #决策树分类器
    from sklearn.naive_bayes import GaussianNB #高斯朴素贝叶斯分类器
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score # 用于评估分类器性能的指标
    from sklearn.metrics import classification_report, confusion_matrix #用于生成分类报告和混淆矩阵
    import warnings #用于忽略警告信息
    warnings.filterwarnings("ignore") # 忽略所有警告信息
    
    # SVM
    svm_model = SVC(random_state=42)
    svm_model.fit(X_train, y_train)
    svm_pred = svm_model.predict(X_test)
    
    print("\nSVM 分类报告:")
    print(classification_report(y_test, svm_pred))  # 打印分类报告
    print("SVM 混淆矩阵:")
    print(confusion_matrix(y_test, svm_pred))  # 打印混淆矩阵
    
    # 计算 SVM 评估指标,这些指标默认计算正类的性能
    svm_accuracy = accuracy_score(y_test, svm_pred)
    svm_precision = precision_score(y_test, svm_pred)
    svm_recall = recall_score(y_test, svm_pred)
    svm_f1 = f1_score(y_test, svm_pred)
    print("SVM 模型评估指标:")
    print(f"准确率: {svm_accuracy:.4f}")
    print(f"精确率: {svm_precision:.4f}")
    print(f"召回率: {svm_recall:.4f}")
    print(f"F1 值: {svm_f1:.4f}")
    # KNN
    knn_model = KNeighborsClassifier()
    knn_model.fit(X_train, y_train)
    knn_pred = knn_model.predict(X_test)
    
    print("\nKNN 分类报告:")
    print(classification_report(y_test, knn_pred))
    print("KNN 混淆矩阵:")
    print(confusion_matrix(y_test, knn_pred))
    
    knn_accuracy = accuracy_score(y_test, knn_pred)
    knn_precision = precision_score(y_test, knn_pred)
    knn_recall = recall_score(y_test, knn_pred)
    knn_f1 = f1_score(y_test, knn_pred)
    print("KNN 模型评估指标:")
    print(f"准确率: {knn_accuracy:.4f}")
    print(f"精确率: {knn_precision:.4f}")
    print(f"召回率: {knn_recall:.4f}")
    print(f"F1 值: {knn_f1:.4f}")
    # 逻辑回归
    logreg_model = LogisticRegression(random_state=42)
    logreg_model.fit(X_train, y_train)
    logreg_pred = logreg_model.predict(X_test)
    
    print("\n逻辑回归 分类报告:")
    print(classification_report(y_test, logreg_pred))
    print("逻辑回归 混淆矩阵:")
    print(confusion_matrix(y_test, logreg_pred))
    
    logreg_accuracy = accuracy_score(y_test, logreg_pred)
    logreg_precision = precision_score(y_test, logreg_pred)
    logreg_recall = recall_score(y_test, logreg_pred)
    logreg_f1 = f1_score(y_test, logreg_pred)
    print("逻辑回归 模型评估指标:")
    print(f"准确率: {logreg_accuracy:.4f}")
    print(f"精确率: {logreg_precision:.4f}")
    print(f"召回率: {logreg_recall:.4f}")
    print(f"F1 值: {logreg_f1:.4f}")
    
    # 朴素贝叶斯
    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)
    nb_pred = nb_model.predict(X_test)
    
    print("\n朴素贝叶斯 分类报告:")
    print(classification_report(y_test, nb_pred))
    print("朴素贝叶斯 混淆矩阵:")
    print(confusion_matrix(y_test, nb_pred))
    
    nb_accuracy = accuracy_score(y_test, nb_pred)
    nb_precision = precision_score(y_test, nb_pred)
    nb_recall = recall_score(y_test, nb_pred)
    nb_f1 = f1_score(y_test, nb_pred)
    print("朴素贝叶斯 模型评估指标:")
    print(f"准确率: {nb_accuracy:.4f}")
    print(f"精确率: {nb_precision:.4f}")
    print(f"召回率: {nb_recall:.4f}")
    print(f"F1 值: {nb_f1:.4f}")
    
    # 决策树
    dt_model = DecisionTreeClassifier(random_state=42)
    dt_model.fit(X_train, y_train)
    dt_pred = dt_model.predict(X_test)
    
    print("\n决策树 分类报告:")
    print(classification_report(y_test, dt_pred))
    print("决策树 混淆矩阵:")
    print(confusion_matrix(y_test, dt_pred))
    
    dt_accuracy = accuracy_score(y_test, dt_pred)
    dt_precision = precision_score(y_test, dt_pred)
    dt_recall = recall_score(y_test, dt_pred)
    dt_f1 = f1_score(y_test, dt_pred)
    print("决策树 模型评估指标:")
    print(f"准确率: {dt_accuracy:.4f}")
    print(f"精确率: {dt_precision:.4f}")
    print(f"召回率: {dt_recall:.4f}")
    print(f"F1 值: {dt_f1:.4f}")
    
    
    # 随机森林
    rf_model = RandomForestClassifier(random_state=42)
    rf_model.fit(X_train, y_train)
    rf_pred = rf_model.predict(X_test)
    
    print("\n随机森林 分类报告:")
    print(classification_report(y_test, rf_pred))
    print("随机森林 混淆矩阵:")
    print(confusion_matrix(y_test, rf_pred))
    
    rf_accuracy = accuracy_score(y_test, rf_pred)
    rf_precision = precision_score(y_test, rf_pred)
    rf_recall = recall_score(y_test, rf_pred)
    rf_f1 = f1_score(y_test, rf_pred)
    print("随机森林 模型评估指标:")
    print(f"准确率: {rf_accuracy:.4f}")
    print(f"精确率: {rf_precision:.4f}")
    print(f"召回率: {rf_recall:.4f}")
    print(f"F1 值: {rf_f1:.4f}")
    
    # XGBoost
    xgb_model = xgb.XGBClassifier(random_state=42)
    xgb_model.fit(X_train, y_train)
    xgb_pred = xgb_model.predict(X_test)
    
    print("\nXGBoost 分类报告:")
    print(classification_report(y_test, xgb_pred))
    print("XGBoost 混淆矩阵:")
    print(confusion_matrix(y_test, xgb_pred))
    
    xgb_accuracy = accuracy_score(y_test, xgb_pred)
    xgb_precision = precision_score(y_test, xgb_pred)
    xgb_recall = recall_score(y_test, xgb_pred)
    xgb_f1 = f1_score(y_test, xgb_pred)
    print("XGBoost 模型评估指标:")
    print(f"准确率: {xgb_accuracy:.4f}")
    print(f"精确率: {xgb_precision:.4f}")
    print(f"召回率: {xgb_recall:.4f}")
    print(f"F1 值: {xgb_f1:.4f}")
    
    # LightGBM
    lgb_model = lgb.LGBMClassifier(random_state=42)
    lgb_model.fit(X_train, y_train)
    lgb_pred = lgb_model.predict(X_test)
    
    print("\nLightGBM 分类报告:")
    print(classification_report(y_test, lgb_pred))
    print("LightGBM 混淆矩阵:")
    print(confusion_matrix(y_test, lgb_pred))
    
    lgb_accuracy = accuracy_score(y_test, lgb_pred)
    lgb_precision = precision_score(y_test, lgb_pred)
    lgb_recall = recall_score(y_test, lgb_pred)
    lgb_f1 = f1_score(y_test, lgb_pred)
    print("LightGBM 模型评估指标:")
    print(f"准确率: {lgb_accuracy:.4f}")
    print(f"精确率: {lgb_precision:.4f}")
    print(f"召回率: {lgb_recall:.4f}")
    print(f"F1 值: {lgb_f1:.4f}")
    
    
    
    

你可能感兴趣的:(python打卡学习,python)