随机森林预测、重要性分析(Python实现)

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from functools import reduce
import numpy as np
import pandas as pd


# 数据导入及基本信息定义
data = pd.read_excel('data2(Topsis评分评级).xlsx')
data = data.drop(columns=['ID'])

prediction_set = data[data['MM'].isna()]
training_set = data.dropna()

features = ['XXX','XXX'] # 此处放表格列名
X = training_set[features]
y = training_set['MM']
minrmse = 1000
maxscore = 0

# 选取最优的random_state
'''
for randomstate in range(50, -1, -1):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=randomstate)

    model = RandomForestClassifier(random_state=randomstate)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    score = accuracy_score(y_tes

你可能感兴趣的:(python,随机森林,机器学习)