使用极限学习机(Extreme Learning Machine, ELM)进行电厂相关数据预测的详细步骤和MATLAB代码示例。假设任务是预测电厂发电量或设备状态(如温度、压力),代码包含数据预处理、ELM模型构建、训练与预测全过程。
假设数据集包含以下字段(示例):
% 加载数据集(示例数据需替换为实际数据)
% 假设数据存储为矩阵data,每列对应一个特征,最后一列为发电量
data = csvread('power_plant.csv', 1, 0); % 跳过标题行
X = data(:, 1:end-1); % 输入特征
Y = data(:, end); % 目标变量
% 数据归一化(归一化到[0,1])
X_normalized = (X - min(X)) ./ (max(X) - min(X));
Y_normalized = (Y - min(Y)) / (max(Y) - min(Y));
% 划分训练集和测试集(70%训练,30%测试)
rng(1); % 固定随机种子
n_samples = size(X_normalized, 1);
n_train = round(0.7 * n_samples);
idx = randperm(n_samples);
X_train = X_normalized(idx(1:n_train), :);
Y_train = Y_normalized(idx(1:n_train), :);
X_test = X_normalized(idx(n_train+1:end), :);
Y_test = Y_normalized(idx(n_train+1:end), :);
预测公式:
[
\mathbf{Y} = \mathbf{H} \mathbf{\beta}
]
其中,(\mathbf{H} = g(\mathbf{X} \mathbf{W}_{\text{hidden}} + \mathbf{b})) 为隐层输出,(g) 为激活函数(如Sigmoid、ReLU)。
function [beta, train_pred, test_pred] = ELM(X_train, Y_train, X_test, num_hidden, activation)
% 参数说明:
% num_hidden: 隐层节点数
% activation: 激活函数('sigmoid', 'relu', 'sin')
% 初始化隐层权重和偏置
[n_train, d] = size(X_train);
rng('default'); % 确保可重复性
W_hidden = randn(d, num_hidden); % 输入到隐层的权重
b_hidden = randn(1, num_hidden); % 隐层偏置
% 计算隐层输出 H
H_train = X_train * W_hidden + repmat(b_hidden, n_train, 1);
switch activation
case 'sigmoid'
H_train = 1 ./ (1 + exp(-H_train));
case 'relu'
H_train = max(0, H_train);
case 'sin'
H_train = sin(H_train);
otherwise
error('不支持的激活函数');
end
% 计算输出层权重 beta (伪逆)
beta = pinv(H_train) * Y_train;
% 训练集预测
train_pred = H_train * beta;
% 测试集预测
n_test = size(X_test, 1);
H_test = X_test * W_hidden + repmat(b_hidden, n_test, 1);
switch activation
case 'sigmoid'
H_test = 1 ./ (1 + exp(-H_test));
case 'relu'
H_test = max(0, H_test);
case 'sin'
H_test = sin(H_test);
end
test_pred = H_test * beta;
end
% 设置ELM参数
num_hidden = 100; % 隐层节点数(需调优)
activation = 'sigmoid'; % 激活函数
% 训练模型并预测
[beta, train_pred, test_pred] = ELM(X_train, Y_train, X_test, num_hidden, activation);
% 反归一化预测结果
train_pred_actual = train_pred * (max(Y) - min(Y)) + min(Y);
test_pred_actual = test_pred * (max(Y) - min(Y)) + min(Y);
Y_train_actual = Y_train * (max(Y) - min(Y)) + min(Y);
Y_test_actual = Y_test * (max(Y) - min(Y)) + min(Y);
计算均方根误差(RMSE)和平均绝对百分比误差(MAPE):
% 训练集误差
train_rmse = sqrt(mean((train_pred_actual - Y_train_actual).^2));
train_mape = mean(abs((train_pred_actual - Y_train_actual) ./ Y_train_actual)) * 100;
% 测试集误差
test_rmse = sqrt(mean((test_pred_actual - Y_test_actual).^2));
test_mape = mean(abs((test_pred_actual - Y_test_actual) ./ Y_test_actual)) * 100;
fprintf('训练集 RMSE: %.2f MW, MAPE: %.2f%%\n', train_rmse, train_mape);
fprintf('测试集 RMSE: %.2f MW, MAPE: %.2f%%\n', test_rmse, test_mape);
% 绘制测试集预测对比
figure;
plot(Y_test_actual, 'b', 'LineWidth', 1.5);
hold on;
plot(test_pred_actual, 'r--', 'LineWidth', 1.5);
xlabel('样本序号');
ylabel('发电量 (MW)');
legend('实际值', '预测值');
title('ELM发电量预测结果对比');
grid on;
% 绘制误差分布直方图
errors = test_pred_actual - Y_test_actual;
figure;
histogram(errors, 20);
xlabel('预测误差 (MW)');
ylabel('频数');
title('测试集预测误差分布');
使用极限学习机进行预测,含有实际电厂数据
通过交叉验证选择最优隐层节点数:
hidden_list = [50, 100, 150, 200]; % 候选节点数
test_rmse_list = zeros(length(hidden_list), 1);
for i = 1:length(hidden_list)
[~, ~, test_pred] = ELM(X_train, Y_train, X_test, hidden_list(i), 'sigmoid');
test_pred_actual = test_pred * (max(Y) - min(Y)) + min(Y);
test_rmse_list(i) = sqrt(mean((test_pred_actual - Y_test_actual).^2));
end
% 绘制RMSE随节点数变化
figure;
plot(hidden_list, test_rmse_list, 'o-');
xlabel('隐层节点数');
ylabel('测试集 RMSE (MW)');
title('隐层节点数对性能的影响');
在计算 (\beta) 时加入L2正则化项:
[
\beta = \left( \mathbf{H}^T \mathbf{H} + \lambda \mathbf{I} \right)^{-1} \mathbf{H}^T \mathbf{Y}
]
修改ELM代码中的beta计算部分:
lambda = 1e-3; % 正则化系数
beta = (H_train' * H_train + lambda * eye(num_hidden)) \ H_train' * Y_train;
% 步骤1:数据加载与预处理
data = csvread('power_plant.csv', 1, 0); % 替换为实际数据路径
X = data(:, 1:end-1);
Y = data(:, end);
% 归一化
X_normalized = (X - min(X)) ./ (max(X) - min(X));
Y_normalized = (Y - min(Y)) / (max(Y) - min(Y));
% 划分训练集和测试集
n_samples = size(X_normalized, 1);
n_train = round(0.7 * n_samples);
idx = randperm(n_samples);
X_train = X_normalized(idx(1:n_train), :);
Y_train = Y_normalized(idx(1:n_train), :);
X_test = X_normalized(idx(n_train+1:end), :);
Y_test = Y_normalized(idx(n_train+1:end), :);
% 步骤2:训练ELM模型
num_hidden = 150; % 通过交叉验证选择
activation = 'sigmoid';
[beta, train_pred, test_pred] = ELM(X_train, Y_train, X_test, num_hidden, activation);
% 反归一化
train_pred_actual = train_pred * (max(Y) - min(Y)) + min(Y);
test_pred_actual = test_pred * (max(Y) - min(Y)) + min(Y);
Y_train_actual = Y_train * (max(Y) - min(Y)) + min(Y);
Y_test_actual = Y_test * (max(Y) - min(Y)) + min(Y);
% 步骤3:评估与可视化
test_rmse = sqrt(mean((test_pred_actual - Y_test_actual).^2));
test_mape = mean(abs((test_pred_actual - Y_test_actual) ./ Y_test_actual)) * 100;
fprintf('测试集 RMSE: %.2f MW, MAPE: %.2f%%\n', test_rmse, test_mape);
figure;
plot(Y_test_actual, 'b', 'LineWidth', 1.5);
hold on;
plot(test_pred_actual, 'r--', 'LineWidth', 1.5);
xlabel('样本序号');
ylabel('发电量 (MW)');
legend('实际值', '预测值');
title('ELM发电量预测结果对比');
grid on;
通过调整隐层节点数、激活函数和正则化参数,可优化模型性能。对于电厂设备故障预测等分类任务,只需将输出层改为分类损失(如Softmax)即可。