引言
最小均方误差算法(LMS)作为一种自适应均衡和滤波算法,在通信与信号处理中具有广泛的应用,可以有效地滤除信道噪声,获取很好的通信质量。本文针对该算法进行了理论分析,Matlab仿真与FPGA实现。
1、理论分析
在前面的博文中我们对常见的几种MIMO均衡算法(CMA,LMS,RLS)理论进行了详细介绍,这里不再赘述,见链接:MIMO均衡算法(CMA,LMS,RLS)原理介绍
这里主要介绍一下LMS算法的主要流程:
2、Matlab仿真
根据如前所述的算法原理,我们利用Matlab算法仿真工具对该算法进行了仿真,相应代码及注释如下:
% 最小均方误差(LMS)
function [e,w,y]=LMS(mu,M,x,d)
%% 参数定义
% 输出参数:
% e: 误差输出
% w: 最终滤波器系数 M*1维
% y: 输出信号
% 输入参数:
% mu: 因子
% M:滤波器长度
% x: 输入信号,N*1维
% d: 目标信号
%%
%step1: 算法初始化
% 滤波器系数
w=zeros(M,1);
% 输入向量长度
N=length(x);
% 执行LMS
for n=M:N
% 倒序输入
filter_in = x(n:-1:n-M+1);
% 计算输出
y(n) = w'*filter_in;
% 误差计算
e(n) = d(n)- y(n);
% 滤波器系数更新
w = w + mu*filter_in*e(n);
end
clc;
clear;
%% 产生待滤波信号
Fs = 20000; %采样频率
N = 2^12; %采样点数
t = 0:1/Fs:N/Fs-1/Fs;%时间跨度
s = sin(2000*2*pi*t) + sin(6000*2*pi*t) + sin(9000*2*pi*t);%待滤波波形
% 归一化
s = (s - min(s))/(max(s) - min(s));
%% 通过高斯信道
noise=0.05*randn(1,length(s));
s_addnoise = s + noise;
% %% RLS
% lambda = 1;
% M = 15;
% delta = 1e-7;
% [e,w,y]=RLS(lambda,M,s_addnoise',s',delta);
%% LMS
M = 15;
mu = 0.0234;
[e,w,y]=LMS(mu,M,s_addnoise',s');
% %% NLMS
% M = 15;
% mu = 0.05;
% a = 1e-4;
% [e,w,y]=NLMS(mu,M,s_addnoise',s',a);
%% 时域波形
figure(1);
subplot(3,1,1);
plot(t,s,'r','LineWidth',1.2);
axis([1500/Fs,1600/Fs,0,1]);
title('滤波前时域波形');
subplot(3,1,2);
plot(t,s_addnoise,'r','LineWidth',1.2);
axis([1500/Fs,1600/Fs,0,1]);
title('加噪声后时域波形');
subplot(3,1,3);
plot(t,y,'r','LineWidth',1.2);
axis([1500/Fs,1600/Fs,0,1]);
title('自适应滤波后时域波形');
%% 误差曲线
figure(2);
plot(abs(e),'r','LineWidth',1.2);
title('误差曲线');
如下所示为LMS算法对应的误差曲线,迭代次数为 500次的时候误差基本已经收敛。
3、FPGA实现
一种算法要想在实际应用中产生实际效果,就必须研究其具体的实现方式,FPGA作为一种并行处理器,在通信及信号处理领域应用比较广泛,因此,本文还研究了该算法的FPGA实现。该算法应用流水线的思想进行实现,流水线处理思想增大了数据吞吐量,降低了数据的处理延时,可以使得该算法能应用于高速度的通信场合。
`timescale 1ns/1ps
module conv(
// system signals
input clk ,
input rst_n ,
// 输入待滤波数据
input signed [15:0]data_in ,
// 权值数据
input signed [15:0]w0 ,
input signed [15:0]w1 ,
input signed [15:0]w2 ,
input signed [15:0]w3 ,
input signed [15:0]w4 ,
input signed [15:0]w5 ,
input signed [15:0]w6 ,
input signed [15:0]w7 ,
input signed [15:0]w8 ,
input signed [15:0]w9 ,
input signed [15:0]w10 ,
input signed [15:0]w11 ,
input signed [15:0]w12 ,
input signed [15:0]w13 ,
input signed [15:0]w14 ,
// 输出数据
output signed[15:0]data_out
);
// 寄存输入数据
reg signed [15:0]x0;
reg signed [15:0]x1;
reg signed [15:0]x2;
reg signed [15:0]x3;
reg signed [15:0]x4;
reg signed [15:0]x5;
reg signed [15:0]x6;
reg signed [15:0]x7;
reg signed [15:0]x8;
reg signed [15:0]x9;
reg signed [15:0]x10;
reg signed [15:0]x11;
reg signed [15:0]x12;
reg signed [15:0]x13;
reg signed [15:0]x14;
// 延时寄存
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
x0 <= 16'd0;
x1 <= 16'd0;
x2 <= 16'd0;
x3 <= 16'd0;
x4 <= 16'd0;
x5 <= 16'd0;
x6 <= 16'd0;
x7 <= 16'd0;
x8 <= 16'd0;
x9 <= 16'd0;
x10 <= 16'd0;
x11 <= 16'd0;
x12 <= 16'd0;
x13 <= 16'd0;
x14 <= 16'd0;
end
else begin
x0 <= data_in;
x1 <= x0;
x2 <= x1;
x3 <= x2;
x4 <= x3;
x5 <= x4;
x6 <= x5;
x7 <= x6;
x8 <= x7;
x9 <= x8;
x10 <= x9;
x11 <= x10;
x12 <= x11;
x13 <= x12;
x14 <= x13;
end
end
// 寄存乘积结果
reg signed [31:0] multi_data0;
reg signed [31:0] multi_data1;
reg signed [31:0] multi_data2;
reg signed [31:0] multi_data3;
reg signed [31:0] multi_data4;
reg signed [31:0] multi_data5;
reg signed [31:0] multi_data6;
reg signed [31:0] multi_data7;
reg signed [31:0] multi_data8;
reg signed [31:0] multi_data9;
reg signed [31:0] multi_data10;
reg signed [31:0] multi_data11;
reg signed [31:0] multi_data12;
reg signed [31:0] multi_data13;
reg signed [31:0] multi_data14;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
multi_data0 <= 32'd0;
multi_data1 <= 32'd0;
multi_data2 <= 32'd0;
multi_data3 <= 32'd0;
multi_data4 <= 32'd0;
multi_data5 <= 32'd0;
multi_data6 <= 32'd0;
multi_data7 <= 32'd0;
multi_data8 <= 32'd0;
multi_data9 <= 32'd0;
multi_data10 <= 32'd0;
multi_data11 <= 32'd0;
multi_data12 <= 32'd0;
multi_data13 <= 32'd0;
multi_data14 <= 32'd0;
end
else begin
// 倒序相乘
multi_data0 <= x0*w0;
multi_data1 <= x1*w1;
multi_data2 <= x2*w2;
multi_data3 <= x3*w3;
multi_data4 <= x4*w4;
multi_data5 <= x5*w5;
multi_data6 <= x6*w6;
multi_data7 <= x7*w7;
multi_data8 <= x8*w8;
multi_data9 <= x9*w9;
multi_data10 <= x10*w10;
multi_data11 <= x11*w11;
multi_data12 <= x12*w12;
multi_data13 <= x13*w13;
multi_data14 <= x14*w14;
end
end
//累加
reg signed [32:0] sum;
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
sum <= 33'd0;
end
else begin
sum <= multi_data0 + multi_data1 + multi_data2 + multi_data3 + multi_data4 + multi_data5 + multi_data6 + multi_data7 + multi_data8 + multi_data9 +multi_data10 + multi_data11 + multi_data12 + multi_data13 + multi_data14;
end
end
// Q28 --> Q14
assign data_out = sum>>14;
endmodule
`timescale 1ns/1ps
module error(
input clk ,
input rst_n,
// 滤波完成数据
input signed [15:0]data_in,
// 参考数据
input signed [15:0]data_ref,
// 误差
output signed [15:0]error
);
// 寄存参考数据
reg signed [15:0]x0;
reg signed [15:0]x1;
reg signed [15:0]x2;
reg signed [15:0]x3;
reg signed [15:0]x4;
reg signed [15:0]x5;
reg signed [15:0]x6;
reg signed [15:0]x7;
reg signed [15:0]x8;
reg signed [15:0]x9;
reg signed [15:0]x10;
reg signed [15:0]x11;
reg signed [15:0]x12;
reg signed [15:0]x13;
reg signed [15:0]x14;
reg signed [15:0]x15;
reg signed [15:0]x16;
// 延时寄存
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
x0 <= 16'd0;
x1 <= 16'd0;
x2 <= 16'd0;
x3 <= 16'd0;
x4 <= 16'd0;
x5 <= 16'd0;
x6 <= 16'd0;
x7 <= 16'd0;
x8 <= 16'd0;
x9 <= 16'd0;
x10 <= 16'd0;
x11 <= 16'd0;
x12 <= 16'd0;
x13 <= 16'd0;
x14 <= 16'd0;
x15 <= 16'd0;
x16 <= 16'd0;
end
// conv模块计算消耗2个时钟周期
// 为保持误差计算延时对准,这里多延时寄存2个时钟周期
else begin
x0 <= data_ref;
x1 <= x0;
x2 <= x1;
x3 <= x2;
x4 <= x3;
x5 <= x4;
x6 <= x5;
x7 <= x6;
x8 <= x7;
x9 <= x8;
x10 <= x9;
x11 <= x10;
x12 <= x11;
x13 <= x12;
x14 <= x13;
x15 <= x14;
x16 <= x15;
end
end
assign error = x16 - data_in;
endmodule
`timescale 1ns/1ps
module w_update(
//system signals
input clk ,
input rst_n ,
// 误差
input signed [15:0]error ,
// 待滤波数据
input signed [15:0]data_in ,
// 权值数据输出
output reg signed [15:0]w0,
output reg signed [15:0]w1,
output reg signed [15:0]w2,
output reg signed [15:0]w3,
output reg signed [15:0]w4,
output reg signed [15:0]w5,
output reg signed [15:0]w6,
output reg signed [15:0]w7,
output reg signed [15:0]w8,
output reg signed [15:0]w9,
output reg signed [15:0]w10,
output reg signed [15:0]w11,
output reg signed [15:0]w12,
output reg signed [15:0]w13,
output reg signed [15:0]w14
);
// 遗忘因子
reg signed [15:0]mu;
// 寄存输入数据
reg signed [15:0]x0;
reg signed [15:0]x1;
reg signed [15:0]x2;
reg signed [15:0]x3;
reg signed [15:0]x4;
reg signed [15:0]x5;
reg signed [15:0]x6;
reg signed [15:0]x7;
reg signed [15:0]x8;
reg signed [15:0]x9;
reg signed [15:0]x10;
reg signed [15:0]x11;
reg signed [15:0]x12;
reg signed [15:0]x13;
reg signed [15:0]x14;
reg signed [15:0]x15;
reg signed [15:0]x16;
// 延时寄存
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
x0 <= 16'd0;
x1 <= 16'd0;
x2 <= 16'd0;
x3 <= 16'd0;
x4 <= 16'd0;
x5 <= 16'd0;
x6 <= 16'd0;
x7 <= 16'd0;
x8 <= 16'd0;
x9 <= 16'd0;
x10 <= 16'd0;
x11 <= 16'd0;
x12 <= 16'd0;
x13 <= 16'd0;
x14 <= 16'd0;
x15 <= 16'd0;
x16 <= 16'd0;
mu <= 16'd383;
end
// conv模块计算消耗2个时钟周期
// 为保持误差计算延时对准,这里多延时寄存2个时钟周期
else begin
x0 <= data_in;
x1 <= x0;
x2 <= x1;
x3 <= x2;
x4 <= x3;
x5 <= x4;
x6 <= x5;
x7 <= x6;
x8 <= x7;
x9 <= x8;
x10 <= x9;
x11 <= x10;
x12 <= x11;
x13 <= x12;
x14 <= x13;
x15 <= x14;
x16 <= x15;
end
end
// mu*x*e
reg signed [47:0]w0_reg;
reg signed [47:0]w1_reg;
reg signed [47:0]w2_reg;
reg signed [47:0]w3_reg;
reg signed [47:0]w4_reg;
reg signed [47:0]w5_reg;
reg signed [47:0]w6_reg;
reg signed [47:0]w7_reg;
reg signed [47:0]w8_reg;
reg signed [47:0]w9_reg;
reg signed [47:0]w10_reg;
reg signed [47:0]w11_reg;
reg signed [47:0]w12_reg;
reg signed [47:0]w13_reg;
reg signed [47:0]w14_reg;
// 权值更新
always @ (*) begin
w0_reg = (mu*x2*error)>>28;
w1_reg = (mu*x3*error)>>28;
w2_reg = (mu*x4*error)>>28;
w3_reg = (mu*x5*error)>>28;
w4_reg = (mu*x6*error)>>28;
w5_reg = (mu*x7*error)>>28;
w6_reg = (mu*x8*error)>>28;
w7_reg = (mu*x9*error)>>28;
w8_reg = (mu*x10*error)>>28;
w9_reg = (mu*x11*error)>>28;
w10_reg = (mu*x12*error)>>28;
w11_reg = (mu*x13*error)>>28;
w12_reg = (mu*x14*error)>>28;
w13_reg = (mu*x15*error)>>28;
w14_reg = (mu*x16*error)>>28;
// 如果时钟频率过高,此处引入的组合逻辑延时可能会导致后面寄存器的建立时间违例
w0 = rst_n ? w0 + w0_reg:16'd0;
w1 = rst_n ? w1 + w1_reg:16'd0;
w2 = rst_n ? w2 + w2_reg:16'd0;
w3 = rst_n ? w3 + w3_reg:16'd0;
w4 = rst_n ? w4 + w4_reg:16'd0;
w5 = rst_n ? w5 + w5_reg:16'd0;
w6 = rst_n ? w6 + w6_reg:16'd0;
w7 = rst_n ? w7 + w7_reg:16'd0;
w8 = rst_n ? w8 + w8_reg:16'd0;
w9 = rst_n ? w9 + w9_reg:16'd0;
w10 = rst_n ? w10 + w10_reg:16'd0;
w11 = rst_n ? w11 + w11_reg:16'd0;
w12 = rst_n ? w12 + w12_reg:16'd0;
w13 = rst_n ? w13 + w13_reg:16'd0;
w14 = rst_n ? w14 + w14_reg:16'd0;
end
endmodule
`timescale 1ns/1ps
module Top(
input clk ,
input rst_n ,
// 输入待滤波数据
input signed [15:0]data_in ,
// 参考数据
input signed [15:0]data_ref ,
// 误差
output signed [15:0]error ,
// 输出数据
output signed[15:0]data_out
);
// 权值
wire signed [15:0]w0;
wire signed [15:0]w1;
wire signed [15:0]w2;
wire signed [15:0]w3;
wire signed [15:0]w4;
wire signed [15:0]w5;
wire signed [15:0]w6;
wire signed [15:0]w7;
wire signed [15:0]w8;
wire signed [15:0]w9;
wire signed [15:0]w10;
wire signed [15:0]w11;
wire signed [15:0]w12;
wire signed [15:0]w13;
wire signed [15:0]w14;
//例化卷积滤波模块
conv conv_demo(
//system signals
.clk(clk) ,
.rst_n(rst_n) ,
// 输入待滤波数据
.data_in(data_in),
// 权值数据
.w0(w0) ,
.w1(w1) ,
.w2(w2) ,
.w3(w3) ,
.w4(w4) ,
.w5(w5) ,
.w6(w6) ,
.w7(w7) ,
.w8(w8) ,
.w9(w9) ,
.w10(w10) ,
.w11(w11) ,
.w12(w12) ,
.w13(w13) ,
.w14(w14) ,
// 输出数据
.data_out(data_out)
);
// 例化误差计算模块
error error_demo(
.clk(clk),
.rst_n(rst_n),
// 滤波完成数据
.data_in(data_out),
// 参考数据
.data_ref(data_ref),
// 误差
.error(error)
);
// 例化权值更新模块
w_update w_update_demo(
.clk(clk) ,
.rst_n(rst_n) ,
// 误差
.error(error) ,
// 待滤波数据
.data_in(data_in),
// 权值数据输出
.w0(w0) ,
.w1(w1) ,
.w2(w2) ,
.w3(w3) ,
.w4(w4) ,
.w5(w5) ,
.w6(w6) ,
.w7(w7) ,
.w8(w8) ,
.w9(w9) ,
.w10(w10) ,
.w11(w11) ,
.w12(w12) ,
.w13(w13) ,
.w14(w14)
);
endmodule
`timescale 1ns/1ps
module tb ();
reg clk;
reg rst_n;
// 输入待滤波数据
reg signed [15:0] data_in;
// 参考数据
reg signed [15:0] data_ref;
// 误差输出
wire signed [15:0] error;
// 滤波数据输出
wire signed [15:0] data_out;
integer fpr_s_addnoise;
integer fpr_s;
integer count1;
integer count2;
integer i;
initial
begin
$display("step1:Load Data");
// 读入加噪声输入数据
fpr_s_addnoise = $fopen("F:/FPGA_DSP/LMS/s_addnoise.txt","r");
// 读入参考数据
fpr_s = $fopen("F:/FPGA_DSP/LMS/s.txt","r");
// 时钟、复位初始化
clk = 1'b0;
rst_n = 1'b1;
#5 rst_n = 1'b0;
#5 rst_n = 1'b1;
// 输入待滤波数据
$display("step2:Write Data to LMS_Filter");
for(i = 0; i <= 15'd32767; i = i + 1)
begin
count1 = $fscanf(fpr_s_addnoise,"%d",data_in);
count2 = $fscanf(fpr_s,"%d",data_ref);
#10;
end
end
always #5 clk = ~clk;
// 例化顶层模块
Top Top_demo(
.clk(clk) ,
.rst_n(rst_n) ,
// 输入待滤波数据
.data_in(data_in) ,
// 参考数据
.data_ref(data_ref),
// 误差
.error(error) ,
// 输出数据
.data_out(data_out)
);
endmodule
4、总结
除了LMS这一种自适应均衡滤波算法外,还有就是RLS算法,RLS算法相比较LMS而言收敛速度更快,性能更优,后续我也会对RLS算法的仿真和实现展开介绍。另外,FPGA实现的算法是一种定点算法,存在计算误差,造成计算精度的丧失,如想进一步提高计算精度,可以选择更高的定点量化位数。