LMS自适应均衡器---从理论仿真到FPGA实现

引言

       最小均方误差算法(LMS)作为一种自适应均衡和滤波算法,在通信与信号处理中具有广泛的应用,可以有效地滤除信道噪声,获取很好的通信质量。本文针对该算法进行了理论分析,Matlab仿真与FPGA实现。

1、理论分析

       在前面的博文中我们对常见的几种MIMO均衡算法(CMA,LMS,RLS)理论进行了详细介绍,这里不再赘述,见链接:MIMO均衡算法(CMA,LMS,RLS)原理介绍

       这里主要介绍一下LMS算法的主要流程:

  • 初始化滤波器长度,权值,遗忘因子
  • 执行卷积,计算输出值
  • 计算输出值与参考值之间的误差
  • 进行权值更新

2、Matlab仿真

       根据如前所述的算法原理,我们利用Matlab算法仿真工具对该算法进行了仿真,相应代码及注释如下:

% 最小均方误差(LMS)
function [e,w,y]=LMS(mu,M,x,d)
%% 参数定义
% 输出参数:
% e: 误差输出 
% w: 最终滤波器系数 M*1维
% y: 输出信号

% 输入参数:
% mu: 因子
% M:滤波器长度
% x: 输入信号,N*1维
% d: 目标信号
%% 
%step1: 算法初始化
% 滤波器系数
w=zeros(M,1); 
% 输入向量长度
N=length(x);
% 执行LMS
for n=M:N 
    % 倒序输入
    filter_in = x(n:-1:n-M+1);
    % 计算输出
    y(n) = w'*filter_in;
    % 误差计算
    e(n) = d(n)- y(n);
    % 滤波器系数更新
    w = w + mu*filter_in*e(n);
end
clc;
clear;
%% 产生待滤波信号
Fs = 20000; %采样频率
N = 2^12; %采样点数
t = 0:1/Fs:N/Fs-1/Fs;%时间跨度
s = sin(2000*2*pi*t) + sin(6000*2*pi*t) + sin(9000*2*pi*t);%待滤波波形
% 归一化
s = (s - min(s))/(max(s) - min(s));
%% 通过高斯信道
noise=0.05*randn(1,length(s));
s_addnoise = s + noise;
% %% RLS
% lambda = 1;
% M = 15;
% delta = 1e-7;
% [e,w,y]=RLS(lambda,M,s_addnoise',s',delta);
%% LMS
M = 15;
mu = 0.0234; 
[e,w,y]=LMS(mu,M,s_addnoise',s');
% %% NLMS
% M = 15;
% mu = 0.05; 
% a = 1e-4;
% [e,w,y]=NLMS(mu,M,s_addnoise',s',a);
%%  时域波形
figure(1);
subplot(3,1,1);
plot(t,s,'r','LineWidth',1.2);
axis([1500/Fs,1600/Fs,0,1]);
title('滤波前时域波形');
subplot(3,1,2);
plot(t,s_addnoise,'r','LineWidth',1.2);
axis([1500/Fs,1600/Fs,0,1]);
title('加噪声后时域波形');
subplot(3,1,3);
plot(t,y,'r','LineWidth',1.2);
axis([1500/Fs,1600/Fs,0,1]);
title('自适应滤波后时域波形');
%% 误差曲线
figure(2);
plot(abs(e),'r','LineWidth',1.2);
title('误差曲线');

       如下所示为LMS算法对应的误差曲线,迭代次数为 500次的时候误差基本已经收敛。

LMS自适应均衡器---从理论仿真到FPGA实现_第1张图片

3、FPGA实现

     一种算法要想在实际应用中产生实际效果,就必须研究其具体的实现方式,FPGA作为一种并行处理器,在通信及信号处理领域应用比较广泛,因此,本文还研究了该算法的FPGA实现。该算法应用流水线的思想进行实现,流水线处理思想增大了数据吞吐量,降低了数据的处理延时,可以使得该算法能应用于高速度的通信场合。

  • 卷积模块(conv.v):该模块主要执行输入数据与滤波器权值数据的卷积操作,实现滤波数据输出
`timescale 1ns/1ps
module conv(
	// system signals
	input			clk					, 
	input			rst_n				,
	// 输入待滤波数据
	input       signed [15:0]data_in     ,
	// 权值数据
	input       signed [15:0]w0         ,
	input       signed [15:0]w1         ,
	input       signed [15:0]w2         ,
	input       signed [15:0]w3         ,
	input       signed [15:0]w4         ,
	input       signed [15:0]w5         ,
	input       signed [15:0]w6         ,
	input       signed [15:0]w7         ,
	input       signed [15:0]w8         ,
	input       signed [15:0]w9         ,
	input       signed [15:0]w10        ,
	input       signed [15:0]w11        ,
	input       signed [15:0]w12        ,
	input       signed [15:0]w13        ,
	input       signed [15:0]w14        ,
    // 输出数据
    output      signed[15:0]data_out    
);
// 寄存输入数据
reg signed [15:0]x0;
reg signed [15:0]x1;
reg signed [15:0]x2;
reg signed [15:0]x3;
reg signed [15:0]x4;
reg signed [15:0]x5;
reg signed [15:0]x6;
reg signed [15:0]x7;
reg signed [15:0]x8;
reg signed [15:0]x9;
reg signed [15:0]x10;
reg signed [15:0]x11;
reg signed [15:0]x12;
reg signed [15:0]x13;
reg signed [15:0]x14;
// 延时寄存
always @(posedge clk or negedge rst_n) begin
	if (!rst_n) begin
		x0 <= 16'd0;
		x1 <= 16'd0;
		x2 <= 16'd0;
		x3 <= 16'd0;
		x4 <= 16'd0;
		x5 <= 16'd0;
		x6 <= 16'd0;
		x7 <= 16'd0;
		x8 <= 16'd0;
		x9 <= 16'd0;
		x10 <= 16'd0;
		x11 <= 16'd0;
		x12 <= 16'd0;
		x13 <= 16'd0;
		x14 <= 16'd0;
	end
	else begin
		x0 <= data_in;
		x1 <= x0;
		x2 <= x1;
		x3 <= x2;
		x4 <= x3;
		x5 <= x4;
		x6 <= x5;
		x7 <= x6;
		x8 <= x7;
		x9 <= x8;
		x10 <= x9;
		x11 <= x10;
		x12 <= x11;
		x13 <= x12;
		x14 <= x13;
	end
end
// 寄存乘积结果
reg signed [31:0] multi_data0;
reg signed [31:0] multi_data1;
reg signed [31:0] multi_data2;
reg signed [31:0] multi_data3;
reg signed [31:0] multi_data4;
reg signed [31:0] multi_data5;
reg signed [31:0] multi_data6;
reg signed [31:0] multi_data7;
reg signed [31:0] multi_data8;
reg signed [31:0] multi_data9;
reg signed [31:0] multi_data10;
reg signed [31:0] multi_data11;
reg signed [31:0] multi_data12;
reg signed [31:0] multi_data13;
reg signed [31:0] multi_data14;
always @(posedge clk or negedge rst_n) begin
	if (!rst_n) begin
		multi_data0 <= 32'd0;
		multi_data1 <= 32'd0;
		multi_data2 <= 32'd0;
		multi_data3 <= 32'd0;
		multi_data4 <= 32'd0;
		multi_data5 <= 32'd0;
		multi_data6 <= 32'd0;
		multi_data7 <= 32'd0;
		multi_data8 <= 32'd0;
		multi_data9 <= 32'd0;
		multi_data10 <= 32'd0;
		multi_data11 <= 32'd0;
		multi_data12 <= 32'd0;
		multi_data13 <= 32'd0;
		multi_data14 <= 32'd0;
	end
	else begin
	    // 倒序相乘
		multi_data0 <= x0*w0;
		multi_data1 <= x1*w1;
		multi_data2 <= x2*w2;
		multi_data3 <= x3*w3;
		multi_data4 <= x4*w4;
		multi_data5 <= x5*w5;
		multi_data6 <= x6*w6;
		multi_data7 <= x7*w7;
		multi_data8 <= x8*w8;
		multi_data9 <= x9*w9;
		multi_data10 <= x10*w10;
		multi_data11 <= x11*w11;
		multi_data12 <= x12*w12;
		multi_data13 <= x13*w13;
		multi_data14 <= x14*w14;
	end
end
//累加
reg signed [32:0] sum;
always @(posedge clk or negedge rst_n) begin
	if (!rst_n) begin
		sum <= 33'd0;
	end
	else begin
		sum <= multi_data0 + multi_data1 + multi_data2 + multi_data3 + multi_data4 + multi_data5 + multi_data6 + multi_data7 + multi_data8 + multi_data9 +multi_data10 + multi_data11 + multi_data12 + multi_data13 + multi_data14;
	end
end
// Q28 --> Q14
assign data_out = sum>>14;
endmodule
  • 误差计算模块(error.v):该模块主要执行滤波输出数据与参考数据之间的误差计算,该误差用于滤波器权值数据的更新
`timescale 1ns/1ps
module error(
	input           clk ,
	input           rst_n,
	// 滤波完成数据
	input			signed [15:0]data_in, 
	// 参考数据
	input			signed [15:0]data_ref,
	// 误差
	output          signed [15:0]error
);
// 寄存参考数据
reg signed [15:0]x0;
reg signed [15:0]x1;
reg signed [15:0]x2;
reg signed [15:0]x3;
reg signed [15:0]x4;
reg signed [15:0]x5;
reg signed [15:0]x6;
reg signed [15:0]x7;
reg signed [15:0]x8;
reg signed [15:0]x9;
reg signed [15:0]x10;
reg signed [15:0]x11;
reg signed [15:0]x12;
reg signed [15:0]x13;
reg signed [15:0]x14;
reg signed [15:0]x15;
reg signed [15:0]x16;
// 延时寄存
always @(posedge clk or negedge rst_n) begin
	if (!rst_n) begin
		x0 <= 16'd0;
		x1 <= 16'd0;
		x2 <= 16'd0;
		x3 <= 16'd0;
		x4 <= 16'd0;
		x5 <= 16'd0;
		x6 <= 16'd0;
		x7 <= 16'd0;
		x8 <= 16'd0;
		x9 <= 16'd0;
		x10 <= 16'd0;
		x11 <= 16'd0;
		x12 <= 16'd0;
		x13 <= 16'd0;
		x14 <= 16'd0;
		x15 <= 16'd0;
		x16 <= 16'd0;
	end
	// conv模块计算消耗2个时钟周期
	// 为保持误差计算延时对准,这里多延时寄存2个时钟周期
	else begin
		x0 <= data_ref;
		x1 <= x0;
		x2 <= x1;
		x3 <= x2;
		x4 <= x3;
		x5 <= x4;
		x6 <= x5;
		x7 <= x6;
		x8 <= x7;
		x9 <= x8;
		x10 <= x9;
		x11 <= x10;
		x12 <= x11;
		x13 <= x12;
		x14 <= x13;
		x15 <= x14;
		x16 <= x15;
	end
end

assign error = x16 - data_in;    

endmodule
  • 滤波器系数更新模块(w_update.v):该模块主要根据输出误差进行滤波器权值数据的更新,更新后的权值数据输送给卷积模块执行卷积滤波
`timescale 1ns/1ps
module w_update(
	//system signals
	input	 clk				    , 
	input	 rst_n				    ,
	// 误差
	input    signed [15:0]error     ,
	// 待滤波数据
	input    signed [15:0]data_in   ,
	// 权值数据输出
	output  reg signed [15:0]w0,
	output  reg signed [15:0]w1,
	output  reg signed [15:0]w2,
	output  reg signed [15:0]w3,
	output  reg signed [15:0]w4,
	output  reg signed [15:0]w5,
	output  reg signed [15:0]w6,
	output  reg signed [15:0]w7,
	output  reg signed [15:0]w8,
	output  reg signed [15:0]w9,
	output  reg signed [15:0]w10,
	output  reg signed [15:0]w11,
	output  reg signed [15:0]w12,
	output  reg signed [15:0]w13,
	output  reg signed [15:0]w14       
);
// 遗忘因子
reg signed [15:0]mu;
// 寄存输入数据
reg signed [15:0]x0;
reg signed [15:0]x1;
reg signed [15:0]x2;
reg signed [15:0]x3;
reg signed [15:0]x4;
reg signed [15:0]x5;
reg signed [15:0]x6;
reg signed [15:0]x7;
reg signed [15:0]x8;
reg signed [15:0]x9;
reg signed [15:0]x10;
reg signed [15:0]x11;
reg signed [15:0]x12;
reg signed [15:0]x13;
reg signed [15:0]x14;
reg signed [15:0]x15;
reg signed [15:0]x16;
// 延时寄存
always @(posedge clk or negedge rst_n) begin
	if (!rst_n) begin
		x0 <= 16'd0;
		x1 <= 16'd0;
		x2 <= 16'd0;
		x3 <= 16'd0;
		x4 <= 16'd0;
		x5 <= 16'd0;
		x6 <= 16'd0;
		x7 <= 16'd0;
		x8 <= 16'd0;
		x9 <= 16'd0;
		x10 <= 16'd0;
		x11 <= 16'd0;
		x12 <= 16'd0;
		x13 <= 16'd0;
		x14 <= 16'd0;
		x15 <= 16'd0;
		x16 <= 16'd0;
		mu <= 16'd383;
	end
	// conv模块计算消耗2个时钟周期
	// 为保持误差计算延时对准,这里多延时寄存2个时钟周期
	else begin
		x0 <= data_in;
		x1 <= x0;
		x2 <= x1;
		x3 <= x2;
		x4 <= x3;
		x5 <= x4;
		x6 <= x5;
		x7 <= x6;
		x8 <= x7;
		x9 <= x8;
		x10 <= x9;
		x11 <= x10;
		x12 <= x11;
		x13 <= x12;
		x14 <= x13;
		x15 <= x14;
		x16 <= x15;
	end
end
// mu*x*e
reg signed [47:0]w0_reg;
reg signed [47:0]w1_reg;
reg signed [47:0]w2_reg;
reg signed [47:0]w3_reg;
reg signed [47:0]w4_reg;
reg signed [47:0]w5_reg;
reg signed [47:0]w6_reg;
reg signed [47:0]w7_reg;
reg signed [47:0]w8_reg;
reg signed [47:0]w9_reg;
reg signed [47:0]w10_reg;
reg signed [47:0]w11_reg;
reg signed [47:0]w12_reg;
reg signed [47:0]w13_reg;
reg signed [47:0]w14_reg;
// 权值更新
always @ (*) begin
	w0_reg = (mu*x2*error)>>28;
	w1_reg = (mu*x3*error)>>28;
	w2_reg = (mu*x4*error)>>28;
	w3_reg = (mu*x5*error)>>28;
	w4_reg = (mu*x6*error)>>28;
	w5_reg = (mu*x7*error)>>28;
	w6_reg = (mu*x8*error)>>28;
	w7_reg = (mu*x9*error)>>28;
	w8_reg = (mu*x10*error)>>28;
	w9_reg = (mu*x11*error)>>28;
	w10_reg = (mu*x12*error)>>28;
	w11_reg = (mu*x13*error)>>28;
	w12_reg = (mu*x14*error)>>28;
	w13_reg = (mu*x15*error)>>28;
	w14_reg = (mu*x16*error)>>28;
    // 如果时钟频率过高,此处引入的组合逻辑延时可能会导致后面寄存器的建立时间违例
    w0 = rst_n ? w0 + w0_reg:16'd0;
    w1 = rst_n ? w1 + w1_reg:16'd0;
    w2 = rst_n ? w2 + w2_reg:16'd0;
    w3 = rst_n ? w3 + w3_reg:16'd0;
    w4 = rst_n ? w4 + w4_reg:16'd0;
    w5 = rst_n ? w5 + w5_reg:16'd0;
    w6 = rst_n ? w6 + w6_reg:16'd0;
    w7 = rst_n ? w7 + w7_reg:16'd0;
    w8 = rst_n ? w8 + w8_reg:16'd0;
    w9 = rst_n ? w9 + w9_reg:16'd0;
    w10 = rst_n ? w10 + w10_reg:16'd0;
    w11 = rst_n ? w11 + w11_reg:16'd0;
    w12 = rst_n ? w12 + w12_reg:16'd0;
    w13 = rst_n ? w13 + w13_reg:16'd0;
    w14 = rst_n ? w14 + w14_reg:16'd0;
end
endmodule
  • 顶层模块(Top.v):主要进行对如上所述三个模块的例化,进行模块的封装
`timescale 1ns/1ps
module Top(
	input	      clk					 , 
	input	      rst_n					 ,
	// 输入待滤波数据
	input       signed [15:0]data_in     ,
	// 参考数据
	input		signed [15:0]data_ref    ,
	// 误差
	output      signed [15:0]error       ,
	// 输出数据
    output      signed[15:0]data_out   
);
// 权值
wire signed [15:0]w0;
wire signed [15:0]w1;
wire signed [15:0]w2;
wire signed [15:0]w3;
wire signed [15:0]w4;
wire signed [15:0]w5;
wire signed [15:0]w6;
wire signed [15:0]w7;
wire signed [15:0]w8;
wire signed [15:0]w9;
wire signed [15:0]w10;
wire signed [15:0]w11;
wire signed [15:0]w12;
wire signed [15:0]w13;
wire signed [15:0]w14;
//例化卷积滤波模块
conv conv_demo(
	//system signals
	.clk(clk)		 , 
	.rst_n(rst_n)	 ,
	// 输入待滤波数据
	.data_in(data_in),
	// 权值数据
	.w0(w0)         ,
	.w1(w1)         ,
	.w2(w2)         ,
	.w3(w3)         ,
	.w4(w4)         ,
	.w5(w5)         ,
	.w6(w6)         ,
	.w7(w7)         ,
	.w8(w8)         ,
	.w9(w9)         ,
	.w10(w10)       ,
	.w11(w11)       ,
	.w12(w12)       ,
	.w13(w13)       ,
	.w14(w14)       ,
    // 输出数据
    .data_out(data_out)    
);
// 例化误差计算模块
error error_demo(
	.clk(clk),
	.rst_n(rst_n),
	// 滤波完成数据
	.data_in(data_out), 
	// 参考数据
	.data_ref(data_ref),
	// 误差
	.error(error)
);
// 例化权值更新模块
w_update w_update_demo(
	.clk(clk)     , 
	.rst_n(rst_n) ,
	// 误差
	.error(error) ,
	// 待滤波数据
	.data_in(data_in),
	// 权值数据输出
	.w0(w0)         ,
	.w1(w1)         ,
	.w2(w2)         ,
	.w3(w3)         ,
	.w4(w4)         ,
	.w5(w5)         ,
	.w6(w6)         ,
	.w7(w7)         ,
	.w8(w8)         ,
	.w9(w9)         ,
	.w10(w10)       ,
	.w11(w11)       ,
	.w12(w12)       ,
	.w13(w13)       ,
	.w14(w14)       
);
endmodule
  • Testbench(tb.v):在Matlab中取得定点化后的数据读入Modelsim中进行测试
`timescale 1ns/1ps
module tb ();
reg clk;
reg rst_n;
// 输入待滤波数据
reg signed [15:0] data_in;
// 参考数据
reg signed [15:0] data_ref;
// 误差输出
wire signed [15:0] error;
// 滤波数据输出
wire signed [15:0] data_out;

integer fpr_s_addnoise;
integer fpr_s;
integer count1;
integer count2;
integer  i;

initial
begin
	$display("step1:Load  Data");
	// 读入加噪声输入数据
	fpr_s_addnoise = $fopen("F:/FPGA_DSP/LMS/s_addnoise.txt","r");
	// 读入参考数据
	fpr_s = $fopen("F:/FPGA_DSP/LMS/s.txt","r");
    // 时钟、复位初始化		
	clk = 1'b0;
	rst_n = 1'b1;
	#5 rst_n = 1'b0;
	#5 rst_n = 1'b1;
	// 输入待滤波数据
	$display("step2:Write Data to LMS_Filter");
	for(i = 0; i <= 15'd32767; i = i + 1)
  		begin
      		count1 = $fscanf(fpr_s_addnoise,"%d",data_in);
      		count2 = $fscanf(fpr_s,"%d",data_ref);
      		#10;
  		end
end
always #5 clk = ~clk;
// 例化顶层模块
Top Top_demo(
	.clk(clk)	  , 
	.rst_n(rst_n) ,
	// 输入待滤波数据
	.data_in(data_in)  ,
	// 参考数据
	.data_ref(data_ref),
	// 误差
	.error(error)      ,
	// 输出数据
    .data_out(data_out)   
);
endmodule
  • Modelsim仿真结果:利用如上所述几个模块,我们对LMS算法进行了一个简单的仿真,结果波形图如下。

LMS自适应均衡器---从理论仿真到FPGA实现_第2张图片

4、总结

     除了LMS这一种自适应均衡滤波算法外,还有就是RLS算法,RLS算法相比较LMS而言收敛速度更快,性能更优,后续我也会对RLS算法的仿真和实现展开介绍。另外,FPGA实现的算法是一种定点算法,存在计算误差,造成计算精度的丧失,如想进一步提高计算精度,可以选择更高的定点量化位数。

你可能感兴趣的:(通信,Verilog,信号处理,算法)