pytorch梯度下降函数_PyTorch学习笔记2 - 梯度下降

1 在numpy和pytorch实现梯度下降法(线性回归)

梯度下降法的一般步骤为:

(1) 设定初始值

(2) 求取梯度

(3) 在梯度方向上进行参数的更新

1.1 Numpy版本

假设欲拟合的目标函数为y = 2*x1 - 4*x2. 这是一个2元线性函数,自变量x是2维向量。通过梯度下降求解最优参数的代码如下:

import numpy as np

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import axes3d

from matplotlib import style

#创建数据

N = 100

x1 = np.linspace(-10, 10, N)

x2 = np.linspace(-15, 5, N)

x = np.concatenate(([x1], [x2]), axis=0).T

w = np.array([2, -4])

y = np.dot(x, w)

fig = plt.figure()

ax1 = fig.add_subplot(111, projection='3d')

ax1.plot_wireframe(np.array([x1]),np.array([x2]),np.array([y]), rstride=5, cstride=5)

ax1.set_xlabel("x1")

ax1.set_ylabel("x2")

ax1.set_zlabel("y")

#梯度下降

EPOCHS = 50 #迭代总次数

LOSS_MIN = 0.0001 #loss的目标最小值,当loss小于此值时停止迭代

lr = 0.01

# w_GD = np.random.rand(2) #梯度下降(GD)过程中存储w的值

w_GD = np.zeros(2)

cost = [] #梯度下降(GD)过程中存储loss的值

w_all = []

for i in range(EPOCHS):

w_all.append(w_GD.copy())

y_predict = np.dot(x, w_GD) #使用当前w_GD的y预测值

loss = np.mean((y_predict-y)**2) #计算loss

cost.append(loss)

dw = np.mean(2*(y_predict-y) * x.T, axis=1) #计算梯度

w_GD -= lr*dw #梯度下降

print("loss:",loss)

print("w1:",w_GD[0],"w2",w_GD[1])

#画出梯度下降曲线

w_all = np.array(w_all)

fig = plt.figure()

ax2 = fig.add_subplot(111, projection='3d')

ax2.plot_wireframe(np.array([w_all[:,0]]),np.array([w_all[:,1]]),np.array([cost]))

ax2.set_xlabel("w1")

ax2.set_ylabel("w2")

ax2.set_zlabel("loss")

fig = plt.figure()

#画出loss-iteration曲线

plt.plot(range(len(cost)),cost)

plt.title('loss')

plt.xlabel('iteration')

plt.ylabel('loss')

plt.show()

运行结果:

w1, w2

loss: 2.565443781623136e-08

w1: 1.9999674457769208, w2 -3.999977280651687

目标函数

图 1 y = 2*x1-4*x2的图像

图1展示了目标函数的图像,是一条在3维空间里的直线

损失函数的梯度下降

图 2 梯度下降轨迹

损失函数

图 3 loss-iteration曲线

由图2-图3可知,梯度下降法可以找到最优的w1, w2来实现对目标函数的最佳拟合。

1.1PyTorch版本

import torch

from torch.autograd import Variable

import numpy as np

N = 100

x = Variable(torch.randn(N,2))

w = Variable(torch.FloatTensor([2, -4]))

y = x*w

EPOCHS = 5000

lr = 0.01

w_GD = Variable(torch.FloatTensor([0, 0]), requires_grad=True)

cost = []

w_all = []

for i in range(EPOCHS):

w_all.append(w_GD.data)

y_predict = x*w_GD

loss = torch.mean((y_predict-y)**2)

cost.append(loss.data.numpy())

loss.backward()

#参数更新

w_GD.data -= lr*w_GD.grad.data

w_GD.grad.data.zero_()

print("loss:",loss)

print("w_GD:",w_GD)

输出:

loss: tensor(8.8394e-11, grad_fn=)

w_GD: tensor([ 2.0000, -4.0000], requires_grad=True)

2 用PyTorch实现一个简单的神经网络

这里采用官方教程给出的LeNet5网络为例,搭建一个简单的卷积神经网络,用于识别手写体数字。

import torch as t

import torchvision as tv

import torch.nn as nn

import torch.nn.functional as F

# 网络搭建

class Net(nn.Module):

def __init__(self):

super(Net, self).__init__()

self.conv1 = nn.Conv2d(1, 6, 5)

self.conv2 = nn.Conv2d(6, 16, 5)

self.fc1 = nn.Linear(16*5*5, 120)

self.fc2 = nn.Linear(120, 84)

self.fc3 = nn.Linear(84, 10)

def forward(self, x):

x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))

x = F.max_pool2d(F.relu(self.conv2(x)), 2)

x = x.view(-1, self.num_flat_features(x))

x = F.relu(self.fc1(x))

x = F.relu(self.fc2(x))

x = self.fc3(x)

return x

def num_flat_features(self, x):

size = x.size()[1:]

num_features = 1

for s in size:

num_features *= s

return num_features

net = Net()

print(net)

#前向传播

x = t.randn(1,1,32,32)

out=net(x)

print("out:",out)

#损失函数

target = torch.randn(10)

target = target.view(1,-1)

criterion = nn.MSELoss()

loss = criterion(out, target)

print("loss:", loss)

#后向传播

net.zero_grad()

loss.backward()

#更新参数

lr = 0.01

for f in net.parameters():

f.data.sub_(f.grad.data * lr)

#使用优化器

import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

optimizer.zero_grad()

output = net(x)

loss = criterion(output, target)

loss.backward()

optimizer.step()

输出:

Net(

(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))

(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))

(fc1): Linear(in_features=400, out_features=120, bias=True)

(fc2): Linear(in_features=120, out_features=84, bias=True)

(fc3): Linear(in_features=84, out_features=10, bias=True)

)

out: tensor([[ 0.0253, -0.0078, 0.0713, 0.1756, 0.0836, 0.1335, -0.1235, 0.0425,

0.0714, 0.1090]], grad_fn=)

loss: tensor(1.0700, grad_fn=)

你可能感兴趣的:(pytorch梯度下降函数)