import numpy as np
import torch
# create a tensor in a similar way to numpy np array
x_numpy = np.array([0.1,0.2,0.3])
x_torch = torch.tensor([0.1,0.2,0.3])
print('x_numpy','x_torch')
print(x_numpy,x_torch)
print()
#to and from numpy ,pytorch
print('to and from numpy ,pytorch:')
print(torch.from_numpy(x_numpy),x_torch.numpy)
print()
#basic operations
y_numpy = np.array([3,4,5])
y_torch = torch.tensor([3,4,5])
print('add:')
print(x_numpy+y_numpy,x_torch+y_torch)
print()
#many functions that are in numpy are also in pytorch
print('norm,即二范数:')
print(np.linalg.norm(x_numpy),torch.norm(x_torch))
print()
#to apply an operation allong a dimension
print('mean along the 0th dimension:')
z_numpy = np.array([[1,2],[3,4.]])
z_torch = torch.tensor([[1,2],[3,4.]])
print(np.mean(z_numpy,axis=0),torch.mean(z_torch,dim=0))#keyword有些不一样
'''result:
x_numpy x_torch
[0.1 0.2 0.3] tensor([0.1000, 0.2000, 0.3000])
to and from numpy ,pytorch:
tensor([0.1000, 0.2000, 0.3000], dtype=torch.float64)
add:
[3.1 4.2 5.3] tensor([3.1000, 4.2000, 5.3000])
norm,即二范数:
0.37416573867739417 tensor(0.3742)
mean along the 0th dimension:
[2. 3.] tensor([2., 3.])
'''
#view
N,C,W,H = 10000,3,28,28
X = torch.randn(N,C,W,H)
print(X.shape)
print(X.view(N,C,784).shape)
print(X.view(-1,C,784).shape)#-1,可以自动算出0维的数量
'''result
torch.Size([10000, 3, 28, 28])
torch.Size([10000, 3, 784])
torch.Size([10000, 3, 784])
'''
#brocasting
x = torch.ones(5,1,4,2)
y = torch.ones(3,1,1)
print((x+y).size())
'''
torch.Size([5, 3, 4, 2])
'''
#cuda
cpu = torch.device('cpu')
gpu = torch.device('cuda')
x = torch.rand(5)
print(x)
x = x.to(gpu)
print(x)
x = x.to(cpu)
print(x)
'''
tensor([0.9609, 0.1270, 0.7621, 0.4180, 0.0792])
tensor([0.9609, 0.1270, 0.7621, 0.4180, 0.0792], device='cuda:0')
tensor([0.9609, 0.1270, 0.7621, 0.4180, 0.0792])
'''
或者以前也写(不建议):
x.cuda()
x.cpu()
import torch
def f(x):
return (x-2)**2
def fp(x):
return (x-2)*2
x = torch.tensor([1.0],requires_grad=True)
#自己写求导公式算梯度
print('Analytical f\'(x):',fp(x))
#pytorch自动算梯度
y = f(x)
y.backward()#求所有的梯度
print('PyTorch\'s f\'(x):',x.grad)
'''
Analytical f'(x): tensor([-2.], grad_fn=)
PyTorch's f'(x): tensor([-2.])
'''
import torch
def f(x):
return (x-2)**2
def fp(x):
return (x-2)*2
x = torch.tensor([5.0],requires_grad=True)
lr = 0.25
print('iter,\tx,\tf(x),\tf\'(x),\tf\'(x) pytorch')
for i in range(12):
y = f(x)
y.backward()
print('{},\t{:.3f},\t{:.3f},\t{:.3f},\t{:.3f}'.format(i,x.item(),f(x).item(),fp(x).item(),x.grad.item()))
x.data = x.data-lr*x.grad
x.grad.detach_() #这是为了提高效率,可不管
x.grad.zero_() #当backward()时,我们需要归零grad变量,从而计算每个梯度,而不是累计梯度
'''
iter, x, f(x), f'(x), f'(x) pytorch
0, 5.000, 9.000, 6.000, 6.000
1, 3.500, 2.250, 3.000, 3.000
2, 2.750, 0.562, 1.500, 1.500
3, 2.375, 0.141, 0.750, 0.750
4, 2.188, 0.035, 0.375, 0.375
5, 2.094, 0.009, 0.188, 0.188
6, 2.047, 0.002, 0.094, 0.094
7, 2.023, 0.001, 0.047, 0.047
8, 2.012, 0.000, 0.023, 0.023
9, 2.006, 0.000, 0.012, 0.012
10, 2.003, 0.000, 0.006, 0.006
11, 2.001, 0.000, 0.003, 0.003
'''
这一段的原文链接
import torch
# -----------准备数据-----------
# y=2*x+1
#下面注释中的是错误数据
# x = torch.Tensor([1, 2, 3])
# y = torch.Tensor([3, 5, 7])
#数据必须是按照列算,一个样本一行,数据维度(3*1) ,特征维度(1维特征)
x = torch.Tensor([[1],[2],[3]])
y = torch.Tensor([[3], [5], [7]])
# -----------选择模型-----------
# 模型定义为一个类而不是一个方法
# 模型继承自 torch.nn.Module
class LinearModel(torch.nn.Module):
# 构造函数:进行初始化工作,必须实现
def __init__(self):
# 调用父类的构造,参数分别为:本类名称,self
super(LinearModel, self).__init__()
# 实例化一个线性对象,参数是输入特征得维度,输出特征的维度,是否要bias
self.linear = torch.nn.Linear(in_features=1, out_features=1)
# 前馈函数:必须实现,反馈函数自动实现
# 因为,实例化这个类时,就会调用__call__*() 方法,而这个方法
# 就写死了一定会调用forward这个方法,所以forward必须覆盖
def forward(self, x):
y_pred = self.linear(x) # y=wx+b
return y_pred
# 实例化LinearModel(),它是可以callable的 model(x)
model = LinearModel()
# -----------损失函数 -----------
# 实例化最小二乘法损失函数,继承自module critertion参数(y_pre,y)
critertion = torch.nn.MSELoss(size_average=True)
# -----------优化器--------------
# 随机梯度下降,参数传入 LinearModel的权重 ,学习率
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# -----------训练模型------------
for epoch in range(1000):
# 将数据传入模型,调用__call__,调用forward()
y_pred = model(x)
# 计算损失,loss是对象,值是标量
loss = critertion(y_pred, y)
print(epoch, loss)
# 梯度归零
optimizer.zero_grad()
# 对损失函数反向传播(为了求梯度)
loss.backward()
# 使用优化器进行梯度更新
optimizer.step()
# 要不要item()都可以
print('w=', model.linear.weight.item())
print('b=', model.linear.bias.item())
x_test = torch.Tensor([[4]])
y_test = model(x_test)
print('y_test=', y_test.data)
'''
0 tensor(30.8965, grad_fn=)
1 tensor(24.4480, grad_fn=)
2 tensor(19.3507, grad_fn=)
3 tensor(15.3214, grad_fn=)
......
998 tensor(0.0010, grad_fn=)
999 tensor(0.0010, grad_fn=)
w= 2.037266492843628
b= 0.9152839779853821
y_test= tensor([[9.0644]])
'''
torch.nn,Model:包含了很多模型,包括线性,CNN等等
import torch
import torch.nn as nn
d_in = 3
d_out = 4
linear_module = nn.Linear(d_in,d_out)#模型
example_tensor = torch.tensor([[1.,2,3],[4,5,6]])
transformed = linear_module(example_tensor)
print('example_tensor:',example_tensor.shape)
print('transformed:',transformed)
print('weight:',linear_module.weight)
print('bias:',linear_module.bias)
'''
example_tensor: torch.Size([2, 3])
transformed: tensor([[ 0.4415, -0.0349, 0.0568, 0.3891],
[ 0.7448, 1.3412, 0.2978, 1.6756]], grad_fn=)
weight: Parameter containing:
tensor([[ 0.0110, -0.3112, 0.4013],
[ 0.5606, -0.2427, 0.1409],
[ 0.3827, -0.2703, -0.0321],
[ 0.5040, 0.4548, -0.5300]], requires_grad=True)
bias: Parameter containing:
tensor([-0.1509, -0.5327, 0.3110, 0.5654], requires_grad=True)
'''
import torch
import torch.nn as nn
activation_funtion = nn.ReLU()
example_tensor = torch.tensor([-1.0,1.0,0.0])
acctivated = activation_funtion(example_tensor)
print('example_tensor',example_tensor)
print('activated',acctivated)
import torch
import torch.nn as nn
d_in = 3
d_hidden = 4
d_out = 1
model = torch.nn.Sequential(
nn.Linear(d_in,d_hidden),
nn.Tanh(),
nn.Linear(d_hidden,d_out),
nn.Sigmoid()
)
example_tensor = torch.tensor([[1.,2,3],[4,5,6]])
transformed = model(example_tensor)
print('transformed:',transformed.shape)
import torch
import torch.nn as nn
mse_loss = nn.MSELoss()
input = torch.tensor([[0.,0,0]])
target = torch.tensor([[1.,0,-1]])
loss = mse_loss(input,target)
print(loss)
'''
tensor(0.6667)
'''
import torch
import torch.nn as nn
#建立一个简单的model
model = nn.Linear(1,1)
#建立一个简单的数据库
x_simple = torch.tensor([[1.]])
y_simple = torch.tensor([[2.]])
#建立优化器
optim = torch.optim.SGD(model.parameters(),lr = 1e-2)
mse_loss_fn = nn.MSELoss()
y_hat = model(x_simple)
print('model params before:',model.weight)
loss = mse_loss_fn(y_hat,y_simple)
optim.zero_grad()
loss.backward()
optim.step()#自动做gradient descent
print('model params after:',model.weight)
'''
model params before: Parameter containing:
tensor([[0.8382]], requires_grad=True)
model params after: Parameter containing:
tensor([[0.8710]], requires_grad=True)
'''
import torch
import torch.nn as nn
x = torch.Tensor([[1],[2],[3]])
y = torch.Tensor([[3], [5], [7]])
step_size = 0.1
linear_module = nn.Linear(1,1)
mse_loss_fn = nn.MSELoss()
optim = torch.optim.SGD(linear_module.parameters(),lr = step_size)
for i in range(20):
y_hat = linear_module(x)
loss = mse_loss_fn(y_hat, y)
optim.zero_grad()
loss.backward()
optim.step()
print('{},\t{:.2f}'.format(i,loss.item()))
print('w=', linear_module.weight.item())
print('b=', linear_module.bias.item())
'''
0, 27.05
1, 0.35
2, 0.03
3, 0.03
4, 0.03
5, 0.02
........
17, 0.01
18, 0.01
19, 0.01
w= 1.874588131904602
b= 1.2850905656814575
'''
在识别image的时候,可以不全连接,而用CNN
CNN就是只保留部分weight的全连接,参数更少
得到的4×4的value,每2×2就取最大,变成2×2的value