数据集包含6种垃圾,分别为cardboard(纸箱),glass(玻璃)、metal(金属)、paper(纸)、plastic(塑料)、其他废品(trash),数据数量较小,仅供学习。
数据集标准备工作,包括将数据集分为训练集和测试集,制作标签文件。代码utils.py
import os
import shutil
import json
path="e://dataset//Garbage_classification"#此路径为上图中六类的目录,可根据自己数据集路径修改
classes=[garbage for garbage in os.listdir(path)]
if os.path.exists(os.path.join(os.getcwd(),'train'))==False:
os.makedirs(os.path.join(os.getcwd(),'train'))
if os.path.exists(os.path.join(os.getcwd(),'val'))==False:
os.makedirs(os.path.join(os.getcwd(),'val'))
f = open("garbage_train.json", 'w')
g = open("garbage_val.json", 'w')
for garbage in classes:
s = 0
for imgname in os.listdir(os.path.join(path,garbage)):
if s%7!=0:
data = {'name': imgname, 'label':classes.index(garbage)}
jsondata = json.dumps(data)
f.write(jsondata)
shutil.copy(os.path.join(path, garbage, imgname),os.path.join(os.getcwd(),'train'))
else:
data = {'name': imgname, 'label': classes.index(garbage)}
jsondata = json.dumps(data)
g.write(jsondata)
shutil.copy(os.path.join(path, garbage, imgname),os.path.join(os.getcwd(),'val'))
s+=1
运行上述代码会生成下图的文件夹。
接下来,我们写一个数据集预处理的类,data.py. root是上图处理得到的数据集的根目录,datajson是两个json文件夹
from PIL import Image
import torch
import os
import json
class MyDataset(torch.utils.data.Dataset): # 创建自己的类:MyDataset,这个类是继承的torch.utils.data.Dataset
def __init__(self, root, datajson, transform=None, target_transform=None): # 初始化一些需要传入的参数
super(MyDataset, self).__init__()
fh = open(datajson, 'r') # 按照传入的路径和txt文本参数,打开这个文本,并读取内容
load_dict = json.load(fh)
imgs = [] # 创建一个名为img的空列表,一会儿用来装东西
for line in load_dict: # 按行循环txt文本中的内容
#line = line.rstrip()# 删除 本行string 字符串末尾的指定字符,这个方法的详细介绍自己查询python
#words = line.split() # 通过指定分隔符对字符串进行切片,默认为所有的空字符,包括空格、换行、制表符等
imgs.append((line['name'], int(line['label']))) # 把txt里的内容读入imgs列表保存,具体是words几要看txt内容而定
self.root=root
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
fn, label = self.imgs[index] # fn是图片path #fn和label分别获得imgs[index]也即是刚才每行中word[0]和word[1]的信息
img = Image.open(os.path.join(self.root,fn)).convert('RGB') # 按照path读入图片from PIL import Image # 按照路径读取图片
if self.transform is not None:
img = self.transform(img) # 是否进行transform
return img, label # return很关键,return回哪些内容,那么我们在训练时循环读取每个batch时,就能获得哪些内容
def __len__(self): # 这个函数也必须要写,它返回的是数据集的长度,也就是多少张图片,要和loader的长度作区分
return len(self.imgs)
再定义一下resnet网络。resnet.py ,这里需要说明一下,由于数据集不够大,很多图片没有超过224,我拟定输入为112,这里有多种resnet系列选择,我用的是最简单的resnet18.
import torch
import torch.nn as nn
class BasicBlock(nn.Module):
"""Basic Block for resnet 18 and resnet 34
"""
#BasicBlock and BottleNeck block
#have different output size
#we use class attribute expansion
#to distinct
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
#residual function
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)
#shortcut
self.shortcut = nn.Sequential()
#the shortcut output dimension is not the same with residual function
#use 1*1 convolution to match the dimension
if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)
def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
class BottleNeck(nn.Module):
"""Residual block for resnet over 50 layers
"""
expansion = 4
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion),
)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion)
)
def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
class ResNet(nn.Module):
def __init__(self, block, num_block, num_classes=6):
super().__init__()
self.in_channels = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=1,padding=1, bias=False),
nn.MaxPool2d(kernel_size=2,stride=2),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True))
#we use a different inputsize than the original paper
#so conv2_x's stride is 1
self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, out_channels, num_blocks, stride):
"""make resnet layers(by layer i didnt mean this 'layer' was the
same as a neuron netowork layer, ex. conv layer), one layer may
contain more than one residual block
Args:
block: block type, basic block or bottle neck block
out_channels: output depth channel number of this layer
num_blocks: how many blocks per layer
stride: the stride of the first block of this layer
Return:
return a resnet layer
"""
# we have num_block blocks per layer, the first block
# could be 1 or 2, other blocks would always be 1
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
output = self.conv1(x)
output = self.conv2_x(output)
output = self.conv3_x(output)
output = self.conv4_x(output)
output = self.conv5_x(output)
output = self.avg_pool(output)
output = output.view(output.size(0), -1)
output = self.fc(output)
return output
def resnet18():
""" return a ResNet 18 object
"""
return ResNet(BasicBlock, [2, 2, 2, 2])
def resnet34():
""" return a ResNet 34 object
"""
return ResNet(BasicBlock, [3, 4, 6, 3])
def resnet50():
""" return a ResNet 50 object
"""
return ResNet(BottleNeck, [3, 4, 6, 3])
def resnet101():
""" return a ResNet 101 object
"""
return ResNet(BottleNeck, [3, 4, 23, 3])
def resnet152():
""" return a ResNet 152 object
"""
return ResNet(BottleNeck, [3, 8, 36, 3])
如果你想要训练多个类,可以将num_classes=6修改,如果你想训练224尺寸的图片,可以将self.conv_1中的参数stride改为2,
我在上面说过,我的输入图片是112,所以,stride设置为1。
在正式训练前,设定超参数 global_settings.py
import os
from datetime import datetime
#directory to save weights file
CHECKPOINT_PATH = 'checkpoint'
#total training epoches
EPOCH = 200
MILESTONES = [60, 120, 160]
#initial learning rate
#INIT_LR = 0.1
#time of we run the script
TIME_NOW = datetime.now().isoformat()
#tensorboard log dir
LOG_DIR = 'runs'
#save weights file per SAVE_EPOCH epoch
SAVE_EPOCH = 10
接下来就是直接训练,train.py
import sys
import argparse
from datetime import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
from tensorboardX import SummaryWriter
import global_settings as settings
def get_network(args, use_gpu=True):
if args.net == 'resnet18':
from models.resnet import resnet18
net = resnet18()
elif args.net == 'resnet34':
from models.resnet import resnet34
net = resnet34()
elif args.net == 'resnet50':
from models.resnet import resnet50
net = resnet50()
elif args.net == 'resnet101':
from models.resnet import resnet101
net = resnet101()
elif args.net == 'resnet152':
from models.resnet import resnet152
net = resnet152()
from data import *
path=os.getcwd()
class WarmUpLR(_LRScheduler):
"""warmup_training learning rate scheduler
Args:
optimizer: optimzier(e.g. SGD)
total_iters: totoal_iters of warmup phase
"""
def __init__(self, optimizer, total_iters, last_epoch=-1):
self.total_iters = total_iters
super().__init__(optimizer, last_epoch)
def get_lr(self):
"""we will use the first m batches, and set the learning
rate to base_lr * m / total_iters
"""
return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]
def train(epoch):
net.train()
for batch_index, (images, labels) in enumerate(train_set):
if epoch <= args.warm:
warmup_scheduler.step()
print('label:',labels.shape)
images = Variable(images)
labels = Variable(labels)
labels = labels.cuda()
images = images.cuda()
optimizer.zero_grad()
outputs = net(images)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
n_iter = (epoch - 1) * len(train_set) + batch_index + 1
last_layer = list(net.children())[-1]
for name, para in last_layer.named_parameters():
if 'weight' in name:
writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter)
if 'bias' in name:
writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter)
print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
loss.item(),
optimizer.param_groups[0]['lr'],
epoch=epoch,
trained_samples=batch_index * args.b + len(images),
total_samples=len(train_set.dataset)
))
#update training loss for each iteration
writer.add_scalar('Train/loss', loss.item(), n_iter)
for name, param in net.named_parameters():
layer, attr = os.path.splitext(name)
attr = attr[1:]
writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
def eval_training(epoch):
net.eval()
test_loss = 0.0 # cost function error
correct = 0.0
for (images, labels) in test_set:
images = Variable(images)
labels = Variable(labels)
images = images.cuda()
labels = labels.cuda()
outputs = net(images)
loss = loss_function(outputs, labels)
test_loss += loss.item()
_, preds = outputs.max(1)
correct += preds.eq(labels).sum()
print('Test set: Average loss: {:.4f}, Adduracy: {:.4f}'.format(
test_loss / len(test_set.dataset),
correct.float() / len(test_set.dataset)
))
print()
#add informations to tensorboard
writer.add_scalar('Test/Average loss', test_loss / len(test_set.dataset), epoch)
writer.add_scalar('Test/Adduracy', correct.float() / len(test_set.dataset), epoch)
return correct.float() / len(test_set.dataset)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-net', type=str, default="resnet18", help='net type')
parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not')
parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader')
parser.add_argument('-b', type=int, default=8, help='batch size for dataloader')
parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset')
parser.add_argument('-warm', type=int, default=1, help='warm up training phase')
parser.add_argument('-lr', type=float, default=0.001, help='initial learning rate')
args = parser.parse_args()
net = get_network(args, use_gpu=args.gpu)
#data preprocessing:
mean = [0.5071, 0.4867, 0.4408]
stdv = [0.2675, 0.2565, 0.2761]
train_transforms = transforms.Compose([
transforms.RandomCrop(112),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=stdv),
])
test_transforms = transforms.Compose([
transforms.RandomCrop(112),
transforms.ToTensor(),
transforms.Normalize(mean=mean, std=stdv),
])
# Datasets
train_set = MyDataset(root='****/train_img',
datajson='**/train.json', transform=train_transforms)
test_set = MyDataset(root='**/val_img',
datajson='**/val.json', transform=test_transforms)
train_set = DataLoader(
train_set, shuffle=True, num_workers=1, batch_size=8)
test_set= DataLoader(
test_set, shuffle=True, num_workers=1, batch_size=8)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay
iter_per_epoch = len(train_set)
warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)
checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, 'lj')
#use tensorboard
if not os.path.exists(settings.LOG_DIR):
os.mkdir(settings.LOG_DIR)
log_dir = os.path.join(
settings.LOG_DIR, args.net,'lj')
print(log_dir)
writer = SummaryWriter(log_dir)
input_tensor = torch.Tensor(8, 3, 112, 112).cuda()
print("done")
#writer.add_graph(net, Variable(input_tensor, requires_grad=True))
#create checkpoint folder to save model
if not os.path.exists(checkpoint_path):
os.makedirs(checkpoint_path)
checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth')
best_add = 0.0
for epoch in range(1, settings.EPOCH):
if epoch > args.warm:
train_scheduler.step(epoch)
train(epoch)
add = eval_training(epoch)
#start to save best performance model after learning rate decay to 0.01
if epoch > settings.MILESTONES[1] and best_add < add:
torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='best'))
best_add = add
continue
if not epoch % settings.SAVE_EPOCH:
torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='regular'))
writer.close()
训练结束后,得到了保存在checkpoints路径下的模型,resnet18.pth
测试集来测试一个单张demo的效果,demo.py
import argparse
#from dataset import *
#from skimage import io
from matplotlib import pyplot as plt
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
import global_settings as settings
from PIL import Image
import torchvision.transforms as transforms
import glabol_settings as settings
def get_network(args, use_gpu=True):
if args.net == 'resnet18':
from models.resnet import resnet18
net = resnet18()
elif args.net == 'resnet34':
from models.resnet import resnet34
net = resnet34()
elif args.net == 'resnet50':
from models.resnet import resnet50
net = resnet50()
elif args.net == 'resnet101':
from models.resnet import resnet101
net = resnet101()
elif args.net == 'resnet152':
from models.resnet import resnet152
net = resnet152()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-net', type=str, default="resnet18", help='net type')
parser.add_argument('-weights', type=str, default="checkpoint/resnet18/lj/resnet18.pth", help='the weights file you want to test')
parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not')
#parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader')
#parser.add_argument('-b', type=int, default=16, help='batch size for dataloader')
#parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset')
args = parser.parse_args()
net = get_network(args)
net.load_state_dict(torch.load(args.weights), args.gpu)
print(net)
net.eval()
correct_1 = 0.0
correct_5 = 0.0
total = 0
transform_test = transforms.Compose([
transforms.RandomCrop(112),
transforms.ToTensor(),
transforms.Normalize(settings.CIFAR100_TRAIN_MEAN,settings.CIFAR100_TRAIN_STD)
])
imgs=Image.open("test.jpg")
img=transform_test(imgs)
img=img.unsqueeze(0)
image = Variable(img).cuda()
print(image.shape)
import time
#label = Variable(img).cuda()
start=time.time()
output = net(image)
print(time.time()-start)
print(output)
_, pred = output.topk(1, 1, largest=True, sorted=True)
print(class_car[pred[0].item()])
imgs.show()
我来测试一张!
识别为纸板。有兴趣可以留言,我后续发上数据集和训练模型pth文件。