上一个帖子记录了基于普通CNN的人脸识别系统。但是,测试准确率实在太低了只有30%。这次使用孪生网络(Siamese Net)进行实现。
代码实现使用了 VGG19 预训练模型作为特征提取器,通过对比学习来判断两张人脸图像是否属于同一人。整个代码分为数据准备、模型构建、训练和测试四个主要部分。
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
import random
import torchvision.transforms as transforms
import torchvision
import time
from torchvision import models
# 设置计算设备,优先使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 设置随机种子确保结果可复现
torch.manual_seed(123)
random.seed(123)
np.random.seed(10)
# ===============================================================
# 训练数据路径
dir_path = r'D:\数据集\faces\training'
# 训练数据集获取函数
# 功能:获取指定目录下所有图片文件及其对应的类别标签
# 参数:tpath - 数据集根目录
# 返回:[(文件路径, 类别标签), ...] 列表
def getFn_Dir(tpath):
dirs = os.listdir(tpath) # 获得所有类别目录名
file_labels = [] # 用于保存(文件名,类别目录编号)的list
for i, dir in enumerate(dirs):
label = i # 按目录对类别编号
path2 = os.path.join(tpath, dir)
files = os.listdir(path2) # 获取当前类别目录下的所有文件名
for file in files:
fn = os.path.join(path2, file) # 具体的文件名(含路劲)
t = (fn, label)
file_labels.append(t)
# 多次随机打乱数据,增强随机性
random.shuffle(file_labels)
random.shuffle(file_labels)
random.shuffle(file_labels)
return file_labels
# 定义图像预处理管道
transform = transforms.Compose(
[transforms.ToPILImage(), # 转换为PIL图像
transforms.Resize((100, 100)), # 调整图像大小为100x100
transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转(数据增强)
transforms.ToTensor(), # 转换为张量
transforms.Normalize((0.5,), (0.5,))]) # 归一化处理
# 自定义人脸数据集类
class FaceDataset(Dataset):
def __init__(self, fn_labels2):
self.fn_labels = fn_labels2 # 存储文件路径和标签对
def __getitem__(self, idx):
img1, label1 = self.fn_labels[idx]
# 随机决定生成同类样本对还是不同类样本对
fg = random.randint(0, 1) # 随机生成0或1
if fg == 1: # 生成同类的样本对
k = idx + 1
while True:
if k >= len(self.fn_labels):
k = 0
img2, label2 = self.fn_labels[k]
k += 1
if int(label1) == int(label2):
break
else: # 生成不同类的样本对
k = idx + 1
while True:
if k >= len(self.fn_labels):
k = 0
img2, label2 = self.fn_labels[k]
k += 1
if int(label1) != int(label2):
break
# 加载并预处理第一张图像
img1 = Image.open(img1)
img1 = np.array(img1)
img1 = torch.Tensor(img1)
img1 = transform(img1)
# 加载并预处理第二张图像
img2 = Image.open(img2)
img2 = np.array(img2)
img2 = torch.Tensor(img2)
img2 = transform(img2)
# 生成标签:相同为0,不同为1
label = torch.Tensor(np.array([int(label1 != label2)], dtype=np.float32))
return img1, img2, label
def __len__(self):
return len(self.fn_labels)
# 准备训练数据
fn_labels = getFn_Dir(dir_path)
faceDataset = FaceDataset(fn_labels)
train_loader = DataLoader(faceDataset, batch_size=8, shuffle=True)
# =====================================================
# 加载预训练的VGG19模型作为特征提取器
# 使用新的weights参数替代已弃用的pretrained参数
vgg19 = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)
vgg19_cnn = vgg19.features
# 冻结VGG19的参数,不参与训练
for param in vgg19_cnn.parameters():
param.requires_grad = False
# 定义孪生网络模型
class SiameseNet(nn.Module):
def __init__(self):
super(SiameseNet, self).__init__()
# 定义共享卷积网络
self.cnn = nn.Sequential(
nn.ReflectionPad2d(1), # 反射填充
nn.Conv2d(1, 3, 3, padding=1), # 将单通道图像转换为3通道,适配VGG19输入
vgg19_cnn, # 使用预训练的VGG19特征提取器
nn.ReLU(inplace=True), # ReLU激活
nn.BatchNorm2d(512) # 批量归一化
)
# 定义全连接层,将特征映射到低维空间
self.fc1 = nn.Sequential(
nn.Linear(512 * 3 * 3, 1024), # 输入维度为VGG19输出的特征图展平后的大小
nn.ReLU(inplace=True),
nn.Linear(1024, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, 512) # 输出512维特征向量
)
# 前向传播单张图像
def forward_once(self, x): # 输入形状: torch.Size([8, 1, 100, 100])
o = x
o = self.cnn(o) # 通过卷积网络提取特征
o = o.reshape(x.size(0), -1) # 展平特征图
o = self.fc1(o) # 通过全连接层生成特征向量
return o
# 前向传播两张图像
def forward(self, i1, i2): # 输入形状: torch.Size([8, 1, 100, 100])
# 两张图像分别通过相同的网络,共享权重
o1 = self.forward_once(i1)
o2 = self.forward_once(i2)
return o1, o2
# 定义对比损失函数
class LossFunction(torch.nn.Module):
def __init__(self, margin=2.0):
super(LossFunction, self).__init__()
self.margin = margin # 定义距离阈值
def forward(self, i1, i2, y): # i1, i2为特征向量,y为标签(0或1)
# 计算两个特征向量之间的欧氏距离
dist = torch.pairwise_distance(i1, i2, keepdim=True)
# 对比损失计算:
# 对于同类样本(y=0),最小化距离的平方
# 对于不同类样本(y=1),如果距离大于margin则损失为0,否则为(margin - dist)^2
loss = torch.mean((1 - y) * torch.pow(dist, 2) + y * torch.pow(torch.clamp(self.margin - dist, min=0.0), 2))
return loss
# ------------------------------------
# 初始化模型、优化器和损失函数
siameseNet = SiameseNet().to(device) # 将模型移至GPU/CPU
optimizer = optim.Adam(siameseNet.parameters(), lr=0.001) # 使用Adam优化器
lossFunction = LossFunction() # 实例化损失函数
# 训练计时
start = time.time()
# 训练主循环
for ep in range(100): # 训练100个轮次
for i, (b_img1, b_img2, b_label) in enumerate(train_loader):
# 将数据移至GPU/CPU
b_img1, b_img2, b_label = b_img1.to(device), b_img2.to(device), b_label.to(device)
# 前向传播
pre_o1, pre_o2 = siameseNet(b_img1, b_img2)
# 计算损失
loss = lossFunction(pre_o1, pre_o2, b_label)
# 每50个批次打印一次损失
if i % 50 == 0:
print(ep, loss.item())
# 反向传播和优化
optimizer.zero_grad() # 清零梯度
loss.backward() # 计算梯度
optimizer.step() # 更新参数
# 计算训练耗时
end = time.time()
print('训练耗时:', round((end - start) / 60.0, 1), '分钟')
# 训练结束
# 保存模型状态字典
torch.save(siameseNet.state_dict(), 'siameseNet_state_dict.pth')
# ============= 以下开始测试 ===========================
# 图像加载和预处理函数
def getImg(fn):
img = Image.open(fn)
img2 = img.convert('RGB') # 用于显示
img = np.array(img)
img = torch.Tensor(img)
img = transform(img)
return img
# 图像加载函数(仅用于显示)
def getImg_show(fn):
img = Image.open(fn)
img = img.convert('RGB') # 用于显示
img = np.array(img)
return img
# 加载测试数据
path = r'D:\数据集\faces\testing'
fn_labels = getFn_Dir(path)
# 加载模型进行测试
siameseNet = SiameseNet().to(device) # 实例化模型
siameseNet.load_state_dict(torch.load('siameseNet_state_dict.pth')) # 加载训练好的参数
siameseNet.eval() # 设置为评估模式(关闭Dropout等训练专用层)
# 测试模型性能
correct = 0
with torch.no_grad(): # 禁用梯度计算,节省内存并加速推理
for fn, label in fn_labels:
img = getImg(fn).unsqueeze(0).to(device) # 加载测试图像
img_min, dist_min, label_min, fn_min = -1, 1000, -1, -1 # 初始化最小距离和对应图像
# 遍历所有图像,找到与当前图像最相似的图像
for fn2, label2 in fn_labels:
if fn == fn2: # 跳过自身
continue
img2 = getImg(fn2).unsqueeze(0).to(device)
pre_o1, pre_o2 = siameseNet(img, img2) # 提取特征
dist = torch.pairwise_distance(pre_o1, pre_o2, keepdim=True) # 计算距离
if dist_min > dist.item(): # 更新最小距离和对应图像
dist_min = dist.item()
img_min = img2
label_min = label2
fn_min = fn2
# 判断预测是否正确
correct += int(label == label_min)
# 可视化代码(默认注释掉)
img_show = getImg_show(fn)
img_show2 = getImg_show(fn_min)
images = dict()
images[fn] = img_show
images[fn_min] = img_show2
def showTwoImages(images, stitle='', rows=1, cols=1):
figure, ax = plt.subplots(nrows=rows, ncols=cols)
for idx, title in enumerate(images):
ax.ravel()[idx].imshow(images[title])
ax.ravel()[idx].set_title(title)
ax.ravel()[idx].set_axis_off()
plt.tight_layout()
plt.suptitle(stitle, fontsize=18, color='red')
plt.show()
stitle = 'Similarity: %.2f' % (dist_min)
# showTwoImages(images, stitle, 1, 2)
# 打印测试结果
print('一共测试了{:.0f}张图片,准确率为{:.1f}%'.format(len(fn_labels), 100. * correct / len(fn_labels)))
import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
import random
import torchvision.transforms as transforms
import torchvision
import time
from torchvision import models
# 随机种子固定
torch.manual_seed(123)
random.seed(123)
np.random.seed(10)
# 数据集路径(请确保此处路径正确)
dir_path = r'D:\数据集\faces\training' # 训练集根目录(包含多个子目录,每个子目录为一类)
test_path = r'D:\数据集\faces\testing' # 测试集根目录
# 1. 数据加载函数(带路径检查)
def getFn_Dir(tpath):
"""获取文件路径和标签列表(添加路径检查)"""
if not os.path.exists(tpath):
raise FileNotFoundError(f"数据集根目录不存在: {tpath}")
dirs = os.listdir(tpath)
file_labels = []
for i, dir_name in enumerate(dirs):
dir_path = os.path.join(tpath, dir_name)
if not os.path.isdir(dir_path):
print(f"跳过非目录文件: {dir_path}")
continue
# 过滤图像文件
img_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.pgm', '.ppm')
files = [f for f in os.listdir(dir_path)
if f.lower().endswith(img_extensions)]
if not files:
print(f"警告:类别目录 {dir_name} 中未找到图像文件")
continue
# 添加图像路径和标签
for file in files:
file_path = os.path.join(dir_path, file)
file_labels.append((file_path, i))
if not file_labels:
raise ValueError(f"在 {tpath} 中未找到任何图像文件,请检查路径或文件格式")
random.shuffle(file_labels)
print(f"成功加载 {len(file_labels)} 个图像样本")
return file_labels
# 2. 数据预处理
transform = transforms.Compose([
transforms.Resize((100, 100)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(10),
transforms.ToTensor(), # 转为[0,1]张量
transforms.Normalize(mean=[0.5], std=[0.5]) # 归一化到[-1,1](单通道灰度图)
])
# 3. 数据集类
class FaceDataset(Dataset):
def __init__(self, fn_labels):
self.fn_labels = fn_labels # 格式:[(图像路径, 标签), ...]
self.cache = {} # 缓存预处理后的图像
def __getitem__(self, idx):
# 获取第一张图像
img1_path, label1 = self.fn_labels[idx]
img1 = self._load_image(img1_path)
# 获取配对图像(同类或不同类)
img2, label2 = self._get_pair(idx, label1)
# 生成标签(0:同类,1:不同类)
label = torch.tensor([int(label1 != label2)], dtype=torch.float32)
return img1, img2, label
def _load_image(self, path):
"""加载并预处理图像(确保单通道)"""
if path in self.cache:
return self.cache[path]
# 强制转为灰度图(单通道)
img = Image.open(path).convert('L')
img = transform(img)
self.cache[path] = img # 缓存
return img
def _get_pair(self, idx, label1):
"""高效获取配对图像"""
# 随机选择100个候选,避免遍历整个数据集
candidates = random.sample(range(len(self.fn_labels)), min(100, len(self.fn_labels) - 1))
for k in candidates:
if k == idx:
continue
img2_path, label2 = self.fn_labels[k]
# 检查是否符合配对条件(同类或不同类)
if (random.random() < 0.5 and label2 == label1) or (random.random() >= 0.5 and label2 != label1):
img2 = self._load_image(img2_path)
return img2, label2
# 若未找到候选,返回第一个非自身样本
k = (idx + 1) % len(self.fn_labels)
img2_path, label2 = self.fn_labels[k]
img2 = self._load_image(img2_path)
return img2, label2
def __len__(self):
return len(self.fn_labels)
# 4. 模型定义
class SiameseNet(nn.Module):
def __init__(self):
super(SiameseNet, self).__init__()
# 加载预训练VGG19(冻结特征层)
vgg19 = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)
self.cnn = nn.Sequential(
nn.Conv2d(1, 3, kernel_size=3, padding=1), # 单通道转3通道(适配VGG)
vgg19.features,
nn.AdaptiveAvgPool2d((3, 3)) # 固定输出尺寸为3x3
)
# 冻结VGG参数
for param in self.cnn.parameters():
param.requires_grad = False
# 全连接层(简化结构,提升稳定性)
self.fc = nn.Sequential(
nn.Linear(512 * 3 * 3, 512), # VGG输出特征:512通道×3×3
nn.BatchNorm1d(512),
nn.ReLU(inplace=True),
nn.Linear(512, 128) # 最终输出128维特征
)
# 初始化全连接层权重
self._init_weights()
def _init_weights(self):
for m in self.fc.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight) # 避免权重过大
if m.bias is not None:
nn.init.zeros_(m.bias)
def forward_once(self, x):
x = self.cnn(x)
x = x.view(x.size(0), -1) # 展平特征
x = self.fc(x)
return x
def forward(self, x1, x2):
out1 = self.forward_once(x1)
out2 = self.forward_once(x2)
return out1, out2
# 5. 损失函数
class ContrastiveLoss(nn.Module):
def __init__(self, margin=1.0):
super().__init__()
self.margin = margin
def forward(self, out1, out2, label):
# 计算欧氏距离
dist = torch.pairwise_distance(out1, out2, keepdim=True)
# 对比损失:同类样本距离越小越好,不同类样本距离大于margin越好
loss = (1 - label) * torch.pow(dist, 2) + \
label * torch.pow(torch.clamp(self.margin - dist, min=0), 2)
return torch.mean(loss)
# 6. 训练函数
def train_model():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 加载训练数据(会自动检查路径)
try:
train_labels = getFn_Dir(dir_path)
except Exception as e:
print(f"数据加载失败:{e}")
return None # 加载失败则退出
# 初始化数据集和数据加载器
train_dataset = FaceDataset(train_labels)
train_loader = DataLoader(
train_dataset,
batch_size=8,
shuffle=True,
num_workers=0 # Windows系统禁用多进程
)
# 模型、损失函数、优化器
model = SiameseNet().to(device)
criterion = ContrastiveLoss(margin=1.0)
optimizer = optim.Adam(model.parameters(), lr=1e-4) # 低学习率避免梯度爆炸
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) # 学习率衰减
# 训练主循环
model.train()
for epoch in range(50):
running_loss = 0.0
for i, (img1, img2, label) in enumerate(train_loader):
# 数据移至设备
img1, img2, label = img1.to(device), img2.to(device), label.to(device)
# 前向传播
out1, out2 = model(img1, img2)
loss = criterion(out1, out2, label)
# 反向传播(带梯度裁剪)
optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # 防止梯度爆炸
optimizer.step()
running_loss += loss.item()
# 打印中间结果
if i % 50 == 0:
print(f"Epoch {epoch}, Batch {i}, Loss: {loss.item():.4f}")
# 每轮结束打印平均损失
avg_loss = running_loss / len(train_loader)
scheduler.step() # 学习率衰减
print(f"Epoch {epoch}/49, 平均损失: {avg_loss:.4f}")
# 保存模型
torch.save(model.state_dict(), "siamese_best.pth")
print("模型保存成功: siamese_best.pth")
return model
# 7. 测试函数
def test_model(model):
if model is None:
print("模型未初始化,无法测试")
return
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval() # 切换到评估模式
# 加载测试数据
try:
test_labels = getFn_Dir(test_path)
except Exception as e:
print(f"测试数据加载失败:{e}")
return
# 预计算所有测试图像的特征
all_features = []
all_labels = []
with torch.no_grad(): # 禁用梯度计算
for img_path, label in test_labels:
# 加载并预处理图像
img = Image.open(img_path).convert('L') # 转为灰度图
img = transform(img).unsqueeze(0).to(device) # 增加批次维度
# 提取特征
feat = model.forward_once(img)
all_features.append(feat)
all_labels.append(label)
# 拼接为张量(形状:[样本数, 128])
all_features = torch.cat(all_features, dim=0)
# 计算准确率
correct = 0
total = len(test_labels)
with torch.no_grad():
for i in range(total):
# 当前图像特征
query_feat = all_features[i].unsqueeze(0)
# 计算与所有其他图像的距离
distances = torch.cdist(query_feat, all_features, p=2).squeeze(0)
# 排除自身(设置距离为无穷大)
distances[i] = float('inf')
# 找最近邻
nearest_idx = torch.argmin(distances).item()
# 判断是否同类
if all_labels[i] == all_labels[nearest_idx]:
correct += 1
accuracy = 100.0 * correct / total
print(f"测试结果:共 {total} 张图片,准确率:{accuracy:.1f}%")
# 主程序入口
if __name__ == '__main__':
# 训练模型
print("===== 开始训练 =====")
model = train_model()
# 测试模型
print("\n===== 开始测试 =====")
test_model(model)