近年来,预训练语言模型(PLM)如 BERT、GPT 和 T5 在自然语言处理(NLP)任务中取得了巨大成功。然而,Fine-Tuning 这些大型模型通常需要大量计算资源,并且每个新任务都需要存储一套完整的微调权重,这导致存储成本高昂。
Adapter-Tuning 作为一种高效的模型调优方法,允许我们在预训练模型的基础上,通过引入轻量级 “Adapter” 层来进行任务特定的学习。Adapter 层只占用少量参数,并且可以在多个任务之间共享,从而大幅降低计算和存储成本。
本文介绍 Adapter-Tuning 技术,并通过一个意图识别任务的代码示例展示其应用。
Adapter-Tuning 通过在 Transformer 层中插入可训练的 “Adapter” 模块,而不改变原始预训练模型的参数。这些 Adapter 模块通常由小型的前馈神经网络(如 Bottleneck 层)组成,并且可以独立于原始模型进行训练。
Adapter-Tuning 具备以下特点:
以下示例展示了如何使用 Adapter-Transformers
库在 DistilBERT
上进行意图分类任务。
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer
class IntentDataset(Dataset):
def __init__(self, texts, labels, tokenizer, max_length=128):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
text = self.texts[idx]
label = self.labels[idx]
encoding = self.tokenizer(
text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors="pt"
)
return {
'input_ids': encoding['input_ids'].squeeze(0),
'attention_mask': encoding['attention_mask'].squeeze(0),
'labels': torch.tensor(label, dtype=torch.long)
}
from adapters import AutoAdapterModel, AdapterConfig
from transformers import AutoTokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 使用 AutoAdapterModel 加载模型
model = AutoAdapterModel.from_pretrained(model_name)
# 设置分类任务的输出头(3 个意图类别)
model.add_classification_head("intent_recognition", num_labels=3)
# 配置 Adapter
adapter_config = AdapterConfig.load("pfeiffer") # 使用 Pfeiffer 结构
model.add_adapter("intent_adapter", config=adapter_config)
# 激活 Adapter 并设置为训练模式
model.set_active_adapters("intent_adapter")
model.train_adapter("intent_adapter") # 只训练 Adapter,冻结其他参数
from torch.optim import AdamW
from torch.utils.data import DataLoader
texts = [
"What's the weather like today?",
"Set a reminder for 3 PM.",
"Tell me a joke.",
"How's the weather tomorrow?",
"Remind me to call mom at 6 PM."
]
labels = [0, 1, 2, 0, 1]
dataset = IntentDataset(texts, labels, tokenizer)
train_dataloader = DataLoader(dataset, batch_size=2, shuffle=True)
optimizer = AdamW(model.parameters(), lr=1e-4) # Adapter 通常需要稍高的学习率
model.train()
for epoch in range(3):
total_loss = 0
for batch in train_dataloader:
outputs = model(
input_ids=batch['input_ids'],
attention_mask=batch['attention_mask'],
labels=batch['labels']
)
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
total_loss += loss.item()
print(f"Epoch {epoch + 1}, Average Loss: {total_loss:.4f}")
model.eval()
test_texts = [
"What's the forecast for this weekend?",
"Set an alarm for 7 AM."
]
intent_map = {0: "Ask about weather", 1: "Set reminder", 2: "Tell joke"}
with torch.no_grad():
for test_text in test_texts:
encoded_input = tokenizer(test_text, return_tensors="pt")
outputs = model(**encoded_input)
predicted_label = torch.argmax(outputs.logits, dim=1).item()
print(f"Text: '{test_text}' -> Predicted Intent: {intent_map[predicted_label]}")
Adapter-Tuning 适用于以下 NLP 任务:
import torch
from adapters import AutoAdapterModel, AdapterConfig
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
# 定义数据集类
class IntentDataset(Dataset):
def __init__(self, texts, labels, tokenizer, max_length=128):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
text = self.texts[idx]
label = self.labels[idx]
encoding = self.tokenizer(
text,
truncation=True,
padding='max_length',
max_length=self.max_length,
return_tensors="pt"
)
return {
'input_ids': encoding['input_ids'].squeeze(0),
'attention_mask': encoding['attention_mask'].squeeze(0),
'labels': torch.tensor(label, dtype=torch.long)
}
# 加载预训练模型和分词器
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 使用 AutoAdapterModel 加载模型
model = AutoAdapterModel.from_pretrained(model_name)
# 设置分类任务的输出头(3 个意图类别)
model.add_classification_head("intent_recognition", num_labels=3)
# 配置 Adapter
adapter_config = AdapterConfig.load("pfeiffer") # 使用 Pfeiffer 架构的 Adapter
model.add_adapter("intent_adapter", config=adapter_config)
# 激活 Adapter 并设置为训练模式
model.set_active_adapters("intent_adapter")
model.train_adapter("intent_adapter") # 只训练 Adapter,其他参数冻结
# 准备训练数据
texts = [
"What's the weather like today?",
"Set a reminder for 3 PM.",
"Tell me a joke.",
"How's the weather tomorrow?",
"Remind me to call mom at 6 PM."
]
labels = [0, 1, 2, 0, 1] # 0=询问天气, 1=设置提醒, 2=讲笑话
# 创建数据集和数据加载器
dataset = IntentDataset(texts, labels, tokenizer)
train_dataloader = DataLoader(dataset, batch_size=2, shuffle=True)
# 定义优化器(只优化 Adapter 参数)
optimizer = AdamW(model.parameters(), lr=1e-4) # Adapter 通常需要稍高的学习率
# 训练循环
for epoch in range(3):
total_loss = 0
for batch in train_dataloader:
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
labels = batch['labels']
# 前向传播
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels
)
loss = outputs.loss
# 反向传播
loss.backward()
optimizer.step()
optimizer.zero_grad()
total_loss += loss.item()
avg_loss = total_loss / len(train_dataloader)
print(f"Epoch {epoch + 1}, Average Loss: {avg_loss:.4f}")
# 保存 Adapter(可选)
model.save_adapter("intent_adapter_output", "intent_adapter")
# 测试(推理)
model.eval()
test_texts = [
"What's the forecast for this weekend?",
"Set an alarm for 7 AM."
]
intent_map = {0: "Ask about weather", 1: "Set reminder", 2: "Tell joke"}
with torch.no_grad():
for test_text in test_texts:
encoded_input = tokenizer(test_text, return_tensors="pt")
outputs = model(**encoded_input)
logits = outputs.logits
predicted_label = torch.argmax(logits, dim=1).item()
predicted_intent = intent_map[predicted_label]
print(f"Text: '{test_text}' -> Predicted Intent: {predicted_intent}")
Adapter-Tuning 是一种高效、低成本的 NLP 任务微调方法,适用于多任务学习和跨任务迁移。相比 Fine-Tuning,Adapter-Tuning 计算资源消耗更低,并且存储占用小,是预训练模型调优的一个理想选择。希望本文的示例能帮助你理解 Adapter-Tuning,并在实际应用中灵活使用这一技术!