Part1前言
随着大语言模型的流行,如何让大模型在消费级GPU上进行微调训练成为了热点。掌握参数有效微调成为每个自然语言处理工程师必不可少的技能,正好hugging face开源了一个PEFT库,让我们也能够自己动手去了解参数有效微调。接下来以中文情感分析(二分类)去了解下参数有效微调。
使用的方法来自这些论文:
- LoRA: LORA: LOW-RANK ADAPTATION OF LARGE LANGUAGE MODELS
- Prefix Tuning: Prefix-Tuning: Optimizing Continuous Prompts for Generation, P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks
- P-Tuning: GPT Understands, Too
- Prompt Tuning: The Power of Scale for Parameter-Efficient Prompt Tuning
Part2结果
接下来是一些的基础设置:
- 数据:ChnSentiCorp_htl_all
- 模型:hfl/chinese-roberta-wwm-ext
- 显存:Tesla T4 15G
- batch_size:64
- epoch:3
- max_length:86
- lr:3e-4
以下是结果,各位自行分析吧:
全参数微调 | prefix-tuning | prompt-tuning | p-tuning | LoRA | |
---|---|---|---|---|---|
总参数 | 102269186 | 102637826 | 102284546 | 102498562 | 102564098 |
可训练参数 | 102269186 | 370178 | 16898 | 230914 | 296450 |
可训练参数占比(%) | 100 | 0.3606 | 0.0165 | 0.2252 | 0.2890 |
占用GPU(15G) | 5.5G | 4.5G | 5.0G | 5.1G | 4.8G |
特有参数 | / | num_virtual_tokens=20 | num_virtual_tokens=20 | num_virtual_tokens=20encoder_hidden_size=128 | inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1 |
训练速度 | 1.13it/s | 1.55 it/s | 1.35 it/s | 1.28 it/s | 1.53 it/s |
验证速度 | 3.36it/s | 3.26 it/s | 2.70 it/s | 2.72 it/s | 3.11 it/s |
训练时长(分钟) | 4.6838 | 4.3513 | 4.1768 | 4.1798 | 3.6353 |
验证loss | 12.2706 | 12.1903 | 13.1484 | 9.1823 | 6.3543 |
准确率 | 0.6941 | 0.7617 | 0.7044 | 0.8461 | 0.8976 |
备注 |
Part3代码
最后附上所有代码:
代码语言:javascript复制#!pip install peft==0.2.0
#!pip install transformers==4.28.1
#!pip install accelerate
#!pip install loralib
#!pip install evaluate
#!pip install tqdm
#!pip install datasets
import argparse
import os
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from peft import (
get_peft_config,
get_peft_model,
get_peft_model_state_dict,
set_peft_model_state_dict,
PeftType,
PrefixTuningConfig,
PromptEncoderConfig,
PromptTuningConfig,
LoraConfig,
)
import evaluate
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
from tqdm import tqdm
import peft
print(peft.__version__)
#!wget https://raw.githubusercontent.com/SophonPlus/ChineseNlpCorpus/master/datasets/ChnSentiCorp_htl_all/ChnSentiCorp_htl_all.csv
data_file = "./ChnSentiCorp_htl_all.csv" # 数据文件路径,数据需要提前下载
# 加载数据集
dataset = load_dataset("csv", data_files=data_file)
dataset = dataset.filter(lambda x: x["review"] is not None)
datasets = dataset["train"].train_test_split(0.2, seed=123)
model_name_or_path = "hfl/chinese-roberta-wwm-ext"
if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
padding_side = "left"
else:
padding_side = "right"
max_length = 86
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)
if getattr(tokenizer, "pad_token_id") is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
def process_function(examples):
tokenized_examples = tokenizer(examples["review"], truncation=True, max_length=max_length)
tokenized_examples["labels"] = examples["label"]
return tokenized_examples
tokenized_datasets = datasets.map(process_function, batched=True, remove_columns=datasets["train"].column_names)
accuracy_metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions.argmax(axis=-1)
return accuracy_metric.compute(predictions=predictions, references=labels)
def collate_fn(examples):
return tokenizer.pad(examples, padding="longest", return_tensors="pt")
# Instantiate dataloaders.
batch_size = 64
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)
eval_dataloader = DataLoader(
tokenized_datasets["test"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size
)
# 训练器配置
p_type = "lora"
if p_type == "prefix-tuning":
peft_type = PeftType.PREFIX_TUNING
peft_config = PrefixTuningConfig(task_type="SEQ_CLS", num_virtual_tokens=20)
elif p_type == "prompt-tuning":
peft_type = PeftType.PROMPT_TUNING
peft_config = PromptTuningConfig(task_type="SEQ_CLS", num_virtual_tokens=20)
elif p_type == "p-tuning":
peft_type = PeftType.P_TUNING
peft_config = PromptEncoderConfig(task_type="SEQ_CLS", num_virtual_tokens=20, encoder_hidden_size=128)
elif p_type == "lora":
peft_type = PeftType.LORA
peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)
# print(peft_type)
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, num_labels=2)
if p_type is not None:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
else:
def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
num_params = param.numel()
# if using DS Zero 3 and the weights are initialized empty
if num_params == 0 and hasattr(param, "ds_numel"):
num_params = param.ds_numel
all_param = num_params
if param.requires_grad:
trainable_params = num_params
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
print_trainable_parameters(model)
lr = 3e-4
num_epochs = 3
optimizer = AdamW(params=model.parameters(), lr=lr)
# Instantiate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
optimizer=optimizer,
num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
num_training_steps=(len(train_dataloader) * num_epochs),
)
device = "cuda"
model.to(device)
metric = evaluate.load("accuracy")
import time
start = time.time()
for epoch in range(num_epochs):
model.train()
for step, batch in enumerate(tqdm(train_dataloader)):
batch.to(device)
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
model.eval()
total_loss = 0.
for step, batch in enumerate(tqdm(eval_dataloader)):
batch.to(device)
with torch.no_grad():
outputs = model(**batch)
loss = outputs.loss
total_loss = loss
predictions = outputs.logits.argmax(dim=-1)
predictions, references = predictions, batch["labels"]
metric.add_batch(
predictions=predictions,
references=references,
)
eval_metric = metric.compute()
print(f"epoch {epoch} loss {total_loss}:", eval_metric)
end = time.time()
print("耗时:{}分钟".format((end-start) / 60))
参考:
https://github.com/huggingface/peft/