20. Instruction Tuning
20. Instruction Tuning¶
κ°μ¶
Instruction Tuningμ pre-trained LLMμ μμ°μ΄ μ§μμ¬νμ λ°λ₯΄λλ‘ fine-tuningνλ λ°©λ²μ λλ€. μ΄λ₯Ό ν΅ν΄ λͺ¨λΈμ΄ λ€μν νμ€ν¬λ₯Ό zero-shotμΌλ‘ μνν μ μκ² λ©λλ€.
1. Instruction Tuning κ°μ¶
1.1 κ°λ ¶
Before Instruction Tuning:
User: "Translate to French: Hello"
Model: "Translate to French: Hello. How are you? I am..."
(completion λͺ¨λλ‘ λμ)
After Instruction Tuning:
User: "Translate to French: Hello"
Model: "Bonjour"
(instruction following)
ν΅μ¬ λ³ν:
- λ¬Έμ₯ μμ± β μ§μ μν
- Emergent abilities ν₯μ
- Zero-shot μΌλ°ν
1.2 νμ΅ λ°μ΄ν° νμ¶
# Instruction tuning λ°μ΄ν° μμ
instruction_data = [
{
"instruction": "Summarize the following article.",
"input": "The stock market experienced significant volatility...",
"output": "Stock markets showed high volatility due to..."
},
{
"instruction": "Translate the following text to Korean.",
"input": "Hello, how are you?",
"output": "μλ
νμΈμ, μ΄λ»κ² μ§λ΄μΈμ?"
},
{
"instruction": "Write a poem about autumn.",
"input": "",
"output": "Leaves of gold and crimson fall..."
}
]
# Prompt template
def format_instruction(example):
if example["input"]:
return f"""### Instruction:
{example["instruction"]}
### Input:
{example["input"]}
### Response:
{example["output"]}"""
else:
return f"""### Instruction:
{example["instruction"]}
### Response:
{example["output"]}"""
2. FLAN (Finetuned Language Net)¶
2.1 FLAN-T5¶
FLAN νμ΅ λ°μ΄ν°:
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β 1,836 tasks from 473 datasets β
β β
β Categories: β
β - NLU (sentiment, NLI, QA) β
β - NLG (summarization, translation) β
β - Reasoning (math, logic) β
β - Dialog β
β β
β Data mixing: β
β - Task proportional mixing β
β - Examples proportional mixing β
β - Temperature-based sampling (T=3) β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
def use_flan_t5():
"""FLAN-T5 μ¬μ©"""
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
# Zero-shot instruction
prompts = [
"Translate to German: The weather is nice today.",
"What is the sentiment of: I love this product!",
"Answer the question: What is the capital of France?",
"Summarize: The quick brown fox jumps over the lazy dog. The dog was sleeping."
]
for prompt in prompts:
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=100)
print(f"Q: {prompt}")
print(f"A: {tokenizer.decode(outputs[0], skip_special_tokens=True)}\n")
2.2 Chain-of-Thought FLAN¶
# CoT λ°μ΄ν° ν¬ν¨
cot_example = {
"instruction": "Solve the math problem step by step.",
"input": "If John has 5 apples and gives 2 to Mary, how many does he have?",
"output": """Let me solve this step by step:
1. John starts with 5 apples
2. John gives 2 apples to Mary
3. Remaining apples = 5 - 2 = 3
Therefore, John has 3 apples."""
}
# νμ΅ μ CoT λ°μ΄ν° λΉμ¨ μ‘°μ
# μΌλ°μ μΌλ‘ 9:1 (non-CoT : CoT)
3. Self-Instruct¶
3.1 κ°λ ¶
Self-Instruct νμ΄νλΌμΈ:
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β 1. Seed Tasks (175κ° μλ μμ±) β
β β β
β 2. Task Generation (LLMμ΄ μ instruction μμ±) β
β β β
β 3. Instance Generation (input/output μ μμ±) β
β β β
β 4. Filtering (νμ§ νν°λ§) β
β β β
β 5. Fine-tuning β
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
μ₯μ :
- μΈκ° λΌλ²¨λ§ μ΅μν
- λ€μν νμ€ν¬ μλ μμ±
- λΉμ© ν¨μ¨μ
import openai
from typing import List, Dict
import json
import random
class SelfInstructGenerator:
"""Self-Instruct λ°μ΄ν° μμ±κΈ°"""
def __init__(self, seed_tasks: List[Dict], model: str = "gpt-4"):
self.seed_tasks = seed_tasks
self.generated_tasks = []
self.model = model
def generate_instruction(self, num_examples: int = 3) -> str:
"""μλ‘μ΄ instruction μμ±"""
# μλμμ μν
examples = random.sample(self.seed_tasks + self.generated_tasks,
min(num_examples, len(self.seed_tasks)))
examples_text = "\n".join([
f"Task {i+1}: {ex['instruction']}"
for i, ex in enumerate(examples)
])
prompt = f"""Here are some example tasks:
{examples_text}
Generate a new and different task instruction. Be creative and diverse.
The task should be clear and specific.
New task instruction:"""
response = openai.ChatCompletion.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0.8,
max_tokens=100
)
return response.choices[0].message.content.strip()
def generate_instance(self, instruction: str) -> Dict:
"""instructionμ λν input/output μμ±"""
prompt = f"""Given the following instruction, generate an appropriate input and output pair.
Instruction: {instruction}
Generate:
1. An input (can be empty if not needed)
2. The expected output
Format:
Input: [your input or "N/A"]
Output: [expected output]"""
response = openai.ChatCompletion.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=500
)
# νμ±
text = response.choices[0].message.content
input_text = self._extract_field(text, "Input:")
output_text = self._extract_field(text, "Output:")
return {
"instruction": instruction,
"input": input_text if input_text != "N/A" else "",
"output": output_text
}
def _extract_field(self, text: str, field: str) -> str:
lines = text.split("\n")
for i, line in enumerate(lines):
if field in line:
# κ°μ μ€ λλ λ€μ μ€
content = line.replace(field, "").strip()
if content:
return content
elif i + 1 < len(lines):
return lines[i + 1].strip()
return ""
def filter_instance(self, instance: Dict) -> bool:
"""νμ§ νν°λ§"""
# κΈΈμ΄ μ²΄ν¬
if len(instance["instruction"]) < 10:
return False
if len(instance["output"]) < 5:
return False
# μ€λ³΅ 체ν¬
for existing in self.generated_tasks:
if self._similarity(instance["instruction"],
existing["instruction"]) > 0.7:
return False
return True
def _similarity(self, a: str, b: str) -> float:
"""κ°λ¨ν μ μ¬λ (μ€μ λ‘λ embedding μ¬μ©)"""
words_a = set(a.lower().split())
words_b = set(b.lower().split())
intersection = words_a & words_b
union = words_a | words_b
return len(intersection) / len(union) if union else 0
def generate_dataset(self, num_instances: int = 1000) -> List[Dict]:
"""λ°μ΄ν°μ
μμ±"""
while len(self.generated_tasks) < num_instances:
# μ instruction μμ±
instruction = self.generate_instruction()
# Instance μμ±
instance = self.generate_instance(instruction)
# νν°λ§
if self.filter_instance(instance):
self.generated_tasks.append(instance)
print(f"Generated {len(self.generated_tasks)}/{num_instances}")
return self.generated_tasks
4. Evol-Instruct (WizardLM)¶
4.1 κ°λ ¶
Evol-Instruct: instructionμ 볡μ‘λλ₯Ό μ μ§μ μΌλ‘ μ¦κ°
Evolution Strategies:
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β In-Depth Evolution: β
β - Add constraints (μ μ½ μΆκ°) β
β - Deepen (λ κΉκ²) β
β - Concretize (ꡬ체ν) β
β - Increase reasoning (μΆλ‘ κ°ν) β
β - Complicate input (μ
λ ₯ 볡μ‘ν) β
β β
β In-Breadth Evolution: β
β - Mutation (λ³ν) β
β - Topic extension (μ£Όμ νμ₯) β
β - Method variation (λ°©λ² λ³κ²½) β
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class EvolInstructGenerator:
"""Evol-Instruct λ°μ΄ν° μμ±"""
EVOLUTION_PROMPTS = {
"add_constraints": """I want you to make the instruction more complex.
You should add one or more constraints/requirements to the instruction.
Original instruction: {instruction}
Evolved instruction with added constraints:""",
"deepen": """I want you to make the instruction more complex.
If the original instruction can be solved in a few steps, please rewrite it
to require more steps to solve.
Original instruction: {instruction}
More complex instruction requiring deeper reasoning:""",
"concretize": """I want you to make the instruction more concrete and specific.
Replace general concepts with specific examples.
Original instruction: {instruction}
More specific instruction:""",
"reasoning": """I want you to make the instruction require multi-step reasoning.
The answer should require combining multiple pieces of information.
Original instruction: {instruction}
Instruction requiring multi-step reasoning:"""
}
def __init__(self, model: str = "gpt-4"):
self.model = model
def evolve_instruction(
self,
instruction: str,
strategy: str = "deepen"
) -> str:
"""Instruction μ§ν"""
prompt = self.EVOLUTION_PROMPTS[strategy].format(instruction=instruction)
response = openai.ChatCompletion.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=200
)
return response.choices[0].message.content.strip()
def multi_round_evolution(
self,
instruction: str,
rounds: int = 3
) -> List[str]:
"""λ€μ€ λΌμ΄λ μ§ν"""
evolved = [instruction]
current = instruction
strategies = ["add_constraints", "deepen", "reasoning", "concretize"]
for i in range(rounds):
strategy = strategies[i % len(strategies)]
current = self.evolve_instruction(current, strategy)
evolved.append(current)
return evolved
# μμ
def evol_instruct_example():
"""Evol-Instruct μμ"""
generator = EvolInstructGenerator()
# μλ³Έ instruction
original = "Write a function to sort a list."
# μ§ν
evolved = generator.multi_round_evolution(original, rounds=3)
print("Evolution chain:")
for i, inst in enumerate(evolved):
print(f"\nRound {i}: {inst}")
# μμ κ²°κ³Ό:
# Round 0: Write a function to sort a list.
# Round 1: Write a function to sort a list of integers in ascending order,
# handling negative numbers and duplicates.
# Round 2: Write a function to sort a list of integers using merge sort,
# with O(n log n) time complexity, handling edge cases like
# empty lists and lists with one element.
# Round 3: Implement a stable merge sort algorithm that sorts a list of
# objects by a given key, maintains relative order of equal
# elements, handles None values, and returns both the sorted
# list and the number of comparisons made.
5. Alpaca/Vicuna μ€νμΌ νμ΅¶
5.1 Stanford Alpaca¶
# Alpaca λ°μ΄ν° νμ
alpaca_format = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{input}
### Response:
{output}"""
# νμ΅ μ½λ
from transformers import (
LlamaForCausalLM, LlamaTokenizer,
TrainingArguments, Trainer, DataCollatorForSeq2Seq
)
from datasets import load_dataset
def train_alpaca_style():
"""Alpaca μ€νμΌ νμ΅"""
# λͺ¨λΈ λ‘λ
model = LlamaForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b",
torch_dtype=torch.float16
)
tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b")
tokenizer.pad_token = tokenizer.eos_token
# λ°μ΄ν°μ
λ‘λ
dataset = load_dataset("tatsu-lab/alpaca")
def format_example(example):
if example["input"]:
text = f"""Below is an instruction that describes a task, paired with an input that provides further context.
### Instruction:
{example["instruction"]}
### Input:
{example["input"]}
### Response:
{example["output"]}{tokenizer.eos_token}"""
else:
text = f"""Below is an instruction that describes a task.
### Instruction:
{example["instruction"]}
### Response:
{example["output"]}{tokenizer.eos_token}"""
return tokenizer(text, truncation=True, max_length=512)
tokenized_dataset = dataset.map(format_example)
# νμ΅ μ€μ
training_args = TrainingArguments(
output_dir="./alpaca-output",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=8,
learning_rate=2e-5,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
fp16=True,
logging_steps=10,
save_strategy="epoch"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset["train"],
data_collator=DataCollatorForSeq2Seq(tokenizer, padding=True)
)
trainer.train()
5.2 ShareGPT/Vicuna νμ¶
# ShareGPT λν νμ
sharegpt_format = {
"conversations": [
{"from": "human", "value": "What is machine learning?"},
{"from": "gpt", "value": "Machine learning is a subset of AI..."},
{"from": "human", "value": "Can you give an example?"},
{"from": "gpt", "value": "Sure! A common example is spam detection..."}
]
}
# Vicuna λν ν
νλ¦Ώ
def format_vicuna_conversation(conversations):
"""Vicuna νμμΌλ‘ λ³ν"""
formatted = ""
for turn in conversations:
if turn["from"] == "human":
formatted += f"USER: {turn['value']}\n"
else:
formatted += f"ASSISTANT: {turn['value']}</s>\n"
return formatted
# Chat template (HuggingFace λ°©μ)
def apply_chat_template(tokenizer, messages):
"""Chat template μ μ©"""
# tokenizerμ chat_templateμ΄ μ€μ λμ΄ μλ κ²½μ°
return tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True
)
6. νμ΅ μ λ΅¶
6.1 λ°μ΄ν° νμ§ vs μ¶
class DataQualityChecker:
"""λ°μ΄ν° νμ§ κ²μ¬"""
def check_quality(self, example: Dict) -> Dict:
"""νμ§ μ μ κ³μ°"""
scores = {}
# 1. κΈΈμ΄ μ μ μ±
inst_len = len(example["instruction"].split())
out_len = len(example["output"].split())
scores["length"] = min(inst_len / 20, 1.0) * min(out_len / 50, 1.0)
# 2. νμ μΌκ΄μ±
scores["format"] = 1.0 if self._check_format(example) else 0.5
# 3. μλ΅ κ΄λ ¨μ± (κ°λ¨ν ν΄λ¦¬μ€ν±)
keywords = set(example["instruction"].lower().split())
response_words = set(example["output"].lower().split())
overlap = len(keywords & response_words) / len(keywords) if keywords else 0
scores["relevance"] = min(overlap * 2, 1.0)
# 4. μ ν΄μ± (κ°λ¨ν νν°)
scores["safety"] = 0.0 if self._contains_harmful(example["output"]) else 1.0
# μ’
ν© μ μ
scores["total"] = sum(scores.values()) / len(scores)
return scores
def _check_format(self, example: Dict) -> bool:
"""νμ κ²μ¬"""
return (
len(example["instruction"]) > 0 and
len(example["output"]) > 0 and
not example["output"].startswith("I cannot") and
not example["output"].startswith("As an AI")
)
def _contains_harmful(self, text: str) -> bool:
"""μ ν΄ μ½ν
μΈ κ²μ¬ (κ°λ¨ν λ²μ )"""
harmful_patterns = ["hack", "illegal", "weapon", "drug"]
return any(p in text.lower() for p in harmful_patterns)
6.2 λ°μ΄ν° λ―Ήμ±¶
def create_instruction_mix(
datasets: Dict[str, List[Dict]],
weights: Dict[str, float],
total_size: int
) -> List[Dict]:
"""νμ€ν¬λ³ λ°μ΄ν° λ―Ήμ±"""
mixed = []
for task, data in datasets.items():
weight = weights.get(task, 1.0)
num_samples = int(total_size * weight / sum(weights.values()))
sampled = random.sample(data, min(num_samples, len(data)))
mixed.extend(sampled)
random.shuffle(mixed)
return mixed[:total_size]
# μμ λ―Ήμ€
datasets = {
"qa": qa_data,
"summarization": summary_data,
"translation": translation_data,
"coding": coding_data,
"reasoning": reasoning_data
}
weights = {
"qa": 1.0,
"summarization": 1.0,
"translation": 0.5,
"coding": 2.0, # μ½λ©μ λ κ°μ€μΉ
"reasoning": 1.5
}
mixed_dataset = create_instruction_mix(datasets, weights, total_size=50000)
ν΅μ¬ μ 리¶
Instruction Tuning ν΅μ¬¶
1. FLAN: λ€μν νμ€ν¬ λ―Ήμ±, CoT ν¬ν¨
2. Self-Instruct: LLMμΌλ‘ λ°μ΄ν° μλ μμ±
3. Evol-Instruct: μ μ§μ 볡μ‘λ μ¦κ°
4. Alpaca: κ°λ¨ν instruction format
5. Vicuna/ShareGPT: λν νμ νμ΅
λ°μ΄ν° νμ§ μ²΄ν¬λ¦¬μ€νΈ¶
β‘ Instructionμ΄ λͺ
ννκ°?
β‘ Outputμ΄ instructionμ λ°λ₯΄λκ°?
β‘ νμμ΄ μΌκ΄μ μΈκ°?
β‘ μ ν΄ μ½ν
μΈ κ° μλκ°?
β‘ λ€μμ±μ΄ μΆ©λΆνκ°?
β‘ λμ΄λ λΆν¬κ° μ μ νκ°?
μ°Έκ³ μλ£¶
- Wei et al. (2021). "Finetuned Language Models Are Zero-Shot Learners" (FLAN)
- Wang et al. (2022). "Self-Instruct: Aligning Language Models with Self-Generated Instructions"
- Xu et al. (2023). "WizardLM: Empowering Large Language Models to Follow Complex Instructions"
- Taori et al. (2023). "Stanford Alpaca"
- Zheng et al. (2023). "Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena"