09. RAG κΈ°μ΄
09. RAG κΈ°μ΄¶
νμ΅ λͺ©ν¶
- RAG (Retrieval-Augmented Generation) μ΄ν΄
- λ¬Έμ μλ² λ©κ³Ό κ²μ
- μ²νΉ μ λ΅
- RAG νμ΄νλΌμΈ ꡬν
1. RAG κ°μ¶
μ RAGμΈκ°?¶
LLMμ νκ³:
- νμ΅ λ°μ΄ν° μ΄ν μ 보 λͺ¨λ¦ (μ§μ μ»·μ€ν)
- νκ° (μλͺ»λ μ 보 μμ±)
- νΉμ λλ©μΈ μ§μ λΆμ‘±
RAG ν΄κ²°μ±
:
- μΈλΆ μ§μ κ²μ ν λ΅λ³ μμ±
- μ΅μ μ 보 λ°μ κ°λ₯
- μΆμ² μ 곡μΌλ‘ μ λ’°μ± ν₯μ
RAG μν€ν μ²¶
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β RAG Pipeline β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β μ§λ¬Έ βββΆ μλ² λ© βββΆ λ²‘ν° κ²μ βββΆ κ΄λ ¨ λ¬Έμ β
β β β
β βΌ β
β μ§λ¬Έ + λ¬Έμ βββΆ LLM βββΆ λ΅λ³ β
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
2. λ¬Έμ μ μ²λ¦¬¶
μ²νΉ (Chunking)¶
def chunk_text(text, chunk_size=500, overlap=50):
"""ν
μ€νΈλ₯Ό μ€λ²λ©μ΄ μλ μ²ν¬λ‘ λΆν """
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk)
start = end - overlap
return chunks
# μ¬μ©
text = "Very long document text here..."
chunks = chunk_text(text, chunk_size=500, overlap=100)
λ¬Έμ₯ κΈ°λ° μ²νΉ¶
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
def chunk_by_sentences(text, max_sentences=5, overlap_sentences=1):
sentences = sent_tokenize(text)
chunks = []
for i in range(0, len(sentences), max_sentences - overlap_sentences):
chunk = ' '.join(sentences[i:i + max_sentences])
chunks.append(chunk)
return chunks
μλ§¨ν± μ²νΉ¶
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
)
chunks = splitter.split_text(text)
3. μλ² λ© μμ±¶
Sentence Transformers¶
from sentence_transformers import SentenceTransformer
# λͺ¨λΈ λ‘λ
model = SentenceTransformer('all-MiniLM-L6-v2')
# μλ² λ© μμ±
texts = ["Hello world", "How are you?"]
embeddings = model.encode(texts)
print(embeddings.shape) # (2, 384)
HuggingFace μλ² λ©¶
from transformers import AutoTokenizer, AutoModel
import torch
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
def get_embeddings(texts):
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
outputs = model(**inputs)
# Mean pooling
embeddings = outputs.last_hidden_state.mean(dim=1)
return embeddings.numpy()
OpenAI μλ² λ©¶
from openai import OpenAI
client = OpenAI()
def get_openai_embeddings(texts, model="text-embedding-3-small"):
response = client.embeddings.create(input=texts, model=model)
return [r.embedding for r in response.data]
4. λ²‘ν° κ²μ¶
μ½μ¬μΈ μ μ¬λ¶
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def search(query_embedding, document_embeddings, top_k=5):
similarities = cosine_similarity([query_embedding], document_embeddings)[0]
top_indices = np.argsort(similarities)[-top_k:][::-1]
return top_indices, similarities[top_indices]
# μ¬μ©
query_emb = model.encode(["What is machine learning?"])[0]
doc_embs = model.encode(documents)
indices, scores = search(query_emb, doc_embs, top_k=3)
FAISS μ¬μ©¶
import faiss
import numpy as np
# μΈλ±μ€ μμ±
dimension = 384 # μλ² λ© μ°¨μ
index = faiss.IndexFlatIP(dimension) # Inner Product (μ½μ¬μΈ μ μ¬λμ© μ κ·ν νμ)
# μ κ·ν ν μΆκ°
embeddings = np.array(embeddings).astype('float32')
faiss.normalize_L2(embeddings)
index.add(embeddings)
# κ²μ
query_emb = model.encode(["query"])[0].astype('float32').reshape(1, -1)
faiss.normalize_L2(query_emb)
distances, indices = index.search(query_emb, k=5)
5. κ°λ¨ν RAG ꡬν¶
from sentence_transformers import SentenceTransformer
from openai import OpenAI
import numpy as np
class SimpleRAG:
def __init__(self, embedding_model='all-MiniLM-L6-v2'):
self.embed_model = SentenceTransformer(embedding_model)
self.client = OpenAI()
self.documents = []
self.embeddings = None
def add_documents(self, documents):
"""λ¬Έμ μΆκ° λ° μλ² λ©"""
self.documents.extend(documents)
self.embeddings = self.embed_model.encode(self.documents)
def search(self, query, top_k=3):
"""κ΄λ ¨ λ¬Έμ κ²μ"""
query_emb = self.embed_model.encode([query])[0]
# μ½μ¬μΈ μ μ¬λ
similarities = np.dot(self.embeddings, query_emb) / (
np.linalg.norm(self.embeddings, axis=1) * np.linalg.norm(query_emb)
)
top_indices = np.argsort(similarities)[-top_k:][::-1]
return [self.documents[i] for i in top_indices]
def generate(self, query, top_k=3):
"""RAG λ΅λ³ μμ±"""
# κ²μ
relevant_docs = self.search(query, top_k)
context = "\n\n".join(relevant_docs)
# ν둬ννΈ κ΅¬μ±
prompt = f"""Answer the question based on the context below.
Context:
{context}
Question: {query}
Answer:"""
# LLM νΈμΆ
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
# μ¬μ©
rag = SimpleRAG()
rag.add_documents([
"Python is a programming language.",
"Machine learning is a subset of AI.",
"RAG combines retrieval with generation."
])
answer = rag.generate("What is RAG?")
print(answer)
6. κ³ κΈ RAG κΈ°λ²¶
Hybrid Search¶
from rank_bm25 import BM25Okapi
class HybridRAG:
def __init__(self):
self.documents = []
self.bm25 = None
self.embeddings = None
def add_documents(self, documents):
self.documents = documents
# BM25 (ν€μλ κ²μ)
tokenized = [doc.lower().split() for doc in documents]
self.bm25 = BM25Okapi(tokenized)
# μλ² λ© (μλ§¨ν± κ²μ)
self.embeddings = model.encode(documents)
def hybrid_search(self, query, top_k=5, alpha=0.5):
# BM25 μ μ
bm25_scores = self.bm25.get_scores(query.lower().split())
bm25_scores = bm25_scores / bm25_scores.max() # μ κ·ν
# μλ² λ© μ μ
query_emb = model.encode([query])[0]
embed_scores = cosine_similarity([query_emb], self.embeddings)[0]
# κ²°ν©
combined = alpha * embed_scores + (1 - alpha) * bm25_scores
top_indices = np.argsort(combined)[-top_k:][::-1]
return [self.documents[i] for i in top_indices]
Query Expansion¶
def expand_query(query, llm_client):
"""쿼리 νμ₯μΌλ‘ κ²μ μ±λ₯ ν₯μ"""
prompt = f"""Generate 3 alternative versions of this search query:
Original: {query}
Alternatives:
1."""
response = llm_client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}]
)
expanded = [query] + parse_alternatives(response.choices[0].message.content)
return expanded
Reranking¶
from sentence_transformers import CrossEncoder
class RAGWithReranker:
def __init__(self):
self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
def search_and_rerank(self, query, candidates, top_k=3):
# 1λ¨κ³: μ΄κΈ° κ²μ (ν보 λ§μ΄)
initial_results = self.search(query, top_k=20)
# 2λ¨κ³: 리λνΉ
pairs = [[query, doc] for doc in initial_results]
scores = self.reranker.predict(pairs)
# μμ kκ° μ ν
top_indices = np.argsort(scores)[-top_k:][::-1]
return [initial_results[i] for i in top_indices]
Multi-Query RAG¶
def multi_query_rag(question, rag, num_queries=3):
"""μ¬λ¬ κ΄μ μ μΏΌλ¦¬λ‘ κ²μ"""
# λ€μν 쿼리 μμ±
prompt = f"""Generate {num_queries} different search queries for:
Question: {question}
Queries:"""
queries = generate_queries(prompt)
# κ° μΏΌλ¦¬λ‘ κ²μ
all_docs = set()
for q in queries:
docs = rag.search(q, top_k=3)
all_docs.update(docs)
return list(all_docs)
7. μ²νΉ μ λ΅ λΉκ΅¶
| μ λ΅ | μ₯μ | λ¨μ | μ¬μ© μμ |
|---|---|---|---|
| κ³ μ ν¬κΈ° | ꡬν κ°λ¨ | λ¬Έλ§₯ λ¨μ | μΌλ°μ μΈ ν μ€νΈ |
| λ¬Έμ₯ κΈ°λ° | μλ―Έ λ¨μ | κΈΈμ΄ λΆκ· μΌ | ꡬ쑰νλ ν μ€νΈ |
| μλ§¨ν± | μλ―Έ 보쑴 | κ³μ° λΉμ© | κ³ νμ§ νμ |
| κ³μΈ΅μ | λ€λ¨κ³ κ²μ | 볡μ‘ν¨ | κΈ΄ λ¬Έμ |
8. νκ° λ©νΈλ¦¶
κ²μ νκ°¶
def calculate_recall_at_k(retrieved, relevant, k):
"""Recall@K κ³μ°"""
retrieved_k = set(retrieved[:k])
relevant_set = set(relevant)
return len(retrieved_k & relevant_set) / len(relevant_set)
def calculate_mrr(retrieved, relevant):
"""Mean Reciprocal Rank"""
for i, doc in enumerate(retrieved):
if doc in relevant:
return 1 / (i + 1)
return 0
μμ± νκ°¶
# RAGAS λΌμ΄λΈλ¬λ¦¬ μ¬μ©
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision
results = evaluate(
dataset,
metrics=[faithfulness, answer_relevancy, context_precision]
)
μ 리¶
RAG 체ν¬λ¦¬μ€νΈ¶
β‘ μ μ ν μ²νΉ ν¬κΈ° μ ν
β‘ μλ² λ© λͺ¨λΈ μ ν (λλ©μΈ κ³ λ €)
β‘ κ²μ top-k νλ
β‘ ν둬ννΈ μ΅μ ν
β‘ νκ° λ©νΈλ¦ μ€μ
ν΅μ¬ μ½λ¶
# μλ² λ©
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(documents)
# κ²μ
query_emb = model.encode([query])[0]
similarities = cosine_similarity([query_emb], embeddings)
# μμ±
context = "\n".join(relevant_docs)
prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
λ€μ λ¨κ³¶
10_LangChain_Basics.mdμμ LangChain νλ μμν¬λ₯Ό νμ΅ν©λλ€.