09. RAG 기초

09. RAG 기초

ν•™μŠ΅ λͺ©ν‘œ

  • RAG (Retrieval-Augmented Generation) 이해
  • λ¬Έμ„œ μž„λ² λ”©κ³Ό 검색
  • μ²­ν‚Ή μ „λž΅
  • RAG νŒŒμ΄ν”„λΌμΈ κ΅¬ν˜„

1. RAG κ°œμš”

μ™œ RAG인가?

LLM의 ν•œκ³„:
- ν•™μŠ΅ 데이터 이후 정보 λͺ¨λ¦„ (지식 μ»·μ˜€ν”„)
- ν™˜κ° (잘λͺ»λœ 정보 생성)
- νŠΉμ • 도메인 지식 λΆ€μ‘±

RAG ν•΄κ²°μ±…:
- μ™ΈλΆ€ 지식 검색 ν›„ λ‹΅λ³€ 생성
- μ΅œμ‹  정보 반영 κ°€λŠ₯
- 좜처 제곡으둜 μ‹ λ’°μ„± ν–₯상

RAG μ•„ν‚€ν…μ²˜

β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚                     RAG Pipeline                     β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚                                                      β”‚
β”‚   질문 ──▢ μž„λ² λ”© ──▢ 벑터 검색 ──▢ κ΄€λ ¨ λ¬Έμ„œ      β”‚
β”‚                           β”‚                          β”‚
β”‚                           β–Ό                          β”‚
β”‚               질문 + λ¬Έμ„œ ──▢ LLM ──▢ λ‹΅λ³€          β”‚
β”‚                                                      β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

2. λ¬Έμ„œ μ „μ²˜λ¦¬

μ²­ν‚Ή (Chunking)

def chunk_text(text, chunk_size=500, overlap=50):
    """ν…μŠ€νŠΈλ₯Ό μ˜€λ²„λž©μ΄ μžˆλŠ” 청크둜 λΆ„ν• """
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start = end - overlap

    return chunks

# μ‚¬μš©
text = "Very long document text here..."
chunks = chunk_text(text, chunk_size=500, overlap=100)

λ¬Έμž₯ 기반 μ²­ν‚Ή

import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

def chunk_by_sentences(text, max_sentences=5, overlap_sentences=1):
    sentences = sent_tokenize(text)
    chunks = []

    for i in range(0, len(sentences), max_sentences - overlap_sentences):
        chunk = ' '.join(sentences[i:i + max_sentences])
        chunks.append(chunk)

    return chunks

μ‹œλ§¨ν‹± μ²­ν‚Ή

from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
)

chunks = splitter.split_text(text)

3. μž„λ² λ”© 생성

Sentence Transformers

from sentence_transformers import SentenceTransformer

# λͺ¨λΈ λ‘œλ“œ
model = SentenceTransformer('all-MiniLM-L6-v2')

# μž„λ² λ”© 생성
texts = ["Hello world", "How are you?"]
embeddings = model.encode(texts)

print(embeddings.shape)  # (2, 384)

HuggingFace μž„λ² λ”©

from transformers import AutoTokenizer, AutoModel
import torch

tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

def get_embeddings(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')

    with torch.no_grad():
        outputs = model(**inputs)

    # Mean pooling
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings.numpy()

OpenAI μž„λ² λ”©

from openai import OpenAI

client = OpenAI()

def get_openai_embeddings(texts, model="text-embedding-3-small"):
    response = client.embeddings.create(input=texts, model=model)
    return [r.embedding for r in response.data]

4. 벑터 검색

코사인 μœ μ‚¬λ„

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def search(query_embedding, document_embeddings, top_k=5):
    similarities = cosine_similarity([query_embedding], document_embeddings)[0]
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    return top_indices, similarities[top_indices]

# μ‚¬μš©
query_emb = model.encode(["What is machine learning?"])[0]
doc_embs = model.encode(documents)

indices, scores = search(query_emb, doc_embs, top_k=3)

FAISS μ‚¬μš©

import faiss
import numpy as np

# 인덱슀 생성
dimension = 384  # μž„λ² λ”© 차원
index = faiss.IndexFlatIP(dimension)  # Inner Product (코사인 μœ μ‚¬λ„μš© μ •κ·œν™” ν•„μš”)

# μ •κ·œν™” ν›„ μΆ”κ°€
embeddings = np.array(embeddings).astype('float32')
faiss.normalize_L2(embeddings)
index.add(embeddings)

# 검색
query_emb = model.encode(["query"])[0].astype('float32').reshape(1, -1)
faiss.normalize_L2(query_emb)

distances, indices = index.search(query_emb, k=5)

5. κ°„λ‹¨ν•œ RAG κ΅¬ν˜„

from sentence_transformers import SentenceTransformer
from openai import OpenAI
import numpy as np

class SimpleRAG:
    def __init__(self, embedding_model='all-MiniLM-L6-v2'):
        self.embed_model = SentenceTransformer(embedding_model)
        self.client = OpenAI()
        self.documents = []
        self.embeddings = None

    def add_documents(self, documents):
        """λ¬Έμ„œ μΆ”κ°€ 및 μž„λ² λ”©"""
        self.documents.extend(documents)
        self.embeddings = self.embed_model.encode(self.documents)

    def search(self, query, top_k=3):
        """κ΄€λ ¨ λ¬Έμ„œ 검색"""
        query_emb = self.embed_model.encode([query])[0]

        # 코사인 μœ μ‚¬λ„
        similarities = np.dot(self.embeddings, query_emb) / (
            np.linalg.norm(self.embeddings, axis=1) * np.linalg.norm(query_emb)
        )

        top_indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.documents[i] for i in top_indices]

    def generate(self, query, top_k=3):
        """RAG λ‹΅λ³€ 생성"""
        # 검색
        relevant_docs = self.search(query, top_k)
        context = "\n\n".join(relevant_docs)

        # ν”„λ‘¬ν”„νŠΈ ꡬ성
        prompt = f"""Answer the question based on the context below.

Context:
{context}

Question: {query}

Answer:"""

        # LLM 호좜
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}]
        )

        return response.choices[0].message.content

# μ‚¬μš©
rag = SimpleRAG()
rag.add_documents([
    "Python is a programming language.",
    "Machine learning is a subset of AI.",
    "RAG combines retrieval with generation."
])

answer = rag.generate("What is RAG?")
print(answer)

6. κ³ κΈ‰ RAG 기법

from rank_bm25 import BM25Okapi

class HybridRAG:
    def __init__(self):
        self.documents = []
        self.bm25 = None
        self.embeddings = None

    def add_documents(self, documents):
        self.documents = documents

        # BM25 (ν‚€μ›Œλ“œ 검색)
        tokenized = [doc.lower().split() for doc in documents]
        self.bm25 = BM25Okapi(tokenized)

        # μž„λ² λ”© (μ‹œλ§¨ν‹± 검색)
        self.embeddings = model.encode(documents)

    def hybrid_search(self, query, top_k=5, alpha=0.5):
        # BM25 점수
        bm25_scores = self.bm25.get_scores(query.lower().split())
        bm25_scores = bm25_scores / bm25_scores.max()  # μ •κ·œν™”

        # μž„λ² λ”© 점수
        query_emb = model.encode([query])[0]
        embed_scores = cosine_similarity([query_emb], self.embeddings)[0]

        # κ²°ν•©
        combined = alpha * embed_scores + (1 - alpha) * bm25_scores

        top_indices = np.argsort(combined)[-top_k:][::-1]
        return [self.documents[i] for i in top_indices]

Query Expansion

def expand_query(query, llm_client):
    """쿼리 ν™•μž₯으둜 검색 μ„±λŠ₯ ν–₯상"""
    prompt = f"""Generate 3 alternative versions of this search query:
    Original: {query}

    Alternatives:
    1."""

    response = llm_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    expanded = [query] + parse_alternatives(response.choices[0].message.content)
    return expanded

Reranking

from sentence_transformers import CrossEncoder

class RAGWithReranker:
    def __init__(self):
        self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

    def search_and_rerank(self, query, candidates, top_k=3):
        # 1단계: 초기 검색 (후보 많이)
        initial_results = self.search(query, top_k=20)

        # 2단계: λ¦¬λž­ν‚Ή
        pairs = [[query, doc] for doc in initial_results]
        scores = self.reranker.predict(pairs)

        # μƒμœ„ k개 선택
        top_indices = np.argsort(scores)[-top_k:][::-1]
        return [initial_results[i] for i in top_indices]

Multi-Query RAG

def multi_query_rag(question, rag, num_queries=3):
    """μ—¬λŸ¬ κ΄€μ μ˜ 쿼리둜 검색"""
    # λ‹€μ–‘ν•œ 쿼리 생성
    prompt = f"""Generate {num_queries} different search queries for:
    Question: {question}

    Queries:"""

    queries = generate_queries(prompt)

    # 각 쿼리둜 검색
    all_docs = set()
    for q in queries:
        docs = rag.search(q, top_k=3)
        all_docs.update(docs)

    return list(all_docs)

7. μ²­ν‚Ή μ „λž΅ 비ꡐ

μ „λž΅ μž₯점 단점 μ‚¬μš© μ‹œμ 
κ³ μ • 크기 κ΅¬ν˜„ 간단 λ¬Έλ§₯ λ‹¨μ ˆ 일반적인 ν…μŠ€νŠΈ
λ¬Έμž₯ 기반 의미 λ‹¨μœ„ 길이 뢈균일 κ΅¬μ‘°ν™”λœ ν…μŠ€νŠΈ
μ‹œλ§¨ν‹± 의미 보쑴 계산 λΉ„μš© κ³ ν’ˆμ§ˆ ν•„μš”
계측적 닀단계 검색 λ³΅μž‘ν•¨ κΈ΄ λ¬Έμ„œ

8. 평가 λ©”νŠΈλ¦­

검색 평가

def calculate_recall_at_k(retrieved, relevant, k):
    """Recall@K 계산"""
    retrieved_k = set(retrieved[:k])
    relevant_set = set(relevant)
    return len(retrieved_k & relevant_set) / len(relevant_set)

def calculate_mrr(retrieved, relevant):
    """Mean Reciprocal Rank"""
    for i, doc in enumerate(retrieved):
        if doc in relevant:
            return 1 / (i + 1)
    return 0

생성 평가

# RAGAS 라이브러리 μ‚¬μš©
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision

results = evaluate(
    dataset,
    metrics=[faithfulness, answer_relevancy, context_precision]
)

정리

RAG 체크리슀트

β–‘ μ μ ˆν•œ μ²­ν‚Ή 크기 선택
β–‘ μž„λ² λ”© λͺ¨λΈ 선택 (도메인 κ³ λ €)
β–‘ 검색 top-k νŠœλ‹
β–‘ ν”„λ‘¬ν”„νŠΈ μ΅œμ ν™”
β–‘ 평가 λ©”νŠΈλ¦­ μ„€μ •

핡심 μ½”λ“œ

# μž„λ² λ”©
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(documents)

# 검색
query_emb = model.encode([query])[0]
similarities = cosine_similarity([query_emb], embeddings)

# 생성
context = "\n".join(relevant_docs)
prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"

λ‹€μŒ 단계

10_LangChain_Basics.mdμ—μ„œ LangChain ν”„λ ˆμž„μ›Œν¬λ₯Ό ν•™μŠ΅ν•©λ‹ˆλ‹€.

to navigate between lessons