06_huggingface_pipeline.py

  1"""
  206. HuggingFace Pipeline 예제
  3
  4다양한 NLP 태스크를 Pipeline으로 수행
  5"""
  6
  7print("=" * 60)
  8print("HuggingFace Pipeline")
  9print("=" * 60)
 10
 11try:
 12    from transformers import pipeline
 13
 14    # ============================================
 15    # 1. 감성 분석
 16    # ============================================
 17    print("\n[1] 감성 분석 (Sentiment Analysis)")
 18    print("-" * 40)
 19
 20    classifier = pipeline("sentiment-analysis")
 21
 22    texts = [
 23        "I love this product! It's amazing.",
 24        "This is terrible. I'm very disappointed.",
 25        "It's okay, nothing special."
 26    ]
 27
 28    results = classifier(texts)
 29    for text, result in zip(texts, results):
 30        print(f"[{result['label']}] ({result['score']:.2%}) {text}")
 31
 32
 33    # ============================================
 34    # 2. 텍스트 생성
 35    # ============================================
 36    print("\n[2] 텍스트 생성 (Text Generation)")
 37    print("-" * 40)
 38
 39    generator = pipeline("text-generation", model="gpt2")
 40
 41    prompt = "Artificial intelligence will"
 42    result = generator(prompt, max_length=50, num_return_sequences=1)
 43    print(f"프롬프트: {prompt}")
 44    print(f"생성: {result[0]['generated_text']}")
 45
 46
 47    # ============================================
 48    # 3. 질의응답 (QA)
 49    # ============================================
 50    print("\n[3] 질의응답 (Question Answering)")
 51    print("-" * 40)
 52
 53    qa = pipeline("question-answering")
 54
 55    context = """
 56    Python is a high-level, general-purpose programming language.
 57    Its design philosophy emphasizes code readability with the use of significant indentation.
 58    Python was created by Guido van Rossum and first released in 1991.
 59    """
 60
 61    questions = [
 62        "Who created Python?",
 63        "When was Python released?",
 64        "What does Python emphasize?"
 65    ]
 66
 67    for question in questions:
 68        result = qa(question=question, context=context)
 69        print(f"Q: {question}")
 70        print(f"A: {result['answer']} (confidence: {result['score']:.2%})")
 71        print()
 72
 73
 74    # ============================================
 75    # 4. 개체명 인식 (NER)
 76    # ============================================
 77    print("\n[4] 개체명 인식 (NER)")
 78    print("-" * 40)
 79
 80    ner = pipeline("ner", grouped_entities=True)
 81
 82    text = "Apple Inc. was founded by Steve Jobs in Cupertino, California in 1976."
 83    entities = ner(text)
 84
 85    print(f"텍스트: {text}")
 86    print("개체:")
 87    for entity in entities:
 88        print(f"  [{entity['entity_group']}] {entity['word']} ({entity['score']:.2%})")
 89
 90
 91    # ============================================
 92    # 5. 텍스트 요약
 93    # ============================================
 94    print("\n[5] 텍스트 요약 (Summarization)")
 95    print("-" * 40)
 96
 97    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 98
 99    article = """
100    Machine learning is a type of artificial intelligence that allows software applications
101    to become more accurate at predicting outcomes without being explicitly programmed to do so.
102    Machine learning algorithms use historical data as input to predict new output values.
103    Recommendation engines are a common use case for machine learning. Other popular uses include
104    fraud detection, spam filtering, malware threat detection, business process automation and
105    predictive maintenance.
106    """
107
108    summary = summarizer(article, max_length=50, min_length=20)
109    print(f"원문 길이: {len(article)} chars")
110    print(f"요약: {summary[0]['summary_text']}")
111
112
113    # ============================================
114    # 6. Zero-shot 분류
115    # ============================================
116    print("\n[6] Zero-shot 분류")
117    print("-" * 40)
118
119    classifier = pipeline("zero-shot-classification")
120
121    texts = [
122        "I need to book a flight to New York",
123        "Can you recommend a good restaurant?",
124        "How do I reset my password?"
125    ]
126    labels = ["travel", "food", "tech_support"]
127
128    for text in texts:
129        result = classifier(text, candidate_labels=labels)
130        top_label = result['labels'][0]
131        top_score = result['scores'][0]
132        print(f"[{top_label}] ({top_score:.2%}) {text}")
133
134
135    # ============================================
136    # 7. Fill-Mask (BERT MLM)
137    # ============================================
138    print("\n[7] Fill-Mask")
139    print("-" * 40)
140
141    fill_mask = pipeline("fill-mask", model="bert-base-uncased")
142
143    text = "Python is a [MASK] programming language."
144    results = fill_mask(text)
145
146    print(f"입력: {text}")
147    print("예측:")
148    for r in results[:3]:
149        print(f"  {r['token_str']}: {r['score']:.2%}")
150
151
152    # ============================================
153    # 8. 번역
154    # ============================================
155    print("\n[8] 번역 (Translation)")
156    print("-" * 40)
157
158    translator = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
159
160    texts = [
161        "Hello, how are you?",
162        "Machine learning is amazing."
163    ]
164
165    for text in texts:
166        result = translator(text)
167        print(f"EN: {text}")
168        print(f"FR: {result[0]['translation_text']}")
169        print()
170
171
172    # ============================================
173    # 정리
174    # ============================================
175    print("=" * 60)
176    print("Pipeline 정리")
177    print("=" * 60)
178
179    summary = """
180주요 Pipeline:
181    - sentiment-analysis: 감성 분석
182    - text-generation: 텍스트 생성
183    - question-answering: 질의응답
184    - ner: 개체명 인식
185    - summarization: 요약
186    - zero-shot-classification: 레이블 없는 분류
187    - fill-mask: 마스크 예측
188    - translation: 번역
189
190사용법:
191    from transformers import pipeline
192    classifier = pipeline("sentiment-analysis")
193    result = classifier("I love this!")
194"""
195    print(summary)
196
197except ImportError as e:
198    print(f"필요 패키지 미설치: {e}")
199    print("pip install transformers torch")