06_huggingface_pipeline.py

Download
python 200 lines 5.9 KB
  1"""
  206. HuggingFace Pipeline ์˜ˆ์ œ
  3
  4๋‹ค์–‘ํ•œ NLP ํƒœ์Šคํฌ๋ฅผ Pipeline์œผ๋กœ ์ˆ˜ํ–‰
  5"""
  6
  7print("=" * 60)
  8print("HuggingFace Pipeline")
  9print("=" * 60)
 10
 11try:
 12    from transformers import pipeline
 13
 14    # ============================================
 15    # 1. ๊ฐ์„ฑ ๋ถ„์„
 16    # ============================================
 17    print("\n[1] ๊ฐ์„ฑ ๋ถ„์„ (Sentiment Analysis)")
 18    print("-" * 40)
 19
 20    classifier = pipeline("sentiment-analysis")
 21
 22    texts = [
 23        "I love this product! It's amazing.",
 24        "This is terrible. I'm very disappointed.",
 25        "It's okay, nothing special."
 26    ]
 27
 28    results = classifier(texts)
 29    for text, result in zip(texts, results):
 30        print(f"[{result['label']}] ({result['score']:.2%}) {text}")
 31
 32
 33    # ============================================
 34    # 2. ํ…์ŠคํŠธ ์ƒ์„ฑ
 35    # ============================================
 36    print("\n[2] ํ…์ŠคํŠธ ์ƒ์„ฑ (Text Generation)")
 37    print("-" * 40)
 38
 39    generator = pipeline("text-generation", model="gpt2")
 40
 41    prompt = "Artificial intelligence will"
 42    result = generator(prompt, max_length=50, num_return_sequences=1)
 43    print(f"ํ”„๋กฌํ”„ํŠธ: {prompt}")
 44    print(f"์ƒ์„ฑ: {result[0]['generated_text']}")
 45
 46
 47    # ============================================
 48    # 3. ์งˆ์˜์‘๋‹ต (QA)
 49    # ============================================
 50    print("\n[3] ์งˆ์˜์‘๋‹ต (Question Answering)")
 51    print("-" * 40)
 52
 53    qa = pipeline("question-answering")
 54
 55    context = """
 56    Python is a high-level, general-purpose programming language.
 57    Its design philosophy emphasizes code readability with the use of significant indentation.
 58    Python was created by Guido van Rossum and first released in 1991.
 59    """
 60
 61    questions = [
 62        "Who created Python?",
 63        "When was Python released?",
 64        "What does Python emphasize?"
 65    ]
 66
 67    for question in questions:
 68        result = qa(question=question, context=context)
 69        print(f"Q: {question}")
 70        print(f"A: {result['answer']} (confidence: {result['score']:.2%})")
 71        print()
 72
 73
 74    # ============================================
 75    # 4. ๊ฐœ์ฒด๋ช… ์ธ์‹ (NER)
 76    # ============================================
 77    print("\n[4] ๊ฐœ์ฒด๋ช… ์ธ์‹ (NER)")
 78    print("-" * 40)
 79
 80    ner = pipeline("ner", grouped_entities=True)
 81
 82    text = "Apple Inc. was founded by Steve Jobs in Cupertino, California in 1976."
 83    entities = ner(text)
 84
 85    print(f"ํ…์ŠคํŠธ: {text}")
 86    print("๊ฐœ์ฒด:")
 87    for entity in entities:
 88        print(f"  [{entity['entity_group']}] {entity['word']} ({entity['score']:.2%})")
 89
 90
 91    # ============================================
 92    # 5. ํ…์ŠคํŠธ ์š”์•ฝ
 93    # ============================================
 94    print("\n[5] ํ…์ŠคํŠธ ์š”์•ฝ (Summarization)")
 95    print("-" * 40)
 96
 97    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 98
 99    article = """
100    Machine learning is a type of artificial intelligence that allows software applications
101    to become more accurate at predicting outcomes without being explicitly programmed to do so.
102    Machine learning algorithms use historical data as input to predict new output values.
103    Recommendation engines are a common use case for machine learning. Other popular uses include
104    fraud detection, spam filtering, malware threat detection, business process automation and
105    predictive maintenance.
106    """
107
108    summary = summarizer(article, max_length=50, min_length=20)
109    print(f"์›๋ฌธ ๊ธธ์ด: {len(article)} chars")
110    print(f"์š”์•ฝ: {summary[0]['summary_text']}")
111
112
113    # ============================================
114    # 6. Zero-shot ๋ถ„๋ฅ˜
115    # ============================================
116    print("\n[6] Zero-shot ๋ถ„๋ฅ˜")
117    print("-" * 40)
118
119    classifier = pipeline("zero-shot-classification")
120
121    texts = [
122        "I need to book a flight to New York",
123        "Can you recommend a good restaurant?",
124        "How do I reset my password?"
125    ]
126    labels = ["travel", "food", "tech_support"]
127
128    for text in texts:
129        result = classifier(text, candidate_labels=labels)
130        top_label = result['labels'][0]
131        top_score = result['scores'][0]
132        print(f"[{top_label}] ({top_score:.2%}) {text}")
133
134
135    # ============================================
136    # 7. Fill-Mask (BERT MLM)
137    # ============================================
138    print("\n[7] Fill-Mask")
139    print("-" * 40)
140
141    fill_mask = pipeline("fill-mask", model="bert-base-uncased")
142
143    text = "Python is a [MASK] programming language."
144    results = fill_mask(text)
145
146    print(f"์ž…๋ ฅ: {text}")
147    print("์˜ˆ์ธก:")
148    for r in results[:3]:
149        print(f"  {r['token_str']}: {r['score']:.2%}")
150
151
152    # ============================================
153    # 8. ๋ฒˆ์—ญ
154    # ============================================
155    print("\n[8] ๋ฒˆ์—ญ (Translation)")
156    print("-" * 40)
157
158    translator = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
159
160    texts = [
161        "Hello, how are you?",
162        "Machine learning is amazing."
163    ]
164
165    for text in texts:
166        result = translator(text)
167        print(f"EN: {text}")
168        print(f"FR: {result[0]['translation_text']}")
169        print()
170
171
172    # ============================================
173    # ์ •๋ฆฌ
174    # ============================================
175    print("=" * 60)
176    print("Pipeline ์ •๋ฆฌ")
177    print("=" * 60)
178
179    summary = """
180์ฃผ์š” Pipeline:
181    - sentiment-analysis: ๊ฐ์„ฑ ๋ถ„์„
182    - text-generation: ํ…์ŠคํŠธ ์ƒ์„ฑ
183    - question-answering: ์งˆ์˜์‘๋‹ต
184    - ner: ๊ฐœ์ฒด๋ช… ์ธ์‹
185    - summarization: ์š”์•ฝ
186    - zero-shot-classification: ๋ ˆ์ด๋ธ” ์—†๋Š” ๋ถ„๋ฅ˜
187    - fill-mask: ๋งˆ์Šคํฌ ์˜ˆ์ธก
188    - translation: ๋ฒˆ์—ญ
189
190์‚ฌ์šฉ๋ฒ•:
191    from transformers import pipeline
192    classifier = pipeline("sentiment-analysis")
193    result = classifier("I love this!")
194"""
195    print(summary)
196
197except ImportError as e:
198    print(f"ํ•„์š” ํŒจํ‚ค์ง€ ๋ฏธ์„ค์น˜: {e}")
199    print("pip install transformers torch")