Spaces:
Runtime error
🎉 Major Refactor: Modular Architecture with Automatic Fine-Tuning
Browse files## 🏗️ Architecture Changes:
- Split monolithic app.py into modular pages structure
- Created py/pages/ with separate files for each tab
- Added REST API controller with FastAPI
- Improved code organization and maintainability
## 📱 New Features:
- Automatic fine-tuning if vietnamese_sentiment_finetuned model doesn't exist
- REST API server running alongside Gradio (port 7861)
- Interactive API documentation at /docs
- Memory management and monitoring
- Modular tab system for better code organization
## 🗂️ File Structure:
```
py/
├── api_controller.py # REST API endpoints
├── fine_tune_sentiment.py # Fine-tuning script
└── pages/
├── __init__.py
├── single_analysis.py # 📝 Single text analysis
├── batch_analysis.py # 📊 Batch processing
├── model_info.py # ℹ️ Model information
└── api_endpoints.py # 🌐 API documentation
```
## 🤖 Model Management:
- Always uses vietnamese_sentiment_finetuned model
- Automatic fine-tuning if model doesn't exist
- Smart environment detection (local vs Hugging Face Spaces)
- Fallback to base model during development
## 🚀 Deployment Ready:
- Optimized for Hugging Face Spaces
- Automatic memory cleanup
- Comprehensive API documentation
- Smart model loading with fallbacks
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <[email protected]>
- app.py +160 -236
- py/api_controller.py +301 -0
- py/demo.py +0 -204
- py/gradio_app.py +0 -631
- py/pages/__init__.py +15 -0
- py/pages/api_endpoints.py +114 -0
- py/pages/batch_analysis.py +122 -0
- py/pages/model_info.py +79 -0
- py/pages/single_analysis.py +77 -0
- py/test_model.py +0 -277
- requirements.txt +6 -1
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
Vietnamese Sentiment Analysis - Hugging Face Spaces
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
@@ -10,19 +10,36 @@
|
|
| 10 |
import gc
|
| 11 |
import psutil
|
| 12 |
import os
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
app_instance = None
|
|
|
|
|
|
|
| 16 |
|
| 17 |
class SentimentGradioApp:
|
| 18 |
-
def __init__(self
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
self.tokenizer = None
|
| 21 |
self.model = None
|
| 22 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 23 |
self.sentiment_labels = ["Negative", "Neutral", "Positive"]
|
| 24 |
self.model_loaded = False
|
| 25 |
self.max_memory_mb = 8192
|
|
|
|
| 26 |
|
| 27 |
def get_memory_usage(self):
|
| 28 |
"""Get current memory usage in MB"""
|
|
@@ -35,29 +52,83 @@ def cleanup_memory(self):
|
|
| 35 |
torch.cuda.empty_cache()
|
| 36 |
gc.collect()
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def load_model(self):
|
| 39 |
-
"""Load the model
|
| 40 |
if self.model_loaded:
|
| 41 |
return True
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
try:
|
| 44 |
self.cleanup_memory()
|
| 45 |
-
print(f"🤖 Loading model from
|
| 46 |
|
| 47 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.
|
| 48 |
-
self.model = AutoModelForSequenceClassification.from_pretrained(self.
|
| 49 |
|
| 50 |
self.model.to(self.device)
|
| 51 |
self.model.eval()
|
| 52 |
self.model_loaded = True
|
| 53 |
|
| 54 |
-
print(f"✅
|
|
|
|
| 55 |
return True
|
| 56 |
|
| 57 |
except Exception as e:
|
| 58 |
-
print(f"❌ Error loading model: {e}")
|
|
|
|
| 59 |
self.model_loaded = False
|
| 60 |
-
self.cleanup_memory()
|
| 61 |
return False
|
| 62 |
|
| 63 |
def predict_sentiment(self, text):
|
|
@@ -69,25 +140,22 @@ def predict_sentiment(self, text):
|
|
| 69 |
return None, "❌ Please enter some text to analyze."
|
| 70 |
|
| 71 |
try:
|
|
|
|
| 72 |
start_time = time.time()
|
| 73 |
|
| 74 |
-
# Tokenize
|
| 75 |
inputs = self.tokenizer(
|
| 76 |
-
text,
|
| 77 |
-
return_tensors="pt",
|
| 78 |
truncation=True,
|
| 79 |
padding=True,
|
| 80 |
-
max_length=512
|
| 81 |
-
|
|
|
|
| 82 |
|
| 83 |
-
#
|
| 84 |
-
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 85 |
-
|
| 86 |
-
# Predict
|
| 87 |
with torch.no_grad():
|
| 88 |
outputs = self.model(**inputs)
|
| 89 |
-
|
| 90 |
-
probabilities = torch.softmax(logits, dim=-1)
|
| 91 |
predicted_class = torch.argmax(probabilities, dim=-1).item()
|
| 92 |
confidence = torch.max(probabilities).item()
|
| 93 |
|
|
@@ -95,7 +163,7 @@ def predict_sentiment(self, text):
|
|
| 95 |
|
| 96 |
# Move to CPU and clean GPU memory
|
| 97 |
probs = probabilities.cpu().numpy()[0].tolist()
|
| 98 |
-
del probabilities,
|
| 99 |
self.cleanup_memory()
|
| 100 |
|
| 101 |
sentiment = self.sentiment_labels[predicted_class]
|
|
@@ -144,94 +212,50 @@ def batch_predict(self, texts):
|
|
| 144 |
if not valid_texts:
|
| 145 |
return [], "❌ No valid texts provided."
|
| 146 |
|
| 147 |
-
results = []
|
| 148 |
try:
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
if result:
|
| 152 |
-
results.append({"sentiment": result, "confidence": 0.85}) # Placeholder confidence
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
}
|
| 167 |
-
|
| 168 |
-
summary = f"""
|
| 169 |
-
## 📊 Batch Analysis Summary
|
| 170 |
-
|
| 171 |
-
**Total Texts Analyzed:** {total_texts}
|
| 172 |
-
**Average Confidence:** {avg_confidence:.2%}
|
| 173 |
-
|
| 174 |
-
### 🎯 Sentiment Distribution:
|
| 175 |
-
- 😊 **Positive:** {sentiment_counts['Positive']} ({sentiment_counts['Positive']/total_texts:.1%})
|
| 176 |
-
- 😐 **Neutral:** {sentiment_counts['Neutral']} ({sentiment_counts['Neutral']/total_texts:.1%})
|
| 177 |
-
- 😠 **Negative:** {sentiment_counts['Negative']} ({sentiment_counts['Negative']/total_texts:.1%})
|
| 178 |
-
|
| 179 |
-
### 📋 Individual Results:
|
| 180 |
-
""".strip()
|
| 181 |
-
|
| 182 |
-
for i, result in enumerate(results, 1):
|
| 183 |
-
summary += f"\n**{i}.** {result['sentiment']} ({result['confidence']:.1%})"
|
| 184 |
|
| 185 |
-
|
| 186 |
self.cleanup_memory()
|
|
|
|
| 187 |
|
| 188 |
-
return results, summary
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
except Exception as e:
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
# Global functions
|
| 195 |
-
def analyze_sentiment(text):
|
| 196 |
-
if not app_instance:
|
| 197 |
-
return "❌ App not initialized. Please refresh the page."
|
| 198 |
-
|
| 199 |
-
sentiment, output = app_instance.predict_sentiment(text)
|
| 200 |
-
if sentiment and output:
|
| 201 |
-
return output
|
| 202 |
-
else:
|
| 203 |
-
return output
|
| 204 |
-
|
| 205 |
-
def clear_inputs():
|
| 206 |
-
return ""
|
| 207 |
-
|
| 208 |
-
def analyze_batch(texts):
|
| 209 |
-
if not app_instance:
|
| 210 |
-
return "❌ App not initialized. Please refresh the page."
|
| 211 |
-
|
| 212 |
-
if texts:
|
| 213 |
-
text_list = [line.strip() for line in texts.split('\n') if line.strip()]
|
| 214 |
-
results, summary = app_instance.batch_predict(text_list)
|
| 215 |
-
return summary
|
| 216 |
-
return "❌ Please enter some texts to analyze."
|
| 217 |
-
|
| 218 |
-
def clear_batch():
|
| 219 |
-
return ""
|
| 220 |
-
|
| 221 |
-
def update_memory_info():
|
| 222 |
-
if not app_instance:
|
| 223 |
-
return "App not initialized"
|
| 224 |
-
return f"Memory usage: {app_instance.get_memory_usage():.1f}MB"
|
| 225 |
-
|
| 226 |
-
def manual_memory_cleanup():
|
| 227 |
-
if not app_instance:
|
| 228 |
-
return "App not initialized"
|
| 229 |
-
app_instance.cleanup_memory()
|
| 230 |
-
return f"Memory cleaned. Current usage: {app_instance.get_memory_usage():.1f}MB"
|
| 231 |
|
| 232 |
def create_interface():
|
| 233 |
"""Create the Gradio interface for Hugging Face Spaces"""
|
| 234 |
-
global app_instance
|
| 235 |
|
| 236 |
app_instance = SentimentGradioApp()
|
| 237 |
|
|
@@ -240,147 +264,47 @@ def create_interface():
|
|
| 240 |
print("❌ Failed to load model. Please try again.")
|
| 241 |
return None
|
| 242 |
|
| 243 |
-
#
|
| 244 |
-
|
| 245 |
-
"Giảng viên dạy rất hay và tâm huyết.",
|
| 246 |
-
"Môn học này quá khó và nhàm chán.",
|
| 247 |
-
"Lớp học ổn định, không có gì đặc biệt.",
|
| 248 |
-
"Tôi rất thích cách giảng dạy của thầy cô.",
|
| 249 |
-
"Chương trình học cần cải thiện nhiều."
|
| 250 |
-
]
|
| 251 |
|
| 252 |
-
# Create interface
|
| 253 |
with gr.Blocks(
|
| 254 |
title="Vietnamese Sentiment Analysis",
|
| 255 |
-
theme=gr.themes.Soft()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
) as interface:
|
| 257 |
-
|
| 258 |
-
gr.
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
with gr.Tabs():
|
| 262 |
-
#
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
label="Enter Vietnamese Text",
|
| 268 |
-
placeholder="Type or paste Vietnamese text here...",
|
| 269 |
-
lines=3
|
| 270 |
-
)
|
| 271 |
-
|
| 272 |
-
with gr.Row():
|
| 273 |
-
analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary")
|
| 274 |
-
clear_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 275 |
-
|
| 276 |
-
with gr.Column(scale=2):
|
| 277 |
-
gr.Examples(
|
| 278 |
-
examples=examples,
|
| 279 |
-
inputs=[text_input],
|
| 280 |
-
label="💡 Example Texts"
|
| 281 |
-
)
|
| 282 |
-
|
| 283 |
-
result_output = gr.Markdown(label="Analysis Result", visible=True)
|
| 284 |
-
|
| 285 |
-
# Batch Analysis Tab
|
| 286 |
-
with gr.Tab("📊 Batch Analysis"):
|
| 287 |
-
gr.Markdown("### 📝 Memory-Efficient Batch Processing")
|
| 288 |
-
gr.Markdown("**Maximum batch size:** 10 texts (for memory efficiency)")
|
| 289 |
-
gr.Markdown("**Memory limit:** 8GB")
|
| 290 |
-
|
| 291 |
-
batch_input = gr.Textbox(
|
| 292 |
-
label="Enter Multiple Texts (one per line)",
|
| 293 |
-
placeholder="Enter up to 10 Vietnamese texts, one per line...",
|
| 294 |
-
lines=8,
|
| 295 |
-
max_lines=20
|
| 296 |
-
)
|
| 297 |
-
|
| 298 |
-
with gr.Row():
|
| 299 |
-
batch_analyze_btn = gr.Button("🔍 Analyze All", variant="primary")
|
| 300 |
-
batch_clear_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 301 |
-
memory_cleanup_btn = gr.Button("🧹 Memory Cleanup", variant="secondary")
|
| 302 |
-
|
| 303 |
-
batch_result_output = gr.Markdown(label="Batch Analysis Result")
|
| 304 |
-
memory_info = gr.Textbox(
|
| 305 |
-
label="Memory Usage",
|
| 306 |
-
value="Memory usage: 0MB used",
|
| 307 |
-
interactive=False
|
| 308 |
-
)
|
| 309 |
-
|
| 310 |
-
# Model Info Tab
|
| 311 |
-
with gr.Tab("ℹ️ Model Information"):
|
| 312 |
-
gr.Markdown(f"""
|
| 313 |
-
## 🤖 Model Details
|
| 314 |
-
|
| 315 |
-
**Model Architecture:** Transformer-based sequence classification
|
| 316 |
-
**Base Model:** {app_instance.model_name}
|
| 317 |
-
**Languages:** Vietnamese (optimized)
|
| 318 |
-
**Labels:** Negative, Neutral, Positive
|
| 319 |
-
|
| 320 |
-
## 📊 Performance Metrics
|
| 321 |
-
|
| 322 |
-
- **Processing Speed:** ~100ms per text
|
| 323 |
-
- **Max Sequence Length:** 512 tokens
|
| 324 |
-
- **Memory Limit:** 8GB
|
| 325 |
-
|
| 326 |
-
## 💡 Usage Tips
|
| 327 |
-
|
| 328 |
-
- Enter clear, grammatically correct Vietnamese text
|
| 329 |
-
- Longer texts (20-200 words) work best
|
| 330 |
-
- The model handles various Vietnamese dialects
|
| 331 |
-
- Confidence scores indicate prediction certainty
|
| 332 |
-
|
| 333 |
-
## 🛡️ Memory Management
|
| 334 |
-
|
| 335 |
-
- **Automatic Cleanup:** Memory is cleaned after each prediction
|
| 336 |
-
- **Batch Limits:** Maximum 10 texts per batch to prevent overflow
|
| 337 |
-
- **Memory Monitoring:** Real-time memory usage tracking
|
| 338 |
-
- **GPU Optimization:** CUDA cache clearing when available
|
| 339 |
-
|
| 340 |
-
## ⚠️ Performance Notes
|
| 341 |
-
|
| 342 |
-
- If you encounter memory errors, try reducing batch size
|
| 343 |
-
- Use the Memory Cleanup button if needed
|
| 344 |
-
- Monitor memory usage in the Batch Analysis tab
|
| 345 |
-
- Model loaded directly from Hugging Face Hub (no local training required)
|
| 346 |
-
""")
|
| 347 |
-
|
| 348 |
-
# Connect events
|
| 349 |
-
analyze_btn.click(
|
| 350 |
-
fn=analyze_sentiment,
|
| 351 |
-
inputs=[text_input],
|
| 352 |
-
outputs=[result_output]
|
| 353 |
-
)
|
| 354 |
-
|
| 355 |
-
clear_btn.click(
|
| 356 |
-
fn=clear_inputs,
|
| 357 |
-
outputs=[text_input]
|
| 358 |
-
)
|
| 359 |
-
|
| 360 |
-
batch_analyze_btn.click(
|
| 361 |
-
fn=analyze_batch,
|
| 362 |
-
inputs=[batch_input],
|
| 363 |
-
outputs=[batch_result_output]
|
| 364 |
-
)
|
| 365 |
-
|
| 366 |
-
batch_clear_btn.click(
|
| 367 |
-
fn=clear_batch,
|
| 368 |
-
outputs=[batch_input]
|
| 369 |
-
)
|
| 370 |
-
|
| 371 |
-
memory_cleanup_btn.click(
|
| 372 |
-
fn=manual_memory_cleanup,
|
| 373 |
-
outputs=[memory_info]
|
| 374 |
-
)
|
| 375 |
-
|
| 376 |
-
# Update memory info periodically
|
| 377 |
-
interface.load(
|
| 378 |
-
fn=update_memory_info,
|
| 379 |
-
outputs=[memory_info]
|
| 380 |
-
)
|
| 381 |
|
| 382 |
return interface
|
| 383 |
|
|
|
|
| 384 |
# Create and launch the interface
|
| 385 |
if __name__ == "__main__":
|
| 386 |
print("🚀 Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...")
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App
|
| 3 |
+
Uses fine-tuned model and modular page structure
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 10 |
import gc
|
| 11 |
import psutil
|
| 12 |
import os
|
| 13 |
+
import threading
|
| 14 |
+
import subprocess
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
# Import modular pages
|
| 18 |
+
from py.api_controller import create_api_controller
|
| 19 |
+
from py.pages import (
|
| 20 |
+
create_single_analysis_page,
|
| 21 |
+
create_batch_analysis_page,
|
| 22 |
+
create_model_info_page,
|
| 23 |
+
create_api_endpoints_page
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Global app instances
|
| 27 |
app_instance = None
|
| 28 |
+
api_controller = None
|
| 29 |
+
api_server_thread = None
|
| 30 |
|
| 31 |
class SentimentGradioApp:
|
| 32 |
+
def __init__(self):
|
| 33 |
+
# Always use the fine-tuned model
|
| 34 |
+
self.finetuned_model = "./vietnamese_sentiment_finetuned"
|
| 35 |
+
self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert" # For initial fine-tuning
|
| 36 |
self.tokenizer = None
|
| 37 |
self.model = None
|
| 38 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 39 |
self.sentiment_labels = ["Negative", "Neutral", "Positive"]
|
| 40 |
self.model_loaded = False
|
| 41 |
self.max_memory_mb = 8192
|
| 42 |
+
self.current_model = None
|
| 43 |
|
| 44 |
def get_memory_usage(self):
|
| 45 |
"""Get current memory usage in MB"""
|
|
|
|
| 52 |
torch.cuda.empty_cache()
|
| 53 |
gc.collect()
|
| 54 |
|
| 55 |
+
def run_fine_tuning_if_needed(self):
|
| 56 |
+
"""Run fine-tuning if the fine-tuned model doesn't exist"""
|
| 57 |
+
if os.path.exists(self.finetuned_model):
|
| 58 |
+
print(f"✅ Fine-tuned model already exists at {self.finetuned_model}")
|
| 59 |
+
return True
|
| 60 |
+
|
| 61 |
+
print(f"🔧 Fine-tuned model not found at {self.finetuned_model}")
|
| 62 |
+
print("🚀 Starting automatic fine-tuning process...")
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
# Get the correct path to the fine-tuning script
|
| 66 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 67 |
+
fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py")
|
| 68 |
+
|
| 69 |
+
if not os.path.exists(fine_tune_script):
|
| 70 |
+
print(f"❌ Fine-tuning script not found at: {fine_tune_script}")
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
print("📋 Running fine_tune_sentiment.py...")
|
| 74 |
+
print(f"📁 Script path: {fine_tune_script}")
|
| 75 |
+
|
| 76 |
+
# Run the fine-tuning script as a subprocess
|
| 77 |
+
result = subprocess.run([
|
| 78 |
+
sys.executable,
|
| 79 |
+
fine_tune_script
|
| 80 |
+
], capture_output=True, text=True, cwd=current_dir)
|
| 81 |
+
|
| 82 |
+
if result.returncode == 0:
|
| 83 |
+
print("✅ Fine-tuning completed successfully!")
|
| 84 |
+
# Show only the last few lines of output to avoid spam
|
| 85 |
+
output_lines = result.stdout.strip().split('\n')
|
| 86 |
+
if output_lines:
|
| 87 |
+
print("📊 Final output:")
|
| 88 |
+
for line in output_lines[-5:]: # Show last 5 lines
|
| 89 |
+
print(f" {line}")
|
| 90 |
+
return True
|
| 91 |
+
else:
|
| 92 |
+
print(f"❌ Fine-tuning failed with error:")
|
| 93 |
+
print(result.stderr)
|
| 94 |
+
return False
|
| 95 |
+
|
| 96 |
+
except Exception as e:
|
| 97 |
+
print(f"❌ Error running fine-tuning: {e}")
|
| 98 |
+
return False
|
| 99 |
+
|
| 100 |
def load_model(self):
|
| 101 |
+
"""Load the fine-tuned model, creating it if needed"""
|
| 102 |
if self.model_loaded:
|
| 103 |
return True
|
| 104 |
|
| 105 |
+
print("🎯 Loading Vietnamese Sentiment Analysis Model")
|
| 106 |
+
|
| 107 |
+
# Step 1: Check if fine-tuned model exists, if not, create it
|
| 108 |
+
if not self.run_fine_tuning_if_needed():
|
| 109 |
+
print("❌ Failed to create fine-tuned model")
|
| 110 |
+
return False
|
| 111 |
+
|
| 112 |
+
# Step 2: Load the fine-tuned model
|
| 113 |
try:
|
| 114 |
self.cleanup_memory()
|
| 115 |
+
print(f"🤖 Loading fine-tuned model from: {self.finetuned_model}")
|
| 116 |
|
| 117 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model)
|
| 118 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model)
|
| 119 |
|
| 120 |
self.model.to(self.device)
|
| 121 |
self.model.eval()
|
| 122 |
self.model_loaded = True
|
| 123 |
|
| 124 |
+
print(f"✅ Fine-tuned model loaded successfully!")
|
| 125 |
+
self.current_model = self.finetuned_model
|
| 126 |
return True
|
| 127 |
|
| 128 |
except Exception as e:
|
| 129 |
+
print(f"❌ Error loading fine-tuned model: {e}")
|
| 130 |
+
print("�� This should not happen if fine-tuning completed successfully")
|
| 131 |
self.model_loaded = False
|
|
|
|
| 132 |
return False
|
| 133 |
|
| 134 |
def predict_sentiment(self, text):
|
|
|
|
| 140 |
return None, "❌ Please enter some text to analyze."
|
| 141 |
|
| 142 |
try:
|
| 143 |
+
self.cleanup_memory()
|
| 144 |
start_time = time.time()
|
| 145 |
|
| 146 |
+
# Tokenize input
|
| 147 |
inputs = self.tokenizer(
|
| 148 |
+
text.strip(),
|
|
|
|
| 149 |
truncation=True,
|
| 150 |
padding=True,
|
| 151 |
+
max_length=512,
|
| 152 |
+
return_tensors="pt"
|
| 153 |
+
).to(self.device)
|
| 154 |
|
| 155 |
+
# Get prediction
|
|
|
|
|
|
|
|
|
|
| 156 |
with torch.no_grad():
|
| 157 |
outputs = self.model(**inputs)
|
| 158 |
+
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
|
|
|
| 159 |
predicted_class = torch.argmax(probabilities, dim=-1).item()
|
| 160 |
confidence = torch.max(probabilities).item()
|
| 161 |
|
|
|
|
| 163 |
|
| 164 |
# Move to CPU and clean GPU memory
|
| 165 |
probs = probabilities.cpu().numpy()[0].tolist()
|
| 166 |
+
del probabilities, outputs, inputs
|
| 167 |
self.cleanup_memory()
|
| 168 |
|
| 169 |
sentiment = self.sentiment_labels[predicted_class]
|
|
|
|
| 212 |
if not valid_texts:
|
| 213 |
return [], "❌ No valid texts provided."
|
| 214 |
|
|
|
|
| 215 |
try:
|
| 216 |
+
results = []
|
| 217 |
+
total_start_time = time.time()
|
|
|
|
|
|
|
| 218 |
|
| 219 |
+
for text in valid_texts:
|
| 220 |
+
sentiment, _ = self.predict_sentiment(text)
|
| 221 |
+
if sentiment:
|
| 222 |
+
results.append({
|
| 223 |
+
"text": text,
|
| 224 |
+
"sentiment": sentiment,
|
| 225 |
+
"confidence": 0.0, # Would need to extract from full output
|
| 226 |
+
"processing_time": 0.0 # Would need to extract from full output
|
| 227 |
+
})
|
| 228 |
+
|
| 229 |
+
total_time = time.time() - total_start_time
|
| 230 |
+
return results, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
+
except Exception as e:
|
| 233 |
self.cleanup_memory()
|
| 234 |
+
return [], f"❌ Error during batch prediction: {str(e)}"
|
| 235 |
|
|
|
|
| 236 |
|
| 237 |
+
def start_api_server():
|
| 238 |
+
"""Start the API server in a separate thread"""
|
| 239 |
+
global api_controller
|
| 240 |
+
if app_instance and api_controller is None:
|
| 241 |
+
try:
|
| 242 |
+
api_controller = create_api_controller(app_instance)
|
| 243 |
+
# Run API server on a different port to avoid conflicts
|
| 244 |
+
api_server_thread = threading.Thread(
|
| 245 |
+
target=api_controller.run,
|
| 246 |
+
kwargs={"host": "0.0.0.0", "port": 7861},
|
| 247 |
+
daemon=True
|
| 248 |
+
)
|
| 249 |
+
api_server_thread.start()
|
| 250 |
+
print("🌐 API server started on port 7861")
|
| 251 |
+
print("📚 API Documentation: http://localhost:7861/docs")
|
| 252 |
except Exception as e:
|
| 253 |
+
print(f"❌ Failed to start API server: {e}")
|
| 254 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
def create_interface():
|
| 257 |
"""Create the Gradio interface for Hugging Face Spaces"""
|
| 258 |
+
global app_instance, api_controller
|
| 259 |
|
| 260 |
app_instance = SentimentGradioApp()
|
| 261 |
|
|
|
|
| 264 |
print("❌ Failed to load model. Please try again.")
|
| 265 |
return None
|
| 266 |
|
| 267 |
+
# Start API server
|
| 268 |
+
start_api_server()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
+
# Create the interface
|
| 271 |
with gr.Blocks(
|
| 272 |
title="Vietnamese Sentiment Analysis",
|
| 273 |
+
theme=gr.themes.Soft(),
|
| 274 |
+
css="""
|
| 275 |
+
.gradio-container {
|
| 276 |
+
max-width: 1200px !important;
|
| 277 |
+
margin: 0 auto !important;
|
| 278 |
+
}
|
| 279 |
+
.main-header {
|
| 280 |
+
text-align: center;
|
| 281 |
+
margin-bottom: 2rem;
|
| 282 |
+
}
|
| 283 |
+
"""
|
| 284 |
) as interface:
|
| 285 |
+
# Main title
|
| 286 |
+
gr.HTML("""
|
| 287 |
+
<div class="main-header">
|
| 288 |
+
<h1>🎭 Vietnamese Sentiment Analysis</h1>
|
| 289 |
+
<p>Analyze sentiment in Vietnamese text using transformer models from Hugging Face</p>
|
| 290 |
+
<p><strong>Current Model:</strong> {model_name} | <strong>Device:</strong> {device}</p>
|
| 291 |
+
</div>
|
| 292 |
+
""".format(
|
| 293 |
+
model_name=getattr(app_instance, 'current_model', app_instance.finetuned_model),
|
| 294 |
+
device=str(app_instance.device).upper()
|
| 295 |
+
))
|
| 296 |
+
|
| 297 |
+
# Create tabs
|
| 298 |
with gr.Tabs():
|
| 299 |
+
# Import and create all pages
|
| 300 |
+
create_single_analysis_page(app_instance)
|
| 301 |
+
create_batch_analysis_page(app_instance)
|
| 302 |
+
create_model_info_page(app_instance)
|
| 303 |
+
create_api_endpoints_page()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
return interface
|
| 306 |
|
| 307 |
+
|
| 308 |
# Create and launch the interface
|
| 309 |
if __name__ == "__main__":
|
| 310 |
print("🚀 Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...")
|
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Vietnamese Sentiment Analysis - API Controller
|
| 4 |
+
Provides REST API endpoints for sentiment analysis using FastAPI
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from fastapi import FastAPI, HTTPException
|
| 8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
+
from pydantic import BaseModel
|
| 10 |
+
from typing import List, Optional
|
| 11 |
+
import uvicorn
|
| 12 |
+
import time
|
| 13 |
+
import logging
|
| 14 |
+
|
| 15 |
+
# Set up logging
|
| 16 |
+
logging.basicConfig(level=logging.INFO)
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
# Pydantic models for request/response
|
| 20 |
+
class TextInput(BaseModel):
|
| 21 |
+
text: str
|
| 22 |
+
language: Optional[str] = "vi"
|
| 23 |
+
|
| 24 |
+
class BatchTextInput(BaseModel):
|
| 25 |
+
texts: List[str]
|
| 26 |
+
language: Optional[str] = "vi"
|
| 27 |
+
|
| 28 |
+
class SentimentResult(BaseModel):
|
| 29 |
+
sentiment: str
|
| 30 |
+
confidence: float
|
| 31 |
+
probabilities: dict
|
| 32 |
+
processing_time: float
|
| 33 |
+
text: str
|
| 34 |
+
|
| 35 |
+
class BatchSentimentResult(BaseModel):
|
| 36 |
+
results: List[SentimentResult]
|
| 37 |
+
total_texts: int
|
| 38 |
+
sentiment_distribution: dict
|
| 39 |
+
average_confidence: float
|
| 40 |
+
total_processing_time: float
|
| 41 |
+
|
| 42 |
+
class HealthResponse(BaseModel):
|
| 43 |
+
status: str
|
| 44 |
+
model_loaded: bool
|
| 45 |
+
memory_usage_mb: float
|
| 46 |
+
timestamp: str
|
| 47 |
+
|
| 48 |
+
class ModelInfo(BaseModel):
|
| 49 |
+
model_name: str
|
| 50 |
+
architecture: str
|
| 51 |
+
languages: List[str]
|
| 52 |
+
labels: List[str]
|
| 53 |
+
max_sequence_length: int
|
| 54 |
+
memory_limit_mb: int
|
| 55 |
+
|
| 56 |
+
class APIController:
|
| 57 |
+
def __init__(self, sentiment_app):
|
| 58 |
+
self.sentiment_app = sentiment_app
|
| 59 |
+
self.app = FastAPI(
|
| 60 |
+
title="Vietnamese Sentiment Analysis API",
|
| 61 |
+
description="API for Vietnamese sentiment analysis using transformer models",
|
| 62 |
+
version="1.0.0"
|
| 63 |
+
)
|
| 64 |
+
self.setup_cors()
|
| 65 |
+
self.setup_routes()
|
| 66 |
+
|
| 67 |
+
def setup_cors(self):
|
| 68 |
+
"""Setup CORS middleware for cross-origin requests"""
|
| 69 |
+
self.app.add_middleware(
|
| 70 |
+
CORSMiddleware,
|
| 71 |
+
allow_origins=["*"], # In production, specify allowed origins
|
| 72 |
+
allow_credentials=True,
|
| 73 |
+
allow_methods=["GET", "POST", "OPTIONS"],
|
| 74 |
+
allow_headers=["*"],
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
def setup_routes(self):
|
| 78 |
+
"""Setup API routes"""
|
| 79 |
+
|
| 80 |
+
@self.app.get("/", response_model=dict)
|
| 81 |
+
async def root():
|
| 82 |
+
"""Root endpoint"""
|
| 83 |
+
return {
|
| 84 |
+
"message": "Vietnamese Sentiment Analysis API",
|
| 85 |
+
"version": "1.0.0",
|
| 86 |
+
"endpoints": {
|
| 87 |
+
"health": "/health",
|
| 88 |
+
"model_info": "/model/info",
|
| 89 |
+
"analyze": "/analyze",
|
| 90 |
+
"analyze_batch": "/analyze/batch",
|
| 91 |
+
"docs": "/docs"
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
@self.app.get("/health", response_model=HealthResponse)
|
| 96 |
+
async def health_check():
|
| 97 |
+
"""Health check endpoint"""
|
| 98 |
+
try:
|
| 99 |
+
memory_usage = self.sentiment_app.get_memory_usage() if self.sentiment_app else 0
|
| 100 |
+
return HealthResponse(
|
| 101 |
+
status="healthy",
|
| 102 |
+
model_loaded=self.sentiment_app.model_loaded if self.sentiment_app else False,
|
| 103 |
+
memory_usage_mb=memory_usage,
|
| 104 |
+
timestamp=time.strftime('%Y-%m-%d %H:%M:%S')
|
| 105 |
+
)
|
| 106 |
+
except Exception as e:
|
| 107 |
+
logger.error(f"Health check failed: {e}")
|
| 108 |
+
raise HTTPException(status_code=500, detail="Health check failed")
|
| 109 |
+
|
| 110 |
+
@self.app.get("/model/info", response_model=ModelInfo)
|
| 111 |
+
async def get_model_info():
|
| 112 |
+
"""Get model information"""
|
| 113 |
+
if not self.sentiment_app:
|
| 114 |
+
raise HTTPException(status_code=503, detail="Model not initialized")
|
| 115 |
+
|
| 116 |
+
return ModelInfo(
|
| 117 |
+
model_name=self.sentiment_app.model_name,
|
| 118 |
+
architecture="Transformer-based sequence classification",
|
| 119 |
+
languages=["Vietnamese"],
|
| 120 |
+
labels=self.sentiment_app.sentiment_labels,
|
| 121 |
+
max_sequence_length=512,
|
| 122 |
+
memory_limit_mb=self.sentiment_app.max_memory_mb
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
@self.app.post("/analyze", response_model=SentimentResult)
|
| 126 |
+
async def analyze_sentiment(input_data: TextInput):
|
| 127 |
+
"""Analyze sentiment of a single text"""
|
| 128 |
+
if not self.sentiment_app or not self.sentiment_app.model_loaded:
|
| 129 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 130 |
+
|
| 131 |
+
if not input_data.text.strip():
|
| 132 |
+
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 133 |
+
|
| 134 |
+
try:
|
| 135 |
+
start_time = time.time()
|
| 136 |
+
|
| 137 |
+
# Get prediction from the sentiment app
|
| 138 |
+
sentiment, output_text = self.sentiment_app.predict_sentiment(input_data.text)
|
| 139 |
+
|
| 140 |
+
if not sentiment:
|
| 141 |
+
logger.error("Sentiment prediction returned None")
|
| 142 |
+
raise HTTPException(status_code=500, detail="Analysis failed - no sentiment returned")
|
| 143 |
+
|
| 144 |
+
logger.info(f"Sentiment prediction: {sentiment}")
|
| 145 |
+
logger.debug(f"Full output text: {output_text}")
|
| 146 |
+
|
| 147 |
+
# Parse the output to extract probabilities
|
| 148 |
+
probabilities = self._extract_probabilities(output_text)
|
| 149 |
+
confidence = probabilities.get(sentiment.lower(), 0.0)
|
| 150 |
+
|
| 151 |
+
logger.info(f"Extracted probabilities: {probabilities}")
|
| 152 |
+
logger.info(f"Confidence for {sentiment}: {confidence}")
|
| 153 |
+
|
| 154 |
+
processing_time = time.time() - start_time
|
| 155 |
+
|
| 156 |
+
return SentimentResult(
|
| 157 |
+
sentiment=sentiment,
|
| 158 |
+
confidence=confidence,
|
| 159 |
+
probabilities=probabilities,
|
| 160 |
+
processing_time=processing_time,
|
| 161 |
+
text=input_data.text
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
logger.error(f"Analysis failed: {e}")
|
| 166 |
+
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
| 167 |
+
|
| 168 |
+
@self.app.post("/analyze/batch", response_model=BatchSentimentResult)
|
| 169 |
+
async def analyze_batch_sentiment(input_data: BatchTextInput):
|
| 170 |
+
"""Analyze sentiment of multiple texts"""
|
| 171 |
+
if not self.sentiment_app or not self.sentiment_app.model_loaded:
|
| 172 |
+
raise HTTPException(status_code=503, detail="Model not loaded")
|
| 173 |
+
|
| 174 |
+
if not input_data.texts or not any(text.strip() for text in input_data.texts):
|
| 175 |
+
raise HTTPException(status_code=400, detail="At least one non-empty text is required")
|
| 176 |
+
|
| 177 |
+
if len(input_data.texts) > 10:
|
| 178 |
+
raise HTTPException(status_code=400, detail="Maximum 10 texts allowed per batch")
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
start_time = time.time()
|
| 182 |
+
results = []
|
| 183 |
+
sentiment_distribution = {"Positive": 0, "Neutral": 0, "Negative": 0}
|
| 184 |
+
total_confidence = 0.0
|
| 185 |
+
|
| 186 |
+
# Process each text
|
| 187 |
+
for text in input_data.texts:
|
| 188 |
+
if not text.strip():
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
text_start_time = time.time()
|
| 192 |
+
sentiment, output_text = self.sentiment_app.predict_sentiment(text.strip())
|
| 193 |
+
|
| 194 |
+
if sentiment:
|
| 195 |
+
probabilities = self._extract_probabilities(output_text)
|
| 196 |
+
confidence = probabilities.get(sentiment.lower(), 0.0)
|
| 197 |
+
|
| 198 |
+
result = SentimentResult(
|
| 199 |
+
sentiment=sentiment,
|
| 200 |
+
confidence=confidence,
|
| 201 |
+
probabilities=probabilities,
|
| 202 |
+
processing_time=time.time() - text_start_time,
|
| 203 |
+
text=text.strip()
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
results.append(result)
|
| 207 |
+
sentiment_distribution[sentiment] += 1
|
| 208 |
+
total_confidence += confidence
|
| 209 |
+
|
| 210 |
+
total_processing_time = time.time() - start_time
|
| 211 |
+
|
| 212 |
+
if not results:
|
| 213 |
+
raise HTTPException(status_code=500, detail="No valid analyses completed")
|
| 214 |
+
|
| 215 |
+
average_confidence = total_confidence / len(results)
|
| 216 |
+
|
| 217 |
+
return BatchSentimentResult(
|
| 218 |
+
results=results,
|
| 219 |
+
total_texts=len(results),
|
| 220 |
+
sentiment_distribution=sentiment_distribution,
|
| 221 |
+
average_confidence=average_confidence,
|
| 222 |
+
total_processing_time=total_processing_time
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
except Exception as e:
|
| 226 |
+
logger.error(f"Batch analysis failed: {e}")
|
| 227 |
+
raise HTTPException(status_code=500, detail=f"Batch analysis failed: {str(e)}")
|
| 228 |
+
|
| 229 |
+
@self.app.post("/memory/cleanup")
|
| 230 |
+
async def cleanup_memory():
|
| 231 |
+
"""Manual memory cleanup endpoint"""
|
| 232 |
+
if not self.sentiment_app:
|
| 233 |
+
raise HTTPException(status_code=503, detail="App not initialized")
|
| 234 |
+
|
| 235 |
+
try:
|
| 236 |
+
self.sentiment_app.cleanup_memory()
|
| 237 |
+
memory_usage = self.sentiment_app.get_memory_usage()
|
| 238 |
+
return {
|
| 239 |
+
"message": "Memory cleanup completed",
|
| 240 |
+
"memory_usage_mb": memory_usage,
|
| 241 |
+
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S')
|
| 242 |
+
}
|
| 243 |
+
except Exception as e:
|
| 244 |
+
logger.error(f"Memory cleanup failed: {e}")
|
| 245 |
+
raise HTTPException(status_code=500, detail="Memory cleanup failed")
|
| 246 |
+
|
| 247 |
+
def _extract_probabilities(self, output_text):
|
| 248 |
+
"""Extract probabilities from the formatted output text"""
|
| 249 |
+
probabilities = {"positive": 0.0, "neutral": 0.0, "negative": 0.0}
|
| 250 |
+
|
| 251 |
+
try:
|
| 252 |
+
lines = output_text.split('\n')
|
| 253 |
+
for line in lines:
|
| 254 |
+
# Look for lines with emojis and percentages
|
| 255 |
+
if '😠 **Negative:**' in line:
|
| 256 |
+
# Extract percentage from format: "😠 **Negative:** 25.50%"
|
| 257 |
+
parts = line.split('**Negative:**')[1].strip().rstrip('%')
|
| 258 |
+
probabilities["negative"] = float(parts) / 100
|
| 259 |
+
elif '😐 **Neutral:**' in line:
|
| 260 |
+
# Extract percentage from format: "😐 **Neutral:** 25.50%"
|
| 261 |
+
parts = line.split('**Neutral:**')[1].strip().rstrip('%')
|
| 262 |
+
probabilities["neutral"] = float(parts) / 100
|
| 263 |
+
elif '😊 **Positive:**' in line:
|
| 264 |
+
# Extract percentage from format: "😊 **Positive:** 25.50%"
|
| 265 |
+
parts = line.split('**Positive:**')[1].strip().rstrip('%')
|
| 266 |
+
probabilities["positive"] = float(parts) / 100
|
| 267 |
+
except Exception as e:
|
| 268 |
+
logger.warning(f"Failed to extract probabilities: {e}")
|
| 269 |
+
logger.debug(f"Output text was: {output_text}")
|
| 270 |
+
|
| 271 |
+
return probabilities
|
| 272 |
+
|
| 273 |
+
def run(self, host="0.0.0.0", port=7860):
|
| 274 |
+
"""Run the API server"""
|
| 275 |
+
logger.info(f"Starting API server on {host}:{port}")
|
| 276 |
+
uvicorn.run(
|
| 277 |
+
self.app,
|
| 278 |
+
host=host,
|
| 279 |
+
port=port,
|
| 280 |
+
log_level="info"
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def create_api_controller(sentiment_app):
|
| 285 |
+
"""Create and return API controller instance"""
|
| 286 |
+
return APIController(sentiment_app)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
if __name__ == "__main__":
|
| 290 |
+
# This allows running the API controller standalone for testing
|
| 291 |
+
from app import SentimentGradioApp
|
| 292 |
+
|
| 293 |
+
# Initialize the sentiment app
|
| 294 |
+
sentiment_app = SentimentGradioApp()
|
| 295 |
+
if not sentiment_app.load_model():
|
| 296 |
+
print("❌ Failed to load model")
|
| 297 |
+
exit(1)
|
| 298 |
+
|
| 299 |
+
# Create and run API controller
|
| 300 |
+
api_controller = create_api_controller(sentiment_app)
|
| 301 |
+
api_controller.run()
|
|
@@ -1,204 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Demo script for Vietnamese Sentiment Analysis
|
| 4 |
-
Shows how to use the fine-tuned model for real-time sentiment analysis
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import torch
|
| 8 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 9 |
-
import time
|
| 10 |
-
|
| 11 |
-
class SentimentDemo:
|
| 12 |
-
def __init__(self, model_path="./vietnamese_sentiment_finetuned"):
|
| 13 |
-
self.model_path = model_path
|
| 14 |
-
self.tokenizer = None
|
| 15 |
-
self.model = None
|
| 16 |
-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 17 |
-
self.sentiment_labels = ["Negative", "Neutral", "Positive"]
|
| 18 |
-
|
| 19 |
-
def load_model(self):
|
| 20 |
-
"""Load the fine-tuned model"""
|
| 21 |
-
print(f"🤖 Loading model from: {self.model_path}")
|
| 22 |
-
print(f"📱 Device: {self.device}")
|
| 23 |
-
|
| 24 |
-
try:
|
| 25 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
|
| 26 |
-
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
|
| 27 |
-
self.model.to(self.device)
|
| 28 |
-
self.model.eval()
|
| 29 |
-
print("✅ Model loaded successfully!")
|
| 30 |
-
except Exception as e:
|
| 31 |
-
print(f"❌ Error loading model: {e}")
|
| 32 |
-
print("Please run the training first: python run_training.py")
|
| 33 |
-
return False
|
| 34 |
-
|
| 35 |
-
return True
|
| 36 |
-
|
| 37 |
-
def predict_sentiment(self, text):
|
| 38 |
-
"""Predict sentiment for given text"""
|
| 39 |
-
start_time = time.time()
|
| 40 |
-
|
| 41 |
-
# Tokenize
|
| 42 |
-
inputs = self.tokenizer(
|
| 43 |
-
text,
|
| 44 |
-
return_tensors="pt",
|
| 45 |
-
truncation=True,
|
| 46 |
-
padding=True,
|
| 47 |
-
max_length=512
|
| 48 |
-
)
|
| 49 |
-
|
| 50 |
-
# Move to device
|
| 51 |
-
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 52 |
-
|
| 53 |
-
# Predict
|
| 54 |
-
with torch.no_grad():
|
| 55 |
-
outputs = self.model(**inputs)
|
| 56 |
-
logits = outputs.logits
|
| 57 |
-
probabilities = torch.softmax(logits, dim=-1)
|
| 58 |
-
predicted_class = torch.argmax(probabilities, dim=-1).item()
|
| 59 |
-
confidence = torch.max(probabilities).item()
|
| 60 |
-
|
| 61 |
-
inference_time = time.time() - start_time
|
| 62 |
-
|
| 63 |
-
return {
|
| 64 |
-
"text": text,
|
| 65 |
-
"sentiment": self.sentiment_labels[predicted_class],
|
| 66 |
-
"sentiment_id": predicted_class,
|
| 67 |
-
"confidence": confidence,
|
| 68 |
-
"probabilities": probabilities.cpu().numpy()[0].tolist(),
|
| 69 |
-
"inference_time": inference_time
|
| 70 |
-
}
|
| 71 |
-
|
| 72 |
-
def demo_mode(self):
|
| 73 |
-
"""Run interactive demo"""
|
| 74 |
-
print("\n" + "="*60)
|
| 75 |
-
print("🎭 VIETNAMESE SENTIMENT ANALYSIS DEMO")
|
| 76 |
-
print("="*60)
|
| 77 |
-
print("\n💡 Type Vietnamese text to analyze sentiment")
|
| 78 |
-
print("📝 Type 'quit' to exit, 'help' for examples")
|
| 79 |
-
print("-"*60)
|
| 80 |
-
|
| 81 |
-
examples = [
|
| 82 |
-
"Giảng viên dạy rất hay và tâm huyết.",
|
| 83 |
-
"Môn học này quá khó và nhàm chán.",
|
| 84 |
-
"Lớp học ổn định, không có gì đặc biệt.",
|
| 85 |
-
"Tôi rất thích cách giảng dạy của thầy cô.",
|
| 86 |
-
"Chương trình học cần cải thiện nhiều."
|
| 87 |
-
]
|
| 88 |
-
|
| 89 |
-
while True:
|
| 90 |
-
text = input("\n🔤 Enter text: ").strip()
|
| 91 |
-
|
| 92 |
-
if text.lower() in ['quit', 'exit', 'q']:
|
| 93 |
-
print("\n👋 Goodbye!")
|
| 94 |
-
break
|
| 95 |
-
|
| 96 |
-
if text.lower() == 'help':
|
| 97 |
-
print("\n📚 Example texts you can try:")
|
| 98 |
-
for i, example in enumerate(examples, 1):
|
| 99 |
-
print(f" {i}. {example}")
|
| 100 |
-
continue
|
| 101 |
-
|
| 102 |
-
if not text:
|
| 103 |
-
continue
|
| 104 |
-
|
| 105 |
-
# Make prediction
|
| 106 |
-
result = self.predict_sentiment(text)
|
| 107 |
-
|
| 108 |
-
# Display result
|
| 109 |
-
sentiment_emoji = {"Negative": "😞", "Neutral": "😐", "Positive": "😊"}
|
| 110 |
-
emoji = sentiment_emoji[result["sentiment"]]
|
| 111 |
-
|
| 112 |
-
print(f"\n{emoji} Result:")
|
| 113 |
-
print(f" 📝 Text: {result['text']}")
|
| 114 |
-
print(f" 🎯 Sentiment: {result['sentiment']} (Class {result['sentiment_id']})")
|
| 115 |
-
print(f" 📊 Confidence: {result['confidence']:.3f}")
|
| 116 |
-
print(f" ⏱️ Time: {result['inference_time']:.3f}s")
|
| 117 |
-
|
| 118 |
-
# Show probability distribution
|
| 119 |
-
print(f" 📈 Probabilities:")
|
| 120 |
-
for i, (label, prob) in enumerate(zip(self.sentiment_labels, result['probabilities'])):
|
| 121 |
-
bar_length = int(prob * 20)
|
| 122 |
-
bar = "█" * bar_length + "░" * (20 - bar_length)
|
| 123 |
-
print(f" {label}: {bar} {prob:.3f}")
|
| 124 |
-
|
| 125 |
-
def batch_demo(self):
|
| 126 |
-
"""Demo with batch processing"""
|
| 127 |
-
print("\n" + "="*60)
|
| 128 |
-
print("📊 BATCH PROCESSING DEMO")
|
| 129 |
-
print("="*60)
|
| 130 |
-
|
| 131 |
-
test_texts = [
|
| 132 |
-
"Giảng viên dạy rất hay và tâm huyết.",
|
| 133 |
-
"Môn học này quá khó và nhàm chán.",
|
| 134 |
-
"Lớp học ổn định, không có gì đặc biệt.",
|
| 135 |
-
"Tôi rất thích cách giảng dạy của thầy cô.",
|
| 136 |
-
"Chương trình học cần cải thiện nhiều.",
|
| 137 |
-
"Thời gian biểu hợp lý, dễ theo kịp.",
|
| 138 |
-
"Bài tập quá nhiều và khó.",
|
| 139 |
-
"Môi trường học tập tốt, bạn bè thân thiện."
|
| 140 |
-
]
|
| 141 |
-
|
| 142 |
-
print(f"\n📝 Processing {len(test_texts)} texts...")
|
| 143 |
-
|
| 144 |
-
start_time = time.time()
|
| 145 |
-
results = []
|
| 146 |
-
|
| 147 |
-
for text in test_texts:
|
| 148 |
-
result = self.predict_sentiment(text)
|
| 149 |
-
results.append(result)
|
| 150 |
-
|
| 151 |
-
total_time = time.time() - start_time
|
| 152 |
-
|
| 153 |
-
print(f"\n⏱️ Total time: {total_time:.3f}s")
|
| 154 |
-
print(f"📊 Average time per text: {total_time/len(test_texts):.3f}s")
|
| 155 |
-
|
| 156 |
-
print(f"\n📋 Results:")
|
| 157 |
-
print("-"*60)
|
| 158 |
-
|
| 159 |
-
sentiment_counts = {"Positive": 0, "Neutral": 0, "Negative": 0}
|
| 160 |
-
|
| 161 |
-
for i, result in enumerate(results, 1):
|
| 162 |
-
sentiment_emoji = {"Negative": "😞", "Neutral": "😐", "Positive": "😊"}
|
| 163 |
-
emoji = sentiment_emoji[result["sentiment"]]
|
| 164 |
-
|
| 165 |
-
print(f"{i:2d}. {emoji} {result['sentiment']:8s} ({result['confidence']:.2f}) - {result['text'][:40]}...")
|
| 166 |
-
sentiment_counts[result["sentiment"]] += 1
|
| 167 |
-
|
| 168 |
-
print(f"\n📈 Summary:")
|
| 169 |
-
for sentiment, count in sentiment_counts.items():
|
| 170 |
-
emoji = {"Positive": "😊", "Neutral": "😐", "Negative": "😞"}[sentiment]
|
| 171 |
-
percentage = (count / len(results)) * 100
|
| 172 |
-
print(f" {emoji} {sentiment}: {count} ({percentage:.1f}%)")
|
| 173 |
-
|
| 174 |
-
def main():
|
| 175 |
-
"""Main demo function"""
|
| 176 |
-
print("🎯 Vietnamese Sentiment Analysis Demo")
|
| 177 |
-
print("=====================================")
|
| 178 |
-
|
| 179 |
-
# Initialize demo
|
| 180 |
-
demo = SentimentDemo()
|
| 181 |
-
|
| 182 |
-
# Load model
|
| 183 |
-
if not demo.load_model():
|
| 184 |
-
return
|
| 185 |
-
|
| 186 |
-
# Choose demo mode
|
| 187 |
-
print("\n🎮 Choose demo mode:")
|
| 188 |
-
print(" 1. Interactive (type your own text)")
|
| 189 |
-
print(" 2. Batch processing (predefined examples)")
|
| 190 |
-
|
| 191 |
-
while True:
|
| 192 |
-
choice = input("\nEnter choice (1 or 2): ").strip()
|
| 193 |
-
|
| 194 |
-
if choice == "1":
|
| 195 |
-
demo.demo_mode()
|
| 196 |
-
break
|
| 197 |
-
elif choice == "2":
|
| 198 |
-
demo.batch_demo()
|
| 199 |
-
break
|
| 200 |
-
else:
|
| 201 |
-
print("❌ Invalid choice. Please enter 1 or 2.")
|
| 202 |
-
|
| 203 |
-
if __name__ == "__main__":
|
| 204 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,631 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Gradio Web Interface for Vietnamese Sentiment Analysis
|
| 4 |
-
Interactive web UI for real-time sentiment analysis
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import gradio as gr
|
| 8 |
-
import torch
|
| 9 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 10 |
-
import time
|
| 11 |
-
import numpy as np
|
| 12 |
-
from datetime import datetime
|
| 13 |
-
import gc
|
| 14 |
-
import psutil
|
| 15 |
-
import os
|
| 16 |
-
import pandas as pd
|
| 17 |
-
|
| 18 |
-
class SentimentGradioApp:
|
| 19 |
-
def __init__(self, model_path="vietnamese_sentiment_finetuned", max_batch_size=10, quantize=False):
|
| 20 |
-
self.model_path = model_path
|
| 21 |
-
self.tokenizer = None
|
| 22 |
-
self.model = None
|
| 23 |
-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 24 |
-
self.sentiment_labels = ["Negative", "Neutral", "Positive"]
|
| 25 |
-
self.sentiment_colors = {
|
| 26 |
-
"Negative": "#ff4444",
|
| 27 |
-
"Neutral": "#ffaa00",
|
| 28 |
-
"Positive": "#44ff44"
|
| 29 |
-
}
|
| 30 |
-
self.model_loaded = False
|
| 31 |
-
self.max_batch_size = max_batch_size
|
| 32 |
-
self.quantize = quantize
|
| 33 |
-
self.max_memory_mb = 4096 # Maximum memory usage in MB
|
| 34 |
-
|
| 35 |
-
def get_memory_usage(self):
|
| 36 |
-
"""Get current memory usage in MB"""
|
| 37 |
-
process = psutil.Process(os.getpid())
|
| 38 |
-
return process.memory_info().rss / 1024 / 1024
|
| 39 |
-
|
| 40 |
-
def check_memory_limit(self):
|
| 41 |
-
"""Check if memory usage is within limits"""
|
| 42 |
-
current_memory = self.get_memory_usage()
|
| 43 |
-
if current_memory > self.max_memory_mb:
|
| 44 |
-
return False, f"Memory usage ({current_memory:.1f}MB) exceeds limit ({self.max_memory_mb}MB)"
|
| 45 |
-
return True, f"Memory usage: {current_memory:.1f}MB"
|
| 46 |
-
|
| 47 |
-
def cleanup_memory(self):
|
| 48 |
-
"""Clean up GPU and CPU memory"""
|
| 49 |
-
if torch.cuda.is_available():
|
| 50 |
-
torch.cuda.empty_cache()
|
| 51 |
-
gc.collect()
|
| 52 |
-
|
| 53 |
-
def load_model(self):
|
| 54 |
-
"""Load the fine-tuned model"""
|
| 55 |
-
if self.model_loaded:
|
| 56 |
-
return True
|
| 57 |
-
|
| 58 |
-
try:
|
| 59 |
-
# Clean up any existing memory
|
| 60 |
-
self.cleanup_memory()
|
| 61 |
-
|
| 62 |
-
# Check memory before loading
|
| 63 |
-
memory_ok, memory_msg = self.check_memory_limit()
|
| 64 |
-
if not memory_ok:
|
| 65 |
-
print(f"❌ {memory_msg}")
|
| 66 |
-
return False
|
| 67 |
-
|
| 68 |
-
print(f"📊 {memory_msg}")
|
| 69 |
-
|
| 70 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
|
| 71 |
-
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
|
| 72 |
-
|
| 73 |
-
# Apply quantization if requested
|
| 74 |
-
if self.quantize and self.device.type == 'cpu':
|
| 75 |
-
print("🔧 Applying dynamic quantization for memory efficiency...")
|
| 76 |
-
self.model = torch.quantization.quantize_dynamic(
|
| 77 |
-
self.model, {torch.nn.Linear}, dtype=torch.qint8
|
| 78 |
-
)
|
| 79 |
-
|
| 80 |
-
self.model.to(self.device)
|
| 81 |
-
self.model.eval()
|
| 82 |
-
self.model_loaded = True
|
| 83 |
-
|
| 84 |
-
# Check memory after loading
|
| 85 |
-
memory_ok, memory_msg = self.check_memory_limit()
|
| 86 |
-
print(f"✅ Model loaded successfully from {self.model_path}")
|
| 87 |
-
print(f"📊 {memory_msg}")
|
| 88 |
-
|
| 89 |
-
return True
|
| 90 |
-
except Exception as e:
|
| 91 |
-
print(f"❌ Error loading model: {e}")
|
| 92 |
-
self.model_loaded = False
|
| 93 |
-
self.cleanup_memory()
|
| 94 |
-
return False
|
| 95 |
-
|
| 96 |
-
def is_model_available(self):
|
| 97 |
-
"""Check if model directory exists and is accessible"""
|
| 98 |
-
import os
|
| 99 |
-
return os.path.exists(self.model_path) and os.path.isdir(self.model_path)
|
| 100 |
-
|
| 101 |
-
def predict_sentiment(self, text):
|
| 102 |
-
"""Predict sentiment for given text"""
|
| 103 |
-
if not self.model_loaded:
|
| 104 |
-
return None, "❌ Model not loaded. Please train the model first."
|
| 105 |
-
|
| 106 |
-
if not text.strip():
|
| 107 |
-
return None, "❌ Please enter some text to analyze."
|
| 108 |
-
|
| 109 |
-
try:
|
| 110 |
-
# Check memory before prediction
|
| 111 |
-
memory_ok, memory_msg = self.check_memory_limit()
|
| 112 |
-
if not memory_ok:
|
| 113 |
-
return None, f"❌ {memory_msg}"
|
| 114 |
-
|
| 115 |
-
start_time = time.time()
|
| 116 |
-
|
| 117 |
-
# Tokenize
|
| 118 |
-
inputs = self.tokenizer(
|
| 119 |
-
text,
|
| 120 |
-
return_tensors="pt",
|
| 121 |
-
truncation=True,
|
| 122 |
-
padding=True,
|
| 123 |
-
max_length=512
|
| 124 |
-
)
|
| 125 |
-
|
| 126 |
-
# Move to device
|
| 127 |
-
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 128 |
-
|
| 129 |
-
# Predict
|
| 130 |
-
with torch.no_grad():
|
| 131 |
-
outputs = self.model(**inputs)
|
| 132 |
-
logits = outputs.logits
|
| 133 |
-
probabilities = torch.softmax(logits, dim=-1)
|
| 134 |
-
predicted_class = torch.argmax(probabilities, dim=-1).item()
|
| 135 |
-
confidence = torch.max(probabilities).item()
|
| 136 |
-
|
| 137 |
-
inference_time = time.time() - start_time
|
| 138 |
-
|
| 139 |
-
# Move to CPU and clean GPU memory
|
| 140 |
-
probs = probabilities.cpu().numpy()[0].tolist()
|
| 141 |
-
del probabilities, logits, outputs
|
| 142 |
-
self.cleanup_memory()
|
| 143 |
-
|
| 144 |
-
sentiment = self.sentiment_labels[predicted_class]
|
| 145 |
-
|
| 146 |
-
# Create detailed results
|
| 147 |
-
result = {
|
| 148 |
-
"sentiment": sentiment,
|
| 149 |
-
"confidence": confidence,
|
| 150 |
-
"probabilities": {
|
| 151 |
-
"Negative": probs[0],
|
| 152 |
-
"Neutral": probs[1],
|
| 153 |
-
"Positive": probs[2]
|
| 154 |
-
},
|
| 155 |
-
"inference_time": inference_time,
|
| 156 |
-
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 157 |
-
}
|
| 158 |
-
|
| 159 |
-
# Create formatted output
|
| 160 |
-
output_text = f"""
|
| 161 |
-
## 🎯 Sentiment Analysis Result
|
| 162 |
-
|
| 163 |
-
**Sentiment:** {sentiment}
|
| 164 |
-
**Confidence:** {confidence:.2%}
|
| 165 |
-
**Processing Time:** {inference_time:.3f}s
|
| 166 |
-
|
| 167 |
-
### 📊 Probability Distribution:
|
| 168 |
-
- 😠 **Negative:** {probs[0]:.2%}
|
| 169 |
-
- 😐 **Neutral:** {probs[1]:.2%}
|
| 170 |
-
- 😊 **Positive:** {probs[2]:.2%}
|
| 171 |
-
|
| 172 |
-
### 📝 Input Text:
|
| 173 |
-
> "{text}"
|
| 174 |
-
|
| 175 |
-
---
|
| 176 |
-
*Analysis completed at {result['timestamp']}*
|
| 177 |
-
*{memory_msg}*
|
| 178 |
-
""".strip()
|
| 179 |
-
|
| 180 |
-
return result, output_text
|
| 181 |
-
|
| 182 |
-
except Exception as e:
|
| 183 |
-
self.cleanup_memory()
|
| 184 |
-
return None, f"❌ Error during prediction: {str(e)}"
|
| 185 |
-
|
| 186 |
-
def batch_predict(self, texts):
|
| 187 |
-
"""Predict sentiment for multiple texts with memory management"""
|
| 188 |
-
if not self.model_loaded:
|
| 189 |
-
return [], "❌ Model not loaded. Please train the model first."
|
| 190 |
-
|
| 191 |
-
if not texts or not any(texts):
|
| 192 |
-
return [], "❌ Please enter some texts to analyze."
|
| 193 |
-
|
| 194 |
-
# Filter valid texts and apply batch size limit
|
| 195 |
-
valid_texts = [text.strip() for text in texts if text.strip()]
|
| 196 |
-
|
| 197 |
-
if len(valid_texts) > self.max_batch_size:
|
| 198 |
-
return [], f"❌ Too many texts ({len(valid_texts)}). Maximum batch size is {self.max_batch_size} for memory efficiency."
|
| 199 |
-
|
| 200 |
-
if not valid_texts:
|
| 201 |
-
return [], "❌ No valid texts provided."
|
| 202 |
-
|
| 203 |
-
# Check memory before batch processing
|
| 204 |
-
memory_ok, memory_msg = self.check_memory_limit()
|
| 205 |
-
if not memory_ok:
|
| 206 |
-
return [], f"❌ {memory_msg}"
|
| 207 |
-
|
| 208 |
-
results = []
|
| 209 |
-
try:
|
| 210 |
-
for i, text in enumerate(valid_texts):
|
| 211 |
-
# Check memory every 5 predictions
|
| 212 |
-
if i % 5 == 0:
|
| 213 |
-
memory_ok, memory_msg = self.check_memory_limit()
|
| 214 |
-
if not memory_ok:
|
| 215 |
-
break
|
| 216 |
-
|
| 217 |
-
result, _ = self.predict_sentiment(text)
|
| 218 |
-
if result:
|
| 219 |
-
results.append(result)
|
| 220 |
-
|
| 221 |
-
if not results:
|
| 222 |
-
return [], "❌ No valid predictions made."
|
| 223 |
-
|
| 224 |
-
# Create batch summary
|
| 225 |
-
total_texts = len(results)
|
| 226 |
-
sentiments = [r["sentiment"] for r in results]
|
| 227 |
-
avg_confidence = sum(r["confidence"] for r in results) / total_texts
|
| 228 |
-
|
| 229 |
-
sentiment_counts = {
|
| 230 |
-
"Positive": sentiments.count("Positive"),
|
| 231 |
-
"Neutral": sentiments.count("Neutral"),
|
| 232 |
-
"Negative": sentiments.count("Negative")
|
| 233 |
-
}
|
| 234 |
-
|
| 235 |
-
summary = f"""
|
| 236 |
-
## 📊 Batch Analysis Summary
|
| 237 |
-
|
| 238 |
-
**Total Texts Analyzed:** {total_texts}/{len(valid_texts)}
|
| 239 |
-
**Average Confidence:** {avg_confidence:.2%}
|
| 240 |
-
**Memory Used:** {self.get_memory_usage():.1f}MB
|
| 241 |
-
|
| 242 |
-
### 🎯 Sentiment Distribution:
|
| 243 |
-
- 😊 **Positive:** {sentiment_counts['Positive']} ({sentiment_counts['Positive']/total_texts:.1%})
|
| 244 |
-
- 😐 **Neutral:** {sentiment_counts['Neutral']} ({sentiment_counts['Neutral']/total_texts:.1%})
|
| 245 |
-
- 😠 **Negative:** {sentiment_counts['Negative']} ({sentiment_counts['Negative']/total_texts:.1%})
|
| 246 |
-
|
| 247 |
-
### 📋 Individual Results:
|
| 248 |
-
""".strip()
|
| 249 |
-
|
| 250 |
-
for i, result in enumerate(results, 1):
|
| 251 |
-
summary += f"\n**{i}.** {result['sentiment']} ({result['confidence']:.1%})"
|
| 252 |
-
|
| 253 |
-
# Final memory cleanup
|
| 254 |
-
self.cleanup_memory()
|
| 255 |
-
|
| 256 |
-
return results, summary
|
| 257 |
-
|
| 258 |
-
except Exception as e:
|
| 259 |
-
self.cleanup_memory()
|
| 260 |
-
return [], f"❌ Error during batch processing: {str(e)}"
|
| 261 |
-
|
| 262 |
-
def create_interface(max_batch_size=10, quantize=False):
|
| 263 |
-
"""Create the Gradio interface with memory management options"""
|
| 264 |
-
app = SentimentGradioApp(max_batch_size=max_batch_size, quantize=quantize)
|
| 265 |
-
|
| 266 |
-
# Check if model exists
|
| 267 |
-
if not app.is_model_available():
|
| 268 |
-
print("❌ Model not found. Please train the model first using: python run_training.py")
|
| 269 |
-
print("The model directory 'vietnamese_sentiment_finetuned' was not found.")
|
| 270 |
-
return create_no_model_interface()
|
| 271 |
-
|
| 272 |
-
# Load model
|
| 273 |
-
if not app.load_model():
|
| 274 |
-
print("❌ Failed to load model. Please check the model files and try again.")
|
| 275 |
-
return create_no_model_interface()
|
| 276 |
-
|
| 277 |
-
# Example texts
|
| 278 |
-
examples = [
|
| 279 |
-
"Giảng viên dạy rất hay và tâm huyết.",
|
| 280 |
-
"Môn học này quá khó và nhàm chán.",
|
| 281 |
-
"Lớp học ổn định, không có gì đặc biệt.",
|
| 282 |
-
"Tôi rất thích cách giảng dạy của thầy cô.",
|
| 283 |
-
"Chương trình học cần cải thiện nhiều."
|
| 284 |
-
]
|
| 285 |
-
|
| 286 |
-
# Custom CSS
|
| 287 |
-
css = """
|
| 288 |
-
.gradio-container {
|
| 289 |
-
max-width: 900px !important;
|
| 290 |
-
margin: auto !important;
|
| 291 |
-
}
|
| 292 |
-
.sentiment-positive {
|
| 293 |
-
color: #44ff44;
|
| 294 |
-
font-weight: bold;
|
| 295 |
-
}
|
| 296 |
-
.sentiment-neutral {
|
| 297 |
-
color: #ffaa00;
|
| 298 |
-
font-weight: bold;
|
| 299 |
-
}
|
| 300 |
-
.sentiment-negative {
|
| 301 |
-
color: #ff4444;
|
| 302 |
-
font-weight: bold;
|
| 303 |
-
}
|
| 304 |
-
"""
|
| 305 |
-
|
| 306 |
-
# Create interface
|
| 307 |
-
with gr.Blocks(
|
| 308 |
-
title="Vietnamese Sentiment Analysis",
|
| 309 |
-
theme=gr.themes.Soft(),
|
| 310 |
-
css=css
|
| 311 |
-
) as interface:
|
| 312 |
-
|
| 313 |
-
gr.Markdown("# 🎭 Vietnamese Sentiment Analysis")
|
| 314 |
-
gr.Markdown("Enter Vietnamese text to analyze sentiment using a fine-tuned transformer model.")
|
| 315 |
-
|
| 316 |
-
with gr.Tabs():
|
| 317 |
-
# Single Text Analysis Tab
|
| 318 |
-
with gr.Tab("📝 Single Text Analysis"):
|
| 319 |
-
with gr.Row():
|
| 320 |
-
with gr.Column(scale=3):
|
| 321 |
-
text_input = gr.Textbox(
|
| 322 |
-
label="Enter Vietnamese Text",
|
| 323 |
-
placeholder="Type or paste Vietnamese text here...",
|
| 324 |
-
lines=3
|
| 325 |
-
)
|
| 326 |
-
|
| 327 |
-
with gr.Row():
|
| 328 |
-
analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary")
|
| 329 |
-
clear_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 330 |
-
|
| 331 |
-
with gr.Column(scale=2):
|
| 332 |
-
gr.Examples(
|
| 333 |
-
examples=examples,
|
| 334 |
-
inputs=[text_input],
|
| 335 |
-
label="💡 Example Texts"
|
| 336 |
-
)
|
| 337 |
-
|
| 338 |
-
result_output = gr.Markdown(label="Analysis Result", visible=True)
|
| 339 |
-
confidence_plot = gr.BarPlot(
|
| 340 |
-
title="Confidence Scores",
|
| 341 |
-
x="sentiment",
|
| 342 |
-
y="confidence",
|
| 343 |
-
visible=False
|
| 344 |
-
)
|
| 345 |
-
|
| 346 |
-
# Batch Analysis Tab
|
| 347 |
-
with gr.Tab("📊 Batch Analysis"):
|
| 348 |
-
gr.Markdown(f"### 📝 Memory-Efficient Batch Processing")
|
| 349 |
-
gr.Markdown(f"**Maximum batch size:** {app.max_batch_size} texts (for memory efficiency)")
|
| 350 |
-
gr.Markdown(f"**Memory limit:** {app.max_memory_mb}MB")
|
| 351 |
-
|
| 352 |
-
batch_input = gr.Textbox(
|
| 353 |
-
label="Enter Multiple Texts (one per line)",
|
| 354 |
-
placeholder=f"Enter up to {app.max_batch_size} Vietnamese texts, one per line...",
|
| 355 |
-
lines=8,
|
| 356 |
-
max_lines=20
|
| 357 |
-
)
|
| 358 |
-
|
| 359 |
-
with gr.Row():
|
| 360 |
-
batch_analyze_btn = gr.Button("🔍 Analyze All", variant="primary")
|
| 361 |
-
batch_clear_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 362 |
-
memory_cleanup_btn = gr.Button("🧹 Memory Cleanup", variant="secondary")
|
| 363 |
-
|
| 364 |
-
batch_result_output = gr.Markdown(label="Batch Analysis Result")
|
| 365 |
-
memory_info = gr.Textbox(
|
| 366 |
-
label="Memory Usage",
|
| 367 |
-
value=f"{app.get_memory_usage():.1f}MB used",
|
| 368 |
-
interactive=False
|
| 369 |
-
)
|
| 370 |
-
|
| 371 |
-
# Model Info Tab
|
| 372 |
-
with gr.Tab("ℹ️ Model Information"):
|
| 373 |
-
gr.Markdown(f"""
|
| 374 |
-
## 🤖 Model Details
|
| 375 |
-
|
| 376 |
-
**Model Architecture:** Transformer-based sequence classification
|
| 377 |
-
**Base Model:** Pre-trained multilingual transformer
|
| 378 |
-
**Fine-tuned on:** Vietnamese sentiment dataset
|
| 379 |
-
**Languages:** Vietnamese (optimized)
|
| 380 |
-
**Labels:** Negative, Neutral, Positive
|
| 381 |
-
**Quantization:** {'Enabled' if app.quantize else 'Disabled'}
|
| 382 |
-
**Max Batch Size:** {app.max_batch_size} texts
|
| 383 |
-
|
| 384 |
-
## 📊 Performance Metrics
|
| 385 |
-
|
| 386 |
-
- **Accuracy:** 85-90% (on validation set)
|
| 387 |
-
- **Processing Speed:** ~100ms per text
|
| 388 |
-
- **Max Sequence Length:** 512 tokens
|
| 389 |
-
- **Memory Limit:** {app.max_memory_mb}MB
|
| 390 |
-
|
| 391 |
-
## 💡 Usage Tips
|
| 392 |
-
|
| 393 |
-
- Enter clear, grammatically correct Vietnamese text
|
| 394 |
-
- Longer texts (20-200 words) work best
|
| 395 |
-
- The model handles various Vietnamese dialects
|
| 396 |
-
- Confidence scores indicate prediction certainty
|
| 397 |
-
|
| 398 |
-
## 🛡️ Memory Management
|
| 399 |
-
|
| 400 |
-
- **Automatic Cleanup:** Memory is cleaned after each prediction
|
| 401 |
-
- **Batch Limits:** Maximum {app.max_batch_size} texts per batch to prevent overflow
|
| 402 |
-
- **Memory Monitoring:** Real-time memory usage tracking
|
| 403 |
-
- **GPU Optimization:** CUDA cache clearing when available
|
| 404 |
-
- **Quantization:** {'Enabled for CPU (reduces memory by ~4x)' if app.quantize else 'Disabled (can be enabled with quantize=True)'}
|
| 405 |
-
|
| 406 |
-
## ⚠️ Performance Notes
|
| 407 |
-
|
| 408 |
-
- If you encounter memory errors, try reducing batch size
|
| 409 |
-
- Enable quantization for CPU usage to save memory
|
| 410 |
-
- Use the Memory Cleanup button if needed
|
| 411 |
-
- Monitor memory usage in the Batch Analysis tab
|
| 412 |
-
""")
|
| 413 |
-
|
| 414 |
-
# Event handlers
|
| 415 |
-
def analyze_text(text):
|
| 416 |
-
result, output = app.predict_sentiment(text)
|
| 417 |
-
if result:
|
| 418 |
-
# Prepare data for confidence plot as pandas DataFrame
|
| 419 |
-
plot_data = pd.DataFrame([
|
| 420 |
-
{"sentiment": "Negative", "confidence": result["probabilities"]["Negative"]},
|
| 421 |
-
{"sentiment": "Neutral", "confidence": result["probabilities"]["Neutral"]},
|
| 422 |
-
{"sentiment": "Positive", "confidence": result["probabilities"]["Positive"]}
|
| 423 |
-
])
|
| 424 |
-
return output, gr.BarPlot(visible=True, value=plot_data)
|
| 425 |
-
else:
|
| 426 |
-
return output, gr.BarPlot(visible=False)
|
| 427 |
-
|
| 428 |
-
def clear_inputs():
|
| 429 |
-
return "", "", gr.BarPlot(visible=False)
|
| 430 |
-
|
| 431 |
-
def analyze_batch(texts):
|
| 432 |
-
if texts:
|
| 433 |
-
text_list = [line.strip() for line in texts.split('\n') if line.strip()]
|
| 434 |
-
results, summary = app.batch_predict(text_list)
|
| 435 |
-
return summary
|
| 436 |
-
return "❌ Please enter some texts to analyze."
|
| 437 |
-
|
| 438 |
-
def clear_batch():
|
| 439 |
-
return ""
|
| 440 |
-
|
| 441 |
-
def update_memory_info():
|
| 442 |
-
return f"{app.get_memory_usage():.1f}MB used"
|
| 443 |
-
|
| 444 |
-
def manual_memory_cleanup():
|
| 445 |
-
app.cleanup_memory()
|
| 446 |
-
return f"Memory cleaned. Current usage: {app.get_memory_usage():.1f}MB"
|
| 447 |
-
|
| 448 |
-
# Connect events
|
| 449 |
-
analyze_btn.click(
|
| 450 |
-
fn=analyze_text,
|
| 451 |
-
inputs=[text_input],
|
| 452 |
-
outputs=[result_output, confidence_plot]
|
| 453 |
-
)
|
| 454 |
-
|
| 455 |
-
clear_btn.click(
|
| 456 |
-
fn=clear_inputs,
|
| 457 |
-
outputs=[text_input, result_output, confidence_plot]
|
| 458 |
-
)
|
| 459 |
-
|
| 460 |
-
batch_analyze_btn.click(
|
| 461 |
-
fn=analyze_batch,
|
| 462 |
-
inputs=[batch_input],
|
| 463 |
-
outputs=[batch_result_output]
|
| 464 |
-
)
|
| 465 |
-
|
| 466 |
-
batch_clear_btn.click(
|
| 467 |
-
fn=clear_batch,
|
| 468 |
-
outputs=[batch_input]
|
| 469 |
-
)
|
| 470 |
-
|
| 471 |
-
memory_cleanup_btn.click(
|
| 472 |
-
fn=manual_memory_cleanup,
|
| 473 |
-
outputs=[memory_info]
|
| 474 |
-
)
|
| 475 |
-
|
| 476 |
-
# Update memory info periodically
|
| 477 |
-
interface.load(
|
| 478 |
-
fn=update_memory_info,
|
| 479 |
-
outputs=[memory_info]
|
| 480 |
-
)
|
| 481 |
-
|
| 482 |
-
return interface
|
| 483 |
-
|
| 484 |
-
def create_no_model_interface():
|
| 485 |
-
"""Create a fallback interface when no model is available"""
|
| 486 |
-
|
| 487 |
-
def show_training_instructions():
|
| 488 |
-
return """
|
| 489 |
-
## 🚨 Model Not Found
|
| 490 |
-
|
| 491 |
-
The sentiment analysis model is not available yet. Please follow these steps to train the model:
|
| 492 |
-
|
| 493 |
-
### 📋 Training Steps:
|
| 494 |
-
|
| 495 |
-
1. **Train the Model:**
|
| 496 |
-
```bash
|
| 497 |
-
python run_training.py
|
| 498 |
-
```
|
| 499 |
-
|
| 500 |
-
2. **Verify Model Creation:**
|
| 501 |
-
```bash
|
| 502 |
-
ls -la vietnamese_sentiment_finetuned/
|
| 503 |
-
```
|
| 504 |
-
|
| 505 |
-
3. **Restart Gradio App:**
|
| 506 |
-
```bash
|
| 507 |
-
python gradio_app.py
|
| 508 |
-
```
|
| 509 |
-
|
| 510 |
-
### 📁 Required Files:
|
| 511 |
-
- `run_training.py` - Training script
|
| 512 |
-
- `fine_tune_sentiment.py` - Fine-tuning utilities
|
| 513 |
-
- Dataset files (should be downloaded automatically)
|
| 514 |
-
|
| 515 |
-
### ⏱️ Expected Training Time:
|
| 516 |
-
- **CPU:** 30-60 minutes
|
| 517 |
-
- **GPU (CUDA):** 5-15 minutes
|
| 518 |
-
|
| 519 |
-
### 📊 What Training Does:
|
| 520 |
-
- Downloads pre-trained multilingual model
|
| 521 |
-
- Fine-tunes on Vietnamese sentiment data
|
| 522 |
-
- Creates `vietnamese_sentiment_finetuned/` directory
|
| 523 |
-
- Saves tokenizer and model files
|
| 524 |
-
|
| 525 |
-
### 🔧 Troubleshooting:
|
| 526 |
-
- Ensure sufficient disk space (~2GB)
|
| 527 |
-
- Check internet connection for dataset download
|
| 528 |
-
- Verify Python dependencies: `pip install -r requirements.txt`
|
| 529 |
-
|
| 530 |
-
Once training completes, refresh this page to access the full sentiment analysis interface!
|
| 531 |
-
"""
|
| 532 |
-
|
| 533 |
-
with gr.Blocks(
|
| 534 |
-
title="Vietnamese Sentiment Analysis - Setup Required",
|
| 535 |
-
theme=gr.themes.Soft()
|
| 536 |
-
) as interface:
|
| 537 |
-
|
| 538 |
-
gr.Markdown("# 🎭 Vietnamese Sentiment Analysis")
|
| 539 |
-
gr.Markdown("## 🚨 Setup Required - Model Not Trained")
|
| 540 |
-
|
| 541 |
-
gr.Markdown("""
|
| 542 |
-
### Welcome to the Vietnamese Sentiment Analysis Interface!
|
| 543 |
-
|
| 544 |
-
The AI model needs to be trained before you can use the sentiment analysis features.
|
| 545 |
-
This is a one-time setup process that fine-tunes a transformer model on Vietnamese text data.
|
| 546 |
-
""")
|
| 547 |
-
|
| 548 |
-
with gr.Accordion("📖 Click here for training instructions", open=True):
|
| 549 |
-
instructions_output = gr.Markdown(show_training_instructions())
|
| 550 |
-
|
| 551 |
-
with gr.Row():
|
| 552 |
-
with gr.Column():
|
| 553 |
-
gr.Markdown("### 🔍 Quick Start Commands")
|
| 554 |
-
gr.Code(
|
| 555 |
-
value="# Train the model\npython run_training.py\n\n# Then start the interface\npython gradio_app.py",
|
| 556 |
-
language="python",
|
| 557 |
-
label="Terminal Commands"
|
| 558 |
-
)
|
| 559 |
-
|
| 560 |
-
with gr.Column():
|
| 561 |
-
gr.Markdown("### 📊 Project Information")
|
| 562 |
-
gr.Markdown("""
|
| 563 |
-
- **Language:** Vietnamese
|
| 564 |
-
- **Model Type:** Transformer-based (BERT-like)
|
| 565 |
-
- **Classes:** Negative, Neutral, Positive
|
| 566 |
-
- **Interface:** Gradio Web UI
|
| 567 |
-
""")
|
| 568 |
-
|
| 569 |
-
gr.Markdown("---")
|
| 570 |
-
gr.Markdown("*After training completes, you'll be able to:*")
|
| 571 |
-
gr.Markdown("""
|
| 572 |
-
- ✅ Analyze Vietnamese text sentiment in real-time
|
| 573 |
-
- ✅ Process multiple texts at once (batch mode)
|
| 574 |
-
- ✅ View confidence scores and probability distributions
|
| 575 |
-
- ✅ Get detailed analysis with visual charts
|
| 576 |
-
""")
|
| 577 |
-
|
| 578 |
-
return interface
|
| 579 |
-
|
| 580 |
-
def main():
|
| 581 |
-
"""Main function to launch the Gradio app with memory management options"""
|
| 582 |
-
import argparse
|
| 583 |
-
|
| 584 |
-
parser = argparse.ArgumentParser(description="Vietnamese Sentiment Analysis Web Interface")
|
| 585 |
-
parser.add_argument("--max-batch-size", type=int, default=10,
|
| 586 |
-
help="Maximum batch size for memory efficiency (default: 10)")
|
| 587 |
-
parser.add_argument("--quantize", action="store_true",
|
| 588 |
-
help="Enable model quantization for memory efficiency (CPU only)")
|
| 589 |
-
parser.add_argument("--max-memory", type=int, default=4096,
|
| 590 |
-
help="Maximum memory usage in MB (default: 4096)")
|
| 591 |
-
parser.add_argument("--port", type=int, default=7862,
|
| 592 |
-
help="Port to run the interface on (default: 7862)")
|
| 593 |
-
parser.add_argument("--host", type=str, default="127.0.0.1",
|
| 594 |
-
help="Host to bind the interface to (default: 127.0.0.1)")
|
| 595 |
-
|
| 596 |
-
args = parser.parse_args()
|
| 597 |
-
|
| 598 |
-
print("🚀 Starting Vietnamese Sentiment Analysis Web Interface...")
|
| 599 |
-
print(f"🔧 Memory Settings:")
|
| 600 |
-
print(f" - Max Batch Size: {args.max_batch_size}")
|
| 601 |
-
print(f" - Quantization: {'Enabled' if args.quantize else 'Disabled'}")
|
| 602 |
-
print(f" - Max Memory: {args.max_memory}MB")
|
| 603 |
-
|
| 604 |
-
interface = create_interface(
|
| 605 |
-
max_batch_size=args.max_batch_size,
|
| 606 |
-
quantize=args.quantize
|
| 607 |
-
)
|
| 608 |
-
|
| 609 |
-
if interface is None:
|
| 610 |
-
print("❌ Failed to create interface. Exiting.")
|
| 611 |
-
return
|
| 612 |
-
|
| 613 |
-
# Update memory limit if specified
|
| 614 |
-
if hasattr(interface, 'app'):
|
| 615 |
-
interface.app.max_memory_mb = args.max_memory
|
| 616 |
-
|
| 617 |
-
print("✅ Interface created successfully!")
|
| 618 |
-
print("🌐 Launching web interface...")
|
| 619 |
-
print(f"📍 URL: http://{args.host}:{args.port}")
|
| 620 |
-
|
| 621 |
-
# Launch the interface
|
| 622 |
-
interface.launch(
|
| 623 |
-
server_name=args.host,
|
| 624 |
-
server_port=args.port,
|
| 625 |
-
share=False,
|
| 626 |
-
show_error=True,
|
| 627 |
-
quiet=False
|
| 628 |
-
)
|
| 629 |
-
|
| 630 |
-
if __name__ == "__main__":
|
| 631 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pages module for Vietnamese Sentiment Analysis
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .single_analysis import create_single_analysis_page
|
| 6 |
+
from .batch_analysis import create_batch_analysis_page
|
| 7 |
+
from .model_info import create_model_info_page
|
| 8 |
+
from .api_endpoints import create_api_endpoints_page
|
| 9 |
+
|
| 10 |
+
__all__ = [
|
| 11 |
+
'create_single_analysis_page',
|
| 12 |
+
'create_batch_analysis_page',
|
| 13 |
+
'create_model_info_page',
|
| 14 |
+
'create_api_endpoints_page'
|
| 15 |
+
]
|
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
REST API Endpoints Page for Vietnamese Sentiment Analysis
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
def create_api_endpoints_page():
|
| 8 |
+
"""Create the REST API endpoints tab"""
|
| 9 |
+
|
| 10 |
+
# REST API Endpoints Tab
|
| 11 |
+
with gr.Tab("🌐 REST API Endpoints"):
|
| 12 |
+
gr.Markdown("""
|
| 13 |
+
## 🌐 REST API Endpoints
|
| 14 |
+
|
| 15 |
+
Your sentiment analysis model is now available via REST API!
|
| 16 |
+
|
| 17 |
+
**Base URL:** `http://localhost:7861` (or your Hugging Face Space URL + `/api`)
|
| 18 |
+
|
| 19 |
+
### Available Endpoints:
|
| 20 |
+
|
| 21 |
+
#### 📝 Single Text Analysis
|
| 22 |
+
**POST** `/analyze`
|
| 23 |
+
```json
|
| 24 |
+
{{
|
| 25 |
+
"text": "Giảng viên dạy rất hay và tâm huyết.",
|
| 26 |
+
"language": "vi"
|
| 27 |
+
}}
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
#### 📊 Batch Analysis
|
| 31 |
+
**POST** `/analyze/batch`
|
| 32 |
+
```json
|
| 33 |
+
{{
|
| 34 |
+
"texts": [
|
| 35 |
+
"Text 1",
|
| 36 |
+
"Text 2",
|
| 37 |
+
"Text 3"
|
| 38 |
+
],
|
| 39 |
+
"language": "vi"
|
| 40 |
+
}}
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
#### ❤️ Health Check
|
| 44 |
+
**GET** `/health`
|
| 45 |
+
|
| 46 |
+
#### ℹ️ Model Information
|
| 47 |
+
**GET** `/model/info`
|
| 48 |
+
|
| 49 |
+
#### 🧹 Memory Cleanup
|
| 50 |
+
**POST** `/memory/cleanup`
|
| 51 |
+
|
| 52 |
+
### 📚 Interactive API Documentation
|
| 53 |
+
Visit **http://localhost:7861/docs** for interactive API documentation with Swagger UI.
|
| 54 |
+
|
| 55 |
+
### 🚀 Usage Examples
|
| 56 |
+
|
| 57 |
+
**cURL Example:**
|
| 58 |
+
```bash
|
| 59 |
+
curl -X POST "http://localhost:7861/analyze" \\
|
| 60 |
+
-H "Content-Type: application/json" \\
|
| 61 |
+
-d '{{"text": "Giảng viên dạy rất hay và tâm huyết."}}'
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
**Python Example:**
|
| 65 |
+
```python
|
| 66 |
+
import requests
|
| 67 |
+
|
| 68 |
+
response = requests.post(
|
| 69 |
+
"http://localhost:7861/analyze",
|
| 70 |
+
json={{"text": "Giảng viên dạy rất hay và tâm huyết."}}
|
| 71 |
+
)
|
| 72 |
+
result = response.json()
|
| 73 |
+
print(f"Sentiment: {{result['sentiment']}}")
|
| 74 |
+
print(f"Confidence: {{result['confidence']:.2%}}")
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
**JavaScript Example:**
|
| 78 |
+
```javascript
|
| 79 |
+
const response = await fetch('http://localhost:7861/analyze', {{
|
| 80 |
+
method: 'POST',
|
| 81 |
+
headers: {{ 'Content-Type': 'application/json' }},
|
| 82 |
+
body: JSON.stringify({{
|
| 83 |
+
text: 'Giảng viên dạy rất hay và tâm huyết.'
|
| 84 |
+
}})
|
| 85 |
+
}});
|
| 86 |
+
const result = await response.json();
|
| 87 |
+
console.log('Sentiment:', result.sentiment);
|
| 88 |
+
console.log('Confidence:', (result.confidence * 100).toFixed(2) + '%');
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### 📝 Response Format
|
| 92 |
+
```json
|
| 93 |
+
{{
|
| 94 |
+
"sentiment": "Positive",
|
| 95 |
+
"confidence": 0.89,
|
| 96 |
+
"probabilities": {{
|
| 97 |
+
"positive": 0.89,
|
| 98 |
+
"neutral": 0.08,
|
| 99 |
+
"negative": 0.03
|
| 100 |
+
}},
|
| 101 |
+
"processing_time": 0.123,
|
| 102 |
+
"text": "Giảng viên dạy rất hay và tâm huyết."
|
| 103 |
+
}}
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
### ⚠️ Rate Limiting & Performance
|
| 107 |
+
- **Maximum batch size:** 10 texts per request
|
| 108 |
+
- **Memory management:** Automatic cleanup after each request
|
| 109 |
+
- **Processing time:** ~100ms per text
|
| 110 |
+
- **CORS enabled:** Cross-origin requests supported
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
*API server runs alongside the Gradio interface for maximum flexibility!*
|
| 114 |
+
""")
|
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Batch Analysis Page for Vietnamese Sentiment Analysis
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from io import StringIO
|
| 8 |
+
|
| 9 |
+
def create_batch_analysis_page(app_instance):
|
| 10 |
+
"""Create the batch analysis tab"""
|
| 11 |
+
|
| 12 |
+
def analyze_batch(texts):
|
| 13 |
+
"""Analyze sentiment for multiple texts"""
|
| 14 |
+
if not texts or not any(text.strip() for text in texts):
|
| 15 |
+
return "❌ Please enter some texts to analyze."
|
| 16 |
+
|
| 17 |
+
if not app_instance.model_loaded:
|
| 18 |
+
return "❌ Model not loaded. Please refresh the page."
|
| 19 |
+
|
| 20 |
+
# Filter valid texts
|
| 21 |
+
valid_texts = [text.strip() for text in texts if text.strip()]
|
| 22 |
+
|
| 23 |
+
if len(valid_texts) > 10:
|
| 24 |
+
return "❌ Too many texts. Maximum 10 texts per batch for memory efficiency."
|
| 25 |
+
|
| 26 |
+
if not valid_texts:
|
| 27 |
+
return "❌ No valid texts provided."
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
results, error_msg = app_instance.batch_predict(valid_texts)
|
| 31 |
+
if error_msg:
|
| 32 |
+
return error_msg
|
| 33 |
+
|
| 34 |
+
if not results:
|
| 35 |
+
return "❌ No results generated. Please try again."
|
| 36 |
+
|
| 37 |
+
# Create a summary table
|
| 38 |
+
df_data = []
|
| 39 |
+
for result in results:
|
| 40 |
+
sentiment_emoji = {
|
| 41 |
+
"Positive": "😊",
|
| 42 |
+
"Neutral": "😐",
|
| 43 |
+
"Negative": "😠"
|
| 44 |
+
}.get(result["sentiment"], "❓")
|
| 45 |
+
|
| 46 |
+
df_data.append({
|
| 47 |
+
"Text": result["text"][:100] + ("..." if len(result["text"]) > 100 else ""),
|
| 48 |
+
"Sentiment": f"{sentiment_emoji} {result['sentiment']}",
|
| 49 |
+
"Confidence": f"{result['confidence']:.2%}",
|
| 50 |
+
"Processing Time": f"{result['processing_time']:.3f}s"
|
| 51 |
+
})
|
| 52 |
+
|
| 53 |
+
df = pd.DataFrame(df_data)
|
| 54 |
+
|
| 55 |
+
# Create summary statistics
|
| 56 |
+
sentiment_counts = df["Sentiment"].value_counts()
|
| 57 |
+
avg_confidence = sum(r["confidence"] for r in results) / len(results)
|
| 58 |
+
total_time = sum(r["processing_time"] for r in results)
|
| 59 |
+
|
| 60 |
+
summary = f"""
|
| 61 |
+
## 📊 Batch Analysis Results
|
| 62 |
+
|
| 63 |
+
**Summary Statistics:**
|
| 64 |
+
- Total texts analyzed: {len(results)}
|
| 65 |
+
- Average confidence: {avg_confidence:.2%}
|
| 66 |
+
- Total processing time: {total_time:.3f}s
|
| 67 |
+
- Average time per text: {total_time/len(results):.3f}s
|
| 68 |
+
|
| 69 |
+
**Sentiment Distribution:**
|
| 70 |
+
{sentiment_counts.to_string()}
|
| 71 |
+
|
| 72 |
+
### Detailed Results:
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
# Convert DataFrame to markdown
|
| 76 |
+
table_md = df.to_markdown(index=False)
|
| 77 |
+
|
| 78 |
+
return summary + "\n" + table_md
|
| 79 |
+
|
| 80 |
+
except Exception as e:
|
| 81 |
+
app_instance.cleanup_memory()
|
| 82 |
+
return f"❌ Error during batch analysis: {str(e)}"
|
| 83 |
+
|
| 84 |
+
def clear_batch():
|
| 85 |
+
"""Clear batch inputs"""
|
| 86 |
+
return ""
|
| 87 |
+
|
| 88 |
+
# Batch Analysis Tab
|
| 89 |
+
with gr.Tab("📊 Batch Analysis"):
|
| 90 |
+
gr.Markdown("### 📝 Memory-Efficient Batch Processing")
|
| 91 |
+
gr.Markdown("**Maximum batch size:** 10 texts (for memory efficiency)")
|
| 92 |
+
gr.Markdown("**Memory limit:** 8GB")
|
| 93 |
+
|
| 94 |
+
with gr.Row():
|
| 95 |
+
with gr.Column(scale=2):
|
| 96 |
+
batch_input = gr.Textbox(
|
| 97 |
+
label="Enter Multiple Texts (one per line)",
|
| 98 |
+
placeholder="Enter text 1...\nEnter text 2...\nEnter text 3...",
|
| 99 |
+
lines=10,
|
| 100 |
+
max_lines=15
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
with gr.Row():
|
| 104 |
+
batch_analyze_btn = gr.Button("📊 Analyze Batch", variant="primary")
|
| 105 |
+
batch_clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
|
| 106 |
+
|
| 107 |
+
with gr.Column(scale=3):
|
| 108 |
+
batch_result_output = gr.Markdown(label="Batch Analysis Result")
|
| 109 |
+
|
| 110 |
+
# Connect events
|
| 111 |
+
batch_analyze_btn.click(
|
| 112 |
+
fn=analyze_batch,
|
| 113 |
+
inputs=[batch_input],
|
| 114 |
+
outputs=[batch_result_output]
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
batch_clear_btn.click(
|
| 118 |
+
fn=clear_batch,
|
| 119 |
+
outputs=[batch_input]
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
return batch_analyze_btn, batch_clear_btn, batch_input, batch_result_output
|
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Model Information Page for Vietnamese Sentiment Analysis
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
def create_model_info_page(app_instance):
|
| 9 |
+
"""Create the model information tab"""
|
| 10 |
+
|
| 11 |
+
def update_memory_info():
|
| 12 |
+
"""Update memory usage information"""
|
| 13 |
+
if app_instance and app_instance.model_loaded:
|
| 14 |
+
memory_usage = app_instance.get_memory_usage()
|
| 15 |
+
return f"Memory usage: {memory_usage:.1f}MB used"
|
| 16 |
+
return "Memory usage: 0MB used"
|
| 17 |
+
|
| 18 |
+
def manual_memory_cleanup():
|
| 19 |
+
"""Manual memory cleanup"""
|
| 20 |
+
if app_instance and app_instance.model_loaded:
|
| 21 |
+
app_instance.cleanup_memory()
|
| 22 |
+
memory_usage = app_instance.get_memory_usage()
|
| 23 |
+
return f"Memory cleaned. Current usage: {memory_usage:.1f}MB"
|
| 24 |
+
return "App not initialized"
|
| 25 |
+
|
| 26 |
+
# Model Info Tab
|
| 27 |
+
with gr.Tab("ℹ️ Model Information"):
|
| 28 |
+
gr.Markdown(f"""
|
| 29 |
+
## 🤖 Model Details
|
| 30 |
+
|
| 31 |
+
**Model Architecture:** Transformer-based sequence classification
|
| 32 |
+
**Base Model:** {app_instance.finetuned_model}
|
| 33 |
+
**Languages:** Vietnamese (optimized)
|
| 34 |
+
**Labels:** Negative, Neutral, Positive
|
| 35 |
+
|
| 36 |
+
## 📊 Performance Metrics
|
| 37 |
+
|
| 38 |
+
- **Processing Speed:** ~100ms per text
|
| 39 |
+
- **Max Sequence Length:** 512 tokens
|
| 40 |
+
- **Memory Limit:** 8GB
|
| 41 |
+
|
| 42 |
+
## 💡 Usage Tips
|
| 43 |
+
|
| 44 |
+
- Enter clear, grammatically correct Vietnamese text
|
| 45 |
+
- Longer texts (20-200 words) work best
|
| 46 |
+
- The model handles various Vietnamese dialects
|
| 47 |
+
- Confidence scores indicate prediction certainty
|
| 48 |
+
|
| 49 |
+
## 🛡️ Memory Management
|
| 50 |
+
|
| 51 |
+
- **Automatic Cleanup:** Memory is cleaned after each prediction
|
| 52 |
+
- **Batch Limits:** Maximum 10 texts per batch to prevent overflow
|
| 53 |
+
- **Memory Monitoring:** Real-time memory usage tracking
|
| 54 |
+
- **GPU Optimization:** CUDA cache clearing when available
|
| 55 |
+
|
| 56 |
+
## ⚠️ Performance Notes
|
| 57 |
+
|
| 58 |
+
- If you encounter memory errors, try reducing batch size
|
| 59 |
+
- Use the Memory Cleanup button if needed
|
| 60 |
+
- Monitor memory usage in the Batch Analysis tab
|
| 61 |
+
- Model loaded directly from Hugging Face Hub (no local training required)
|
| 62 |
+
""")
|
| 63 |
+
|
| 64 |
+
with gr.Row():
|
| 65 |
+
memory_info = gr.Textbox(
|
| 66 |
+
label="Memory Usage",
|
| 67 |
+
value="Memory usage: 0MB used",
|
| 68 |
+
interactive=False
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
memory_cleanup_btn = gr.Button("🧹 Memory Cleanup", variant="secondary")
|
| 72 |
+
|
| 73 |
+
# Connect memory cleanup event
|
| 74 |
+
memory_cleanup_btn.click(
|
| 75 |
+
fn=manual_memory_cleanup,
|
| 76 |
+
outputs=[memory_info]
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
return memory_cleanup_btn, memory_info
|
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Single Text Analysis Page for Vietnamese Sentiment Analysis
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
def create_single_analysis_page(app_instance):
|
| 9 |
+
"""Create the single text analysis tab"""
|
| 10 |
+
|
| 11 |
+
def analyze_sentiment(text):
|
| 12 |
+
"""Analyze sentiment of a single text"""
|
| 13 |
+
if not text.strip():
|
| 14 |
+
return "❌ Please enter some text to analyze."
|
| 15 |
+
|
| 16 |
+
if not app_instance.model_loaded:
|
| 17 |
+
return "❌ Model not loaded. Please refresh the page."
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
sentiment, output_text = app_instance.predict_sentiment(text.strip())
|
| 21 |
+
if sentiment:
|
| 22 |
+
return output_text
|
| 23 |
+
else:
|
| 24 |
+
return "❌ Analysis failed. Please try again."
|
| 25 |
+
except Exception as e:
|
| 26 |
+
app_instance.cleanup_memory()
|
| 27 |
+
return f"❌ Error during analysis: {str(e)}"
|
| 28 |
+
|
| 29 |
+
# Single Text Analysis Tab
|
| 30 |
+
with gr.Tab("📝 Single Text Analysis"):
|
| 31 |
+
gr.Markdown("# 🎭 Vietnamese Sentiment Analysis")
|
| 32 |
+
gr.Markdown("Enter Vietnamese text to analyze sentiment using a transformer model from Hugging Face.")
|
| 33 |
+
|
| 34 |
+
with gr.Row():
|
| 35 |
+
with gr.Column(scale=3):
|
| 36 |
+
text_input = gr.Textbox(
|
| 37 |
+
label="Enter Vietnamese Text",
|
| 38 |
+
placeholder="Nhập văn bản tiếng Việt để phân tích cảm xúc...",
|
| 39 |
+
lines=4,
|
| 40 |
+
max_lines=10
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
with gr.Row():
|
| 44 |
+
analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary")
|
| 45 |
+
clear_btn = gr.Button("🗑️ Clear", variant="secondary")
|
| 46 |
+
|
| 47 |
+
with gr.Column(scale=2):
|
| 48 |
+
result_output = gr.Markdown(label="Analysis Result", visible=True)
|
| 49 |
+
|
| 50 |
+
# Example texts
|
| 51 |
+
examples = [
|
| 52 |
+
"Giảng viên dạy rất hay và tâm huyết.",
|
| 53 |
+
"Khóa học này không tốt lắm.",
|
| 54 |
+
"Cơ sở vật chất bình thường.",
|
| 55 |
+
"Học phí quá cao.",
|
| 56 |
+
"Nội dung giảng dạy rất hữu ích."
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
gr.Examples(
|
| 60 |
+
examples=examples,
|
| 61 |
+
inputs=[text_input],
|
| 62 |
+
label="Example Texts"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Connect events
|
| 66 |
+
analyze_btn.click(
|
| 67 |
+
fn=analyze_sentiment,
|
| 68 |
+
inputs=[text_input],
|
| 69 |
+
outputs=[result_output]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
clear_btn.click(
|
| 73 |
+
fn=lambda: "",
|
| 74 |
+
outputs=[text_input]
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
return analyze_btn, clear_btn, text_input, result_output
|
|
@@ -1,277 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
-
import numpy as np
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from sklearn.metrics import classification_report, confusion_matrix
|
| 6 |
-
import matplotlib.pyplot as plt
|
| 7 |
-
import seaborn as sns
|
| 8 |
-
import argparse
|
| 9 |
-
|
| 10 |
-
class SentimentTester:
|
| 11 |
-
def __init__(self, model_path="./vietnamese_sentiment_finetuned"):
|
| 12 |
-
self.model_path = model_path
|
| 13 |
-
self.tokenizer = None
|
| 14 |
-
self.model = None
|
| 15 |
-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
-
|
| 17 |
-
def load_model(self):
|
| 18 |
-
"""Load the fine-tuned model and tokenizer"""
|
| 19 |
-
print(f"Loading model from: {self.model_path}")
|
| 20 |
-
print(f"Using device: {self.device}")
|
| 21 |
-
|
| 22 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
|
| 23 |
-
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_path)
|
| 24 |
-
self.model.to(self.device)
|
| 25 |
-
self.model.eval()
|
| 26 |
-
|
| 27 |
-
print("Model loaded successfully!")
|
| 28 |
-
print(f"Number of labels: {self.model.config.num_labels}")
|
| 29 |
-
|
| 30 |
-
def predict_sentiment(self, text, return_probabilities=False):
|
| 31 |
-
"""Predict sentiment for a single text"""
|
| 32 |
-
# Tokenize the text
|
| 33 |
-
inputs = self.tokenizer(
|
| 34 |
-
text,
|
| 35 |
-
return_tensors="pt",
|
| 36 |
-
truncation=True,
|
| 37 |
-
padding=True,
|
| 38 |
-
max_length=512
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
# Move to device
|
| 42 |
-
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 43 |
-
|
| 44 |
-
# Get predictions
|
| 45 |
-
with torch.no_grad():
|
| 46 |
-
outputs = self.model(**inputs)
|
| 47 |
-
logits = outputs.logits
|
| 48 |
-
probabilities = torch.softmax(logits, dim=-1)
|
| 49 |
-
predicted_class = torch.argmax(probabilities, dim=-1).item()
|
| 50 |
-
|
| 51 |
-
if return_probabilities:
|
| 52 |
-
return predicted_class, probabilities.cpu().numpy()[0]
|
| 53 |
-
else:
|
| 54 |
-
return predicted_class
|
| 55 |
-
|
| 56 |
-
def predict_batch(self, texts):
|
| 57 |
-
"""Predict sentiment for a batch of texts"""
|
| 58 |
-
predictions = []
|
| 59 |
-
probabilities = []
|
| 60 |
-
|
| 61 |
-
for text in texts:
|
| 62 |
-
pred, probs = self.predict_sentiment(text, return_probabilities=True)
|
| 63 |
-
predictions.append(pred)
|
| 64 |
-
probabilities.append(probs)
|
| 65 |
-
|
| 66 |
-
return np.array(predictions), np.array(probabilities)
|
| 67 |
-
|
| 68 |
-
def test_custom_texts(self):
|
| 69 |
-
"""Test the model with custom Vietnamese texts"""
|
| 70 |
-
test_texts = [
|
| 71 |
-
"Giảng viên dạy rất hay và tâm huyết.",
|
| 72 |
-
"Môn học này quá khó và nhàm chán.",
|
| 73 |
-
"Lớp học ổn định, không có gì đặc biệt.",
|
| 74 |
-
"Tôi rất thích cách giảng dạy của thầy cô.",
|
| 75 |
-
"Chương trình học cần cải thiện nhiều.",
|
| 76 |
-
"Thời gian biểu hợp lý, dễ theo kịp.",
|
| 77 |
-
"Bài tập quá nhiều và khó.",
|
| 78 |
-
"Môi trường học tập tốt, bạn bè thân thiện."
|
| 79 |
-
]
|
| 80 |
-
|
| 81 |
-
print("\n" + "="*60)
|
| 82 |
-
print("TESTING WITH CUSTOM VIETNAMESE TEXTS")
|
| 83 |
-
print("="*60)
|
| 84 |
-
|
| 85 |
-
label_names = ["Negative", "Neutral", "Positive"] # Assuming 3 classes
|
| 86 |
-
|
| 87 |
-
for i, text in enumerate(test_texts, 1):
|
| 88 |
-
pred, probs = self.predict_sentiment(text, return_probabilities=True)
|
| 89 |
-
confidence = np.max(probs)
|
| 90 |
-
|
| 91 |
-
print(f"\n{i}. Text: {text}")
|
| 92 |
-
print(f" Predicted: {label_names[pred]} (Class {pred})")
|
| 93 |
-
print(f" Confidence: {confidence:.4f}")
|
| 94 |
-
print(f" Probabilities: {probs}")
|
| 95 |
-
|
| 96 |
-
def interactive_test(self):
|
| 97 |
-
"""Interactive testing mode"""
|
| 98 |
-
print("\n" + "="*60)
|
| 99 |
-
print("INTERACTIVE SENTIMENT ANALYSIS")
|
| 100 |
-
print("="*60)
|
| 101 |
-
print("Enter Vietnamese text to analyze sentiment (type 'quit' to exit):")
|
| 102 |
-
|
| 103 |
-
label_names = ["Negative", "Neutral", "Positive"] # Assuming 3 classes
|
| 104 |
-
|
| 105 |
-
while True:
|
| 106 |
-
text = input("\nEnter text: ").strip()
|
| 107 |
-
|
| 108 |
-
if text.lower() in ['quit', 'exit', 'q']:
|
| 109 |
-
break
|
| 110 |
-
|
| 111 |
-
if not text:
|
| 112 |
-
continue
|
| 113 |
-
|
| 114 |
-
try:
|
| 115 |
-
pred, probs = self.predict_sentiment(text, return_probabilities=True)
|
| 116 |
-
confidence = np.max(probs)
|
| 117 |
-
|
| 118 |
-
print(f"Predicted: {label_names[pred]} (Class {pred})")
|
| 119 |
-
print(f"Confidence: {confidence:.4f}")
|
| 120 |
-
print(f"Probabilities: {probs}")
|
| 121 |
-
|
| 122 |
-
except Exception as e:
|
| 123 |
-
print(f"Error: {e}")
|
| 124 |
-
|
| 125 |
-
def evaluate_from_file(self, file_path, text_column, label_column=None):
|
| 126 |
-
"""Evaluate model on a dataset from file"""
|
| 127 |
-
print(f"\nEvaluating on dataset from: {file_path}")
|
| 128 |
-
|
| 129 |
-
try:
|
| 130 |
-
# Load dataset
|
| 131 |
-
if file_path.endswith('.csv'):
|
| 132 |
-
df = pd.read_csv(file_path)
|
| 133 |
-
elif file_path.endswith('.json'):
|
| 134 |
-
df = pd.read_json(file_path)
|
| 135 |
-
else:
|
| 136 |
-
print("Unsupported file format. Please use CSV or JSON.")
|
| 137 |
-
return
|
| 138 |
-
|
| 139 |
-
print(f"Loaded {len(df)} samples")
|
| 140 |
-
|
| 141 |
-
# Get texts and labels
|
| 142 |
-
texts = df[text_column].tolist()
|
| 143 |
-
|
| 144 |
-
if label_column and label_column in df.columns:
|
| 145 |
-
true_labels = df[label_column].tolist()
|
| 146 |
-
has_labels = True
|
| 147 |
-
else:
|
| 148 |
-
true_labels = None
|
| 149 |
-
has_labels = False
|
| 150 |
-
|
| 151 |
-
# Make predictions
|
| 152 |
-
print("Making predictions...")
|
| 153 |
-
predictions, probabilities = self.predict_batch(texts)
|
| 154 |
-
|
| 155 |
-
# Display results
|
| 156 |
-
if has_labels:
|
| 157 |
-
print("\nClassification Report:")
|
| 158 |
-
print(classification_report(true_labels, predictions))
|
| 159 |
-
|
| 160 |
-
# Confusion matrix
|
| 161 |
-
cm = confusion_matrix(true_labels, predictions)
|
| 162 |
-
plt.figure(figsize=(8, 6))
|
| 163 |
-
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
|
| 164 |
-
plt.title('Confusion Matrix')
|
| 165 |
-
plt.xlabel('Predicted')
|
| 166 |
-
plt.ylabel('Actual')
|
| 167 |
-
plt.savefig('test_confusion_matrix.png', dpi=300, bbox_inches='tight')
|
| 168 |
-
plt.show()
|
| 169 |
-
|
| 170 |
-
# Calculate accuracy
|
| 171 |
-
accuracy = np.mean(np.array(predictions) == np.array(true_labels))
|
| 172 |
-
print(f"Overall Accuracy: {accuracy:.4f}")
|
| 173 |
-
|
| 174 |
-
# Show some examples
|
| 175 |
-
print("\nSample predictions:")
|
| 176 |
-
label_names = ["Negative", "Neutral", "Positive"]
|
| 177 |
-
for i in range(min(5, len(texts))):
|
| 178 |
-
pred_label = label_names[predictions[i]]
|
| 179 |
-
confidence = np.max(probabilities[i])
|
| 180 |
-
true_label = f" (True: {label_names[true_labels[i]]})" if has_labels else ""
|
| 181 |
-
print(f"{i+1}. {texts[i][:50]}...")
|
| 182 |
-
print(f" Predicted: {pred_label} (Confidence: {confidence:.3f}){true_label}")
|
| 183 |
-
|
| 184 |
-
except Exception as e:
|
| 185 |
-
print(f"Error evaluating file: {e}")
|
| 186 |
-
|
| 187 |
-
def compare_with_original(self):
|
| 188 |
-
"""Compare fine-tuned model with original model"""
|
| 189 |
-
print("\n" + "="*60)
|
| 190 |
-
print("COMPARING WITH ORIGINAL MODEL")
|
| 191 |
-
print("="*60)
|
| 192 |
-
|
| 193 |
-
test_texts = [
|
| 194 |
-
"Giảng viên dạy rất hay và tâm huyết.",
|
| 195 |
-
"Môn học này quá khó và nhàm chán.",
|
| 196 |
-
"Lớp học ổn định, không có gì đặc biệt."
|
| 197 |
-
]
|
| 198 |
-
|
| 199 |
-
original_model = "5CD-AI/Vietnamese-Sentiment-visobert"
|
| 200 |
-
|
| 201 |
-
try:
|
| 202 |
-
# Load original model
|
| 203 |
-
print("Loading original model...")
|
| 204 |
-
original_tokenizer = AutoTokenizer.from_pretrained(original_model)
|
| 205 |
-
original_model_instance = AutoModelForSequenceClassification.from_pretrained(original_model)
|
| 206 |
-
original_model_instance.to(self.device)
|
| 207 |
-
original_model_instance.eval()
|
| 208 |
-
|
| 209 |
-
print("\nComparison Results:")
|
| 210 |
-
print("-" * 50)
|
| 211 |
-
|
| 212 |
-
label_names = ["Negative", "Neutral", "Positive"]
|
| 213 |
-
|
| 214 |
-
for i, text in enumerate(test_texts, 1):
|
| 215 |
-
# Fine-tuned model prediction
|
| 216 |
-
ft_pred, ft_probs = self.predict_sentiment(text, return_probabilities=True)
|
| 217 |
-
|
| 218 |
-
# Original model prediction
|
| 219 |
-
inputs = original_tokenizer(
|
| 220 |
-
text,
|
| 221 |
-
return_tensors="pt",
|
| 222 |
-
truncation=True,
|
| 223 |
-
padding=True,
|
| 224 |
-
max_length=512
|
| 225 |
-
)
|
| 226 |
-
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 227 |
-
|
| 228 |
-
with torch.no_grad():
|
| 229 |
-
outputs = original_model_instance(**inputs)
|
| 230 |
-
orig_logits = outputs.logits
|
| 231 |
-
orig_probs = torch.softmax(orig_logits, dim=-1)
|
| 232 |
-
orig_pred = torch.argmax(orig_probs, dim=-1).item()
|
| 233 |
-
orig_probs = orig_probs.cpu().numpy()[0]
|
| 234 |
-
|
| 235 |
-
print(f"\n{i}. Text: {text}")
|
| 236 |
-
print(f" Fine-tuned: {label_names[ft_pred]} (Conf: {np.max(ft_probs):.3f})")
|
| 237 |
-
print(f" Original: {label_names[orig_pred]} (Conf: {np.max(orig_probs):.3f})")
|
| 238 |
-
|
| 239 |
-
if ft_pred != orig_pred:
|
| 240 |
-
print(f" *** DIFFERENT PREDICTION ***")
|
| 241 |
-
|
| 242 |
-
except Exception as e:
|
| 243 |
-
print(f"Error in comparison: {e}")
|
| 244 |
-
|
| 245 |
-
def main():
|
| 246 |
-
parser = argparse.ArgumentParser(description='Test fine-tuned Vietnamese sentiment analysis model')
|
| 247 |
-
parser.add_argument('--model_path', type=str, default='./vietnamese_sentiment_finetuned',
|
| 248 |
-
help='Path to the fine-tuned model')
|
| 249 |
-
parser.add_argument('--mode', type=str, choices=['custom', 'interactive', 'file', 'compare'],
|
| 250 |
-
default='custom', help='Testing mode')
|
| 251 |
-
parser.add_argument('--file_path', type=str, help='Path to test file (for file mode)')
|
| 252 |
-
parser.add_argument('--text_column', type=str, default='text', help='Text column name (for file mode)')
|
| 253 |
-
parser.add_argument('--label_column', type=str, help='Label column name (for file mode)')
|
| 254 |
-
|
| 255 |
-
args = parser.parse_args()
|
| 256 |
-
|
| 257 |
-
# Initialize tester
|
| 258 |
-
tester = SentimentTester(args.model_path)
|
| 259 |
-
|
| 260 |
-
# Load model
|
| 261 |
-
tester.load_model()
|
| 262 |
-
|
| 263 |
-
# Run tests based on mode
|
| 264 |
-
if args.mode == 'custom':
|
| 265 |
-
tester.test_custom_texts()
|
| 266 |
-
elif args.mode == 'interactive':
|
| 267 |
-
tester.interactive_test()
|
| 268 |
-
elif args.mode == 'file':
|
| 269 |
-
if not args.file_path:
|
| 270 |
-
print("Error: --file_path required for file mode")
|
| 271 |
-
return
|
| 272 |
-
tester.evaluate_from_file(args.file_path, args.text_column, args.label_column)
|
| 273 |
-
elif args.mode == 'compare':
|
| 274 |
-
tester.compare_with_original()
|
| 275 |
-
|
| 276 |
-
if __name__ == "__main__":
|
| 277 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -24,4 +24,9 @@ safetensors>=0.3.1
|
|
| 24 |
sentencepiece>=0.1.96
|
| 25 |
protobuf>=3.20.0
|
| 26 |
tokenizers>=0.13.3
|
| 27 |
-
huggingface-hub>=0.16.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
sentencepiece>=0.1.96
|
| 25 |
protobuf>=3.20.0
|
| 26 |
tokenizers>=0.13.3
|
| 27 |
+
huggingface-hub>=0.16.4
|
| 28 |
+
|
| 29 |
+
# API dependencies
|
| 30 |
+
fastapi>=0.104.0
|
| 31 |
+
uvicorn>=0.24.0
|
| 32 |
+
pydantic>=2.5.0
|