Spaces:

shegga
/

SentimentAnalysisForNMTTNT

Runtime error

App Files Files Community

SentimentAnalysisForNMTTNT / py /pages /batch_analysis.py

shegga

🎉 Major Refactor: Modular Architecture with Automatic Fine-Tuning

b8ae42e about 2 months ago

raw

history blame contribute delete

4.15 kB

	"""
	Batch Analysis Page for Vietnamese Sentiment Analysis
	"""

	import gradio as gr
	import pandas as pd
	from io import StringIO

	def create_batch_analysis_page(app_instance):
	"""Create the batch analysis tab"""

	def analyze_batch(texts):
	"""Analyze sentiment for multiple texts"""
	if not texts or not any(text.strip() for text in texts):
	return "❌ Please enter some texts to analyze."

	if not app_instance.model_loaded:
	return "❌ Model not loaded. Please refresh the page."

	# Filter valid texts
	valid_texts = [text.strip() for text in texts if text.strip()]

	if len(valid_texts) > 10:
	return "❌ Too many texts. Maximum 10 texts per batch for memory efficiency."

	if not valid_texts:
	return "❌ No valid texts provided."

	try:
	results, error_msg = app_instance.batch_predict(valid_texts)
	if error_msg:
	return error_msg

	if not results:
	return "❌ No results generated. Please try again."

	# Create a summary table
	df_data = []
	for result in results:
	sentiment_emoji = {
	"Positive": "😊",
	"Neutral": "😐",
	"Negative": "😠"
	}.get(result["sentiment"], "❓")

	df_data.append({
	"Text": result["text"][:100] + ("..." if len(result["text"]) > 100 else ""),
	"Sentiment": f"{sentiment_emoji} {result['sentiment']}",
	"Confidence": f"{result['confidence']:.2%}",
	"Processing Time": f"{result['processing_time']:.3f}s"
	})

	df = pd.DataFrame(df_data)

	# Create summary statistics
	sentiment_counts = df["Sentiment"].value_counts()
	avg_confidence = sum(r["confidence"] for r in results) / len(results)
	total_time = sum(r["processing_time"] for r in results)

	summary = f"""
	## 📊 Batch Analysis Results

	Summary Statistics:
	- Total texts analyzed: {len(results)}
	- Average confidence: {avg_confidence:.2%}
	- Total processing time: {total_time:.3f}s
	- Average time per text: {total_time/len(results):.3f}s

	Sentiment Distribution:
	{sentiment_counts.to_string()}

	### Detailed Results:
	"""

	# Convert DataFrame to markdown
	table_md = df.to_markdown(index=False)

	return summary + "\n" + table_md

	except Exception as e:
	app_instance.cleanup_memory()
	return f"❌ Error during batch analysis: {str(e)}"

	def clear_batch():
	"""Clear batch inputs"""
	return ""

	# Batch Analysis Tab
	with gr.Tab("📊 Batch Analysis"):
	gr.Markdown("### 📝 Memory-Efficient Batch Processing")
	gr.Markdown("Maximum batch size: 10 texts (for memory efficiency)")
	gr.Markdown("Memory limit: 8GB")

	with gr.Row():
	with gr.Column(scale=2):
	batch_input = gr.Textbox(
	label="Enter Multiple Texts (one per line)",
	placeholder="Enter text 1...\nEnter text 2...\nEnter text 3...",
	lines=10,
	max_lines=15
	)

	with gr.Row():
	batch_analyze_btn = gr.Button("📊 Analyze Batch", variant="primary")
	batch_clear_btn = gr.Button("🗑️ Clear All", variant="secondary")

	with gr.Column(scale=3):
	batch_result_output = gr.Markdown(label="Batch Analysis Result")

	# Connect events
	batch_analyze_btn.click(
	fn=analyze_batch,
	inputs=[batch_input],
	outputs=[batch_result_output]
	)

	batch_clear_btn.click(
	fn=clear_batch,
	outputs=[batch_input]
	)

	return batch_analyze_btn, batch_clear_btn, batch_input, batch_result_output