Spaces:

Anupam007
/

google-meet-transcriber

Runtime error

App Files Files Community

google-meet-transcriber / app.py

Anupam007

Update app.py

aa321b9 verified 9 months ago

raw

history blame contribute delete

4.91 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import time
	import numpy as np

	# --- Configuration ---
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	print(f"Device set to use: {device}")

	stt_model_id = "openai/whisper-tiny"
	summarizer_model_id = "sshleifer/distilbart-cnn-6-6"
	SUMMARY_INTERVAL = 30.0 # Time between summarization updates

	# --- Load Models ---
	print("Loading Speech-to-Text (STT) model...")
	stt_model = AutoModelForSpeechSeq2Seq.from_pretrained(stt_model_id).to(device)
	processor = AutoProcessor.from_pretrained(stt_model_id)

	stt_pipeline = pipeline(
	"automatic-speech-recognition",
	model=stt_model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	generate_kwargs={"max_new_tokens": 128}, # ✅ FIXED: Moved max_new_tokens
	chunk_length_s=30,
	batch_size=16,
	device=device,
	)

	print("Loading Summarization model...")
	summarizer = pipeline("summarization", model=summarizer_model_id, device=device)

	def format_summary_as_bullets(summary_text):
	"""Format summary into bullet points."""
	if not summary_text:
	return ""
	sentences = summary_text.replace(". ", ".\n- ").split('\n')
	return "- " + "\n".join(sentences).strip()

	def process_audio_stream(new_chunk_tuple, accumulated_transcript_state, last_summary_time_state, current_summary_state):
	"""Process streaming audio into transcript and summary."""
	if new_chunk_tuple is None:
	return accumulated_transcript_state, current_summary_state, accumulated_transcript_state, last_summary_time_state, current_summary_state

	sample_rate, audio_chunk = new_chunk_tuple
	if audio_chunk is None or sample_rate is None or audio_chunk.size == 0:
	return accumulated_transcript_state, current_summary_state, accumulated_transcript_state, last_summary_time_state, current_summary_state

	# Convert to float32 if needed
	if audio_chunk.dtype != np.float32:
	audio_chunk = audio_chunk.astype(np.float32) / 32768.0

	# Speech-to-text processing
	try:
	result = stt_pipeline({"sampling_rate": sample_rate, "raw": audio_chunk.copy()})
	new_text = result["text"].strip() if result["text"] else ""
	except Exception as e:
	new_text = f"[Transcription Error: {e}]"

	updated_transcript = accumulated_transcript_state + " " + new_text if accumulated_transcript_state else new_text

	# Summarization every SUMMARY_INTERVAL
	current_time = time.time()
	new_summary = current_summary_state
	if updated_transcript and len(updated_transcript) > 50 and (current_time - last_summary_time_state > SUMMARY_INTERVAL):
	try:
	summary_result = summarizer(updated_transcript, max_length=150, min_length=30, do_sample=False)
	raw_summary = summary_result[0]['summary_text']
	new_summary = format_summary_as_bullets(raw_summary)
	last_summary_time_state = current_time
	except Exception as e:
	return updated_transcript, f"[Summarization Error]\n{current_summary_state}", updated_transcript, last_summary_time_state, current_summary_state

	return updated_transcript, new_summary, updated_transcript, last_summary_time_state, new_summary

	# --- Gradio UI ---
	print("Creating Gradio interface...")
	with gr.Blocks() as demo:
	gr.Markdown("# 🎤 AI-Powered Meeting Notes with Google Meet Integration")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Audio(sources=["microphone"], streaming=True, label="🎙 Live Audio", type="numpy")
	gr.Image(sources=["webcam"], label="📷 Webcam", streaming=True)

	with gr.Column(scale=2):
	transcription_output = gr.Textbox(label="📝 Full Transcription", lines=10, interactive=False)
	summary_output = gr.Textbox(label=f"🔹 Summary (Updates ~{SUMMARY_INTERVAL}s)", lines=6, interactive=False)

	# Google Meet Button (opens in new tab)
	google_meet_button = gr.Button("Start Google Meet")
	google_meet_button.click(fn=lambda: None, inputs=[], outputs=[], _js="() => window.open('https://meet.google.com/new', '_blank')")

	# Streaming Audio Processing
	gr.Audio(sources=["microphone"], streaming=True).stream(
	fn=process_audio_stream,
	inputs=[gr.Audio(sources=["microphone"], streaming=True), gr.State(""), gr.State(0.0), gr.State("")],
	outputs=[transcription_output, summary_output, gr.State(""), gr.State(0.0), gr.State("")]
	)

	# Clear button
	def clear_state():
	return "", "", 0.0, ""
	clear_button = gr.Button("Clear Transcript & Summary")
	clear_button.click(fn=lambda: ("", "", "", 0.0, ""), inputs=[], outputs=[transcription_output, summary_output, gr.State(""), gr.State(0.0), gr.State("")])

	print("Launching Gradio app...")
	demo.queue()
	demo.launch(debug=True, share=True)