Spaces:

Anupam007
/

google-meet-transcriber

Runtime error

App Files Files Community

Anupam007 commited on Mar 27, 2025

Commit

1c244b4

verified ·

1 Parent(s): 07131a9

Create app.py

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import time
+import numpy as np
+import soundfile as sf
+# --- Configuration ---
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+print(f"Using device: {device}")
+# STT Model
+stt_model_id = "openai/whisper-tiny"
+stt_model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    stt_model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+)
+stt_model.to(device)
+processor = AutoProcessor.from_pretrained(stt_model_id)
+stt_pipeline = pipeline(
+    "automatic-speech-recognition",
+    model=stt_model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=30,
+    batch_size=16,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+# Summarization Model
+summarizer_model_id = "sshleifer/distilbart-cnn-6-6"
+summarizer = pipeline("summarization", model=summarizer_model_id, device=device)
+SUMMARY_INTERVAL = 30.0
+def format_summary_as_bullets(summary_text):
+    """Formats a summary into bullet points."""
+    if not summary_text:
+        return ""
+    sentences = summary_text.replace(". ", ".\n- ").split('\n')
+    return "- " + "\n".join(sentences).strip()
+def process_audio_stream(new_chunk_tuple, transcript_state, last_summary_time, summary_state):
+    if new_chunk_tuple is None:
+        return transcript_state, summary_state, transcript_state, last_summary_time, summary_state
+    sample_rate, audio_chunk = new_chunk_tuple
+    if audio_chunk is None or audio_chunk.size == 0:
+        return transcript_state, summary_state, transcript_state, last_summary_time, summary_state
+    if audio_chunk.dtype != np.float32:
+        audio_chunk = audio_chunk.astype(np.float32) / 32768.0
+    new_text = ""
+    try:
+        result = stt_pipeline({"sampling_rate": sample_rate, "raw": audio_chunk.copy()})
+        new_text = result["text"].strip() if result["text"] else ""
+    except Exception as e:
+        new_text = f"[Transcription Error: {e}]"
+    updated_transcript = transcript_state + (" " + new_text if transcript_state else new_text)
+    current_time = time.time()
+    new_summary = summary_state
+    updated_last_summary_time = last_summary_time
+    if updated_transcript and len(updated_transcript) > 50 and (current_time - last_summary_time > SUMMARY_INTERVAL):
+        try:
+            summary_result = summarizer(updated_transcript, max_length=150, min_length=30, do_sample=False)
+            if summary_result and isinstance(summary_result, list):
+                raw_summary = summary_result[0]['summary_text']
+                new_summary = format_summary_as_bullets(raw_summary)
+                updated_last_summary_time = current_time
+        except Exception as e:
+            return updated_transcript, f"[Summarization Error]\n\n{summary_state}", updated_transcript, last_summary_time, summary_state
+    return updated_transcript, new_summary, updated_transcript, updated_last_summary_time, new_summary
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Real-Time Meeting Notes with Google Meet")
+    gr.Markdown("Click the button below to start a Google Meet session.")
+    google_meet_button = gr.Markdown("### [Start Google Meet](https://meet.google.com/new){target=_blank}")
+    transcript_state = gr.State("")
+    last_summary_time = gr.State(0.0)
+    summary_state = gr.State("")
+    with gr.Row():
+        with gr.Column(scale=1):
+            audio_stream = gr.Audio(sources=["microphone"], streaming=True, label="Live Microphone Input", type="numpy")
+        with gr.Column(scale=2):
+            transcription_output = gr.Textbox(label="Full Transcription", lines=15, interactive=False)
+            summary_output = gr.Textbox(label=f"Bullet Point Summary (Updates ~every {SUMMARY_INTERVAL}s)", lines=10, interactive=False)
+    audio_stream.stream(
+        fn=process_audio_stream,
+        inputs=[audio_stream, transcript_state, last_summary_time, summary_state],
+        outputs=[transcription_output, summary_output, transcript_state, last_summary_time, summary_state],
+    )
+    clear_button = gr.Button("Clear Transcript & Summary")
+    clear_button.click(fn=lambda: ("", "", 0.0, ""), inputs=[], outputs=[transcription_output, summary_output, transcript_state, last_summary_time, summary_state])
+demo.queue()
+demo.launch(debug=True, share=True)