Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| import time | |
| import numpy as np | |
| # --- Configuration --- | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| print(f"Device set to use: {device}") | |
| stt_model_id = "openai/whisper-tiny" | |
| summarizer_model_id = "sshleifer/distilbart-cnn-6-6" | |
| SUMMARY_INTERVAL = 30.0 # Time between summarization updates | |
| # --- Load Models --- | |
| print("Loading Speech-to-Text (STT) model...") | |
| stt_model = AutoModelForSpeechSeq2Seq.from_pretrained(stt_model_id).to(device) | |
| processor = AutoProcessor.from_pretrained(stt_model_id) | |
| stt_pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model=stt_model, | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor, | |
| generate_kwargs={"max_new_tokens": 128}, # β FIXED: Moved max_new_tokens | |
| chunk_length_s=30, | |
| batch_size=16, | |
| device=device, | |
| ) | |
| print("Loading Summarization model...") | |
| summarizer = pipeline("summarization", model=summarizer_model_id, device=device) | |
| def format_summary_as_bullets(summary_text): | |
| """Format summary into bullet points.""" | |
| if not summary_text: | |
| return "" | |
| sentences = summary_text.replace(". ", ".\n- ").split('\n') | |
| return "- " + "\n".join(sentences).strip() | |
| def process_audio_stream(new_chunk_tuple, accumulated_transcript_state, last_summary_time_state, current_summary_state): | |
| """Process streaming audio into transcript and summary.""" | |
| if new_chunk_tuple is None: | |
| return accumulated_transcript_state, current_summary_state, accumulated_transcript_state, last_summary_time_state, current_summary_state | |
| sample_rate, audio_chunk = new_chunk_tuple | |
| if audio_chunk is None or sample_rate is None or audio_chunk.size == 0: | |
| return accumulated_transcript_state, current_summary_state, accumulated_transcript_state, last_summary_time_state, current_summary_state | |
| # Convert to float32 if needed | |
| if audio_chunk.dtype != np.float32: | |
| audio_chunk = audio_chunk.astype(np.float32) / 32768.0 | |
| # Speech-to-text processing | |
| try: | |
| result = stt_pipeline({"sampling_rate": sample_rate, "raw": audio_chunk.copy()}) | |
| new_text = result["text"].strip() if result["text"] else "" | |
| except Exception as e: | |
| new_text = f"[Transcription Error: {e}]" | |
| updated_transcript = accumulated_transcript_state + " " + new_text if accumulated_transcript_state else new_text | |
| # Summarization every SUMMARY_INTERVAL | |
| current_time = time.time() | |
| new_summary = current_summary_state | |
| if updated_transcript and len(updated_transcript) > 50 and (current_time - last_summary_time_state > SUMMARY_INTERVAL): | |
| try: | |
| summary_result = summarizer(updated_transcript, max_length=150, min_length=30, do_sample=False) | |
| raw_summary = summary_result[0]['summary_text'] | |
| new_summary = format_summary_as_bullets(raw_summary) | |
| last_summary_time_state = current_time | |
| except Exception as e: | |
| return updated_transcript, f"[Summarization Error]\n{current_summary_state}", updated_transcript, last_summary_time_state, current_summary_state | |
| return updated_transcript, new_summary, updated_transcript, last_summary_time_state, new_summary | |
| # --- Gradio UI --- | |
| print("Creating Gradio interface...") | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ AI-Powered Meeting Notes with Google Meet Integration") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Audio(sources=["microphone"], streaming=True, label="π Live Audio", type="numpy") | |
| gr.Image(sources=["webcam"], label="π· Webcam", streaming=True) | |
| with gr.Column(scale=2): | |
| transcription_output = gr.Textbox(label="π Full Transcription", lines=10, interactive=False) | |
| summary_output = gr.Textbox(label=f"πΉ Summary (Updates ~{SUMMARY_INTERVAL}s)", lines=6, interactive=False) | |
| # Google Meet Button (opens in new tab) | |
| google_meet_button = gr.Button("Start Google Meet") | |
| google_meet_button.click(fn=lambda: None, inputs=[], outputs=[], _js="() => window.open('https://meet.google.com/new', '_blank')") | |
| # Streaming Audio Processing | |
| gr.Audio(sources=["microphone"], streaming=True).stream( | |
| fn=process_audio_stream, | |
| inputs=[gr.Audio(sources=["microphone"], streaming=True), gr.State(""), gr.State(0.0), gr.State("")], | |
| outputs=[transcription_output, summary_output, gr.State(""), gr.State(0.0), gr.State("")] | |
| ) | |
| # Clear button | |
| def clear_state(): | |
| return "", "", 0.0, "" | |
| clear_button = gr.Button("Clear Transcript & Summary") | |
| clear_button.click(fn=lambda: ("", "", "", 0.0, ""), inputs=[], outputs=[transcription_output, summary_output, gr.State(""), gr.State(0.0), gr.State("")]) | |
| print("Launching Gradio app...") | |
| demo.queue() | |
| demo.launch(debug=True, share=True) | |