import streamlit as st from transformers import pipeline import tempfile import os # Load Whisper pipeline once @st.cache_resource def load_asr_pipeline(): return pipeline("automatic-speech-recognition", model="openai/whisper-base") asr = load_asr_pipeline() st.title("🗣️ WhaleSong: Voice to Text") st.markdown("Speech coming soon! Upload an audio file. We'll transcribe it using Whisper.") # Audio input audio_file = st.file_uploader("🎙️ Upload audio file (wav, mp3, m4a)", type=["wav", "mp3", "m4a"]) if audio_file: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: tmp.write(audio_file.read()) tmp_path = tmp.name st.audio(tmp_path) with st.spinner("Transcribing..."): result = asr(tmp_path) st.success("✅ Transcription complete!") st.markdown(f"**Transcript:**\n\n{result['text']}") os.remove(tmp_path)