""" ElevenLabs client for Pip's voice. Handles: Text-to-speech with emotional tone matching. """ import os from typing import Optional from elevenlabs.client import AsyncElevenLabs from elevenlabs import VoiceSettings class ElevenLabsClient: """ElevenLabs-powered voice synthesis for Pip.""" # Voice model options MODELS = { "flash": "eleven_flash_v2_5", # ~75ms latency - for real-time "turbo": "eleven_turbo_v2_5", # ~250ms - balance "expressive": "eleven_multilingual_v2", # Higher quality, more expressive } # Tone settings for different emotional contexts TONE_SETTINGS = { "calm": VoiceSettings( stability=0.8, similarity_boost=0.7, style=0.3, use_speaker_boost=True ), "warm": VoiceSettings( stability=0.7, similarity_boost=0.75, style=0.5, use_speaker_boost=True ), "excited": VoiceSettings( stability=0.5, similarity_boost=0.8, style=0.8, use_speaker_boost=True ), "gentle": VoiceSettings( stability=0.85, similarity_boost=0.7, style=0.2, use_speaker_boost=True ), "mysterious": VoiceSettings( stability=0.6, similarity_boost=0.6, style=0.6, use_speaker_boost=True ), } def __init__(self): api_key = os.getenv("ELEVENLABS_API_KEY") self.available = bool(api_key) if self.available: self.client = AsyncElevenLabs(api_key=api_key) else: self.client = None print("⚠️ ElevenLabs: No API key found - voice disabled") # Default voice - can be customized or created via Voice Design self.default_voice_id = "21m00Tcm4TlvDq8ikWAM" # Rachel - warm, friendly self.pip_voice_id = None # Will be set if custom voice is created def is_available(self) -> bool: """Check if the client is available.""" return self.available async def speak( self, text: str, tone: str = "warm", use_fast_model: bool = True ) -> Optional[bytes]: """ Generate speech from text with emotional tone matching. Returns audio bytes (mp3). """ if not self.available or not self.client: return None try: model = self.MODELS["flash"] if use_fast_model else self.MODELS["expressive"] voice_settings = self.TONE_SETTINGS.get(tone, self.TONE_SETTINGS["warm"]) voice_id = self.pip_voice_id or self.default_voice_id # Note: text_to_speech.convert returns an async generator directly (no await) audio_gen = self.client.text_to_speech.convert( text=text, voice_id=voice_id, model_id=model, voice_settings=voice_settings, output_format="mp3_44100_128" ) # Collect all audio chunks from the async generator audio_bytes = b"" async for chunk in audio_gen: audio_bytes += chunk return audio_bytes except Exception as e: print(f"ElevenLabs TTS error: {e}") import traceback traceback.print_exc() return None async def speak_stream( self, text: str, tone: str = "warm" ): """ Stream audio generation for lower latency. Yields audio chunks as they're generated. """ if not self.available or not self.client: return try: model = self.MODELS["flash"] voice_settings = self.TONE_SETTINGS.get(tone, self.TONE_SETTINGS["warm"]) voice_id = self.pip_voice_id or self.default_voice_id # Note: text_to_speech.convert returns an async generator directly (no await) audio_stream = self.client.text_to_speech.convert( text=text, voice_id=voice_id, model_id=model, voice_settings=voice_settings, output_format="mp3_44100_128" ) async for chunk in audio_stream: yield chunk except Exception as e: print(f"ElevenLabs streaming error: {e}") return async def get_available_voices(self) -> list: """ Get list of available voices. """ if not self.available or not self.client: return [] try: voices = await self.client.voices.get_all() return [{"id": v.voice_id, "name": v.name} for v in voices.voices] except Exception as e: print(f"Error getting voices: {e}") return [] def get_tone_for_emotion(self, emotion: str, intensity: int) -> str: """ Map emotional state to voice tone. """ emotion_tone_map = { "happy": "excited" if intensity > 7 else "warm", "sad": "gentle", "anxious": "calm", "angry": "calm", # Counterbalance "excited": "excited", "confused": "warm", "hopeful": "warm", "peaceful": "calm", "curious": "mysterious", "neutral": "warm", } return emotion_tone_map.get(emotion.lower(), "warm")