Upload folder using huggingface_hub
Browse files- TTS/melo_handler.py +3 -1
- audio_streaming_client.py +9 -3
- handler.py +1 -1
TTS/melo_handler.py
CHANGED
|
@@ -33,7 +33,7 @@ class MeloTTSHandler(BaseHandler):
|
|
| 33 |
def setup(
|
| 34 |
self,
|
| 35 |
should_listen,
|
| 36 |
-
device="
|
| 37 |
language="en",
|
| 38 |
speaker_to_id="en",
|
| 39 |
gen_kwargs={}, # Unused
|
|
@@ -41,10 +41,12 @@ class MeloTTSHandler(BaseHandler):
|
|
| 41 |
):
|
| 42 |
self.should_listen = should_listen
|
| 43 |
self.device = device
|
|
|
|
| 44 |
self.language = language
|
| 45 |
self.model = TTS(
|
| 46 |
language=WHISPER_LANGUAGE_TO_MELO_LANGUAGE[self.language], device=device
|
| 47 |
)
|
|
|
|
| 48 |
self.speaker_id = self.model.hps.data.spk2id[
|
| 49 |
WHISPER_LANGUAGE_TO_MELO_SPEAKER[speaker_to_id]
|
| 50 |
]
|
|
|
|
| 33 |
def setup(
|
| 34 |
self,
|
| 35 |
should_listen,
|
| 36 |
+
device="auto",
|
| 37 |
language="en",
|
| 38 |
speaker_to_id="en",
|
| 39 |
gen_kwargs={}, # Unused
|
|
|
|
| 41 |
):
|
| 42 |
self.should_listen = should_listen
|
| 43 |
self.device = device
|
| 44 |
+
console.print(f"[green]Device: {device}")
|
| 45 |
self.language = language
|
| 46 |
self.model = TTS(
|
| 47 |
language=WHISPER_LANGUAGE_TO_MELO_LANGUAGE[self.language], device=device
|
| 48 |
)
|
| 49 |
+
console.print(f"[green]Model device: {self.model.device}")
|
| 50 |
self.speaker_id = self.model.hps.data.spk2id[
|
| 51 |
WHISPER_LANGUAGE_TO_MELO_SPEAKER[speaker_to_id]
|
| 52 |
]
|
audio_streaming_client.py
CHANGED
|
@@ -57,9 +57,15 @@ class AudioStreamingClient:
|
|
| 57 |
if self.session_state != "processing" and not self.send_queue.empty():
|
| 58 |
chunk = self.send_queue.get().tobytes()
|
| 59 |
buffer += chunk
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
else:
|
| 64 |
self.send_request()
|
| 65 |
time.sleep(0.1)
|
|
|
|
| 57 |
if self.session_state != "processing" and not self.send_queue.empty():
|
| 58 |
chunk = self.send_queue.get().tobytes()
|
| 59 |
buffer += chunk
|
| 60 |
+
|
| 61 |
+
# Calculate energy of the audio chunk
|
| 62 |
+
energy = np.sum(np.square(np.frombuffer(chunk, dtype=np.int16))) / len(chunk)
|
| 63 |
+
print(f"Energy: {energy}")
|
| 64 |
+
|
| 65 |
+
if energy > 0.01: # Threshold for energy detection
|
| 66 |
+
if len(buffer) >= self.args.chunk_size * 2: # * 2 because of int16
|
| 67 |
+
self.send_request(buffer)
|
| 68 |
+
buffer = b''
|
| 69 |
else:
|
| 70 |
self.send_request()
|
| 71 |
time.sleep(0.1)
|
handler.py
CHANGED
|
@@ -23,7 +23,7 @@ class EndpointHandler:
|
|
| 23 |
self.parler_tts_handler_kwargs,
|
| 24 |
self.melo_tts_handler_kwargs,
|
| 25 |
self.chat_tts_handler_kwargs,
|
| 26 |
-
) = get_default_arguments(mode='none', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo')
|
| 27 |
|
| 28 |
setup_logger(self.module_kwargs.log_level)
|
| 29 |
|
|
|
|
| 23 |
self.parler_tts_handler_kwargs,
|
| 24 |
self.melo_tts_handler_kwargs,
|
| 25 |
self.chat_tts_handler_kwargs,
|
| 26 |
+
) = get_default_arguments(mode='none', log_level='DEBUG', lm_model_name='meta-llama/Meta-Llama-3.1-8B-Instruct', tts='melo', device='mps')
|
| 27 |
|
| 28 |
setup_logger(self.module_kwargs.log_level)
|
| 29 |
|