import os

CPU_THREADS = 16

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
os.environ["OPENBLAS_NUM_THREADS"] = str(CPU_THREADS)
os.environ["NUMEXPR_NUM_THREADS"] = str(CPU_THREADS)

import sys
import tempfile
import gradio as gr
import numpy as np
import soundfile as sf
from huggingface_hub import snapshot_download

MODEL_REPO = "KevinAHM/pocket-tts-onnx"

repo_dir = snapshot_download(repo_id=MODEL_REPO)
os.chdir(repo_dir)
sys.path.insert(0, repo_dir)

import onnxruntime as ort

_OriginalInferenceSession = ort.InferenceSession

def _PatchedInferenceSession(*args, **kwargs):
    so = kwargs.get("sess_options", ort.SessionOptions())
    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
    so.intra_op_num_threads = CPU_THREADS
    so.inter_op_num_threads = 1
    kwargs["sess_options"] = so
    return _OriginalInferenceSession(*args, **kwargs)

ort.InferenceSession = _PatchedInferenceSession

from pocket_tts_onnx import PocketTTSOnnx

tts_cache = {}

def get_tts(temperature: float, lsd_steps: int):
    key = (float(temperature), int(lsd_steps))
    if key not in tts_cache:
        tts_cache[key] = PocketTTSOnnx(
            precision="int8",
            temperature=float(temperature),
            lsd_steps=int(lsd_steps),
            device="cpu",
        )
    return tts_cache[key]

def synthesize(ref_audio_path, text, temperature, lsd_steps):
    text = (text or "").strip()
    if not ref_audio_path:
        raise gr.Error("Upload a reference audio file.")
    if not text:
        raise gr.Error("Enter some text.")

    tts = get_tts(temperature, int(lsd_steps))
    audio = tts.generate(text=text, voice=ref_audio_path)

    sr = getattr(tts, "SAMPLE_RATE", 24000)
    audio_np = np.asarray(audio)
    if audio_np.ndim > 1:
        audio_np = audio_np.squeeze()

    out_path = os.path.join(tempfile.gettempdir(), "pocket_tts_out.wav")
    sf.write(out_path, audio_np, sr)
    return out_path

with gr.Blocks() as demo:
    with gr.Row():
        ref_audio = gr.Audio(type="filepath")
        text = gr.Textbox(lines=6, value="Hello, this is a test.")
    with gr.Row():
        temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05)
        lsd_steps = gr.Slider(1, 20, value=10, step=1)
    generate = gr.Button("Generate")
    out_audio = gr.Audio(type="filepath")

    generate.click(
        fn=synthesize,
        inputs=[ref_audio, text, temperature, lsd_steps],
        outputs=[out_audio],
        api_name="generate",
    )

if __name__ == "__main__":
    demo.launch()