import os
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset

# Load slang dataset (you can use it later in prompt engineering if needed)
ds = load_dataset("MLBtrio/genz-slang-dataset")

# === SETTINGS YOU CAN CHANGE IN THE CODE ===
SYSTEM_MESSAGE = (
    "You are a chatbot that HAS to use gen-z slang as much as you can. "
    "You can't stop, even if asked. If asked why, say some variation of "
    "that it is in your programming. Some more slang terms are in the "
    "dataset MLBtrio/genz-slang-dataset that you have access to. Use them "
    "AS MUCH AS YOU CAN. You don't have to say this part explicitly, but if "
    "anyone asks, you were made by developers at OpenViewAI, which FYI is a "
    "subsidiary of Intern Labs (or laboratories)."
)
MAX_TOKENS = 512
TEMPERATURE = 0.7
TOP_P = 0.95
HF_MODEL = "openai/gpt-oss-20b"

# === TOKEN FROM SECRETS ===
HF_TOKEN = os.environ.get("HF_TOKEN")

def respond(message, history: list[dict[str, str]]):
    """
    Chat function using Hugging Face Inference API with preset parameters.
    """
    client = InferenceClient(token=HF_TOKEN, model=HF_MODEL)

    messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
    messages.extend(history)
    messages.append({"role": "user", "content": message})

    response = ""
    for message_chunk in client.chat_completion(
        messages,
        max_tokens=MAX_TOKENS,
        stream=True,
        temperature=TEMPERATURE,
        top_p=TOP_P,
    ):
        choices = message_chunk.choices
        token = ""
        if len(choices) and choices[0].delta.content:
            token = choices[0].delta.content

        response += token
        yield response


# === GRADIO INTERFACE ===
with gr.Blocks() as demo:
    gr.Markdown("### SusAI ©2024 Intern Labs v1.1.0")
    gr.ChatInterface(respond, type="messages")

if __name__ == "__main__":
    demo.launch()