import gradio as gr
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Safe loader: try with device_map for HF inference if possible
print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}")

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)

# Safe loader
try:
    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
except Exception:
    model = AutoModelForCausalLM.from_pretrained(MODEL_ID)


model.to(DEVICE)
model.eval()

# Generation helper
GEN_KWARGS = dict(
    max_new_tokens=256,
    do_sample=True,
    temperature=0.2,
    top_p=0.95,
    top_k=50,
    num_return_sequences=1,
)

PROMPT_TEMPLATE = (
    "# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n"
)


def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95):
    if not instruction.strip():
        return "Please provide an instruction or problem statement."

    prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip())
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(DEVICE)
    attention_mask = inputs.get("attention_mask")
    if attention_mask is not None:
        attention_mask = attention_mask.to(DEVICE)

    gen_kwargs = GEN_KWARGS.copy()
    gen_kwargs.update({
        "max_new_tokens": int(max_tokens),
        "temperature": float(temperature),
        "top_p": float(top_p),
    })

    with torch.no_grad():
        outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Strip the prompt prefix from the decoded text if present
    if decoded.startswith(prompt):
        decoded = decoded[len(prompt):]

    return decoded.strip()


with gr.Blocks(title="SmolLM Python Code Assistant") as demo:
    gr.Markdown("# SmolLM — Python Code Generation\nEnter an instruction and get a multi-function Python module.")

    with gr.Row():
        instr = gr.Textbox(lines=6, placeholder="Describe the Python module you want...", label="Instruction")
        with gr.Column(scale=1):
            max_t = gr.Slider(minimum=32, maximum=1024, value=256, step=32, label="Max new tokens")
            temp = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Temperature")
            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p")
            run_btn = gr.Button("Generate")

    output = gr.Code(label="Generated Python module", language="python")

    def run(instruction, max_tokens, temperature, top_p):
        try:
            return generate_code(instruction, max_tokens, temperature, top_p)
        except Exception as e:
            return f"Error during generation: {e}"

    run_btn.click(run, inputs=[instr, max_t, temp, top_p], outputs=[output])

    gr.Examples(examples=[
        "Implement a Python module that includes: a function to compute Fibonacci sequence, a function to check primality, and a function to compute factorial, all with type hints and docstrings.",
        "Create a Python module for basic matrix operations (add, multiply, transpose) with appropriate error handling and tests.",
    ], inputs=instr)

if __name__ == "__main__":
    demo.launch()