import gradio as gr import os import torch from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora") DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Safe loader: try with device_map for HF inference if possible print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True) # Safe loader try: model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True) except Exception: model = AutoModelForCausalLM.from_pretrained(MODEL_ID) model.to(DEVICE) model.eval() # Generation helper GEN_KWARGS = dict( max_new_tokens=256, do_sample=True, temperature=0.2, top_p=0.95, top_k=50, num_return_sequences=1, ) PROMPT_TEMPLATE = ( "# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n" ) def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95): if not instruction.strip(): return "Please provide an instruction or problem statement." prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip()) inputs = tokenizer(prompt, return_tensors="pt") input_ids = inputs["input_ids"].to(DEVICE) attention_mask = inputs.get("attention_mask") if attention_mask is not None: attention_mask = attention_mask.to(DEVICE) gen_kwargs = GEN_KWARGS.copy() gen_kwargs.update({ "max_new_tokens": int(max_tokens), "temperature": float(temperature), "top_p": float(top_p), }) with torch.no_grad(): outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs) decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) # Strip the prompt prefix from the decoded text if present if decoded.startswith(prompt): decoded = decoded[len(prompt):] return decoded.strip() with gr.Blocks(title="SmolLM Python Code Assistant") as demo: gr.Markdown("# SmolLM — Python Code Generation\nEnter an instruction and get a multi-function Python module.") with gr.Row(): instr = gr.Textbox(lines=6, placeholder="Describe the Python module you want...", label="Instruction") with gr.Column(scale=1): max_t = gr.Slider(minimum=32, maximum=1024, value=256, step=32, label="Max new tokens") temp = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Temperature") top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p") run_btn = gr.Button("Generate") output = gr.Code(label="Generated Python module", language="python") def run(instruction, max_tokens, temperature, top_p): try: return generate_code(instruction, max_tokens, temperature, top_p) except Exception as e: return f"Error during generation: {e}" run_btn.click(run, inputs=[instr, max_t, temp, top_p], outputs=[output]) gr.Examples(examples=[ "Implement a Python module that includes: a function to compute Fibonacci sequence, a function to check primality, and a function to compute factorial, all with type hints and docstrings.", "Create a Python module for basic matrix operations (add, multiply, transpose) with appropriate error handling and tests.", ], inputs=instr) if __name__ == "__main__": demo.launch()