"""
Amigo 1.0 - Coding Specialist
Created by Jan Israel
Uses HuggingFace ZeroGPU for FREE GPU inference
"""

import os
# Fix OMP_NUM_THREADS warning
os.environ['OMP_NUM_THREADS'] = '1'

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import spaces

# Model configuration
BASE_MODEL = "codellama/CodeLlama-7b-hf"
LORA_MODEL = "swordfish7412/Amigo_1.0"

print("🔧 Loading Amigo 1.0...")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
    attn_implementation="eager"  # Fix: Use eager attention (faster on T4)
)

# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, LORA_MODEL)
model.eval()

# Disable cache for faster inference
model.config.use_cache = False
if hasattr(model.generation_config, 'use_cache'):
    model.generation_config.use_cache = False

print("✅ Amigo 1.0 loaded successfully!")

@spaces.GPU
def chat(message, history):
    if not message.strip():
        return "Please enter a message."
    
    # Format prompt
    prompt = f"### Instruction:\n{message}\n\n### Response:\n"
    
    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=384,  # Reduced from 512 for faster responses
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            use_cache=False  # Explicitly disable cache
        )
    
    # Decode
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the response part
    if "### Response:" in response:
        response = response.split("### Response:")[-1].strip()
    
    return response

with gr.Blocks(theme=gr.themes.Soft(), title="Amigo 1.0 - Coding Specialist") as demo:
    gr.Markdown("""
    # Amigo 1.0 - Coding Specialist
    
    **Created by Jan Israel**
    
    ## The Swordfish AI Trio
    
    Amigo is part of a powerful trio of specialized AI models designed to work together seamlessly:
    
    **Bandila 1.0** - Reasoning Specialist  
    Expert in system architecture, DevOps strategy, root cause analysis, and strategic planning.
    
    **Amigo 1.0** - Coding Specialist (You are here)  
    Master of code generation, debugging, algorithm design, and software development best practices.
    
    **Amihan 1.2** - Intelligent Orchestrator  
    The AI-powered coordinator with a dedicated brain (Phi-3-mini) that intelligently routes, synthesizes, and answers directly.
    
    Together, they form an advanced AI ecosystem built for **logic, creation, and collaboration**.
    
    ---
    
    ## About Amigo
    
    Amigo 1.0 is a specialized coding assistant fine-tuned on CodeLlama-7B using LoRA (Low-Rank Adaptation) techniques. 
    This model has been trained on over 10,000 curated examples of code generation, debugging, and algorithmic problem-solving, 
    focusing on practical software development assistance.
    
    ### Core Capabilities
    
    **Code Generation**
    - Multi-language support: Python, JavaScript, TypeScript, Java, C++, Go, Rust
    - Framework-specific code: React, Vue, Django, Flask, FastAPI, Express.js
    - Database queries: SQL, MongoDB, PostgreSQL
    - API integration and REST endpoint design
    
    **Debugging & Analysis**
    - Error diagnosis and resolution strategies
    - Performance bottleneck identification
    - Code review and optimization suggestions
    - Security vulnerability detection
    
    **Algorithm Design**
    - Data structures implementation (trees, graphs, heaps, hash tables)
    - Sorting and searching algorithms
    - Dynamic programming solutions
    - Time and space complexity analysis
    
    **Best Practices**
    - Clean code principles and design patterns
    - Testing strategies (unit, integration, e2e)
    - Documentation standards
    - Version control workflows
    
    ### Technical Specifications
    
    - **Base Model:** CodeLlama-7B (Meta AI)
    - **Fine-tuning Method:** LoRA (rank=16, alpha=32)
    - **Training Dataset:** 10,000+ curated coding examples
    - **Context Window:** 2048 tokens
    - **Inference Mode:** CPU (30-60 seconds per response)
    - **Parameters:** 7 billion (base) + 542MB (LoRA adapter)
    
    ### Use Cases
    
    - Software development assistance
    - Code review and refactoring
    - Learning programming concepts
    - Technical interview preparation
    - Prototyping and rapid development
    
    ---
    
    **Note:** This is a demonstration deployment running on CPU. For production use with faster inference times, 
    consider deploying on GPU infrastructure or using the model via API integration.
    """)
    
    chatbot = gr.Chatbot(height=500, show_label=False, type="messages")
    
    with gr.Row():
        msg = gr.Textbox(
            placeholder="Ask me to write code, debug, or explain algorithms...",
            show_label=False,
            scale=4
        )
        submit = gr.Button("Send", scale=1, variant="primary")
    
    gr.Examples(
        examples=[
            "What is your name?",
            "Write a Python function to calculate Fibonacci numbers",
            "Create a binary search algorithm in Python",
            "Explain how quicksort works"
        ],
        inputs=msg
    )
    
    gr.Markdown("""
    ---
    
    ### The Swordfish AI Trio
    
    **Models:**
    - [Amigo 1.0 - Coding Specialist](https://huggingface.co/swordfish7412/Amigo_1.0) (You are here)
    - [Bandila 1.0 - Reasoning Specialist](https://huggingface.co/swordfish7412/Bandila_1.0)
    - [Amihan 1.2 - Intelligent Orchestrator](https://huggingface.co/swordfish7412/Amihan_1.2)
    
    **Spaces (Try Them Live):**
    - [Amigo Space - Coding Specialist](https://huggingface.co/spaces/swordfish7412/Amigo-Space) (You are here)
    - [Bandila Space - Reasoning Specialist](https://huggingface.co/spaces/swordfish7412/Bandila-Space)
    - [Amihan Space - Intelligent Orchestrator](https://huggingface.co/spaces/swordfish7412/Amihan)
    
    ### About the Creator
    
    **Jan Israel** is a full-stack engineer and AI/ML developer with over 10 years of combined experience in software development 
    and machine learning. Based between the Philippines and Canada, he transitioned from pure full-stack development to specializing in AI-driven solutions.
    
    **Notable Projects:**
    
    **AI Republic** - An intelligent coding assistant and debugging platform powered by fine-tuned language models. The platform provides 
    real-time code analysis, automated debugging, and intelligent code suggestions for enterprise development teams.
    
    **Swordfish Project** - A collection of specialized AI models designed for software development and system operations. These models 
    (Bandila, Amigo, and Amihan) are being integrated into AI Republic as the core reasoning and coding engine.
    
    ### License & Usage
    
    This model is released under the same license as CodeLlama-7B. Free for research and commercial use.
    For API access or custom deployments, contact the creator.
    """)
    
    def respond(message, chat_history):
        bot_message = chat(message, chat_history)
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": bot_message})
        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    submit.click(respond, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    demo.launch()