""" Amigo 1.0 - Coding Specialist Created by Jan Israel Uses HuggingFace ZeroGPU for FREE GPU inference """ import os # Fix OMP_NUM_THREADS warning os.environ['OMP_NUM_THREADS'] = '1' import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import spaces # Model configuration BASE_MODEL = "codellama/CodeLlama-7b-hf" LORA_MODEL = "swordfish7412/Amigo_1.0" print("🔧 Loading Amigo 1.0...") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, attn_implementation="eager" # Fix: Use eager attention (faster on T4) ) # Load LoRA adapter model = PeftModel.from_pretrained(base_model, LORA_MODEL) model.eval() # Disable cache for faster inference model.config.use_cache = False if hasattr(model.generation_config, 'use_cache'): model.generation_config.use_cache = False print("✅ Amigo 1.0 loaded successfully!") @spaces.GPU def chat(message, history): if not message.strip(): return "Please enter a message." # Format prompt prompt = f"### Instruction:\n{message}\n\n### Response:\n" # Tokenize inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) inputs = {k: v.to(model.device) for k, v in inputs.items()} # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=384, # Reduced from 512 for faster responses temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id, use_cache=False # Explicitly disable cache ) # Decode response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the response part if "### Response:" in response: response = response.split("### Response:")[-1].strip() return response with gr.Blocks(theme=gr.themes.Soft(), title="Amigo 1.0 - Coding Specialist") as demo: gr.Markdown(""" # Amigo 1.0 - Coding Specialist **Created by Jan Israel** ## The Swordfish AI Trio Amigo is part of a powerful trio of specialized AI models designed to work together seamlessly: **Bandila 1.0** - Reasoning Specialist Expert in system architecture, DevOps strategy, root cause analysis, and strategic planning. **Amigo 1.0** - Coding Specialist (You are here) Master of code generation, debugging, algorithm design, and software development best practices. **Amihan 1.2** - Intelligent Orchestrator The AI-powered coordinator with a dedicated brain (Phi-3-mini) that intelligently routes, synthesizes, and answers directly. Together, they form an advanced AI ecosystem built for **logic, creation, and collaboration**. --- ## About Amigo Amigo 1.0 is a specialized coding assistant fine-tuned on CodeLlama-7B using LoRA (Low-Rank Adaptation) techniques. This model has been trained on over 10,000 curated examples of code generation, debugging, and algorithmic problem-solving, focusing on practical software development assistance. ### Core Capabilities **Code Generation** - Multi-language support: Python, JavaScript, TypeScript, Java, C++, Go, Rust - Framework-specific code: React, Vue, Django, Flask, FastAPI, Express.js - Database queries: SQL, MongoDB, PostgreSQL - API integration and REST endpoint design **Debugging & Analysis** - Error diagnosis and resolution strategies - Performance bottleneck identification - Code review and optimization suggestions - Security vulnerability detection **Algorithm Design** - Data structures implementation (trees, graphs, heaps, hash tables) - Sorting and searching algorithms - Dynamic programming solutions - Time and space complexity analysis **Best Practices** - Clean code principles and design patterns - Testing strategies (unit, integration, e2e) - Documentation standards - Version control workflows ### Technical Specifications - **Base Model:** CodeLlama-7B (Meta AI) - **Fine-tuning Method:** LoRA (rank=16, alpha=32) - **Training Dataset:** 10,000+ curated coding examples - **Context Window:** 2048 tokens - **Inference Mode:** CPU (30-60 seconds per response) - **Parameters:** 7 billion (base) + 542MB (LoRA adapter) ### Use Cases - Software development assistance - Code review and refactoring - Learning programming concepts - Technical interview preparation - Prototyping and rapid development --- **Note:** This is a demonstration deployment running on CPU. For production use with faster inference times, consider deploying on GPU infrastructure or using the model via API integration. """) chatbot = gr.Chatbot(height=500, show_label=False, type="messages") with gr.Row(): msg = gr.Textbox( placeholder="Ask me to write code, debug, or explain algorithms...", show_label=False, scale=4 ) submit = gr.Button("Send", scale=1, variant="primary") gr.Examples( examples=[ "What is your name?", "Write a Python function to calculate Fibonacci numbers", "Create a binary search algorithm in Python", "Explain how quicksort works" ], inputs=msg ) gr.Markdown(""" --- ### The Swordfish AI Trio **Models:** - [Amigo 1.0 - Coding Specialist](https://huggingface.co/swordfish7412/Amigo_1.0) (You are here) - [Bandila 1.0 - Reasoning Specialist](https://huggingface.co/swordfish7412/Bandila_1.0) - [Amihan 1.2 - Intelligent Orchestrator](https://huggingface.co/swordfish7412/Amihan_1.2) **Spaces (Try Them Live):** - [Amigo Space - Coding Specialist](https://huggingface.co/spaces/swordfish7412/Amigo-Space) (You are here) - [Bandila Space - Reasoning Specialist](https://huggingface.co/spaces/swordfish7412/Bandila-Space) - [Amihan Space - Intelligent Orchestrator](https://huggingface.co/spaces/swordfish7412/Amihan) ### About the Creator **Jan Israel** is a full-stack engineer and AI/ML developer with over 10 years of combined experience in software development and machine learning. Based between the Philippines and Canada, he transitioned from pure full-stack development to specializing in AI-driven solutions. **Notable Projects:** **AI Republic** - An intelligent coding assistant and debugging platform powered by fine-tuned language models. The platform provides real-time code analysis, automated debugging, and intelligent code suggestions for enterprise development teams. **Swordfish Project** - A collection of specialized AI models designed for software development and system operations. These models (Bandila, Amigo, and Amihan) are being integrated into AI Republic as the core reasoning and coding engine. ### License & Usage This model is released under the same license as CodeLlama-7B. Free for research and commercial use. For API access or custom deployments, contact the creator. """) def respond(message, chat_history): bot_message = chat(message, chat_history) chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": bot_message}) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) submit.click(respond, [msg, chatbot], [msg, chatbot]) if __name__ == "__main__": demo.launch()