import gradio as gr from transformers import pipeline import torch import threading import time # Configuration MODEL_NAME = "trendmicro-ailab/Llama-Primus-Reasoning" class FastModelHandler: def __init__(self): self.pipe = None self.loading = False self.loaded = False self.start_background_loading() def start_background_loading(self): """Model ko background mein automatically load kare""" def load_model(): self.loading = True try: print("š Model automatically loading...") # Fast loading ke liye pipeline use kare self.pipe = pipeline( "text-generation", model=MODEL_NAME, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) self.loaded = True self.loading = False print("ā Model loaded successfully!") except Exception as e: print(f"ā Loading error: {e}") # Fallback: CPU pe load kare try: self.pipe = pipeline( "text-generation", model=MODEL_NAME, device_map="cpu" ) self.loaded = True self.loading = False print("ā Model loaded on CPU!") except Exception as e2: print(f"ā CPU loading failed: {e2}") # Background thread start kare thread = threading.Thread(target=load_model, daemon=True) thread.start() def generate_fast(self, prompt, max_length=200): """Fast response generate kare""" if not self.loaded: if self.loading: return "ā³ Model abhi load ho raha hai... 1-2 minute wait karein" else: return "ā Model load nahi ho paya. Page refresh karein" try: # Fast generation start_time = time.time() result = self.pipe( prompt, max_length=max_length, temperature=0.7, do_sample=True, num_return_sequences=1, pad_token_id=50256, repetition_penalty=1.1 ) generation_time = time.time() - start_time response = result[0]['generated_text'] return f"{response}\n\nā” Generation time: {generation_time:.1f}s" except Exception as e: return f"ā Generation error: {str(e)}" # Model automatically initialize ho jayega model_handler = FastModelHandler() # Simple interface - sirf prompt do, response mil jaye with gr.Blocks(theme=gr.themes.Soft(), title="Fast Reasoning Model") as demo: gr.Markdown(""" # š Fast Reasoning Model **Model automatically load ho gaya hai!** š **Bas prompt likho aur Generate dabao - jaldi response mil jayega!** """) # Real-time status display status_display = gr.HTML( value="