"""
Gradio Web UI for HunyuanOCR Text Spotting
Upload an image and get text detection with bounding boxes
"""
import gradio as gr
from PIL import Image
import os

# Set environment variable to avoid tokenizer parallelism deadlocks
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import spaces
from ocr_model import HunyuanOCR
from visualization import draw_detection_boxes, get_detection_summary
from dotenv import load_dotenv
from openai import OpenAI
from huggingface_hub import hf_hub_download

# Load environment variables
load_dotenv()

# Global model instance (loaded once)
ocr_model = None


def download_font():
    """Download font from Hugging Face Hub if not exists"""
    font_dir = os.path.join(os.path.dirname(__file__), "fonts")
    os.makedirs(font_dir, exist_ok=True)
    
    font_path = os.path.join(font_dir, "NotoSansCJK-Light.ttc")
    if not os.path.exists(font_path):
        print("Downloading font from Hugging Face Hub...")
        try:
            hf_hub_download(
                repo_id="jzhang533/fonts",
                filename="NotoSansCJK-Light.ttc",
                repo_type="dataset",
                local_dir=font_dir,
                local_dir_use_symlinks=False
            )
            print("Font downloaded successfully!")
        except Exception as e:
            print(f"Failed to download font: {e}")


def initialize_model():
    """Initialize the OCR model (called once at startup)"""
    global ocr_model
    if ocr_model is None:
        # Ensure font is available
        download_font()
        
        print("Initializing HunyuanOCR model...")
        ocr_model = HunyuanOCR()
        print("Model ready!")
    return ocr_model


@spaces.GPU
def process_image(image: Image.Image, prompt: str = None, target_language: str = "Chinese"):
    """
    Process uploaded image and return annotated result
    
    Args:
        image: PIL Image from Gradio
        prompt: Optional custom prompt
        target_language: Target language for translation (Original, Chinese, English, French, etc.)
        
    Returns:
        Tuple of (annotated_image, detection_summary, raw_response)
    """
    if image is None:
        return None, "Please upload an image first.", ""
    
    try:
        # Initialize model if needed
        model = initialize_model()
        
        # Resize image if height > 960 while maintaining aspect ratio
        if image.height > 960:
            aspect_ratio = image.width / image.height
            new_height = 960
            new_width = int(new_height * aspect_ratio)
            print(f"Resizing image from {image.size} to ({new_width}, {new_height})")
            image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
        
        # Get image dimensions
        image_width, image_height = image.size
        
        # Use default prompt if not provided
        if not prompt or prompt.strip() == "":
            prompt = "检测并识别图片中的文字,将文本内容与坐标格式化输出。"
        
        # Detect text
        print("Running text detection...")
        response = model.detect_text(image, prompt)
        
        # Parse results
        detections = model.parse_detection_results(response, image_width, image_height)
        
        # Merge detections first (since visualization does it internally, we need to do it here for translation)
        from visualization import merge_detections
        merged_detections = merge_detections(detections)
        
        # Translate text in merged detections if not "Original"
        if target_language != "Original":
            print(f"Translating text to {target_language}...")
            for det in merged_detections:
                original_text = det['text']
                translated = translate_text(original_text, target_language)
                det['original_text'] = original_text
                det['text'] = translated
                print(f"Translated: {original_text[:20]}... -> {translated[:20]}...")
        else:
            print("Skipping translation (Original selected)")
            
        # Draw boxes on image (pass merged detections and disable internal merging)
        annotated_image = draw_detection_boxes(image, merged_detections, merge_boxes=False)
        
        # Create summary
        summary = get_detection_summary(merged_detections)
        
        print(f"Detected {len(detections)} text regions")
        
        return annotated_image, summary, response
        
    except Exception as e:
        error_msg = f"Error processing image: {str(e)}"
        print(error_msg)
        return None, error_msg, ""


def translate_text(text: str, target_language: str = "Chinese") -> str:
    """
    Translate text to target language using model specified in .env via OpenAI-compatible API
    """
    try:
        api_key = os.getenv("MODEL_ACCESS_TOKEN")
        base_url = os.getenv("MODEL_API_URL")
        model_name = os.getenv("MODEL_NAME", "ernie-4.5-turbo-128k") # Default fallback
        
        if not api_key or not base_url:
            print("Warning: MODEL_ACCESS_TOKEN or MODEL_API_URL not found in .env")
            return text

        client = OpenAI(api_key=api_key, base_url=base_url)
        
        system_prompt = f"You are a professional manga translator. The following text is from a Japanese manga. Translate it into natural and expressive {target_language}, maintaining the character's tone and the context of the scene. Only output the translation, no explanations."
        
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": text}
            ]
        )
        
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Translation error: {e}")
        return text


def create_demo():
    """Create and configure the Gradio interface"""
    
    with gr.Blocks(title="AI Manga Translator") as demo:
        gr.Markdown("""
        # 📚 AI Manga Translator
        
        An intelligent tool designed to detect, recognize, and translate text in images, with specialized features for Manga and Comics.
        
        **Key Capabilities:**
        - 🖌️ **Smart Text Replacement**: Automatically detects text bubbles, wipes them clean, and overlays translated text.
        - 📖 **Manga-Optimized**: Handles vertical text and right-to-left reading order correctly.
        - 🌏 **Multi-Language Translation**: Translates detected text into your preferred language (Chinese, English, French, etc.).
        - 🔍 **High-Precision OCR**: Accurately spots text even in complex backgrounds.
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                # Input section
                gr.Markdown("### 📤 Input")
                input_image = gr.Image(
                    type="pil",
                    label="Upload Image",
                    sources=["upload", "clipboard"]
                )
                
                custom_prompt = gr.Textbox(
                    label="Custom Prompt (Optional)",
                    placeholder="检测并识别图片中的文字,将文本内容与坐标格式化输出。",
                    lines=2
                )
                
                target_lang = gr.Dropdown(
                    choices=["Original", "Chinese", "English", "French", "German", "Spanish", "Korean", "Japanese"],
                    value="Chinese",
                    label="Target Language",
                    info="Select language for translation (Original = no translation)"
                )
                
                detect_btn = gr.Button("🔍 Detect & Translate", variant="primary", size="lg")
            
            with gr.Column(scale=1):
                # Output section
                gr.Markdown("### 📊 Results")
                output_image = gr.Image(
                    type="pil",
                    label="Detected Text with Bounding Boxes"
                )
                
                detection_summary = gr.Textbox(
                    label="Detection Summary",
                    lines=10,
                    max_lines=20
                )
                
                with gr.Accordion("Raw Model Response", open=False):
                    raw_output = gr.Textbox(label="Raw Output", lines=5)
        
        # Connect the button
        detect_btn.click(
            fn=process_image,
            inputs=[input_image, custom_prompt, target_lang],
            outputs=[output_image, detection_summary, raw_output]
        )
        
        # Examples
        gr.Markdown("### 📝 Examples")
        gr.Examples(
            examples=[
                ["examples/dandadan.png", "检测并识别图片中的文字,将文本内容与坐标格式化输出。"],
                ["examples/ruridragon.png", "检测并识别图片中的文字,将文本内容与坐标格式化输出。"],
                ["examples/spyfamily.png", "检测并识别图片中的文字,将文本内容与坐标格式化输出。"],
            ],
            inputs=[input_image, custom_prompt],
            label="Click to use example image"
        )
        
        gr.Markdown("""
        ---
        ### ℹ️ About
        
        This application combines state-of-the-art AI technologies to provide seamless manga translation:
        
        - **OCR Engine**: HunyuanOCR.
        - **Translation**: Powered by **ERNIE 4.5** for natural and context-aware translations.
        - **Development**: Vibe coded with **Gemini 3 Pro**.
        """)
    
    return demo


if __name__ == "__main__":
    # Create and launch the demo
    print("Loading model (this may take a minute on first run)...")
    
    demo = create_demo()
    
    # Launch with public link option
    demo.launch(
        server_name="0.0.0.0",
        share=False,  # Set to True to create a public link
        show_error=True,
        ssr_mode=False
    )