import gradio as gr import base64 import json from PIL import ImageDraw from io import BytesIO import re import requests from transformers import Qwen2VLProcessor processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", use_fast=True) url = "http://localhost:8000/v2/models/vllm_model/generate" # Function to handle the inference and visualization def ask_triton(image): try: # Image Input buf = BytesIO() image.save(buf, format="PNG") img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8") # Build conversation messages = [ { 'role': 'system', 'content': [{'type': 'text', 'text': "You are a Vision Language Model specialized in product images. Detect nutrition tables."}] }, { 'role': 'user', 'content': [ { 'type': 'image', 'image': img_b64, }, { 'type': 'text', 'text': "Detect the bounding box of the nutrition table." } ] } ] # Apply chat template and build payload chat_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) payload = { "text_input": chat_text, "image": img_b64, "parameters": { "stream": False, "temperature": 0, "max_tokens": 2048 } } # Send POST request to vLLM response = requests.post(url, json=payload) resp_json = response.json() output_text = resp_json.get("text_output", "") # Extract assistant response if "<|im_start|>assistant\n" in output_text: output_text = output_text.rsplit("<|im_start|>assistant\n", 1)[-1] # Extract and draw bounding box match = re.search(r"\((\d+),(\d+)\),\((\d+),(\d+)\)", output_text) if match: x1, y1, x2, y2 = map(int, match.groups()) draw = ImageDraw.Draw(image) w, h = image.size draw.rectangle((x1 / 1000 * w, y1 / 1000 * h, x2 / 1000 * w, y2 / 1000 * h), outline="green", width=10) return image, output_text except Exception as e: return f"Error: {e}" # Gradio Interface gr.Interface( fn=ask_triton, inputs=[ gr.Image(type="pil") ], outputs=["image", "text"], title="Nutrition Table Detection", description="Please upload image containing a nutrition table to visualizes bounding box prediction." ).launch(server_name="0.0.0.0", server_port=7860)