Spaces:

Tas01
/

background-removal-api

Running

App Files Files Community

Tas01 commited on 12 days ago

Commit

c95b9e0

verified ·

1 Parent(s): 4888ca6

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -271

app.py CHANGED Viewed

@@ -147,121 +147,6 @@ class ImageStoryteller:
-    # def generate_story(self, analysis_result, creativity_level=0.7):
-    #     """Generate a story based on detected objects and scene using Qwen"""
-    #     if self.llm_model is None:
-    #         return "Story generation model not available."
-    #     try:
-    #         # Extract detected objects and scene
-    #         objects = [obj['name'] for obj in analysis_result['objects']]
-    #         scenes = [scene['type'] for scene in analysis_result['scenes']]
-    #         # Create a prompt for the LLM
-    #         objects_str = ", ".join(objects)  # Use top 3 objects
-    #         scene_str = scenes[0] if scenes else "general scene"
-    #         # FIXED: Convert creativity_level to float if it's a tuple
-    #         if isinstance(creativity_level, (tuple, list)):
-    #             creativity_level = float(creativity_level[0])
-    #         # Different prompt templates for creativity
-    #         if creativity_level > 0.8:
-    #             prompt = f"""Based on this image containing {objects_str} in a {scene_str}, write a creative and imaginative short story (3-4 paragraphs).
-    #             Make it engaging and add interesting details about the scene. Story:"""
-    #         elif creativity_level > 0.5:
-    #             prompt = f"""Create a short story about an image with {objects_str} in a {scene_str}.
-    #             Write 2-3 paragraphs that describe what might be happening in this scene. Story:"""
-    #         else:
-    #             prompt = f"""Describe what you see in an image containing {objects_str} in a {scene_str}.
-    #             Write a simple 1-2 paragraph description. Description:"""
-    #         # QWEN 1.8B SPECIFIC FORMATTING - SIMPLE AND EFFECTIVE
-    #         if "qwen" in self.llm_model_id.lower():
-    #             # Qwen works best with this simple format
-    #             formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
-    #         elif "phi" in self.llm_model_id:  # For Phi models
-    #             # Phi-2 specific formatting
-    #             formatted_prompt = f"Instruct: {prompt}\nOutput:"
-    #         elif "gemma" in self.llm_model_id:
-    #             # Gemma specific formatting
-    #             formatted_prompt = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
-    #         else:
-    #             # Generic formatting
-    #             formatted_prompt = f"{prompt}\n\n"
-    #         # Tokenize and generate
-    #         inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.llm_model.device)
-    #         with torch.no_grad():
-    #             # QWEN OPTIMIZED GENERATION PARAMETERS
-    #             if "qwen" in self.llm_model_id.lower():
-    #                 outputs = self.llm_model.generate(
-    #                     **inputs,
-    #                     max_new_tokens=300,  # Good length for stories
-    #                     temperature=creativity_level,
-    #                     do_sample=True,
-    #                     top_p=0.9,
-    #                     repetition_penalty=1.1,
-    #                     eos_token_id=self.tokenizer.eos_token_id,
-    #                     pad_token_id=self.tokenizer.eos_token_id,
-    #                     no_repeat_ngram_size=3  # Prevent repetition
-    #                 )
-    #             else:
-    #                 outputs = self.llm_model.generate(
-    #                     **inputs,
-    #                     max_new_tokens=250,
-    #                     temperature=creativity_level,
-    #                     do_sample=True,
-    #                     top_p=0.9,
-    #                     repetition_penalty=1.1,
-    #                     pad_token_id=self.tokenizer.eos_token_id
-    #                 )
-    #         # Decode and clean up
-    #         story = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-    #         # Clean up Qwen specific tokens
-    #         if "qwen" in self.llm_model_id.lower():
-    #             # Remove the prompt and Qwen chat tokens
-    #             story = story.replace(formatted_prompt, "").strip()
-    #             story = story.replace("<|im_end|>", "").strip()
-    #             story = story.replace("<|im_start|>", "").strip()
-    #             story = story.replace("<|endoftext|>", "").strip()
-    #             # Sometimes Qwen repeats, clean that up
-    #             if "Story:" in story:
-    #                 story = story.split("Story:")[-1].strip()
-    #             if "Description:" in story:
-    #                 story = story.split("Description:")[-1].strip()
-    #         elif story.startswith(formatted_prompt):
-    #             story = story[len(formatted_prompt):].strip()
-    #         # Additional cleanup for any model
-    #         story = story.strip()
-    #         # If story is too short, try a simpler approach
-    #         if len(story.split()) < 10:
-    #             # Fallback: use a direct prompt
-    #             simple_prompt = f"Tell me a story about {objects_str} in {scene_str}."
-    #             simple_inputs = self.tokenizer(simple_prompt, return_tensors="pt").to(self.llm_model.device)
-    #             with torch.no_grad():
-    #                 simple_outputs = self.llm_model.generate(
-    #                     **simple_inputs,
-    #                     max_new_tokens=200,
-    #                     temperature=0.8,
-    #                     do_sample=True
-    #                 )
-    #             story = self.tokenizer.decode(simple_outputs[0], skip_special_tokens=True)
-    #             story = story.replace(simple_prompt, "").strip()
-    # #         return story
-    #     except Exception as e:
-    #         print(f"Story generation failed: {e}")
-    #         objects_str = ", ".join(objects) if 'objects' in locals() else "unknown"
-    #         scene_str = scenes[0] if 'scenes' in locals() and scenes else "unknown scene"
-    #         return f"Failed to generate story. Detected objects: {objects_str} in a {scene_str}. Error: {str(e)}"
     def generate_story(self, analysis_result, creativity_level=0.7):
         """Generate a story with caption based on detected objects and scene using Qwen"""
         if self.llm_model is None:
@@ -402,65 +287,7 @@ class ImageStoryteller:
         return formatted_text
-    # FIXED: Added the missing method
-    # def create_story_overlay(self, image, story):
-    #     """Create a simple overlay showing story - returns just the story text"""
-    #     # If you want to create an image with text, you can implement it here
-    #     # For now, let's just return the story text
-    #     return story
-    # def create_story_overlay(self, image, story):
-    #     """Create story overlay as separate black image with bigger fonts"""
-    #     img_np = np.array(image)
-    #     height, width = 800, 800#img_np.shape[:2]
-    #     # Create a separate black image for the story (1/3 of original height)
-    #     overlay_height = height // 1
-    #     overlay = np.zeros((overlay_height, width, 3), dtype=np.uint8)
-    #     # Add text to the black overlay with bigger fonts
-    #     font = cv2.FONT_HERSHEY_SIMPLEX
-    #     font_scale = 1  # Much bigger font
-    #     font_color = (255, 255, 255)  # White text
-    #     thickness = 1  # Thicker text
-    #     line_spacing = 25  # More spacing for bigger text
-    #     # Split story into lines (max 40 characters per line for bigger text)
-    #     words = story.split()
-    #     lines = []
-    #     current_line = ""
-    #     for word in words:
-    #         if len(current_line + word) <= 40:
-    #             current_line += word + " "
-    #         else:
-    #             lines.append(current_line.strip())
-    #             current_line = word + " "
-    #     if current_line:
-    #         lines.append(current_line.strip())
-    #     # Limit to 5 lines maximum for bigger text
-    #     # if len(lines) > 5:  # If you want to keep some limit but indicate truncation
-    #     #     lines = lines[:5]
-    #     #     lines.append("... [Story continues]")  # Indicate truncation
-    #     # Calculate text block height for centering
-    #     total_text_height = len(lines) * line_spacing
-    #     start_y = (overlay_height - total_text_height) // + 60
-    #     # Add each line of text, centered
-    #     y_offset = start_y
-    #     for line in lines:
-    #         # Calculate text size for centering
-    #         text_size = cv2.getTextSize(line, font, font_scale, thickness)[0]
-    #         text_x = (width - text_size[0]) // 2
-    #         cv2.putText(overlay, line, (text_x, y_offset),
-    #                    font, font_scale, font_color, thickness, cv2.LINE_AA)
-    #         y_offset += line_spacing
-    #     return Image.fromarray(overlay)
     def remove_background(self, image):
         """Remove background using rembg"""
@@ -524,7 +351,7 @@ class ImageStoryteller:
             analysis_result = self.analyze_image_with_clip(image)
             # Generate story
-            story = self.generate_story(analysis_result, image.size)
             # # Create analysis overlay
             # analysis_image = self.create_analysis_overlay(image, analysis_result)
@@ -578,55 +405,7 @@ with gr.Blocks(title="Who says AI isn’t creative? Watch it turn a single image
     # Load example images
     example_images_list = get_example_images()
-    # # Custom CSS to remove gallery selection frames
-    # #custom_css = """
-    # #.gradio-container {
-    # #    max-height: 95vh !important;
-    # #    overflow-y: auto !important;
-    # #}
-    # #blocks-container {
-    #     max-height: 100% !important;
-    #     overflow: auto !important;
-    # }
-    # #.gallery .wrap.contain .grid .wrap {
-    #     border: none !important;
-    #     box-shadow: none !important;
-    #     outline: none !important;
-    # }
-    # .gallery .wrap.contain .grid .wrap.selected {
-    #     border: none !important;
-    #     box-shadow: none !important;
-    #     outline: none !important;
-    # }
-    # .gallery .thumbnail {
-    #     border: none !important;
-    #     box-shadow: none !important;
-    #     outline: none !important;
-    # }
-    # .gallery .thumbnail.selected {
-    #     border: none !important;
-    #     box-shadow: none !important;
-    #     outline: none !important;
-    # }
-    # .gallery .wrap.gradio-image {
-    #     border: none !important;
-    #     box-shadow: none !important;
-    #     outline: none !important;
-    # }
-    # .gallery .wrap.gradio-image.selected {
-    #     border: none !important;
-    #     box-shadow: none !important;
-    #     outline: none !important;
-    # }
-    # /* Prevent infinite expansion */
-    # .panel {
-    #     max-height: 80vh !important;
-    #     overflow-y: auto !important;
-    # }
-    # #"""
     custom_css = """
     <style>
@@ -655,53 +434,7 @@ with gr.Blocks(title="Who says AI isn’t creative? Watch it turn a single image
     </style>
     """
-    # Add this JavaScript separately
-    # javascript = """
-    # <script>
-    # document.addEventListener('DOMContentLoaded', function() {
-    #     // Force container height immediately
-    #     const forceHeight = function() {
-    #         const containers = document.querySelectorAll('.gradio-container, .container, #blocks-container');
-    #         containers.forEach(container => {
-    #             container.style.height = '100vh';
-    #             container.style.maxHeight = '100vh';
-    #             container.style.overflowY = 'auto';
-    #         });
-    #     };
-    #     // Run immediately
-    #     forceHeight();
-    #     // Run again after a short delay to catch dynamic content
-    #     setTimeout(forceHeight, 100);
-    #     setTimeout(forceHeight, 500);
-    #     // Monitor for ANY DOM changes
-    #     const observer = new MutationObserver(function(mutations) {
-    #         forceHeight();
-    #         // Constrain any new elements
-    #         mutations.forEach(function(mutation) {
-    #             mutation.addedNodes.forEach(function(node) {
-    #                 if (node.nodeType === 1) {
-    #                     node.style.maxHeight = '100%';
-    #                     if (node.querySelectorAll) {
-    #                         node.querySelectorAll('*').forEach(child => {
-    #                             child.style.maxHeight = '100%';
-    #                         });
-    #                     }
-    #                 }
-    #             });
-    #         });
-    #     });
-    #     observer.observe(document.body, {
-    #         childList: true,
-    #         subtree: true,
-    #         attributes: true,
-    #         attributeFilter: ['style', 'class']
-    #     });
-    # });
-    # </script>
     javascript = """
     <script>
     document.addEventListener('DOMContentLoaded', function() {

     def generate_story(self, analysis_result, creativity_level=0.7):
         """Generate a story with caption based on detected objects and scene using Qwen"""
         if self.llm_model is None:
         return formatted_text
     def remove_background(self, image):
         """Remove background using rembg"""
             analysis_result = self.analyze_image_with_clip(image)
             # Generate story
+            story = self.generate_story(analysis_result, creativity_level=0.7)
             # # Create analysis overlay
             # analysis_image = self.create_analysis_overlay(image, analysis_result)
     # Load example images
     example_images_list = get_example_images()
     custom_css = """
     <style>
     </style>
     """
     javascript = """
     <script>
     document.addEventListener('DOMContentLoaded', function() {