Spaces:

bytedance-research
/

UMO_OmniGen2

Running on Zero

App Files Files Community

cb1cyf commited on Sep 9

Commit

387b7ca

1 Parent(s): 9916f36

feat: speedup

Browse files

Files changed (1) hide show

app.py +28 -24

app.py CHANGED Viewed

@@ -47,6 +47,8 @@ SAVE_DIR = "output/gradio"
 pipeline = None
 accelerator = None
 save_images = False
 def load_pipeline(accelerator, weight_dtype, args):
     pipeline = OmniGen2Pipeline.from_pretrained(
@@ -83,7 +85,7 @@ def load_pipeline(accelerator, weight_dtype, args):
         pipeline = pipeline.to(accelerator.device)
     return pipeline
-@spaces.GPU(duration=300)
 def run(
     instruction,
     width_input,
@@ -103,8 +105,13 @@ def run(
     max_pixels: int = 1024 * 1024,
     seed_input: int = -1,
     align_res: bool = True,
-    progress=gr.Progress(),
 ):
     input_images = [image_input_1, image_input_2, image_input_3]
     input_images = [img for img in input_images if img is not None]
@@ -116,10 +123,6 @@ def run(
     generator = torch.Generator(device=accelerator.device).manual_seed(seed_input)
-    def progress_callback(cur_step, timesteps):
-        frac = (cur_step + 1) / float(timesteps)
-        progress(frac)
     if scheduler == 'euler':
         pipeline.scheduler = FlowMatchEulerDiscreteScheduler()
     elif scheduler == 'dpmsolver++':
@@ -147,11 +150,8 @@ def run(
         num_images_per_prompt=num_images_per_prompt,
         generator=generator,
         output_type="pil",
-        step_func=progress_callback,
     )
-    progress(1.0)
     vis_images = [to_tensor(image) * 2 - 1 for image in results.images]
     output_image = create_collage(vis_images)
@@ -185,7 +185,6 @@ def get_examples(base_dir="assets/examples/OmniGen2"):
         with open(config_path, "r", encoding="utf-8") as f:
             config = json.load(f)
         _example = [config.get(k, None) for k in example_keys]
-        _example.append(50) # steps
         examples.append(_example)
     return examples
@@ -215,8 +214,6 @@ tips = """
 💡 We provide step-by-step instructions in our <a href='https://github.com/bytedance/UMO' target='_blank'> Github Repo</a>. Additionally, try the examples and comparison provided below the demo to quickly get familiar with UMO and spark your creativity!
-❗️ Since the generation of OmniGen2 itself is quite slow with default 50 steps, we recommand using less steps to save your free quota but with some performance degradation.
 <details>
 <summary style="cursor: pointer; color: #d34c0e; font-weight: 500;"> ⚡️ Tips from the based OmniGen2</summary>
@@ -290,6 +287,25 @@ def main(args):
                         label="Width", minimum=256, maximum=2048, value=1024, step=128
                     )
                 with gr.Accordion("Advanced Options", open=False):
                     with gr.Row(equal_height=True):
                         align_res = gr.Checkbox(
@@ -348,17 +364,6 @@ def main(args):
                         outputs=[cfg_range_start]
                     )
-                    with gr.Row(equal_height=True):
-                        scheduler_input = gr.Dropdown(
-                            label="Scheduler",
-                            choices=["euler", "dpmsolver++"],
-                            value="euler",
-                            info="The scheduler to use for the model.",
-                        )
-                        num_inference_steps = gr.Slider(
-                            label="Inference Steps", minimum=20, maximum=100, value=50, step=1
-                        )
                     with gr.Row(equal_height=True):
                         num_images_per_prompt = gr.Slider(
                             label="Number of images per prompt",
@@ -448,7 +453,6 @@ def main(args):
                 align_res,
                 output_image,
                 output_image_omnigen2,
-                num_inference_steps,
             ],
             label="We provide examples for academic research. The vast majority of images used in this demo are either generated or from open-source datasets. If you have any concerns, please contact us, and we will promptly remove any inappropriate content.",
             examples_per_page=15

 pipeline = None
 accelerator = None
 save_images = False
+enable_taylorseer = False
+enable_teacache = False
 def load_pipeline(accelerator, weight_dtype, args):
     pipeline = OmniGen2Pipeline.from_pretrained(
         pipeline = pipeline.to(accelerator.device)
     return pipeline
+@spaces.GPU(duration=600)
 def run(
     instruction,
     width_input,
     max_pixels: int = 1024 * 1024,
     seed_input: int = -1,
     align_res: bool = True,
 ):
+    if enable_taylorseer:
+        pipeline.enable_taylorseer = True
+    elif enable_teacache:
+        pipeline.transformer.enable_teacache = True
+        pipeline.transformer.teacache_rel_l1_thresh = 0.05
     input_images = [image_input_1, image_input_2, image_input_3]
     input_images = [img for img in input_images if img is not None]
     generator = torch.Generator(device=accelerator.device).manual_seed(seed_input)
     if scheduler == 'euler':
         pipeline.scheduler = FlowMatchEulerDiscreteScheduler()
     elif scheduler == 'dpmsolver++':
         num_images_per_prompt=num_images_per_prompt,
         generator=generator,
         output_type="pil",
     )
     vis_images = [to_tensor(image) * 2 - 1 for image in results.images]
     output_image = create_collage(vis_images)
         with open(config_path, "r", encoding="utf-8") as f:
             config = json.load(f)
         _example = [config.get(k, None) for k in example_keys]
         examples.append(_example)
     return examples
 💡 We provide step-by-step instructions in our <a href='https://github.com/bytedance/UMO' target='_blank'> Github Repo</a>. Additionally, try the examples and comparison provided below the demo to quickly get familiar with UMO and spark your creativity!
 <details>
 <summary style="cursor: pointer; color: #d34c0e; font-weight: 500;"> ⚡️ Tips from the based OmniGen2</summary>
                         label="Width", minimum=256, maximum=2048, value=1024, step=128
                     )
+                with gr.Accordion("Speed Up Options", open=True):
+                    with gr.Row(equal_height=True):
+                        global enable_taylorseer
+                        global enable_teacache
+                        enable_taylorseer = gr.Checkbox(label="Whether to use TaylorSeer to speed up inference", value=True)
+                        enable_teacache = gr.Checkbox(label="Whether to use TeaCache to speed up inference", value=False)
+                    with gr.Row(equal_height=True):
+                        scheduler_input = gr.Dropdown(
+                            label="Scheduler",
+                            choices=["euler", "dpmsolver++"],
+                            value="euler",
+                            info="The scheduler to use for the model.",
+                        )
+                        num_inference_steps = gr.Slider(
+                            label="Inference Steps", minimum=20, maximum=100, value=50, step=1
+                        )
                 with gr.Accordion("Advanced Options", open=False):
                     with gr.Row(equal_height=True):
                         align_res = gr.Checkbox(
                         outputs=[cfg_range_start]
                     )
                     with gr.Row(equal_height=True):
                         num_images_per_prompt = gr.Slider(
                             label="Number of images per prompt",
                 align_res,
                 output_image,
                 output_image_omnigen2,
             ],
             label="We provide examples for academic research. The vast majority of images used in this demo are either generated or from open-source datasets. If you have any concerns, please contact us, and we will promptly remove any inappropriate content.",
             examples_per_page=15