Spaces:

alexyywwdd
/

postometro-free-demo

Sleeping

App Files Files Community

imabackstabber commited on Mar 21, 2024

Commit

db8354d

1 Parent(s): 4923179

refine layout

Browse files

Files changed (2) hide show

app.py +7 -3
main/inference.py +17 -4

app.py CHANGED Viewed

@@ -32,9 +32,9 @@ def infer(image_input, in_threshold=0.5, num_people="Single person", render_mesh
     inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER)
     os.system(f'rm -rf {OUT_FOLDER}/*')
     multi_person = False if (num_people == "Single person") else True
-    vis_img, num_bbox, mmdet_box = inferer.infer(image_input, in_threshold, multi_person, not(render_mesh))
-    return vis_img, "bbox num: {}, bbox meta: {}".format(num_bbox, mmdet_box)
 TITLE = '''<h1 align="center">PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery</h1>'''
 DESCRIPTION = '''
@@ -43,6 +43,9 @@ DESCRIPTION = '''
 Note: You can drop a image at the panel (or select one of the examples)
 to obtain the 3D parametric reconstructions of the detected humans.
 </p>
 '''
 with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo:
@@ -71,10 +74,11 @@ with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo:
             send_button = gr.Button("Infer")
         with gr.Column():
             processed_frames = gr.Image(label="Rendered Results")
             debug_textbox = gr.Textbox(label="Debug information")
     # example_images = gr.Examples([])
-    send_button.click(fn=infer, inputs=[image_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, debug_textbox])
     # with gr.Row():
     example_images = gr.Examples([
         ['/home/user/app/assets/01.jpg'],

     inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER)
     os.system(f'rm -rf {OUT_FOLDER}/*')
     multi_person = False if (num_people == "Single person") else True
+    vis_img, bbox_img, num_bbox, mmdet_box = inferer.infer(image_input, in_threshold, multi_person, not(render_mesh))
+    return vis_img, bbox_img, "bbox num: {}\nbbox meta: {}".format(num_bbox, mmdet_box)
 TITLE = '''<h1 align="center">PostoMETRO: Pose Token Enhanced Mesh Transformer for Robust 3D Human Mesh Recovery</h1>'''
 DESCRIPTION = '''
 Note: You can drop a image at the panel (or select one of the examples)
 to obtain the 3D parametric reconstructions of the detected humans.
 </p>
+<p>
+Check out <a href="https://arxiv.org/abs/2403.12473"><b>our paper on arxiv page</b>!
+</p>
 '''
 with gr.Blocks(title="PostoMETRO", css=".gradio-container") as demo:
             send_button = gr.Button("Infer")
         with gr.Column():
             processed_frames = gr.Image(label="Rendered Results")
+            bbox_frames = gr.Image(label="Bbox Results")
             debug_textbox = gr.Textbox(label="Debug information")
     # example_images = gr.Examples([])
+    send_button.click(fn=infer, inputs=[image_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, bbox_frames, debug_textbox])
     # with gr.Row():
     example_images = gr.Examples([
         ['/home/user/app/assets/01.jpg'],

main/inference.py CHANGED Viewed

@@ -57,6 +57,7 @@ class Inferer:
         transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])
         vis_img = original_img.copy()
         original_img_height, original_img_width = original_img.shape[:2]
         # load renderer
@@ -97,13 +98,14 @@ class Inferer:
             # align these pre-processing steps
             bbox = process_bbox(mmdet_box_xywh, original_img_width, original_img_height)
-            ok_bboxes.append(bbox)
             # [DEBUG] test mmdet pipeline
             if bbox is not None:
                 top_left = (int(bbox[0]), int(bbox[1]))
                 bottom_right = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
                 cv2.rectangle(vis_img, top_left, bottom_right, (0, 0, 255), 2)
             # human model inference
             img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, self.cfg.input_img_shape)
@@ -136,18 +138,19 @@ class Inferer:
                                                                                pred_cam[2] + cy_delta / (pred_cam[0] / (original_img_height / bbox[3]))],
                                                                                mesh_as_vertices=mesh_as_vertices)
             vis_img = vis_img.astype('uint8')
-        return vis_img, len(ok_bboxes), ok_bboxes
 if __name__ == '__main__':
     from PIL import Image
     inferer = Inferer('postometro', 1, './out_folder') # gpu
-    image_path = f'../assets/07.jpg'
     image = Image.open(image_path)
     # Convert the PIL image to a NumPy array
     image_np = np.array(image)
-    vis_img, _ , _ = inferer.infer(image_np, 0.2, multi_person=True, mesh_as_vertices=True)
     save_path = f'./saved_vis_07.jpg'
     # Ensure the image is in the correct format (PIL expects uint8)
     if vis_img.dtype != np.uint8:
@@ -157,3 +160,13 @@ if __name__ == '__main__':
     image = Image.fromarray(vis_img)
     image.save(save_path)

         transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])
         vis_img = original_img.copy()
+        bbox_img = original_img.copy()
         original_img_height, original_img_width = original_img.shape[:2]
         # load renderer
             # align these pre-processing steps
             bbox = process_bbox(mmdet_box_xywh, original_img_width, original_img_height)
+            ok_bboxes.append(bbox.tolist())
             # [DEBUG] test mmdet pipeline
             if bbox is not None:
                 top_left = (int(bbox[0]), int(bbox[1]))
                 bottom_right = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
                 cv2.rectangle(vis_img, top_left, bottom_right, (0, 0, 255), 2)
+                cv2.rectangle(bbox_img, top_left, bottom_right, (0, 0, 255), 2)
             # human model inference
             img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, self.cfg.input_img_shape)
                                                                                pred_cam[2] + cy_delta / (pred_cam[0] / (original_img_height / bbox[3]))],
                                                                                mesh_as_vertices=mesh_as_vertices)
             vis_img = vis_img.astype('uint8')
+        return vis_img, bbox_img, len(ok_bboxes), ok_bboxes
 if __name__ == '__main__':
     from PIL import Image
     inferer = Inferer('postometro', 1, './out_folder') # gpu
+    image_path = f'../assets/06.jpg'
     image = Image.open(image_path)
     # Convert the PIL image to a NumPy array
     image_np = np.array(image)
+    vis_img, bbox_img, num_bbox, mmdet_box = inferer.infer(image_np, 0.2, multi_person=True, mesh_as_vertices=True)
     save_path = f'./saved_vis_07.jpg'
+    bbox_save_path = f'./bbox_saved_vis_07.jpg'
     # Ensure the image is in the correct format (PIL expects uint8)
     if vis_img.dtype != np.uint8:
     image = Image.fromarray(vis_img)
     image.save(save_path)
+    # Ensure the image is in the correct format (PIL expects uint8)
+    if bbox_img.dtype != np.uint8:
+        bbox_img = bbox_img.astype('uint8')
+    # Convert the Numpy array (if RGB) to a PIL image and save
+    image = Image.fromarray(bbox_img)
+    image.save(bbox_save_path)
+    print("bbox num: {}\nbbox meta: {}".format(num_bbox, mmdet_box))