Spaces:
Running
Running
Anthony Liang commited on
Commit ·
8dba63e
1
Parent(s): b267297
more updates for preferences
Browse files
app.py
CHANGED
|
@@ -349,6 +349,9 @@ def process_single_video(
|
|
| 349 |
target_progress = np.linspace(0.0, 1.0, num=num_frames).tolist()
|
| 350 |
success_label = [1.0 if prog > 0.5 else 0.0 for prog in target_progress]
|
| 351 |
|
|
|
|
|
|
|
|
|
|
| 352 |
# Create Trajectory
|
| 353 |
trajectory = Trajectory(
|
| 354 |
task=task_text,
|
|
@@ -356,6 +359,7 @@ def process_single_video(
|
|
| 356 |
frames_shape=frames_shape,
|
| 357 |
target_progress=target_progress,
|
| 358 |
success_label=success_label,
|
|
|
|
| 359 |
metadata={"source": "gradio_app"},
|
| 360 |
)
|
| 361 |
|
|
@@ -468,6 +472,10 @@ def process_two_videos(
|
|
| 468 |
success_label_a = [1.0 if prog > 0.5 else 0.0 for prog in target_progress_a]
|
| 469 |
success_label_b = [1.0 if prog > 0.5 else 0.0 for prog in target_progress_b]
|
| 470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
# Create trajectories
|
| 472 |
trajectory_a = Trajectory(
|
| 473 |
task=task_text,
|
|
@@ -475,6 +483,7 @@ def process_two_videos(
|
|
| 475 |
frames_shape=frames_shape_a,
|
| 476 |
target_progress=target_progress_a,
|
| 477 |
success_label=success_label_a,
|
|
|
|
| 478 |
metadata={"source": "gradio_app", "trajectory": "A"},
|
| 479 |
)
|
| 480 |
|
|
@@ -484,6 +493,7 @@ def process_two_videos(
|
|
| 484 |
frames_shape=frames_shape_b,
|
| 485 |
target_progress=target_progress_b,
|
| 486 |
success_label=success_label_b,
|
|
|
|
| 487 |
metadata={"source": "gradio_app", "trajectory": "B"},
|
| 488 |
)
|
| 489 |
|
|
@@ -724,7 +734,7 @@ with demo:
|
|
| 724 |
value=False,
|
| 725 |
info="If enabled, predict progress per frame rather than feeding the entire video at once",
|
| 726 |
)
|
| 727 |
-
analyze_single_btn = gr.Button("
|
| 728 |
|
| 729 |
gr.Markdown("---")
|
| 730 |
gr.Markdown("**OR Select from Dataset**")
|
|
@@ -977,72 +987,73 @@ with demo:
|
|
| 977 |
)
|
| 978 |
|
| 979 |
with gr.Tab("Preference Analysis"):
|
|
|
|
| 980 |
with gr.Row():
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
video_a_input = gr.Video(label="Video A", height=250)
|
| 984 |
-
video_b_input = gr.Video(label="Video B", height=250)
|
| 985 |
-
task_text_dual = gr.Textbox(
|
| 986 |
-
label="Task Description",
|
| 987 |
-
placeholder="Describe the task",
|
| 988 |
-
value="Complete the task",
|
| 989 |
-
)
|
| 990 |
-
analyze_dual_btn = gr.Button("Compare Videos", variant="primary")
|
| 991 |
-
|
| 992 |
-
gr.Markdown("---")
|
| 993 |
-
gr.Markdown("**OR Select from Dataset**")
|
| 994 |
-
gr.Markdown("---")
|
| 995 |
-
|
| 996 |
-
with gr.Accordion("📁 Video A - Select from Dataset", open=False):
|
| 997 |
-
dataset_name_a = gr.Dropdown(
|
| 998 |
-
choices=PREDEFINED_DATASETS,
|
| 999 |
-
value="jesbu1/oxe_rfm",
|
| 1000 |
-
label="Dataset Name",
|
| 1001 |
-
allow_custom_value=True,
|
| 1002 |
-
)
|
| 1003 |
-
config_name_a = gr.Dropdown(
|
| 1004 |
-
choices=[], value="", label="Configuration Name", allow_custom_value=True
|
| 1005 |
-
)
|
| 1006 |
-
with gr.Row():
|
| 1007 |
-
refresh_configs_btn_a = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
|
| 1008 |
-
load_dataset_btn_a = gr.Button("Load Dataset", variant="secondary", size="sm")
|
| 1009 |
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1043 |
|
| 1044 |
-
|
| 1045 |
-
|
|
|
|
| 1046 |
|
| 1047 |
# State variables for datasets
|
| 1048 |
current_dataset_a = gr.State(None)
|
|
|
|
| 349 |
target_progress = np.linspace(0.0, 1.0, num=num_frames).tolist()
|
| 350 |
success_label = [1.0 if prog > 0.5 else 0.0 for prog in target_progress]
|
| 351 |
|
| 352 |
+
# predict_last_frame_mask: server expects a list (1.0 per frame); omit and server pad_list_to_max can get None
|
| 353 |
+
predict_last_frame_mask = [1.0] * num_frames
|
| 354 |
+
|
| 355 |
# Create Trajectory
|
| 356 |
trajectory = Trajectory(
|
| 357 |
task=task_text,
|
|
|
|
| 359 |
frames_shape=frames_shape,
|
| 360 |
target_progress=target_progress,
|
| 361 |
success_label=success_label,
|
| 362 |
+
predict_last_frame_mask=predict_last_frame_mask,
|
| 363 |
metadata={"source": "gradio_app"},
|
| 364 |
)
|
| 365 |
|
|
|
|
| 472 |
success_label_a = [1.0 if prog > 0.5 else 0.0 for prog in target_progress_a]
|
| 473 |
success_label_b = [1.0 if prog > 0.5 else 0.0 for prog in target_progress_b]
|
| 474 |
|
| 475 |
+
# predict_last_frame_mask: server expects a list (1.0 per frame); None causes pad_list_to_max to fail
|
| 476 |
+
mask_a = [1.0] * num_frames_a
|
| 477 |
+
mask_b = [1.0] * num_frames_b
|
| 478 |
+
|
| 479 |
# Create trajectories
|
| 480 |
trajectory_a = Trajectory(
|
| 481 |
task=task_text,
|
|
|
|
| 483 |
frames_shape=frames_shape_a,
|
| 484 |
target_progress=target_progress_a,
|
| 485 |
success_label=success_label_a,
|
| 486 |
+
predict_last_frame_mask=mask_a,
|
| 487 |
metadata={"source": "gradio_app", "trajectory": "A"},
|
| 488 |
)
|
| 489 |
|
|
|
|
| 493 |
frames_shape=frames_shape_b,
|
| 494 |
target_progress=target_progress_b,
|
| 495 |
success_label=success_label_b,
|
| 496 |
+
predict_last_frame_mask=mask_b,
|
| 497 |
metadata={"source": "gradio_app", "trajectory": "B"},
|
| 498 |
)
|
| 499 |
|
|
|
|
| 734 |
value=False,
|
| 735 |
info="If enabled, predict progress per frame rather than feeding the entire video at once",
|
| 736 |
)
|
| 737 |
+
analyze_single_btn = gr.Button("Compute Progress", variant="primary")
|
| 738 |
|
| 739 |
gr.Markdown("---")
|
| 740 |
gr.Markdown("**OR Select from Dataset**")
|
|
|
|
| 987 |
)
|
| 988 |
|
| 989 |
with gr.Tab("Preference Analysis"):
|
| 990 |
+
# Full-width row: two videos side by side
|
| 991 |
with gr.Row():
|
| 992 |
+
video_a_input = gr.Video(label="Video A", height=320)
|
| 993 |
+
video_b_input = gr.Video(label="Video B", height=320)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 994 |
|
| 995 |
+
task_text_dual = gr.Textbox(
|
| 996 |
+
label="Task Description",
|
| 997 |
+
placeholder="Describe the task",
|
| 998 |
+
value="Complete the task",
|
| 999 |
+
)
|
| 1000 |
+
analyze_dual_btn = gr.Button("Compute Preference", variant="primary")
|
| 1001 |
+
|
| 1002 |
+
gr.Markdown("---")
|
| 1003 |
+
gr.Markdown("**OR Select from Dataset**")
|
| 1004 |
+
gr.Markdown("---")
|
| 1005 |
+
|
| 1006 |
+
with gr.Accordion("📁 Video A - Select from Dataset", open=False):
|
| 1007 |
+
dataset_name_a = gr.Dropdown(
|
| 1008 |
+
choices=PREDEFINED_DATASETS,
|
| 1009 |
+
value="jesbu1/oxe_rfm",
|
| 1010 |
+
label="Dataset Name",
|
| 1011 |
+
allow_custom_value=True,
|
| 1012 |
+
)
|
| 1013 |
+
config_name_a = gr.Dropdown(
|
| 1014 |
+
choices=[], value="", label="Configuration Name", allow_custom_value=True
|
| 1015 |
+
)
|
| 1016 |
+
with gr.Row():
|
| 1017 |
+
refresh_configs_btn_a = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
|
| 1018 |
+
load_dataset_btn_a = gr.Button("Load Dataset", variant="secondary", size="sm")
|
| 1019 |
+
|
| 1020 |
+
dataset_status_a = gr.Markdown("", visible=False)
|
| 1021 |
+
with gr.Row():
|
| 1022 |
+
prev_traj_btn_a = gr.Button("⬅️ Prev", variant="secondary", size="sm")
|
| 1023 |
+
trajectory_slider_a = gr.Slider(
|
| 1024 |
+
minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
|
| 1025 |
+
)
|
| 1026 |
+
next_traj_btn_a = gr.Button("Next ➡️", variant="secondary", size="sm")
|
| 1027 |
+
trajectory_metadata_a = gr.Markdown("", visible=False)
|
| 1028 |
+
use_dataset_video_btn_a = gr.Button("Use Selected Video for A", variant="secondary")
|
| 1029 |
+
|
| 1030 |
+
with gr.Accordion("📁 Video B - Select from Dataset", open=False):
|
| 1031 |
+
dataset_name_b = gr.Dropdown(
|
| 1032 |
+
choices=PREDEFINED_DATASETS,
|
| 1033 |
+
value="jesbu1/oxe_rfm",
|
| 1034 |
+
label="Dataset Name",
|
| 1035 |
+
allow_custom_value=True,
|
| 1036 |
+
)
|
| 1037 |
+
config_name_b = gr.Dropdown(
|
| 1038 |
+
choices=[], value="", label="Configuration Name", allow_custom_value=True
|
| 1039 |
+
)
|
| 1040 |
+
with gr.Row():
|
| 1041 |
+
refresh_configs_btn_b = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
|
| 1042 |
+
load_dataset_btn_b = gr.Button("Load Dataset", variant="secondary", size="sm")
|
| 1043 |
+
|
| 1044 |
+
dataset_status_b = gr.Markdown("", visible=False)
|
| 1045 |
+
with gr.Row():
|
| 1046 |
+
prev_traj_btn_b = gr.Button("⬅️ Prev", variant="secondary", size="sm")
|
| 1047 |
+
trajectory_slider_b = gr.Slider(
|
| 1048 |
+
minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
|
| 1049 |
+
)
|
| 1050 |
+
next_traj_btn_b = gr.Button("Next ➡️", variant="secondary", size="sm")
|
| 1051 |
+
trajectory_metadata_b = gr.Markdown("", visible=False)
|
| 1052 |
+
use_dataset_video_btn_b = gr.Button("Use Selected Video for B", variant="secondary")
|
| 1053 |
|
| 1054 |
+
gr.Markdown("---")
|
| 1055 |
+
gr.Markdown("### Preference result")
|
| 1056 |
+
result_text = gr.Markdown("")
|
| 1057 |
|
| 1058 |
# State variables for datasets
|
| 1059 |
current_dataset_a = gr.State(None)
|