Spaces:

rewardfm
/

rewardeval_ui

Build error

App Files Files Community

Anthony Liang commited on 21 days ago

Commit

a4ffa6f

1 Parent(s): 4d32b53

first commit

Browse files

Files changed (3) hide show

README.md +6 -5
app.py +508 -0
requirements.txt +32 -0

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: Rewardeval Ui
-emoji: 💻
-colorFrom: indigo
-colorTo: indigo
 sdk: gradio
-sdk_version: 6.1.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Rewardfm Eval Ui
+emoji: 🔥
+colorFrom: gray
+colorTo: red
 sdk: gradio
+sdk_version: 6.0.0
 app_file: app.py
 pinned: false
+short_description: UI for rfm evals
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,508 @@

+#!/usr/bin/env python3
+"""
+Gradio app for RFM (Reward Foundation Model) inference visualization.
+Supports single video (progress/success) and dual video (preference/similarity) predictions.
+Uses eval server for inference instead of loading models locally.
+"""
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional, Tuple
+import gradio as gr
+import spaces  # Required for ZeroGPU
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+from PIL import Image
+import decord
+from rfm.data.dataset_types import Trajectory, ProgressSample, PreferenceSample
+from rfm.evals.eval_utils import build_payload, post_batch_npy
+# Global server state
+_server_state = {
+    "server_url": None,
+}
+def check_server_health(server_url: str) -> Tuple[str, Optional[dict]]:
+    """Check server health and get model info."""
+    if not server_url:
+        return "Please provide a server URL.", None
+    try:
+        url = server_url.rstrip("/") + "/health"
+        response = requests.get(url, timeout=5.0)
+        response.raise_for_status()
+        health_data = response.json()
+        # Also try to get GPU status for more info
+        try:
+            status_url = server_url.rstrip("/") + "/gpu_status"
+            status_response = requests.get(status_url, timeout=5.0)
+            if status_response.status_code == 200:
+                status_data = status_response.json()
+                health_data.update(status_data)
+        except:
+            pass
+        _server_state["server_url"] = server_url
+        return f"Server connected: {health_data.get('available_gpus', 0)}/{health_data.get('total_gpus', 0)} GPUs available", health_data
+    except requests.exceptions.RequestException as e:
+        return f"Error connecting to server: {str(e)}", None
+def extract_frames(video_path: str, max_frames: int = 16, fps: float = 1.0) -> np.ndarray:
+    """Extract frames from video file as numpy array (T, H, W, C)."""
+    if video_path is None:
+        return None
+    if isinstance(video_path, tuple):
+        video_path = video_path[0]
+    if not os.path.exists(video_path):
+        return None
+    try:
+        vr = decord.VideoReader(video_path, num_threads=1)
+        total_frames = len(vr)
+        if total_frames <= max_frames:
+            frame_indices = list(range(total_frames))
+        else:
+            frame_indices = [
+                int(i * total_frames / max_frames)
+                for i in range(max_frames)
+            ]
+        frames_array = vr.get_batch(frame_indices).asnumpy()  # Shape: (T, H, W, C)
+        del vr
+        return frames_array
+    except Exception as e:
+        print(f"Error extracting frames: {e}")
+        return None
+def process_single_video(
+    video_path: str,
+    task_text: str = "Complete the task",
+    server_url: str = "",
+    fps: float = 1.0,
+) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+    """Process single video for progress and success predictions using eval server."""
+    if not server_url:
+        return None, None, "Please provide a server URL and check connection first."
+    if not _server_state.get("server_url"):
+        return None, None, "Server not connected. Please check server connection first."
+    if video_path is None:
+        return None, None, "Please provide a video."
+    try:
+        frames_array = extract_frames(video_path, max_frames=16, fps=fps)
+        if frames_array is None or frames_array.size == 0:
+            return None, None, "Could not extract frames from video."
+        # Convert frames to (T, H, W, C) numpy array with uint8 values
+        if frames_array.dtype != np.uint8:
+            frames_array = np.clip(frames_array, 0, 255).astype(np.uint8)
+        num_frames = frames_array.shape[0]
+        frames_shape = frames_array.shape  # (T, H, W, C)
+        # Create target progress (placeholder - would be None in real use)
+        target_progress = np.linspace(0.0, 1.0, num=num_frames).tolist()
+        success_label = [1.0 if prog > 0.5 else 0.0 for prog in target_progress]
+        # Create Trajectory
+        trajectory = Trajectory(
+            task=task_text,
+            frames=frames_array,
+            frames_shape=frames_shape,
+            target_progress=target_progress,
+            success_label=success_label,
+            metadata={"source": "gradio_app"},
+        )
+        # Create ProgressSample
+        progress_sample = ProgressSample(
+            trajectory=trajectory,
+            data_gen_strategy="demo",
+        )
+        # Build payload and send to server
+        files, sample_data = build_payload([progress_sample])
+        response = post_batch_npy(server_url, files, sample_data, timeout_s=120.0)
+        # Process response
+        outputs_progress = response.get("outputs_progress", {})
+        progress_pred = outputs_progress.get("progress_pred", [])
+        # Extract progress predictions
+        if progress_pred and len(progress_pred) > 0:
+            progress_array = np.array(progress_pred[0])  # First sample
+        else:
+            progress_array = np.array([])
+        # Create plots
+        progress_plot = create_progress_plot(progress_array, num_frames)
+        success_plot = None  # Success predictions not always available from server
+        info_text = f"**Frames processed:** {num_frames}\n"
+        if len(progress_array) > 0:
+            info_text += f"**Final progress:** {progress_array[-1]:.3f}\n"
+        return progress_plot, success_plot, info_text
+    except Exception as e:
+        return None, None, f"Error processing video: {str(e)}"
+def process_dual_videos(
+    video_a_path: str,
+    video_b_path: str,
+    task_text: str = "Complete the task",
+    prediction_type: str = "preference",
+    server_url: str = "",
+    fps: float = 1.0,
+) -> Tuple[Optional[str], Optional[str]]:
+    """Process two videos for preference or similarity prediction using eval server."""
+    if not server_url:
+        return "Please provide a server URL and check connection first.", None
+    if not _server_state.get("server_url"):
+        return "Server not connected. Please check server connection first.", None
+    if video_a_path is None or video_b_path is None:
+        return "Please provide both videos.", None
+    try:
+        frames_array_a = extract_frames(video_a_path, max_frames=16, fps=fps)
+        frames_array_b = extract_frames(video_b_path, max_frames=16, fps=fps)
+        if frames_array_a is None or frames_array_a.size == 0:
+            return "Could not extract frames from video A.", None
+        if frames_array_b is None or frames_array_b.size == 0:
+            return "Could not extract frames from video B.", None
+        # Convert frames to uint8
+        if frames_array_a.dtype != np.uint8:
+            frames_array_a = np.clip(frames_array_a, 0, 255).astype(np.uint8)
+        if frames_array_b.dtype != np.uint8:
+            frames_array_b = np.clip(frames_array_b, 0, 255).astype(np.uint8)
+        num_frames_a = frames_array_a.shape[0]
+        num_frames_b = frames_array_b.shape[0]
+        frames_shape_a = frames_array_a.shape
+        frames_shape_b = frames_array_b.shape
+        # Create target progress for both trajectories
+        target_progress_a = np.linspace(0.0, 1.0, num=num_frames_a).tolist()
+        target_progress_b = np.linspace(0.0, 1.0, num=num_frames_b).tolist()
+        success_label_a = [1.0 if prog > 0.5 else 0.0 for prog in target_progress_a]
+        success_label_b = [1.0 if prog > 0.5 else 0.0 for prog in target_progress_b]
+        # Create trajectories
+        trajectory_a = Trajectory(
+            task=task_text,
+            frames=frames_array_a,
+            frames_shape=frames_shape_a,
+            target_progress=target_progress_a,
+            success_label=success_label_a,
+            metadata={"source": "gradio_app", "trajectory": "A"},
+        )
+        trajectory_b = Trajectory(
+            task=task_text,
+            frames=frames_array_b,
+            frames_shape=frames_shape_b,
+            target_progress=target_progress_b,
+            success_label=success_label_b,
+            metadata={"source": "gradio_app", "trajectory": "B"},
+        )
+        if prediction_type == "preference":
+            # Create PreferenceSample (A = chosen, B = rejected)
+            preference_sample = PreferenceSample(
+                chosen_trajectory=trajectory_a,
+                rejected_trajectory=trajectory_b,
+                data_gen_strategy="demo",
+            )
+            # Build payload and send to server
+            files, sample_data = build_payload([preference_sample])
+            response = post_batch_npy(server_url, files, sample_data, timeout_s=120.0)
+            # Process response
+            outputs_preference = response.get("outputs_preference", {})
+            predictions = outputs_preference.get("predictions", [])
+            prediction_probs = outputs_preference.get("prediction_probs", [])
+            result_text = f"**Preference Prediction:**\n"
+            if prediction_probs and len(prediction_probs) > 0:
+                prob = prediction_probs[0]
+                result_text += f"- Probability (A preferred): {prob:.3f}\n"
+                result_text += f"- Interpretation: {'Video A is preferred' if prob > 0.5 else 'Video B is preferred'}\n"
+            else:
+                result_text += "Could not extract preference prediction from server response.\n"
+        else:  # similarity - not yet implemented in eval server response format
+            result_text = "Similarity prediction not yet supported in eval server response format."
+        # Create comparison plot
+        frames_a_list = [Image.fromarray(frame) for frame in frames_array_a]
+        frames_b_list = [Image.fromarray(frame) for frame in frames_array_b]
+        comparison_plot = create_comparison_plot(frames_a_list, frames_b_list, prediction_type)
+        return result_text, comparison_plot
+    except Exception as e:
+        return f"Error processing videos: {str(e)}", None
+def create_progress_plot(progress_pred: np.ndarray, num_frames: int) -> str:
+    """Create progress prediction plot."""
+    plt.rcParams['font.family'] = 'DejaVu Sans'
+    plt.rcParams['font.size'] = 16
+    fig, ax = plt.subplots(figsize=(10, 6))
+    if len(progress_pred) > 0:
+        frame_indices = np.arange(len(progress_pred))
+        ax.plot(frame_indices, progress_pred, 'b-', linewidth=3, marker='o', markersize=8, label='Progress Prediction')
+    else:
+        ax.text(0.5, 0.5, 'No progress prediction available',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=18)
+    ax.set_xlabel('Frame Index', fontsize=18, fontweight='bold')
+    ax.set_ylabel('Progress (0-1)', fontsize=18, fontweight='bold')
+    ax.set_title('Progress Prediction', fontsize=20, fontweight='bold')
+    ax.set_ylim([0, 1])
+    ax.legend(fontsize=14)
+    plt.tight_layout()
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
+    plt.savefig(tmp_file.name, dpi=150, bbox_inches='tight')
+    plt.close()
+    return tmp_file.name
+def create_success_plot(success_probs: np.ndarray, num_frames: int) -> str:
+    """Create success probability plot."""
+    plt.rcParams['font.family'] = 'DejaVu Sans'
+    plt.rcParams['font.size'] = 16
+    fig, ax = plt.subplots(figsize=(10, 6))
+    if len(success_probs) > 0:
+        frame_indices = np.arange(len(success_probs))
+        ax.plot(frame_indices, success_probs, 'g-', linewidth=3, marker='s', markersize=8, label='Success Probability')
+        ax.axhline(y=0.5, color='r', linestyle='--', linewidth=2, label='Decision Threshold (0.5)')
+    else:
+        ax.text(0.5, 0.5, 'No success prediction available',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=18)
+    ax.set_xlabel('Frame Index', fontsize=18, fontweight='bold')
+    ax.set_ylabel('Success Probability (0-1)', fontsize=18, fontweight='bold')
+    ax.set_title('Success Prediction', fontsize=20, fontweight='bold')
+    ax.set_ylim([0, 1])
+    ax.legend(fontsize=14)
+    plt.tight_layout()
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
+    plt.savefig(tmp_file.name, dpi=150, bbox_inches='tight')
+    plt.close()
+    return tmp_file.name
+def create_comparison_plot(frames_a: list, frames_b: list, prediction_type: str) -> str:
+    """Create side-by-side comparison plot of two videos."""
+    plt.rcParams['font.family'] = 'DejaVu Sans'
+    plt.rcParams['font.size'] = 16
+    fig, axes = plt.subplots(2, min(8, max(len(frames_a), len(frames_b))), figsize=(16, 4))
+    if len(axes.shape) == 1:
+        axes = axes.reshape(2, -1)
+    # Sample frames to display
+    num_display = min(8, max(len(frames_a), len(frames_b)))
+    indices_a = np.linspace(0, len(frames_a) - 1, num_display, dtype=int) if len(frames_a) > 1 else [0]
+    indices_b = np.linspace(0, len(frames_b) - 1, num_display, dtype=int) if len(frames_b) > 1 else [0]
+    # Display frames from video A (top row)
+    for idx, frame_idx in enumerate(indices_a):
+        if frame_idx < len(frames_a):
+            axes[0, idx].imshow(frames_a[frame_idx])
+            axes[0, idx].axis('off')
+            axes[0, idx].set_title(f'Frame {frame_idx}', fontsize=12)
+    # Display frames from video B (bottom row)
+    for idx, frame_idx in enumerate(indices_b):
+        if frame_idx < len(frames_b):
+            axes[1, idx].imshow(frames_b[frame_idx])
+            axes[1, idx].axis('off')
+            axes[1, idx].set_title(f'Frame {frame_idx}', fontsize=12)
+    # Add row labels
+    fig.text(0.02, 0.75, 'Video A', rotation=90, fontsize=18, fontweight='bold', va='center')
+    fig.text(0.02, 0.25, 'Video B', rotation=90, fontsize=18, fontweight='bold', va='center')
+    title = f"{prediction_type.capitalize()} Comparison: Video A vs Video B"
+    fig.suptitle(title, fontsize=20, fontweight='bold', y=0.98)
+    plt.tight_layout()
+    # Save to temporary file
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
+    plt.savefig(tmp_file.name, dpi=150, bbox_inches='tight')
+    plt.close()
+    return tmp_file.name
+# Create Gradio interface
+try:
+    # Try with theme (Gradio 4.0+)
+    demo = gr.Blocks(title="RFM Inference Visualizer", theme=gr.themes.Soft())
+except TypeError:
+    # Fallback for older Gradio versions without theme support
+    demo = gr.Blocks(title="RFM Inference Visualizer")
+with demo:
+    gr.Markdown(
+        """
+        # RFM (Reward Foundation Model) Inference Visualizer
+        Visualize progress, success, preference, and similarity predictions from the Reward Foundation Model.
+        **Features:**
+        - **Single Video**: Get progress and success predictions
+        - **Dual Videos**: Compare two videos with preference or similarity predictions
+        **Note:** This app connects to an eval server. Please provide the server URL and check connection before use.
+        """
+    )
+    with gr.Tab("Server Setup"):
+        gr.Markdown("### Connect to Eval Server")
+        gr.Markdown("Enter the eval server URL and check connection.")
+        with gr.Row():
+            with gr.Column(scale=3):
+                server_url_input = gr.Textbox(
+                    label="Server URL",
+                    placeholder="http://40.119.56.66:8000",
+                    value="http://40.119.56.66:8000",
+                    interactive=True,
+                )
+            with gr.Column(scale=1):
+                check_connection_btn = gr.Button("Check Connection", variant="primary", size="sm")
+        server_status = gr.Markdown("Enter server URL and click 'Check Connection'")
+        def on_check_connection(server_url: str):
+            """Handle server connection check."""
+            status, health_data = check_server_health(server_url)
+            return status
+        check_connection_btn.click(
+            fn=on_check_connection,
+            inputs=[server_url_input],
+            outputs=[server_status],
+        )
+    with gr.Tab("Progress Prediction"):
+        gr.Markdown("### Progress & Success Prediction")
+        with gr.Row():
+            with gr.Column():
+                single_video_input = gr.Video(label="Upload Video", height=300)
+                task_text_input = gr.Textbox(
+                    label="Task Description",
+                    placeholder="Describe the task (e.g., 'Pick up the red block')",
+                    value="Complete the task",
+                )
+                fps_input_single = gr.Slider(
+                    label="FPS (Frames Per Second)",
+                    minimum=0.1,
+                    maximum=10.0,
+                    value=1.0,
+                    step=0.1,
+                    info="Frames per second to extract from video (higher = more frames)",
+                )
+                analyze_single_btn = gr.Button("Analyze Video", variant="primary")
+            with gr.Column():
+                progress_plot = gr.Image(label="Progress Prediction", height=400)
+                success_plot = gr.Image(label="Success Prediction", height=400)
+                info_output = gr.Markdown("")
+        analyze_single_btn.click(
+            fn=process_single_video,
+            inputs=[single_video_input, task_text_input, server_url_input, fps_input_single],
+            outputs=[progress_plot, success_plot, info_output],
+        )
+    with gr.Tab("Preference/Similarity Analysis"):
+        gr.Markdown("### Preference & Similarity Prediction")
+        with gr.Row():
+            with gr.Column():
+                video_a_input = gr.Video(label="Video A", height=250)
+                video_b_input = gr.Video(label="Video B", height=250)
+                task_text_dual = gr.Textbox(
+                    label="Task Description",
+                    placeholder="Describe the task",
+                    value="Complete the task",
+                )
+                prediction_type = gr.Radio(
+                    choices=["preference", "similarity"],
+                    value="preference",
+                    label="Prediction Type",
+                )
+                fps_input_dual = gr.Slider(
+                    label="FPS (Frames Per Second)",
+                    minimum=0.1,
+                    maximum=10.0,
+                    value=1.0,
+                    step=0.1,
+                    info="Frames per second to extract from videos (higher = more frames)",
+                )
+                analyze_dual_btn = gr.Button("Compare Videos", variant="primary")
+            with gr.Column():
+                result_text = gr.Markdown("")
+                comparison_plot = gr.Image(label="Video Comparison", height=500)
+        analyze_dual_btn.click(
+            fn=process_dual_videos,
+            inputs=[video_a_input, video_b_input, task_text_dual, prediction_type, server_url_input, fps_input_dual],
+            outputs=[result_text, comparison_plot],
+        )
+def main():
+    """Launch the Gradio app."""
+    import sys
+    # Check if reload mode is requested
+    watch_files = os.getenv("GRADIO_WATCH", "0") == "1" or "--reload" in sys.argv
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,  # Show full error messages
+    )
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+# Requirements for RFM Eval UI Gradio App
+# Core dependencies
+matplotlib>=3.5.0
+numpy>=1.21.0
+torch>=2.0.0
+PyYAML>=6.0
+Pillow>=9.0.0
+# HuggingFace
+huggingface-hub>=0.16.0
+transformers>=4.30.0
+# Sentence transformers (for ReWiND models)
+sentence-transformers>=2.2.0
+# Qwen VL utilities
+qwen-vl-utils
+# Video processing
+opencv-python-headless>=4.5.0
+decord>=0.6.0  # For video frame extraction (same as preprocess_datasets.py)
+# Development tools (optional, for auto-reload)
+watchfiles  # For file watching during development
+# RFM package (installed from git repository)
+# For local development, you can also install with: pip install -e ../ (from parent directory)
+git+https://github.com/aliang8/reward_fm.git@93b1ad4b5a530fb32c234bf926b659105e676d00
+# Make sure a newer version of gradio is installed
+gradio==4.44.0