GAIA_benchmark_agent

Sleeping

App Files Files Community

gabriel-melki commited on Sep 17

Commit

2ee9679

1 Parent(s): 77c169c

Reorganize project structure: move files to src/ directory and remove old files

Browse files

Files changed (14) hide show

.gitattributes +0 -35
.gitignore +1 -0
agent.py +0 -51
app.py +0 -43
prompt.py +0 -46
src/eval/submission.py +13 -5
src/tools/audio_processing_tools.py +1 -1
src/ui/builder.py +0 -1
submission.py +0 -198
tools/audio_processing_tools.py +0 -11
tools/file_tools.py +0 -60
tools/image_processing_tools.py +0 -59
tools/wikipedia_tools.py +0 -323
tools/youtube_tools.py +0 -112

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,5 +1,6 @@
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so

 # Python
 __pycache__/
+.gradio/
 *.py[cod]
 *$py.class
 *.so

agent.py DELETED Viewed

@@ -1,51 +0,0 @@
-import os
-import glob
-from smolagents import CodeAgent
-from prompt import get_prompt
-class QuestionAnsweringAgent(CodeAgent):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-    def get_current_files(self):
-        """Get a set of all files in the current working directory"""
-        try:
-            # Get all files in current directory (including hidden files)
-            all_files = set()
-            for pattern in ['*', '.*']:
-                all_files.update(glob.glob(pattern))
-            # Filter to only include actual files (not directories)
-            files = {f for f in all_files if os.path.isfile(f)}
-            return files
-        except Exception as e:
-            print(f"Error getting current files: {e}")
-            return set()
-    def cleanup_created_files(self, files_before):
-        """Remove files that were created during execution"""
-        try:
-            files_after = self.get_current_files()
-            newly_created_files = files_after - files_before
-            for file_path in newly_created_files:
-                try:
-                    if os.path.exists(file_path):
-                        os.remove(file_path)
-                        print(f"Cleaned up file: {file_path}")
-                except Exception as e:
-                    print(f"Error cleaning up file {file_path}: {e}")
-        except Exception as e:
-            print(f"Error during cleanup: {e}")
-    def __call__(self, question_text, file_name) -> str:
-        # Take snapshot of files before execution
-        files_before = self.get_current_files()
-        try:
-            enhanced_question = get_prompt(question_text, file_name)
-            response = self.run(enhanced_question, reset=True)
-            return response
-        finally:
-            # Always clean up files, even if there's an exception
-            self.cleanup_created_files(files_before)

app.py DELETED Viewed

@@ -1,43 +0,0 @@
-from smolagents import (
-    InferenceClientModel,
-    FinalAnswerTool
-)
-from tools.wikipedia_tools import wikipedia_summary, read_wikipedia_page
-from tools.file_tools import read_file_as_text
-from tools.youtube_tools import download_youtube_url_images, download_youtube_url_audio
-from tools.image_processing_tools import ask_question_about_image
-from agent import QuestionAnsweringAgent
-from submission import build_gradio_interface
-model = InferenceClientModel(
-    provider="auto",
-    model_id="Qwen/Qwen3-Coder-30B-A3B-Instruct",
-    temperature=0,
-    top_p=1.0,
-    seed=42
-)
-agent_tools = [
-    FinalAnswerTool(),
-    wikipedia_summary, read_wikipedia_page,
-    read_file_as_text,
-    download_youtube_url_images, download_youtube_url_audio,
-    ask_question_about_image
-]
-agent = QuestionAnsweringAgent(
-    name="question_answering_expert",
-    model=model,
-    tools=agent_tools,
-    add_base_tools=True,
-    planning_interval=None,
-    additional_authorized_imports=["os", "bs4", "PIL", "transformers", "torch", "requests", "glob"],
-    max_steps=10,
-    verbosity_level=2,  # For better debugging
-)
-if __name__ == "__main__":
-    build_gradio_interface(agent)

prompt.py DELETED Viewed

@@ -1,46 +0,0 @@
-def get_prompt(question_text, file_name):
-    PROMPT = f"""
-        You are a highly precise question-answering agent.
-        When given a question:
-        - If necessary, start by performing a wikipedia search using the `wikipedia_summary` tool to find possible sources of information.
-          For the `query` parameter of the `wikipedia_summary` tool, you MUST think about the entity being searched for and ALWAYS pass exactly the entity name (person/place/event) with no qualifiers.
-          NEVER include words like: career, albums, list, biography, years, numbers, prepositions, or date ranges.
-          Examples:
-            - "Lionel Messi career" → use: wikipedia_summary("Lionel Messi")
-            - "Battle of Hastings timeline" → use: wikipedia_summary("Battle of Hastings")
-            - "Population of Paris in 2010" → use: wikipedia_summary("Paris")
-        - If necessary, visit the wikipedia page listed in the wikipedia summary tool to read the full content. You will find the page url in the output of the wikipedia summary tool at the end after the **Read more:** section. Use the `read_wikipedia_page` tool to visit the page.
-        - When using the `read_wikipedia_page` tool, you may find tables in the page. To analyze the tables, please use a code snippet to read the tables into a pandas dataframe and analyze the data.
-        - If necessary, download a youtube video using the `download_youtube_url_audio` or `download_youtube_url_images` tool to find possible sources of information. For the parameter `num_images`, use a large number if you need to have comprehensive information about the video.
-          - If necessary, analyze the audio or images downloaded from youtube using the `ask_question_about_image` tool to find possible sources of information.
-          - If necessary, perform a web search using the `web_search` tool to find possible sources of information.
-        - If necessary, please analyze the images downloaded using the `ask_question_about_image` tool to find possible sources of information.
-        - If the web search only returns titles and short snippets, you MUST visit the actual webpage using the `visit_webpage` tool to read the full content before answering.
-        - If the task requires reading, listening, or analyzing a file, you must use the file specified after the question, NOT the file name mentioned casually inside the question text.
-        - Comma separated lists MUST contain a single space after each comma.
-        - If you are asked for a number, don't use comma to write your number, nor use units such as $$ or percent sign unless specified otherwise.
-        - If you are asked for a string, don't use articles, nor abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-        - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-        - Only answer after you have gathered enough information by reading the actual page contents.
-        - Only answer after you have printed out the final answer first.
-        - Once you have obtained the final answer, you MUST make a code call as follows:
-        <code>
-        final_answer("your_answer")
-        </code>
-        to submit the final answer.
-        - Do not retry or execute anything else after calling `final_answer`. STOP IMMEDIATELY.
-        - Calling `final_answer` terminates the task completely. No further steps are needed.
-        - The function `final_answer` must wrap the exact printed value.
-        - Provide ONLY the precise answer requested.
-        - Do not include explanations, steps, reasoning, or additional text when calling `final_answer`.
-        - Be direct and specific. The GAIA benchmark requires exactly matching answers.
-        Based on the above guidelines, answer the following question:
-        -- beginning of question --
-        {question_text}
-        -- end of question --
-        IMPORTANT: If the question mentions the need to use a file, the file name is provided below.
-        file_name: {file_name}"""
-    return PROMPT

src/eval/submission.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import gradio as gr
 import requests
 import pandas as pd
@@ -6,7 +5,7 @@ import numpy as np
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-SELECTED_QUESTIONS = [3]
 def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
     """
@@ -24,6 +23,9 @@ def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # 1. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
@@ -50,7 +52,13 @@ def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
     answers_payload = []
     is_correct_answers = []
     print(f"Running agent on {len(questions_data)} questions...")
-    for item in np.array(questions_data).take(SELECTED_QUESTIONS):
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_name = item.get("file_name")
@@ -66,7 +74,7 @@ def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
         individual_submission_data = {
             "username": username.strip(),
-            "agent_code": agent_code,
             "answers": [{"task_id": task_id, "submitted_answer": submitted_answer}]
         }
@@ -79,7 +87,7 @@ def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 3. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)

 import gradio as gr
 import requests
 import pandas as pd
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+SELECTED_QUESTIONS = None #[3]
 def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
     """
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 0. Define Agent Code
+    agent_code = f"https://huggingface.co/spaces/{profile.username}/tree/main"
     # 1. Fetch Questions
     print(f"Fetching questions from: {questions_url}")
     try:
     answers_payload = []
     is_correct_answers = []
     print(f"Running agent on {len(questions_data)} questions...")
+    selected_questions_data = (
+        np.array(questions_data).take(SELECTED_QUESTIONS)
+        if SELECTED_QUESTIONS
+        else questions_data
+    )
+    for item in selected_questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_name = item.get("file_name")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
         individual_submission_data = {
             "username": username.strip(),
+            "agent_code": agent_code.strip(),
             "answers": [{"task_id": task_id, "submitted_answer": submitted_answer}]
         }
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 3. Prepare Submission
+    submission_data = {"username": username.strip(), "agent_code": agent_code.strip(), "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)

src/tools/audio_processing_tools.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from whisper import load_model
 from smolagents.tools import tool
 @tool
 def ask_question_about_audio(question: str, path_to_audio: str) -> str:
     """

 from whisper import load_model
 from smolagents.tools import tool
+# TODO: Add a tool to ask a question about an audio
 @tool
 def ask_question_about_audio(question: str, path_to_audio: str) -> str:
     """

src/ui/builder.py CHANGED Viewed

@@ -4,7 +4,6 @@ import re
 import shutil
 from typing import Optional
 import gradio as gr
-from functools import partial
 from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
 from smolagents.agents import ActionStep, MultiStepAgent

 import shutil
 from typing import Optional
 import gradio as gr
 from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
 from smolagents.agents import ActionStep, MultiStepAgent

submission.py DELETED Viewed

@@ -1,198 +0,0 @@
-import os
-import gradio as gr
-import requests
-import pandas as pd
-import numpy as np
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-SELECTED_QUESTIONS = [3]
-def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
-    """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username= f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
-    try:
-        agent = agent
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
-    # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
-    except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
-    results_log = []
-    answers_payload = []
-    is_correct_answers = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in np.array(questions_data).take(SELECTED_QUESTIONS):
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        file_name = item.get("file_name")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
-            continue
-        try:
-            submitted_answer = agent(question_text, file_name)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-        individual_submission_data = {
-            "username": username.strip(),
-            "agent_code": agent_code,
-            "answers": [{"task_id": task_id, "submitted_answer": submitted_answer}]
-        }
-        individual_response = requests.post(submit_url, json=individual_submission_data, timeout=60)
-        individual_response.raise_for_status()
-        individual_result_data = individual_response.json()
-        is_correct_answers.append(True if individual_result_data.get("correct_count", 0) == 1 else False)
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        results_df["Is Correct"] = is_correct_answers
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        results_df["Is Correct"] = is_correct_answers
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-def build_gradio_interface(agent):
-    # --- Build Gradio Interface using Blocks ---
-    with gr.Blocks() as demo:
-        gr.Markdown("# Basic Agent Evaluation Runner")
-        gr.Markdown(
-            """
-            **Instructions:**
-            1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-            2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-            3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-            ---
-            **Disclaimers:**
-            Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-            This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
-            """
-        )
-        gr.LoginButton()
-        run_button = gr.Button("Run Evaluation & Submit All Answers")
-        status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-        # Removed max_rows=10 from DataFrame constructor
-        results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-        def run_with_login_state(profile: gr.OAuthProfile):
-            return run_and_submit_all(agent, profile)
-        run_button.click(
-            fn=run_with_login_state,
-            outputs=[status_output, results_table]
-        )
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

tools/audio_processing_tools.py DELETED Viewed

@@ -1,11 +0,0 @@
-from whisper import load_model
-from smolagents.tools import tool
-@tool
-def ask_question_about_audio(question: str, path_to_audio: str) -> str:
-    """
-    Ask a question about an audio and return the answer.
-    """
-    model = load_model("base")
-    res = model.transcribe(path_to_audio)
-    return res["text"]

tools/file_tools.py DELETED Viewed

@@ -1,60 +0,0 @@
-import json
-import csv
-import openpyxl
-import whisper
-import os
-import requests
-from smolagents.tools import tool
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-def _download_file(file_name: str) -> None:
-    if not os.path.exists(file_name):
-        url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[-2]}"
-        r = requests.get(url)
-        with open(file_name, "wb") as f:
-            f.write(r.content)
-@tool
-def read_file_as_text(file_name: str) -> str:
-    """
-    Opens a file and returns its content as readable text.
-    Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (for mp3, it transcribes speech to text).
-    Args:
-        file_name (str): The path or name of the file.
-    Returns:
-        str: The content of the file as text, or transcribed speech if 'mp3'.
-    """
-    _download_file(file_name)
-    file_type = file_name.split(".")[-1]
-    try:
-        if file_type in {"txt", "py"}:
-            with open(file_name, "r", encoding="utf-8") as f:
-                return f.read()
-        elif file_type == "json":
-            with open(file_name, "r", encoding="utf-8") as f:
-                data = json.load(f)
-            return json.dumps(data, indent=2)
-        elif file_type == "csv":
-            with open(file_name, "r", encoding="utf-8") as f:
-                reader = csv.reader(f)
-                rows = list(reader)
-            return "\n".join([", ".join(row) for row in rows])
-        elif file_type == "xlsx":
-            wb = openpyxl.load_workbook(file_name, data_only=True)
-            sheet = wb.active
-            content = []
-            for row in sheet.iter_rows(values_only=True):
-                content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
-            return "\n".join(content)
-        elif file_type == "mp3":
-            w = whisper.load_model("base")
-            res = w.transcribe(file_name)
-            return res["text"]
-        else:
-            return f"File type '{file_type}' not supported."
-    except FileNotFoundError:
-        return f"File '{file_name}' not found."
-    except Exception as e:
-        return f"Error opening file '{file_name}': {str(e)}"

tools/image_processing_tools.py DELETED Viewed

@@ -1,59 +0,0 @@
-from PIL import Image
-from transformers import BlipProcessor, BlipForQuestionAnswering
-from smolagents.tools import tool
-import torch
-import requests
-import os
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-def _download_file(file_name: str) -> None:
-    """Download file if it doesn't exist locally"""
-    try:
-        # Try to open the file to check if it exists
-        with open(file_name, 'rb') as f:
-            pass  # File exists, do nothing
-    except FileNotFoundError:
-        # File doesn't exist, download it
-        url = f"{DEFAULT_API_URL}/files/{file_name.split('.')[-2]}"
-        r = requests.get(url)
-        with open(file_name, "wb") as f:
-            f.write(r.content)
-@tool
-def ask_question_about_image(question: str, path_to_image: str) -> str:
-    """
-    Ask a question about an image and return the answer.
-    Args:
-        question: the question to ask about the image.
-        path_to_image: The path to the image to ask the question about.
-    Returns:
-        A string with the answer to the question.
-    """
-    # Download the file if it doesn't exist
-    _download_file(path_to_image)
-    # Check if CUDA is available and use GPU if possible, otherwise use CPU
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    # Load the processor and model (using BLIP for more stable VQA)
-    processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
-    model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
-    model = model.to(device)
-    # Load and process the image
-    image = Image.open(path_to_image).convert('RGB')
-    # Process the inputs
-    inputs = processor(image, question, return_tensors="pt")
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Generate the answer
-    with torch.no_grad():
-        outputs = model.generate(**inputs, max_length=50, num_beams=5)
-    # Decode and return the answer
-    answer = processor.decode(outputs[0], skip_special_tokens=True)
-    return answer

tools/wikipedia_tools.py DELETED Viewed

@@ -1,323 +0,0 @@
-import os
-import requests
-from io import StringIO
-import pandas as pd
-from bs4 import BeautifulSoup
-from smolagents.tools import tool
-import wikipediaapi
-def fetch_wikipedia_page(url: str) -> str:
-    """Fetch raw HTML of a Wikipedia page."""
-    headers = {
-        "User-Agent": "GAIA_benchmark_agent/1.0 (contact: [email protected])",
-        "Accept-Language": "en-US,en;q=0.9",
-    }
-    resp = requests.get(url, headers=headers, timeout=50)
-    resp.raise_for_status()
-    return resp.text
-def _normalize_title(value: str) -> str:
-    """Lowercase, collapse whitespace for robust title comparisons."""
-    return " ".join(value.lower().split()) if isinstance(value, str) else ""
-def _remove_sections_by_titles(soup: BeautifulSoup, titles: list[str]) -> None:
-    """Remove sections (header + content until next header of same/higher level) whose
-    header text matches any of `titles` (case-insensitive). Mutates `soup` in-place.
-    """
-    if not titles:
-        return
-    excluded = {_normalize_title(t) for t in titles}
-    header_tags = ["h1", "h2", "h3", "h4", "h5", "h6"]
-    # Find all headers that match excluded titles
-    headers_to_remove = []
-    for header in soup.find_all(header_tags):
-        title_text = _normalize_title(header.get_text(" ", strip=True))
-        if title_text in excluded:
-            headers_to_remove.append(header)
-    # Remove each matching section (header + content)
-    for header in headers_to_remove:
-        # Skip if header was already removed as part of another section
-        if not header.parent:
-            continue
-        level = int(header.name[1])
-        # Determine the container to remove - could be the header itself or its parent wrapper
-        header_container = header
-        # If header is wrapped in a heading container (like div.mw-heading), use that as the starting point
-        if (header.parent and
-            header.parent.name == 'div' and
-            header.parent.get('class') and
-            any('heading' in cls.lower() for cls in header.parent.get('class', []))):
-            header_container = header.parent
-        nodes_to_remove = [header_container]
-        # Collect all content after the header container until next header of same/higher level
-        current = header_container
-        while current.next_sibling:
-            current = current.next_sibling
-            sib_name = getattr(current, "name", None)
-            # If we hit another header (directly or within a heading container), check its level
-            next_header = None
-            if sib_name in header_tags:
-                next_header = current
-            elif (sib_name == 'div' and
-                  current.get('class') and
-                  any('heading' in cls.lower() for cls in current.get('class', []))):
-                # This is a heading container, find the header inside it
-                for child in current.find_all(header_tags):
-                    next_header = child
-                    break
-            if next_header:
-                next_level = int(next_header.name[1])
-                if next_level <= level:
-                    # This is a header of same or higher level - stop here
-                    break
-            # Add this node to removal list
-            nodes_to_remove.append(current)
-        # Remove all collected nodes
-        for node in nodes_to_remove:
-            try:
-                node.decompose()
-            except Exception:
-                try:
-                    node.extract()
-                except Exception:
-                    pass
-def _cleanup_non_content(root: BeautifulSoup) -> None:
-    """Remove Wikipedia UI/maintenance blocks from the main content area."""
-    selectors = [
-        "div#toc",
-        "div.toc",
-        "div.hatnote",
-        "div.shortdescription",
-        "div.reflist",
-        "ol.references",
-        "div.navbox",
-        "table.navbox",
-        "table.vertical-navbox",
-        "table.sidebar",
-        "table.ambox",
-        "table.metadata",
-        "div#catlinks",
-        "div.mw-authority-control",
-        "div.printfooter",
-        "div.portal",
-        "table.infobox",  # avoid dumping infobox into text
-    ]
-    for sel in selectors:
-        for el in root.select(sel):
-            try:
-                el.decompose()
-            except Exception:
-                try:
-                    el.extract()
-                except Exception:
-                    pass
-def extract_text(soup: BeautifulSoup) -> str:
-    """Extract main text (paragraphs + headers + lists) from article body only, preserving document order.
-    Excludes content that's inside tables and excludes headers that are also used as
-    table names (either as <caption> or the nearest previous header) to avoid duplication
-    with extract_tables."""
-    content_root = soup.select_one("div.mw-parser-output") or soup
-    for elem in content_root(["script", "style", "sup", "aside", "nav"]):
-        elem.decompose()
-    _cleanup_non_content(content_root)
-    # Identify table names (from captions or nearest previous headers) to avoid duplicating them in text
-    table_names_normalized = set()
-    for table in content_root.find_all("table"):
-        # Skip non-content tables (same logic as extract_tables)
-        classes = table.get("class", [])
-        if isinstance(classes, list) and any(
-            c.lower() in {"navbox", "vertical-navbox", "sidebar", "mbox", "metadata"}
-            for c in classes
-        ):
-            continue
-        name_text = None
-        caption_el = table.find("caption")
-        if caption_el:
-            caption_text = caption_el.get_text(" ", strip=True)
-            if caption_text:
-                name_text = caption_text
-            else:
-                # Empty caption: treat as no caption and fallback to previous header
-                prev_header = table.find_previous(["h1", "h2", "h3", "h4", "h5", "h6"])
-                if prev_header:
-                    name_text = prev_header.get_text(" ", strip=True)
-        else:
-            prev_header = table.find_previous(["h1", "h2", "h3", "h4", "h5", "h6"])
-            if prev_header:
-                name_text = prev_header.get_text(" ", strip=True)
-        if not name_text and isinstance(classes, list) and any(c.lower() == "infobox" for c in classes):
-            name_text = "Infobox"
-        if name_text:
-            table_names_normalized.add(_normalize_title(name_text))
-    # Find all text elements in document order, but exclude duplicates
-    text_elements = []
-    for element in content_root.find_all(["h1", "h2", "h3", "h4", "h5", "h6", "p", "li"]):
-        # Skip elements that are inside a table (to avoid duplication with extract_tables)
-        if element.find_parent("table"):
-            continue
-        # Skip headers that match any table name (to avoid duplication with extract_tables)
-        if element.name in {"h1", "h2", "h3", "h4", "h5", "h6"}:
-            header_text_norm = _normalize_title(element.get_text(" ", strip=True))
-            if header_text_norm in table_names_normalized:
-                continue
-        # Skip list items that are exactly a table name (common for inline mini-TOCs within sections)
-        if element.name == "li":
-            li_text_norm = _normalize_title(element.get_text(" ", strip=True))
-            if li_text_norm in table_names_normalized:
-                continue
-        text = element.get_text(" ", strip=True)
-        if text:  # Only include non-empty text
-            text_elements.append(text)
-    return "\n\n".join(text_elements)
-def extract_tables(soup: BeautifulSoup) -> list[dict]:
-    """Extract all HTML tables as dicts: {name, df}."""
-    content_root = soup.select_one("div.mw-parser-output") or soup
-    tables = []
-    for table_idx, table in enumerate(content_root.find_all("table")):
-        # Skip non-content tables (navboxes, sidebars, etc.)
-        classes = table.get("class", [])
-        if isinstance(classes, list) and any(
-            c.lower() in {"navbox", "vertical-navbox", "sidebar", "mbox", "metadata"}
-            for c in classes
-        ):
-            continue
-        # Prefer explicit <caption>
-        caption_el = table.find("caption")
-        name = caption_el.get_text(" ", strip=True) if caption_el else None
-        # Fallback: nearest previous section header
-        if not name:
-            prev_header = table.find_previous(["h1", "h2", "h3", "h4", "h5", "h6"])
-            if prev_header:
-                name = prev_header.get_text(" ", strip=True)
-        # Fallback: class-based hints (e.g., infobox)
-        if not name:
-            if isinstance(classes, list) and any(c.lower() == "infobox" for c in classes):
-                name = "Infobox"
-        # Final fallback
-        if not name:
-            name = f"Table {table_idx + 1}"
-        try:
-            dfs = pd.read_html(StringIO(str(table)))
-            if len(dfs) == 1:
-                tables.append({"name": name, "df": dfs[0]})
-            else:
-                for part_idx, df in enumerate(dfs, start=1):
-                    tables.append({"name": f"{name} (part {part_idx})", "df": df})
-        except ValueError:
-            continue
-    return tables
-def format_for_llm(text: str, tables: list[dict], sections_to_exclude: list[str]) -> str:
-    """Combine text + tables into a single string for LLM input."""
-    output = []
-    output.append("=== ARTICLE TEXT ===\n")
-    output.append(text)
-    excluded = {_normalize_title(s) for s in sections_to_exclude}
-    filtered_tables = [
-        t for t in tables if _normalize_title(t.get("name", "")) not in excluded
-    ]
-    for i, t in enumerate(filtered_tables, start=1):
-        tname = t.get("name") or f"Table {i}"
-        df = t["df"]
-        output.append(f"\n\n=== TABLE {i}: {tname} ===\n")
-        output.append(df.to_markdown(index=False))
-    return "\n".join(output)
-@tool
-def wikipedia_summary(entity: str) -> dict:
-    """
-    Search Wikipedia for a query and return a dictionary with the summary of the page and the url of the page.
-    Args:
-        entity: the entity being searched for and ALWAYS pass exactly the entity name (person/place/event/concept) with no qualifiers.
-    Returns:
-        A dictionary with the summary of the page and the url of the page.
-    """
-    import wikipedia
-    summary_tool = wikipediaapi.Wikipedia(
-        user_agent=f"My research agent ({os.getenv('USER_EMAIL')})",
-    )
-    page = summary_tool.page(entity)
-    if not page.exists():
-        raise ValueError(f"No Wikipedia page found for '{entity}'. Try a different query.")
-    sections = [section._title for section in page.sections]
-    return {
-        "summary": f'''The sections inside the page are {", ".join(sections)} and the summary of the page is {page.summary}
-        ''',
-        "url": wikipedia.page(pageid=page.pageid).url
-    }
-@tool
-def read_wikipedia_page(
-    url: str,
-    sections_to_exclude: list[str] = [
-        "External links",
-        "References",
-        "Further reading",
-        "See also",
-        "Notes",
-    ]) -> str:
-    """
-    Read a Wikipedia page and return a string with the text of the page.
-    Args:
-        url: The URL of the Wikipedia page to read.
-        sections_to_exclude: A list of sections to exclude from the page.
-    Returns:
-        A string with the text of the page.
-    """
-    if "https://en.wikipedia.org/wiki/" not in url:
-        raise ValueError("URL is required")
-    # Fetch the page
-    html = fetch_wikipedia_page(url)
-    # Parse the page
-    soup = BeautifulSoup(html, "html.parser")
-    # Remove unwanted sections
-    _remove_sections_by_titles(soup, sections_to_exclude)
-    # Extract after pruning unwanted sections
-    text = extract_text(soup)
-    tables = extract_tables(soup)
-    # Combine
-    llm_ready = format_for_llm(text, tables, sections_to_exclude)
-    return llm_ready

tools/youtube_tools.py DELETED Viewed

@@ -1,112 +0,0 @@
-import os
-import subprocess
-from yt_dlp import YoutubeDL
-from smolagents.tools import tool
-# Use FFmpeg to extract frames from the video
-def extract_frames_with_ffmpeg(video_path: str, num_frames: int) -> [str]:
-    """Extract frames from video using FFmpeg"""
-    if not os.path.exists(video_path):
-        raise FileNotFoundError(f"Video file not found: {video_path}")
-    # Get video duration using ffprobe
-    duration_cmd = [
-        'ffprobe', '-v', 'quiet', '-print_format', 'json',
-        '-show_format', video_path
-    ]
-    try:
-        result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
-        import json
-        metadata = json.loads(result.stdout)
-        duration = float(metadata['format']['duration'])
-        # Calculate time intervals for frame extraction
-        time_intervals = [duration * i / (num_frames + 1) for i in range(1, num_frames + 1)]
-        extracted_files = []
-        for i, time_pos in enumerate(time_intervals):
-            output_filename = f"{os.path.splitext(os.path.basename(video_path))[0]}_frame_{i+1:03d}.jpg"
-            # Extract frame at specific time
-            ffmpeg_cmd = [
-                'ffmpeg', '-i', video_path, '-ss', str(time_pos),
-                '-vframes', '1', '-q:v', '2', '-y', output_filename
-            ]
-            subprocess.run(ffmpeg_cmd, capture_output=True, check=True)
-            extracted_files.append(output_filename)
-        return extracted_files
-    except subprocess.CalledProcessError as e:
-        print(f"Error running FFmpeg: {e}")
-        return []
-    except Exception as e:
-        print(f"Error: {e}")
-        return []
-@tool
-def download_youtube_url_audio(url: str) -> str:
-    """
-    Download a YouTube video using the url, extract the audio and return the path to the downloaded file.
-    Args:
-        url (str): The URL of the YouTube video to download.
-    Returns:
-        str: The path to the downloaded audio file.
-    """
-    ydl_audio_opts = {
-        'format': 'bestaudio/best',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'quiet': True,
-        'no_verbose_header': True,
-        'no_warnings': True,
-    }
-    with YoutubeDL(ydl_audio_opts) as ydl:
-        file_path = ydl.extract_info(url)
-    return file_path['requested_downloads'][0]['filepath']
-@tool
-def download_youtube_url_images(url: str, num_images: int = 3) -> str:
-    """
-        Download a YouTube video using the url, extract the frames and return the path to the downloaded files.
-        Args:
-            url (str): The URL of the YouTube video to download.
-            num_images (int): The number of images to download. The images are extracted from the video at regular intervals.
-        Returns:
-            str: The different paths to the downloaded frames, separated by newlines.
-    """
-    # First, download the video
-    ydl_images_opts = {
-        'format': 'best[height<=720]',  # Download video in reasonable quality
-        'outtmpl': '%(title)s.%(ext)s',  # Save with title as filename
-        'quiet': True,
-        'no_verbose_header': True,
-        'no_warnings': True,
-    }
-    with YoutubeDL(ydl_images_opts) as ydl:
-        info = ydl.extract_info(url, download=True)
-        video_filepath = ydl.prepare_filename(info)
-    # Extract frames from the downloaded video
-    if os.path.exists(video_filepath):
-        extracted_frames = extract_frames_with_ffmpeg(video_filepath, num_images)
-        return "\n".join(extracted_frames)
-    return ""