Unit4-Final-Certificate

Running

App Files Files Community

nismamjad commited on May 25

Commit

0de4662

verified ·

1 Parent(s): 013637a

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -51

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import gradio as gr
 from datasets import load_dataset, Dataset
-from datetime import datetime, date # Combined datetime imports
 import io
 import os
 from PIL import Image, ImageDraw, ImageFont
 from huggingface_hub import login
-import requests # For API calls
-import json # For handling JSON data
 # Attempt to login using environment token
 try:
@@ -141,8 +142,7 @@ def get_gaia_api_questions():
     try:
         questions_url = f"{GAIA_API_BASE_URL}/questions"
         print(f"Attempting to fetch questions from: {questions_url}")
-        # Adding a timeout to the GET request as well, for consistency
-        response = requests.get(questions_url, timeout=30) # 30-second timeout for fetching questions
         response.raise_for_status()
         return response.json(), None
     except requests.exceptions.RequestException as e:
@@ -152,65 +152,157 @@ def get_gaia_api_questions():
         print(f"An unexpected error occurred while fetching questions: {e}")
         return None, f"An unexpected error occurred: {e}"
-def my_agent_logic(task_id: str, question: str, files: list = None):
     """
-    Uses the Gemini API to generate an answer for the given question.
     """
-    print(f"Agent (Gemini) processing Task ID: {task_id}, Question: {question}")
-    if files:
-        print(f"Files associated with this task: {files}")
     gemini_api_key = os.environ.get("GEMINI_API_KEY")
     if not gemini_api_key:
         print("Error: GEMINI_API_KEY not found in environment variables. Please set it in Space Secrets.")
         return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
-    prompt_parts = [
-        "You are an AI assistant answering questions for the GAIA benchmark.",
-        "Your goal is to provide the single, exact, concise, and factual answer to the question below.",
-        "Do not include any conversational fluff, disclaimers, explanations, or any introductory phrases like 'The answer is:'.",
-        "Do not use markdown formatting unless the question explicitly asks for it.",
-        "If the question implies a specific format (e.g., a number, a date, a comma-separated list), provide the answer in that format.",
-        "Do NOT include the phrase 'FINAL ANSWER' in your response.",
-        f"\nQuestion: {question}"
     ]
-    if files:
-        prompt_parts.append(f"\nNote: The following file(s) are associated with this question, but you may not have direct access to their content: {files}. Answer based on the question text and your general knowledge. If the question is unanswerable without the file content, state that you cannot answer without file access.")
-    full_prompt = "\n".join(prompt_parts)
     payload = {
-        "contents": [{
-            "role": "user",
-            "parts": [{"text": full_prompt}]
-        }],
         "generationConfig": {
-            "temperature": 0.4,
-            "maxOutputTokens": 250,
         }
     }
     api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
     agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
     try:
         headers = {"Content-Type": "application/json"}
         print(f"Calling Gemini API for task {task_id}...")
-        # --- MODIFIED LINE: Added timeout=60 ---
         response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=60)
-        # --- END OF MODIFIED LINE ---
         response.raise_for_status()
         result = response.json()
         if (result.get("candidates") and
             result["candidates"][0].get("content") and
             result["candidates"][0]["content"].get("parts") and
             result["candidates"][0]["content"]["parts"][0].get("text")):
-            agent_computed_answer = result["candidates"][0]["content"]["parts"][0]["text"].strip()
-            if agent_computed_answer.upper().startswith("FINAL ANSWER:"):
-                agent_computed_answer = agent_computed_answer[len("FINAL ANSWER:"):].strip()
         else:
             print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {result}")
             if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
@@ -219,7 +311,6 @@ def my_agent_logic(task_id: str, question: str, files: list = None):
                 agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
             else:
                 agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
     except requests.exceptions.Timeout:
         print(f"Timeout error calling Gemini API for task {task_id}.")
         agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
@@ -227,16 +318,14 @@ def my_agent_logic(task_id: str, question: str, files: list = None):
         print(f"Error calling Gemini API for task {task_id}: {e}")
         if e.response is not None:
             print(f"Gemini API Error Response Status: {e.response.status_code}")
-            try:
-                print(f"Gemini API Error Response Body: {e.response.json()}")
-            except json.JSONDecodeError:
-                print(f"Gemini API Error Response Body (text): {e.response.text}")
         agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
     except Exception as e:
         print(f"An unexpected error occurred in my_agent_logic for task {task_id}: {e}")
         agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
-    print(f"Agent (Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
     return agent_computed_answer
 def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
@@ -265,13 +354,14 @@ def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
     for task in tasks_to_process:
         task_id = task.get("task_id")
         question = task.get("question")
-        associated_files = task.get("files", [])
         if task_id and question:
             log_messages.append(f"\nProcessing Task ID: {task_id}")
             log_messages.append(f"Question: {question}")
-            if associated_files:
-                 log_messages.append(f"Associated files: {associated_files}")
-            submitted_answer = my_agent_logic(task_id, question, associated_files)
             log_messages.append(f"Agent's Answer: {submitted_answer}")
             answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
         else:
@@ -289,26 +379,21 @@ def submit_agent_answers(profile: gr.OAuthProfile, answers_for_submission_state)
     space_id = os.getenv('SPACE_ID', '')
     agent_code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"
     submission_log_messages = [f"Preparing to submit answers for user: {username}"]
     if not space_id:
         your_space_name_guess = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
         if not your_space_name_guess or your_space_name_guess == 'app':
              your_space_name_guess = "YOUR_SPACE_NAME_HERE"
         agent_code_link = f"https://huggingface.co/spaces/{username}/{your_space_name_guess}/tree/main"
         submission_log_messages.append(f"Warning: SPACE_ID not found. Constructed agent_code_link as: {agent_code_link}. Please verify this link is correct.")
     submission_log_messages.append(f"Agent Code Link: {agent_code_link}")
     payload = {
         "username": username,
         "agent_code": agent_code_link,
         "answers": answers_for_submission_state
     }
     try:
         submit_url = f"{GAIA_API_BASE_URL}/submit"
         print(f"Attempting to submit answers to: {submit_url} with payload: {payload}")
-        # Adding a timeout to the POST request for submission as well
         response = requests.post(submit_url, json=payload, timeout=60)
         response.raise_for_status()
         submission_response = response.json()
@@ -337,7 +422,7 @@ def submit_agent_answers(profile: gr.OAuthProfile, answers_for_submission_state)
         submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
         return "\n".join(submission_log_messages)
-# --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
     gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
@@ -350,7 +435,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Tabs():
         with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
             gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
-            gr.Markdown("This agent uses the Gemini API to generate answers. Implement your custom logic in `my_agent_logic` if desired.")
             run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
             run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
             gr.Markdown("### Agent Run Log & Generated Answers:")

 import gradio as gr
 from datasets import load_dataset, Dataset
+from datetime import datetime, date
 import io
 import os
 from PIL import Image, ImageDraw, ImageFont
 from huggingface_hub import login
+import requests
+import json
+import base64 # <-- ADDED IMPORT for image handling
 # Attempt to login using environment token
 try:
     try:
         questions_url = f"{GAIA_API_BASE_URL}/questions"
         print(f"Attempting to fetch questions from: {questions_url}")
+        response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         return response.json(), None
     except requests.exceptions.RequestException as e:
         print(f"An unexpected error occurred while fetching questions: {e}")
         return None, f"An unexpected error occurred: {e}"
+def get_gaia_file_data_for_task(task_id_for_file_fetch, associated_file_metadata_list):
     """
+    Fetches the content of the primary file associated with a task_id from the GAIA API.
+    Returns raw_bytes, detected_mime_type, and file_name.
+    associated_file_metadata_list is the 'files' list from the question data.
     """
+    # If no metadata, assume no file to fetch for this specialized getter.
+    # Or, if the API always serves THE file for task_id, then metadata is just for info.
+    # Let's assume the API /files/{task_id} always gives the relevant file if one exists for the task.
+    file_url = f"{GAIA_API_BASE_URL}/files/{task_id_for_file_fetch}"
+    print(f"Attempting to fetch file for task {task_id_for_file_fetch} from {file_url}")
+    try:
+        response = requests.get(file_url, timeout=30)
+        response.raise_for_status() # This will error if file not found (404) or other issues
+        raw_bytes = response.content
+        detected_mime_type = response.headers.get('Content-Type', '').split(';')[0].strip()
+        # Try to get a filename from metadata if available, otherwise default
+        file_name = "attached_file"
+        if associated_file_metadata_list and isinstance(associated_file_metadata_list, list) and len(associated_file_metadata_list) > 0:
+            # Assuming the first file in metadata is the one fetched, or provides its name
+            first_file_meta = associated_file_metadata_list[0]
+            if isinstance(first_file_meta, dict) and 'file_name' in first_file_meta:
+                file_name = first_file_meta['file_name']
+        print(f"File fetched for task {task_id_for_file_fetch}. Mime-type: {detected_mime_type}, Name: {file_name}, Size: {len(raw_bytes)} bytes")
+        return raw_bytes, detected_mime_type, file_name
+    except requests.exceptions.HTTPError as http_err:
+        # Specifically handle 404 for "no file" vs other errors
+        if http_err.response.status_code == 404:
+            print(f"No file found (404) for task {task_id_for_file_fetch} at {file_url}.")
+        else:
+            print(f"HTTP error fetching file for task {task_id_for_file_fetch}: {http_err}")
+        return None, None, None
+    except requests.exceptions.RequestException as e:
+        print(f"Could not fetch file for task {task_id_for_file_fetch}: {e}. Proceeding without file content.")
+        return None, None, None
+    except Exception as e_gen:
+        print(f"Unexpected error fetching file for task {task_id_for_file_fetch}: {e_gen}")
+        return None, None, None
+def my_agent_logic(task_id: str, question: str, files_metadata: list = None): # files_metadata is the list from task.get("files")
+    """
+    Uses the Gemini API, with GAIA-specific prompting and basic file handling,
+    to generate an answer for the given question.
+    """
+    print(f"Agent (GAIA-Grounded Gemini) processing Task ID: {task_id}, Question: {question}")
+    if files_metadata: # This is the list of file metadata dicts
+        print(f"File metadata associated with this task: {files_metadata}")
     gemini_api_key = os.environ.get("GEMINI_API_KEY")
     if not gemini_api_key:
         print("Error: GEMINI_API_KEY not found in environment variables. Please set it in Space Secrets.")
         return f"ERROR_GEMINI_KEY_MISSING_FOR_TASK_{task_id}"
+    # --- GAIA-specific System Prompt ---
+    # Adapted from Figure 2 of GAIA Paper [cite: 103, 104, 105, 106, 107, 108]
+    system_prompt_lines = [
+        "You are a general AI assistant. I will ask you a question.",
+        "Report your thoughts (for your own processing, not for the final answer), and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].", # Instructing the LLM about the template it should "think" in
+        "However, your actual returned response to me (the user) should ONLY be [YOUR FINAL ANSWER] part, without the 'FINAL ANSWER:' prefix.", # Clarification for our use case
+        "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.",
+        "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.",
+        "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.",
+        "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
+        "Be precise and ensure the answer strictly adheres to any format requested in the question.",
+        "If external files are mentioned or provided, use their content if relevant and accessible to answer the question.",
     ]
+    # We won't send this as a separate "system" message in Gemini's typical API structure,
+    # but rather prepend it to the user question for a single turn.
+    # --- Prepare parts for Gemini API payload ---
+    gemini_parts = []
+    # Prepend system prompt guidelines to the main question text part
+    user_question_text = "\n".join(system_prompt_lines) + f"\n\nGAIA Question: {question}"
+    # --- File Handling ---
+    file_content_bytes, detected_mime_type, file_name = None, None, None
+    if files_metadata: # If the question has associated file(s) metadata
+        file_content_bytes, detected_mime_type, file_name = get_gaia_file_data_for_task(task_id, files_metadata)
+    if file_content_bytes:
+        if detected_mime_type and detected_mime_type.startswith("image/"): # Handle images
+            try:
+                base64_image = base64.b64encode(file_content_bytes).decode('utf-8')
+                gemini_parts.append({"text": user_question_text}) # Question text first
+                gemini_parts.append({
+                    "inline_data": {
+                        "mime_type": detected_mime_type,
+                        "data": base64_image
+                    }
+                })
+                print(f"Added image {file_name} ({detected_mime_type}) to Gemini prompt for task {task_id}.")
+            except Exception as e_img:
+                print(f"Error processing image file {file_name} for task {task_id}: {e_img}")
+                gemini_parts.append({"text": user_question_text + f"\n[Agent note: An image file '{file_name}' was associated but could not be processed: {e_img}]"})
+        elif detected_mime_type and detected_mime_type == "text/plain": # Handle plain text files
+            try:
+                text_content = file_content_bytes.decode('utf-8')
+                user_question_text += f"\n\nContent of attached text file '{file_name}':\n{text_content}"
+                gemini_parts.append({"text": user_question_text})
+                print(f"Added text file content '{file_name}' to Gemini prompt for task {task_id}.")
+            except Exception as e_txt:
+                print(f"Error decoding text file {file_name} for task {task_id}: {e_txt}")
+                gemini_parts.append({"text": user_question_text + f"\n[Agent note: A text file '{file_name}' was associated but could not be decoded: {e_txt}]"})
+        else: # Other file types, just mention them
+            user_question_text += f"\n\nNote: A file named '{file_name}' (type: {detected_mime_type or 'unknown'}) is associated with this question. Its content is not directly viewable in this text prompt."
+            gemini_parts.append({"text": user_question_text})
+            print(f"Noted non-image/text file {file_name} ({detected_mime_type}) in Gemini prompt for task {task_id}.")
+    else: # No file content fetched or no files associated
+        gemini_parts.append({"text": user_question_text})
     payload = {
+        "contents": [{"role": "user", "parts": gemini_parts}],
         "generationConfig": {
+            "temperature": 0.2, # Lower temperature for more factual/deterministic GAIA answers
+            "maxOutputTokens": 300, # Increased slightly for potentially more complex answers
         }
     }
     api_url_with_key = f"{GEMINI_API_URL_BASE}?key={gemini_api_key}"
     agent_computed_answer = f"ERROR_CALLING_GEMINI_FOR_TASK_{task_id}"
     try:
         headers = {"Content-Type": "application/json"}
         print(f"Calling Gemini API for task {task_id}...")
         response = requests.post(api_url_with_key, headers=headers, json=payload, timeout=60)
         response.raise_for_status()
         result = response.json()
         if (result.get("candidates") and
             result["candidates"][0].get("content") and
             result["candidates"][0]["content"].get("parts") and
             result["candidates"][0]["content"]["parts"][0].get("text")):
+            raw_answer = result["candidates"][0]["content"]["parts"][0]["text"].strip()
+            # Remove the "FINAL ANSWER:" prefix if the LLM included it, despite instructions
+            if raw_answer.upper().startswith("FINAL ANSWER:"):
+                agent_computed_answer = raw_answer[len("FINAL ANSWER:"):].strip()
+            else:
+                agent_computed_answer = raw_answer
+            # Further cleaning: sometimes LLMs might still add subtle quotes if the answer is a simple string
+            if len(agent_computed_answer) > 1 and ((agent_computed_answer.startswith('"') and agent_computed_answer.endswith('"')) or \
+               (agent_computed_answer.startswith("'") and agent_computed_answer.endswith("'"))):
+                agent_computed_answer = agent_computed_answer[1:-1]
         else:
             print(f"Warning: Unexpected response structure from Gemini API for task {task_id}: {result}")
             if result.get("promptFeedback") and result["promptFeedback"].get("blockReason"):
                 agent_computed_answer = f"ERROR_GEMINI_PROMPT_BLOCKED_{block_reason}_FOR_TASK_{task_id}"
             else:
                 agent_computed_answer = f"ERROR_PARSING_GEMINI_RESPONSE_FOR_TASK_{task_id}"
     except requests.exceptions.Timeout:
         print(f"Timeout error calling Gemini API for task {task_id}.")
         agent_computed_answer = f"ERROR_GEMINI_TIMEOUT_FOR_TASK_{task_id}"
         print(f"Error calling Gemini API for task {task_id}: {e}")
         if e.response is not None:
             print(f"Gemini API Error Response Status: {e.response.status_code}")
+            try: print(f"Gemini API Error Response Body: {e.response.json()}")
+            except json.JSONDecodeError: print(f"Gemini API Error Response Body (text): {e.response.text}")
         agent_computed_answer = f"ERROR_GEMINI_REQUEST_FAILED_FOR_TASK_{task_id}"
     except Exception as e:
         print(f"An unexpected error occurred in my_agent_logic for task {task_id}: {e}")
         agent_computed_answer = f"ERROR_UNEXPECTED_IN_AGENT_LOGIC_FOR_TASK_{task_id}"
+    print(f"Agent (GAIA-Grounded Gemini) computed answer for Task ID {task_id}: {agent_computed_answer}")
     return agent_computed_answer
 def run_agent_on_gaia(profile: gr.OAuthProfile, run_all_questions: bool = True):
     for task in tasks_to_process:
         task_id = task.get("task_id")
         question = task.get("question")
+        associated_files_metadata = task.get("files", []) # This is the list of file metadata dicts
         if task_id and question:
             log_messages.append(f"\nProcessing Task ID: {task_id}")
             log_messages.append(f"Question: {question}")
+            if associated_files_metadata:
+                 log_messages.append(f"Associated files metadata: {associated_files_metadata}")
+            # Pass the files_metadata to the agent logic
+            submitted_answer = my_agent_logic(task_id, question, associated_files_metadata)
             log_messages.append(f"Agent's Answer: {submitted_answer}")
             answers_to_submit.append({"task_id": task_id, "submitted_answer": submitted_answer})
         else:
     space_id = os.getenv('SPACE_ID', '')
     agent_code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"
     submission_log_messages = [f"Preparing to submit answers for user: {username}"]
     if not space_id:
         your_space_name_guess = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
         if not your_space_name_guess or your_space_name_guess == 'app':
              your_space_name_guess = "YOUR_SPACE_NAME_HERE"
         agent_code_link = f"https://huggingface.co/spaces/{username}/{your_space_name_guess}/tree/main"
         submission_log_messages.append(f"Warning: SPACE_ID not found. Constructed agent_code_link as: {agent_code_link}. Please verify this link is correct.")
     submission_log_messages.append(f"Agent Code Link: {agent_code_link}")
     payload = {
         "username": username,
         "agent_code": agent_code_link,
         "answers": answers_for_submission_state
     }
     try:
         submit_url = f"{GAIA_API_BASE_URL}/submit"
         print(f"Attempting to submit answers to: {submit_url} with payload: {payload}")
         response = requests.post(submit_url, json=payload, timeout=60)
         response.raise_for_status()
         submission_response = response.json()
         submission_log_messages.append(f"An unexpected error occurred during submission: {e}")
         return "\n".join(submission_log_messages)
+# --- Gradio Interface (largely unchanged from your latest version) ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎓 Agents Course - Unit 4 Final Project")
     gr.Markdown("⚠️ **Note**: Due to high demand, you might experience occasional bugs. If something doesn't work, please try again after a moment!")
     with gr.Tabs():
         with gr.TabItem("🤖 Run Agent on GAIA Benchmark"):
             gr.Markdown("## Step 1: Run Your Agent & Generate Answers")
+            gr.Markdown("This agent uses the Gemini API (with GAIA-specific prompting and basic file handling) to generate answers.")
             run_all_questions_checkbox = gr.Checkbox(label="Process all questions (unchecked processes 1 random question for testing)", value=True)
             run_agent_button = gr.Button("🔎 Fetch Questions & Run My Agent")
             gr.Markdown("### Agent Run Log & Generated Answers:")