General_AI_Assistant_GAIA

Sleeping

App Files Files Community

laverdes commited on Jun 19

Commit

f0a8f60

verified ·

1 Parent(s): 1853d1e

feat: basic tool-use langchain agent besides langgraph

Browse files

Files changed (1) hide show

basic_agent.py +156 -116

basic_agent.py CHANGED Viewed

@@ -8,13 +8,9 @@ from rich.markdown import Markdown
 from rich.json import JSON
 from typing import TypedDict, Sequence, Annotated
-from langchain_core.messages import BaseMessage
-from langgraph.graph.message import add_messages
-from langgraph.graph import StateGraph, START, END
 from langchain_openai import ChatOpenAI
-from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
-from tqdm import tqdm
 def print_conversation(messages):
@@ -22,18 +18,55 @@ def print_conversation(messages):
     for msg in messages:
         role = msg.get("role", "unknown").capitalize()
         content = msg.get("content", "")
         try:
-            parsed_json = json.loads(content)
-            rendered_content = JSON.from_data(parsed_json)
         except (json.JSONDecodeError, TypeError):
-            rendered_content = Markdown(content.strip())
         panel = Panel(
             rendered_content,
             title=f"[bold blue]{role}[/]",
-            border_style="green" if role == "User" else "magenta",
             expand=True
         )
@@ -43,18 +76,44 @@ def print_conversation(messages):
 def generate_final_answer(qa: dict[str, str]) -> str:
     """Invokes gpt-4o-mini to extract generate a final answer based on the content query, response, and metadata"""
-    final_answer_llm = ChatOpenAI(model="gpt-4o", temperature=0)
     system_prompt = (
-        "You will receive a JSON string containing a user's query, a response, and metadata. "
-        "Extract and return only the final answer to the query as a plain string. "
-        "Do not return anything else. "
-        "Avoid any labels, prefixes, or explanation. "
-        "Return only the exact value that satisfies the query, suitable for string comparison."
-        "If the query is not answerable due to a missing file in the input and is reflected in the response, answer with 'File not found'. "
     )
     system_message = SystemMessage(content=system_prompt)
     messages = [
         system_message,
         HumanMessage(content=f'Generate the final answer for the following query:\n\n{json.dumps(qa)}')
@@ -63,124 +122,105 @@ def generate_final_answer(qa: dict[str, str]) -> str:
     response = final_answer_llm.invoke(messages)
     return response.content
-class AgentState(TypedDict):
-    messages: Annotated[Sequence[BaseMessage], add_messages]
-class BasicOpenAIAgentWorkflow:
     """Basic custom class from an agent prompted for tool-use pattern"""
     def __init__(self, tools: list, model='gpt-4o', backstory:str="", streaming=False):
-        self.name = "Basic OpenAI Agent Workflow"
         self.tools = tools
-        self.llm = ChatOpenAI(model=model, temperature=0, streaming=streaming)
-        self.graph = None
-        self.history = []
-        self.history_messages = []  # Store messages in LangChain format
         self.backstory = backstory if backstory else "You are a helpful assistant that can use tools to answer questions. Your name is Gaia."
-        role_message = {'role': 'system', 'content': self.backstory}
-        self.history.append(role_message)
-    def _call_llm(self, state: AgentState):
-        """invokes the assigned llm"""
-        return {'messages': [self.llm.invoke(state['messages'])]}
-    def _convert_history_to_messages(self):
-        """Convert self.history to LangChain-compatible messages"""
-        converted = []
-        for msg in self.history:
-            content = msg['content']
-            if not isinstance(content, str):
-                raise ValueError(f"Expected string content, got: {type(content)} — {content}")
-            if msg['role'] == 'user':
-                converted.append(HumanMessage(content=content))
-            elif msg['role'] == 'assistant':
-                converted.append(AIMessage(content=content))
-            elif msg['role'] == 'system':
-                converted.append(SystemMessage(content=content))
-            else:
-                raise ValueError(f"Unknown role in message: {msg}")
-        self.history_messages = converted
     def create_basic_tool_use_agent_state_graph(self, custom_tools_nm="tools"):
         """Binds tools, creates and compiles graph"""
-        self.llm = self.llm.bind_tools(self.tools)
-        # Graph Init
-        graph = StateGraph(AgentState)
-        # Nodes
-        graph.add_node('agent', self._call_llm)
-        tools_node = ToolNode(self.tools)
-        graph.add_node(custom_tools_nm, tools_node)
-        # Edges
-        graph.add_edge(START, "agent")
-        graph.add_conditional_edges('agent', tools_condition, {'tools': custom_tools_nm, END: END})
-        self.graph = graph.compile()
-    def chat(self, query, verbose=2, only_final_answer=False):
-        """Simple agent call"""
-        if isinstance(query, dict):
-            query = query["messages"]
-        user_message = {'role': 'user', 'content': query}
-        self.history.append(user_message)
-        # Ensure history has at least 1 message
-        if not self.history:
-            raise ValueError("History is empty. Cannot proceed.")
-        self._convert_history_to_messages()
-        if not self.history_messages:
-            raise ValueError("Converted message history is empty. Something went wrong.")
-        response = self.graph.invoke({'messages': self.history_messages})  # invoke with all the history to keep context (dummy mem)
-        response = response['messages'][-1].content
-        if only_final_answer:
-            final_answer_content = {
                 'query': query,
                 'response': response,
-                'metadata': {}
-            }
-            response = generate_final_answer(final_answer_content)
-        assistant_message = {'role': 'assistant', 'content': response}
-        self.history.append(assistant_message)
-        if verbose==2:
-            print_conversation(self.history)
-        elif verbose==1:
-            print_conversation([assistant_message])
         return response
-    def invoke(self, input_str: str):
-        """Invoke the compiled graph with the input data"""
-        _ = self.chat(input_str)  # prints response in terminal
-        self._convert_history_to_messages()
-        return {'messages': self.history_messages}
-    def chat_batch(self, queries=None, only_final_answer=False):
-        """Send several simple agent calls to the llm using the compiled graph"""
-        if queries is None:
-            queries = []
-        for i, query in tqdm(enumerate(queries, start=1)):
-            if i == len(queries):
-                self.chat(query, verbose=2, only_final_answer=only_final_answer)
-            else:
-                self.chat(query, verbose=0, only_final_answer=only_final_answer)

 from rich.json import JSON
 from typing import TypedDict, Sequence, Annotated
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
 from langchain_openai import ChatOpenAI
 def print_conversation(messages):
     for msg in messages:
         role = msg.get("role", "unknown").capitalize()
         content = msg.get("content", "")
         try:
+            if isinstance(content, str):
+                content = json.loads(content)
+            elif isinstance(content, dict) and 'output' in content.keys():
+                if isinstance(content['output'], HumanMessage):
+                    content['output'] = content['output'].content
+            elif isinstance(content, HumanMessage):
+                content = content.content
+            rendered_content = JSON.from_data(content)
         except (json.JSONDecodeError, TypeError):
+            try:
+                rendered_content = Markdown(content.strip())
+            except AttributeError:
+                # from gemini
+                try:
+                    rendered_content = {
+                        'query': content.get('query', 'QueryKeyNotFound').content[0]['text'],
+                        'output': content.get('output', 'OutputKeyNotFound'),
+                    }
+                    rendered_content = JSON.from_data(rendered_content)
+                except Exception as e:
+                    print(f"Failed to render content for role: {role}. Content: {content}")
+                    print("Error:", e)
+        border_style_color = "red"
+        if "Assistant" in role:
+            border_style_color = "magenta"
+        elif "User" in role:
+            border_style_color = "green"
+        elif "System" in role:
+            border_style_color = "blue"
+        elif "Tool" in role:
+            border_style_color = "yellow"
+        elif "Token" in role:
+            border_style_color = "white"
         panel = Panel(
             rendered_content,
             title=f"[bold blue]{role}[/]",
+            border_style=border_style_color,
             expand=True
         )
 def generate_final_answer(qa: dict[str, str]) -> str:
     """Invokes gpt-4o-mini to extract generate a final answer based on the content query, response, and metadata"""
+    final_answer_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
     system_prompt = (
+        "You will be given a JSON object containing a user's query, a response from an AI assistant, and optional metadata. "
+        "Your task is to extract and return a final answer to the query as a plain string, strictly suitable for exact match evaluation. "
+        "Do NOT answer the query yourself. Use the response as the source of truth. "
+        "Use the query only as context to interpret the response and extract a final, normalized answer. "
+        "Your output must be:\n"
+        "- A **single plain string** with **no prefixes, labels, or explanations**.\n"
+        "- Suitable for exact string comparison.\n"
+        "- Clean and deterministic: no variation in formatting, casing, or punctuation."
+        "Special rules:\n"
+        "- If the response shows inability to process attached media (images, audio, video), return: **'File not found'**.\n"
+        "- If the response is a list of search results aggregate the information before constructing an answer"
+        "- If the query is quantitative (How many...?), **aggregate the results of the tool(s) call(s) and return the numeric answer** only.\n"
+        "- If the query is unanswerable from the response, return: **'No answer found: <brief reason>'**."
+        "Examples:\n"
+        "- Query: 'What’s in the attached image?'\n"
+        "  Response: 'I'm unable to view images directly...'\n"
+        "  Output: 'File not found'\n\n"
+        "- Query: 'What’s the total population of X'\n"
+        "  Response: '{title: demographics of X, content: 1. City A: 2M, 2. City B: 3M, title: history of X, content: currently there are Y number of inhabitants in X...'\n"
+        "  Output: '5000000'\n"
+        "Strictly follow these rules. Some final answers will require more analysis if the provided response. "
+        "You can reason to get to the answer but always consider the response as the base_knowledge (keep coherence)."
+        "Return only the final string answer. Do not include any other content."
     )
     system_message = SystemMessage(content=system_prompt)
+    if isinstance(qa['response']['query'], HumanMessage):
+        qa['response'] = qa['response']['output']
     messages = [
         system_message,
         HumanMessage(content=f'Generate the final answer for the following query:\n\n{json.dumps(qa)}')
     response = final_answer_llm.invoke(messages)
     return response.content
+class ToolAgent:
     """Basic custom class from an agent prompted for tool-use pattern"""
     def __init__(self, tools: list, model='gpt-4o', backstory:str="", streaming=False):
+        self.name = "GAIA Tool-Use Agent"
         self.tools = tools
+        self.llm = ChatOpenAI(model=model, temperature=0, streaming=streaming, max_retries=5)
+        self.executor = None
         self.backstory = backstory if backstory else "You are a helpful assistant that can use tools to answer questions. Your name is Gaia."
     def create_basic_tool_use_agent_state_graph(self, custom_tools_nm="tools"):
         """Binds tools, creates and compiles graph"""
+        tools_info = '\n\n'.join([f'{tool.name}: {tool.description}: {tool.args}' for tool in self.tools])
+        chatgpt_with_tools = self.llm.bind_tools(self.tools)
+        prompt_template = ChatPromptTemplate.from_messages(
+            [
+                ("system", self.backstory),
+                MessagesPlaceholder(variable_name="history", optional=True),
+                ("human", "{query}"),
+                MessagesPlaceholder(variable_name="agent_scratchpad"),
+            ]
+        )
+        agent = create_tool_calling_agent(self.llm, self.tools, prompt_template)
+        self.executor = AgentExecutor(
+            agent=agent,
+            tools=self.tools,
+            early_stopping_method='force',
+            max_iterations=10
+)
+    def chat(self, query:str, metadata):
+        """Perform a single step in the conversation with the tool agent executor."""
+            if metadata is None:
+                metadata = {}
+            with_attachments = False
+            query_message = HumanMessage(content=query)
+            if "image_path" in metadata:
+                # Create a HumanMessage with image content
+                query_message = HumanMessage(
+                    content=[
+                        {"type": "text", "text": query},
+                        {"type": "text", "text": f"image_path: {metadata['image_path']}"},
+                    ]
+                )
+                with_attachments = True
+            user_message = {'role': 'user', 'content': query if not with_attachments else query_message}
+            print_conversation([user_message])
+            response = self.executor.invoke({
+                "query": query if not with_attachments else query_message,
+            })
+            response_message = {'role': 'assistant', 'content': response}
+            print_conversation([response_message])
+            final_answer = generate_final_answer({
                 'query': query,
                 'response': response,
+            })
+            final_answer_message = {'role': 'Final Answer', 'content': final_answer}
+            print_conversation([final_answer_message])
+            return final_answer
+    def invoke(self, q_data):
+        """Invoke the executor input data"""
+        query = q_data.get("query", "")
+        metadata = q_data.get("metadata", None)
+        try:
+            response = self.chat(query, metadata)
+            time.sleep(3)
+        except RateLimitError:
+            response = 'Rate limit error encountered. Retrying after a short pause...'
+            error_message = {'role': 'Rate-limit-hit', 'content': response}
+            print_conversation([error_message])
+            time.sleep(5)
+            try:
+                response = self.chat(query, metadata)
+            except RateLimitError:
+                response = 'Rate limit error encountered again. Skipping this query.'
+                error_message = {'role': 'Rate-limit-hit', 'content': response}
+                print_conversation([error_message])
+        print()
         return response
+    def __call__(self, q_data):
+        """Call the invoke method from the agent executor."""
+        return self.invoke(q_data)