Spaces:
Running
Running
| import os | |
| import pandas as pd | |
| import requests | |
| from functools import lru_cache | |
| from pydantic import Field, BaseModel | |
| from typing import Any, Optional | |
| from omegaconf import OmegaConf | |
| from vectara_agentic.agent import Agent | |
| from vectara_agentic.tools import ToolsFactory, VectaraToolFactory | |
| from vectara_agentic.agent_config import AgentConfig | |
| from vectara_agentic.sub_query_workflow import SubQuestionQueryWorkflow | |
| from dotenv import load_dotenv | |
| load_dotenv(override=True) | |
| tickers = { | |
| "C": "Citigroup", | |
| "COF": "Capital One", | |
| "JPM": "JPMorgan Chase", | |
| "AAPL": "Apple Computer", | |
| "GOOG": "Google", | |
| "AMZN": "Amazon", | |
| "SNOW": "Snowflake", | |
| "TEAM": "Atlassian", | |
| "TSLA": "Tesla", | |
| "NVDA": "Nvidia", | |
| "MSFT": "Microsoft", | |
| "AMD": "Advanced Micro Devices", | |
| "INTC": "Intel", | |
| "NFLX": "Netflix", | |
| "STT": "State Street", | |
| "BK": "Bank of New York Mellon", | |
| } | |
| years = range(2015, 2025) | |
| initial_prompt = "How can I help you today?" | |
| # Tool to get the income statement for a given company and year using the FMP API | |
| def fmp_income_statement( | |
| ticker: str = Field(description="the ticker symbol of the company.", examples=["AAPL", "GOOG", "AMZN"]), | |
| year: int = Field(description="the year for which to get the income statement.", examples=[2020, 2021, 2022]), | |
| ) -> str: | |
| """ | |
| Get the income statement for a given company and year using the FMP (https://financialmodelingprep.com) API. | |
| Args: | |
| ticker (str): the ticker symbol of the company. | |
| year (int): the year for which to get the income statement. | |
| Returns: | |
| A dictionary with the income statement data. | |
| All data is in USD, but you can convert it to more compact form like K, M, B. | |
| """ | |
| if ticker not in tickers or year not in years: | |
| return "Invalid ticker or year. Please call this tool with a valid company ticker and year." | |
| fmp_api_key = os.environ.get("FMP_API_KEY", None) | |
| if fmp_api_key is None: | |
| return "FMP_API_KEY environment variable not set. This tool does not work." | |
| url = f"https://financialmodelingprep.com/api/v3/income-statement/{ticker}?apikey={fmp_api_key}" | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| data = response.json() | |
| income_statement = pd.DataFrame(data) | |
| if len(income_statement) == 0 or "date" not in income_statement.columns: | |
| return "No data found for the given ticker symbol." | |
| income_statement["date"] = pd.to_datetime(income_statement["date"]) | |
| income_statement_specific_year = income_statement[ | |
| income_statement["date"].dt.year == int(year) | |
| ] | |
| values_dict = income_statement_specific_year.to_dict(orient="records")[0] | |
| return f"Financial results: {', '.join([f'{key}={value}' for key, value in values_dict.items() if key not in ['date', 'cik', 'link', 'finalLink']])}" | |
| return f"FMP API returned error {response.status_code}. This tool does not work." | |
| def get_company_info() -> list[str]: | |
| """ | |
| Returns a dictionary of companies you can query about. Always check this before using any other tool. | |
| The output is a dictionary of valid ticker symbols mapped to company names. | |
| You can use this to identify the companies you can query about, and their ticker information. | |
| """ | |
| return tickers | |
| def get_valid_years() -> list[str]: | |
| """ | |
| Returns a list of the years for which financial reports are available. | |
| Always check this before using any other tool. | |
| """ | |
| return years | |
| class AgentTools: | |
| def __init__(self, _cfg, agent_config): | |
| self.tools_factory = ToolsFactory() | |
| self.agent_config = agent_config | |
| self.cfg = _cfg | |
| self.vec_factory = VectaraToolFactory(vectara_api_key=_cfg.api_key, | |
| vectara_corpus_key=_cfg.corpus_key) | |
| def get_tools(self): | |
| class QueryTranscriptsArgs(BaseModel): | |
| query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"]) | |
| year: int | str = Field( | |
| default=None, | |
| description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year", | |
| examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)'] | |
| ) | |
| ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.") | |
| vec_factory = VectaraToolFactory(vectara_api_key=self.cfg.api_key, | |
| vectara_corpus_key=self.cfg.corpus_key) | |
| summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' | |
| ask_transcripts = vec_factory.create_rag_tool( | |
| tool_name = "ask_transcripts", | |
| tool_description = """ | |
| Given a company name and year, responds to a user question about the company, based on analyst call transcripts about the company's financial reports for that year. | |
| You can ask this tool any question about the company including risks, opportunities, financial performance, competitors and more. | |
| """, | |
| tool_args_schema = QueryTranscriptsArgs, | |
| reranker = "multilingual_reranker_v1", rerank_k = 100, rerank_cutoff = 0.1, | |
| n_sentences_before = 2, n_sentences_after = 4, lambda_val = 0.005, | |
| summary_num_results = 15, | |
| vectara_summarizer = summarizer, | |
| include_citations = True, | |
| verbose=False, | |
| ) | |
| class SearchTranscriptsArgs(BaseModel): | |
| query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"]) | |
| top_k: int = Field(..., description="The number of results to return.") | |
| year: int | str = Field( | |
| default=None, | |
| description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year", | |
| examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)'] | |
| ) | |
| ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.") | |
| search_transcripts = vec_factory.create_search_tool( | |
| tool_name = "search_transcripts", | |
| tool_description = """ | |
| Given a company name and year, and a user query, retrieves relevant documents about the company. | |
| """, | |
| tool_args_schema = SearchTranscriptsArgs, | |
| reranker = "multilingual_reranker_v1", rerank_k = 100, | |
| lambda_val = 0.005, | |
| verbose=False | |
| ) | |
| tools_factory = ToolsFactory() | |
| return ( | |
| [tools_factory.create_tool(tool) for tool in | |
| [ | |
| get_company_info, | |
| get_valid_years, | |
| fmp_income_statement, | |
| ] | |
| ] + | |
| [ask_transcripts, search_transcripts] | |
| ) | |
| def initialize_agent(_cfg, agent_progress_callback=None): | |
| financial_bot_instructions = """ | |
| - You are a helpful financial assistant, with expertise in financial reporting, in conversation with a user. | |
| - Use the 'fmp_income_statement' tool (with the company ticker and year) to obtain financial data. | |
| - Always check the 'get_company_info' and 'get_valid_years' tools to validate company and year are valid. | |
| - Use the 'ask_transcripts' tool to answer most questions about the company's financial performance, risks, opportunities, strategy, competitors, and more. | |
| - Respond in a compact format by using appropriate units of measure (e.g., K for thousands, M for millions, B for billions). | |
| Do not report the same number twice (e.g. $100K and 100,000 USD). | |
| - Do not include URLs unless they are provided in the output of a tool response and are valid URLs. | |
| Ignore references or citations in the 'ask_transcripts' tool output if they have an empty URL (for example "[2]()"). | |
| - When querying a tool for a numeric value or KPI, use a concise and non-ambiguous description of what you are looking for. | |
| - If you calculate a metric, make sure you have all the necessary information to complete the calculation. Don't guess. | |
| - Do not response in markdown. | |
| """ | |
| def query_logging(query: str, response: str): | |
| print(f"Logging query={query}, response={response}") | |
| agent_config = AgentConfig() | |
| agent = Agent( | |
| tools=AgentTools(_cfg, agent_config).get_tools(), | |
| topic="Financial data, annual reports and 10-K filings", | |
| custom_instructions=financial_bot_instructions, | |
| agent_progress_callback=agent_progress_callback, | |
| query_logging_callback=query_logging, | |
| verbose=True, | |
| #workflow_cls=SubQuestionQueryWorkflow, | |
| ) | |
| agent.report() | |
| return agent | |
| def get_agent_config() -> OmegaConf: | |
| companies = ", ".join(tickers.values()) | |
| cfg = OmegaConf.create({ | |
| 'corpus_key': str(os.environ['VECTARA_CORPUS_KEY']), | |
| 'api_key': str(os.environ['VECTARA_API_KEY']), | |
| 'examples': os.environ.get('QUERY_EXAMPLES', None), | |
| 'demo_name': "finance-chat", | |
| 'demo_welcome': "Financial Assistant demo.", | |
| 'demo_description': f"This assistant can help you with any questions about the financials of several companies:\n\n **{companies}**.\n" | |
| }) | |
| return cfg | |