laverdes commited on
Commit
5cfc619
·
verified ·
1 Parent(s): 2b7e98c

feat: add image query tool

Browse files
Files changed (1) hide show
  1. tools.py +40 -1
tools.py CHANGED
@@ -182,7 +182,46 @@ def aggregate_information(results: list[str], query: str) -> str:
182
  print_tool_response(output_text)
183
 
184
  return output_text
185
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  def extract_video_id(url: str) -> str:
188
  parsed = urlparse(url)
 
182
  print_tool_response(output_text)
183
 
184
  return output_text
185
+
186
+
187
+ gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
188
+
189
+
190
+ @tool
191
+ def image_query_tool(image_path: str, question: str) -> str:
192
+ """
193
+ Uses Gemini Vision to answer a question about an image.
194
+ - image_path: file path to the image to analyze (.png)
195
+ - question: the query to ask about the image
196
+ """
197
+ try:
198
+ base64_img = encode_image_to_base64(image_path)
199
+ except OSError:
200
+ response = f"OSError: Invalid argument (invalid image path or file format): {image_path}. Please provide a valid PNG image."
201
+ print_tool_response(response)
202
+ return response
203
+
204
+ base64_img_str = f"data:image/png;base64,{base64_img}"
205
+ if CUSTOM_DEBUG:
206
+ print_tool_call(
207
+ image_query_tool,
208
+ tool_name='image_query_tool',
209
+ args={'base64_image': base64_img_str[:100], 'question': question},
210
+ )
211
+ msg = HumanMessage(content=[
212
+ {"type": "text", "text": question},
213
+ {"type": "image_url", "image_url": base64_img_str},
214
+ ])
215
+ try:
216
+ response = gemini.invoke([msg])
217
+ except ChatGoogleGenerativeAIError:
218
+ response = "ChatGoogleGenerativeAIError: Invalid argument provided to Gemini: 400 Provided image is not valid"
219
+ print_tool_response(response)
220
+ return response
221
+ if CUSTOM_DEBUG:
222
+ print_tool_response(response.content)
223
+ return response.content
224
+
225
 
226
  def extract_video_id(url: str) -> str:
227
  parsed = urlparse(url)