G.E.N.I.EmarkII

Sleeping

App Files Files Community

nihalaninihal commited on Apr 13

Commit

76fb0a8

verified ·

1 Parent(s): 0f005f1

Update handler.py

Browse files

Files changed (1) hide show

handler.py +32 -117

handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# updated_handler.py
 import asyncio
 import base64
 import json
@@ -21,16 +21,11 @@ class AudioLoop:
         self.audio_in_queue = asyncio.Queue()
         # Flag to signal shutdown
         self.shutdown_event = asyncio.Event()
-        # Voice configuration
-        self.voice_name = "Puck"  # Default voice name
-        # Track if we're in audio mode
-        self.audio_mode = True
-    async def startup(self, tools=None, api_key=None):
         """Send the model setup message to Gemini.
         Args:
-            tools: Optional list of tools to enable for the model
             api_key: API key to use (overrides environment variable)
         """
         # Use provided API key or fallback to environment variable
@@ -41,31 +36,12 @@ class AudioLoop:
         uri = uri_template.format(api_key=key)
         self.ws = await connect(uri, additional_headers={"Content-Type": "application/json"})
-        # Configure speech output with specified voice
-        speech_config = {
-            "speech_config": {
-                "voice_config": {
-                    "prebuilt_voice_config": {
-                        "voice_name": self.voice_name
-                    }
-                }
-            }
-        }
-        # Add speech and audio configuration to setup
         setup_msg = {
             "setup": {
-                "model": f"models/{model}",
-                "live_connect_config": {
-                    "response_modalities": ["AUDIO"],
-                    **speech_config
-                }
             }
         }
-        # Add tools if provided
-        if tools:
-            setup_msg["setup"]["tools"] = tools
         await self.ws.send(json.dumps(setup_msg))
@@ -73,22 +49,6 @@ class AudioLoop:
         setup_response = json.loads(raw_response)
         print("[AudioLoop] Setup response from Gemini:", setup_response)
-    def set_voice(self, voice_name):
-        """Set the voice to use for audio responses.
-        Args:
-            voice_name: Name of the voice to use (e.g. "Puck", "Charon", etc.)
-        """
-        self.voice_name = voice_name
-    def set_audio_mode(self, enabled=True):
-        """Enable or disable audio mode.
-        Args:
-            enabled: True to enable audio mode, False to use text only
-        """
-        self.audio_mode = enabled
     async def send_realtime(self):
         """Read from out_queue and forward those messages to Gemini in real time."""
         try:
@@ -96,17 +56,6 @@ class AudioLoop:
                 # Get next message from queue with timeout
                 try:
                     msg = await asyncio.wait_for(self.out_queue.get(), 0.5)
-                    # If we're in audio-only mode and this is a text message,
-                    # add flag to request audio output
-                    if self.audio_mode and "client_content" in msg:
-                        # Ensure there's a configuration section for this message
-                        if "config" not in msg:
-                            msg["config"] = {}
-                        # Set response modality to audio
-                        msg["config"]["response_modalities"] = ["AUDIO"]
                     await self.ws.send(json.dumps(msg))
                 except asyncio.TimeoutError:
                     # No message in queue, continue checking
@@ -120,41 +69,49 @@ class AudioLoop:
             print("[AudioLoop] send_realtime task ended")
     async def receive_audio(self):
-        """Read from Gemini websocket and push PCM data into audio_in_queue."""
         try:
             while not self.shutdown_event.is_set():
                 try:
                     raw_response = await asyncio.wait_for(self.ws.recv(), 0.5)
                     response = json.loads(raw_response)
-                    # Debug log all responses (optional)
-                    # print("Gemini raw response:", response)
-                    # Process audio data
                     try:
                         # Check for inline PCM data
-                        if "serverContent" in response and "modelTurn" in response["serverContent"] and "parts" in response["serverContent"]["modelTurn"]:
                             parts = response["serverContent"]["modelTurn"]["parts"]
                             for part in parts:
                                 if "inlineData" in part and "data" in part["inlineData"]:
                                     b64data = part["inlineData"]["data"]
                                     pcm_data = base64.b64decode(b64data)
                                     await self.audio_in_queue.put(pcm_data)
-                        # Check for text data
-                        if "serverContent" in response and "modelTurn" in response["serverContent"] and "parts" in response["serverContent"]["modelTurn"]:
-                            parts = response["serverContent"]["modelTurn"]["parts"]
-                            for part in parts:
-                                if "text" in part:
-                                    print(f"[AudioLoop] Text response: {part['text']}")
-                                    # You could add text handling here if needed
-                    except KeyError as e:
-                        print(f"[AudioLoop] KeyError while parsing response: {e}")
-                    # Handle tool calls
                     tool_call = response.pop('toolCall', None)
-                    if tool_call is not None:
-                        await self.handle_tool_call(tool_call)
                 except asyncio.TimeoutError:
                     # No message received, continue checking
                     continue
@@ -168,54 +125,12 @@ class AudioLoop:
             traceback.print_exc()
         finally:
             print("[AudioLoop] receive_audio task ended")
-    async def handle_tool_call(self, tool_call):
-        print("[AudioLoop] Tool call received:", tool_call)
-        for fc in tool_call['functionCalls']:
-            msg = {
-                'tool_response': {
-                    'function_responses': [{
-                        'id': fc['id'],
-                        'name': fc['name'],
-                        'response': {'result': {'string_value': 'ok'}}
-                    }]
-                }
-            }
-            await self.ws.send(json.dumps(msg))
     async def run(self):
         """Main entry point: connects to Gemini, starts send/receive tasks."""
         try:
-            # Define tools that Gemini can use
-            turn_on_the_lights_schema = {'name': 'turn_on_the_lights'}
-            turn_off_the_lights_schema = {'name': 'turn_off_the_lights'}
-            github_repo_info_schema = {
-                'name': 'get_github_repo_info',
-                'description': 'Get information about a GitHub repository',
-                'parameters': {
-                    'type': 'object',
-                    'properties': {
-                        'repo_url': {
-                            'type': 'string',
-                            'description': 'Full URL of the GitHub repository'
-                        }
-                    },
-                    'required': ['repo_url']
-                }
-            }
-            tools = [
-                {'google_search': {}},
-                {'function_declarations': [
-                    turn_on_the_lights_schema,
-                    turn_off_the_lights_schema,
-                    github_repo_info_schema
-                ]},
-                {'code_execution': {}},
-            ]
             # Initialize the connection with Gemini
-            await self.startup(tools)
             # Start processing tasks
             try:

+# basic_handler.py
 import asyncio
 import base64
 import json
         self.audio_in_queue = asyncio.Queue()
         # Flag to signal shutdown
         self.shutdown_event = asyncio.Event()
+    async def startup(self, api_key=None):
         """Send the model setup message to Gemini.
         Args:
             api_key: API key to use (overrides environment variable)
         """
         # Use provided API key or fallback to environment variable
         uri = uri_template.format(api_key=key)
         self.ws = await connect(uri, additional_headers={"Content-Type": "application/json"})
+        # Absolutely minimal setup message
         setup_msg = {
             "setup": {
+                "model": f"models/{model}"
             }
         }
         await self.ws.send(json.dumps(setup_msg))
         setup_response = json.loads(raw_response)
         print("[AudioLoop] Setup response from Gemini:", setup_response)
     async def send_realtime(self):
         """Read from out_queue and forward those messages to Gemini in real time."""
         try:
                 # Get next message from queue with timeout
                 try:
                     msg = await asyncio.wait_for(self.out_queue.get(), 0.5)
                     await self.ws.send(json.dumps(msg))
                 except asyncio.TimeoutError:
                     # No message in queue, continue checking
             print("[AudioLoop] send_realtime task ended")
     async def receive_audio(self):
+        """Read from Gemini websocket and process responses."""
         try:
             while not self.shutdown_event.is_set():
                 try:
                     raw_response = await asyncio.wait_for(self.ws.recv(), 0.5)
                     response = json.loads(raw_response)
+                    # Print for debugging
+                    print(f"[AudioLoop] Received response: {json.dumps(response)[:500]}...")
+                    # Process audio data if present
                     try:
                         # Check for inline PCM data
+                        if ("serverContent" in response and
+                            "modelTurn" in response["serverContent"] and
+                            "parts" in response["serverContent"]["modelTurn"]):
                             parts = response["serverContent"]["modelTurn"]["parts"]
                             for part in parts:
                                 if "inlineData" in part and "data" in part["inlineData"]:
                                     b64data = part["inlineData"]["data"]
                                     pcm_data = base64.b64decode(b64data)
                                     await self.audio_in_queue.put(pcm_data)
+                    except Exception as e:
+                        print(f"[AudioLoop] Error extracting audio: {e}")
+                    # Handle tool calls if present
                     tool_call = response.pop('toolCall', None)
+                    if tool_call:
+                        print(f"[AudioLoop] Tool call received: {tool_call}")
+                        # Send simple OK response for now
+                        for fc in tool_call.get('functionCalls', []):
+                            resp_msg = {
+                                'tool_response': {
+                                    'function_responses': [{
+                                        'id': fc.get('id', ''),
+                                        'name': fc.get('name', ''),
+                                        'response': {'result': {'string_value': 'ok'}}
+                                    }]
+                                }
+                            }
+                            await self.ws.send(json.dumps(resp_msg))
                 except asyncio.TimeoutError:
                     # No message received, continue checking
                     continue
             traceback.print_exc()
         finally:
             print("[AudioLoop] receive_audio task ended")
     async def run(self):
         """Main entry point: connects to Gemini, starts send/receive tasks."""
         try:
             # Initialize the connection with Gemini
+            await self.startup()
             # Start processing tasks
             try: