nihalaninihal commited on
Commit
76fb0a8
·
verified ·
1 Parent(s): 0f005f1

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +32 -117
handler.py CHANGED
@@ -1,4 +1,4 @@
1
- # updated_handler.py
2
  import asyncio
3
  import base64
4
  import json
@@ -21,16 +21,11 @@ class AudioLoop:
21
  self.audio_in_queue = asyncio.Queue()
22
  # Flag to signal shutdown
23
  self.shutdown_event = asyncio.Event()
24
- # Voice configuration
25
- self.voice_name = "Puck" # Default voice name
26
- # Track if we're in audio mode
27
- self.audio_mode = True
28
-
29
- async def startup(self, tools=None, api_key=None):
30
  """Send the model setup message to Gemini.
31
 
32
  Args:
33
- tools: Optional list of tools to enable for the model
34
  api_key: API key to use (overrides environment variable)
35
  """
36
  # Use provided API key or fallback to environment variable
@@ -41,31 +36,12 @@ class AudioLoop:
41
  uri = uri_template.format(api_key=key)
42
  self.ws = await connect(uri, additional_headers={"Content-Type": "application/json"})
43
 
44
- # Configure speech output with specified voice
45
- speech_config = {
46
- "speech_config": {
47
- "voice_config": {
48
- "prebuilt_voice_config": {
49
- "voice_name": self.voice_name
50
- }
51
- }
52
- }
53
- }
54
-
55
- # Add speech and audio configuration to setup
56
  setup_msg = {
57
  "setup": {
58
- "model": f"models/{model}",
59
- "live_connect_config": {
60
- "response_modalities": ["AUDIO"],
61
- **speech_config
62
- }
63
  }
64
  }
65
-
66
- # Add tools if provided
67
- if tools:
68
- setup_msg["setup"]["tools"] = tools
69
 
70
  await self.ws.send(json.dumps(setup_msg))
71
 
@@ -73,22 +49,6 @@ class AudioLoop:
73
  setup_response = json.loads(raw_response)
74
  print("[AudioLoop] Setup response from Gemini:", setup_response)
75
 
76
- def set_voice(self, voice_name):
77
- """Set the voice to use for audio responses.
78
-
79
- Args:
80
- voice_name: Name of the voice to use (e.g. "Puck", "Charon", etc.)
81
- """
82
- self.voice_name = voice_name
83
-
84
- def set_audio_mode(self, enabled=True):
85
- """Enable or disable audio mode.
86
-
87
- Args:
88
- enabled: True to enable audio mode, False to use text only
89
- """
90
- self.audio_mode = enabled
91
-
92
  async def send_realtime(self):
93
  """Read from out_queue and forward those messages to Gemini in real time."""
94
  try:
@@ -96,17 +56,6 @@ class AudioLoop:
96
  # Get next message from queue with timeout
97
  try:
98
  msg = await asyncio.wait_for(self.out_queue.get(), 0.5)
99
-
100
- # If we're in audio-only mode and this is a text message,
101
- # add flag to request audio output
102
- if self.audio_mode and "client_content" in msg:
103
- # Ensure there's a configuration section for this message
104
- if "config" not in msg:
105
- msg["config"] = {}
106
-
107
- # Set response modality to audio
108
- msg["config"]["response_modalities"] = ["AUDIO"]
109
-
110
  await self.ws.send(json.dumps(msg))
111
  except asyncio.TimeoutError:
112
  # No message in queue, continue checking
@@ -120,41 +69,49 @@ class AudioLoop:
120
  print("[AudioLoop] send_realtime task ended")
121
 
122
  async def receive_audio(self):
123
- """Read from Gemini websocket and push PCM data into audio_in_queue."""
124
  try:
125
  while not self.shutdown_event.is_set():
126
  try:
127
  raw_response = await asyncio.wait_for(self.ws.recv(), 0.5)
128
  response = json.loads(raw_response)
129
 
130
- # Debug log all responses (optional)
131
- # print("Gemini raw response:", response)
132
 
133
- # Process audio data
134
  try:
135
  # Check for inline PCM data
136
- if "serverContent" in response and "modelTurn" in response["serverContent"] and "parts" in response["serverContent"]["modelTurn"]:
 
 
 
137
  parts = response["serverContent"]["modelTurn"]["parts"]
138
  for part in parts:
139
  if "inlineData" in part and "data" in part["inlineData"]:
140
  b64data = part["inlineData"]["data"]
141
  pcm_data = base64.b64decode(b64data)
142
  await self.audio_in_queue.put(pcm_data)
143
-
144
- # Check for text data
145
- if "serverContent" in response and "modelTurn" in response["serverContent"] and "parts" in response["serverContent"]["modelTurn"]:
146
- parts = response["serverContent"]["modelTurn"]["parts"]
147
- for part in parts:
148
- if "text" in part:
149
- print(f"[AudioLoop] Text response: {part['text']}")
150
- # You could add text handling here if needed
151
- except KeyError as e:
152
- print(f"[AudioLoop] KeyError while parsing response: {e}")
153
 
154
- # Handle tool calls
155
  tool_call = response.pop('toolCall', None)
156
- if tool_call is not None:
157
- await self.handle_tool_call(tool_call)
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  except asyncio.TimeoutError:
159
  # No message received, continue checking
160
  continue
@@ -168,54 +125,12 @@ class AudioLoop:
168
  traceback.print_exc()
169
  finally:
170
  print("[AudioLoop] receive_audio task ended")
171
-
172
- async def handle_tool_call(self, tool_call):
173
- print("[AudioLoop] Tool call received:", tool_call)
174
- for fc in tool_call['functionCalls']:
175
- msg = {
176
- 'tool_response': {
177
- 'function_responses': [{
178
- 'id': fc['id'],
179
- 'name': fc['name'],
180
- 'response': {'result': {'string_value': 'ok'}}
181
- }]
182
- }
183
- }
184
- await self.ws.send(json.dumps(msg))
185
 
186
  async def run(self):
187
  """Main entry point: connects to Gemini, starts send/receive tasks."""
188
  try:
189
- # Define tools that Gemini can use
190
- turn_on_the_lights_schema = {'name': 'turn_on_the_lights'}
191
- turn_off_the_lights_schema = {'name': 'turn_off_the_lights'}
192
- github_repo_info_schema = {
193
- 'name': 'get_github_repo_info',
194
- 'description': 'Get information about a GitHub repository',
195
- 'parameters': {
196
- 'type': 'object',
197
- 'properties': {
198
- 'repo_url': {
199
- 'type': 'string',
200
- 'description': 'Full URL of the GitHub repository'
201
- }
202
- },
203
- 'required': ['repo_url']
204
- }
205
- }
206
-
207
- tools = [
208
- {'google_search': {}},
209
- {'function_declarations': [
210
- turn_on_the_lights_schema,
211
- turn_off_the_lights_schema,
212
- github_repo_info_schema
213
- ]},
214
- {'code_execution': {}},
215
- ]
216
-
217
  # Initialize the connection with Gemini
218
- await self.startup(tools)
219
 
220
  # Start processing tasks
221
  try:
 
1
+ # basic_handler.py
2
  import asyncio
3
  import base64
4
  import json
 
21
  self.audio_in_queue = asyncio.Queue()
22
  # Flag to signal shutdown
23
  self.shutdown_event = asyncio.Event()
24
+
25
+ async def startup(self, api_key=None):
 
 
 
 
26
  """Send the model setup message to Gemini.
27
 
28
  Args:
 
29
  api_key: API key to use (overrides environment variable)
30
  """
31
  # Use provided API key or fallback to environment variable
 
36
  uri = uri_template.format(api_key=key)
37
  self.ws = await connect(uri, additional_headers={"Content-Type": "application/json"})
38
 
39
+ # Absolutely minimal setup message
 
 
 
 
 
 
 
 
 
 
 
40
  setup_msg = {
41
  "setup": {
42
+ "model": f"models/{model}"
 
 
 
 
43
  }
44
  }
 
 
 
 
45
 
46
  await self.ws.send(json.dumps(setup_msg))
47
 
 
49
  setup_response = json.loads(raw_response)
50
  print("[AudioLoop] Setup response from Gemini:", setup_response)
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  async def send_realtime(self):
53
  """Read from out_queue and forward those messages to Gemini in real time."""
54
  try:
 
56
  # Get next message from queue with timeout
57
  try:
58
  msg = await asyncio.wait_for(self.out_queue.get(), 0.5)
 
 
 
 
 
 
 
 
 
 
 
59
  await self.ws.send(json.dumps(msg))
60
  except asyncio.TimeoutError:
61
  # No message in queue, continue checking
 
69
  print("[AudioLoop] send_realtime task ended")
70
 
71
  async def receive_audio(self):
72
+ """Read from Gemini websocket and process responses."""
73
  try:
74
  while not self.shutdown_event.is_set():
75
  try:
76
  raw_response = await asyncio.wait_for(self.ws.recv(), 0.5)
77
  response = json.loads(raw_response)
78
 
79
+ # Print for debugging
80
+ print(f"[AudioLoop] Received response: {json.dumps(response)[:500]}...")
81
 
82
+ # Process audio data if present
83
  try:
84
  # Check for inline PCM data
85
+ if ("serverContent" in response and
86
+ "modelTurn" in response["serverContent"] and
87
+ "parts" in response["serverContent"]["modelTurn"]):
88
+
89
  parts = response["serverContent"]["modelTurn"]["parts"]
90
  for part in parts:
91
  if "inlineData" in part and "data" in part["inlineData"]:
92
  b64data = part["inlineData"]["data"]
93
  pcm_data = base64.b64decode(b64data)
94
  await self.audio_in_queue.put(pcm_data)
95
+ except Exception as e:
96
+ print(f"[AudioLoop] Error extracting audio: {e}")
 
 
 
 
 
 
 
 
97
 
98
+ # Handle tool calls if present
99
  tool_call = response.pop('toolCall', None)
100
+ if tool_call:
101
+ print(f"[AudioLoop] Tool call received: {tool_call}")
102
+ # Send simple OK response for now
103
+ for fc in tool_call.get('functionCalls', []):
104
+ resp_msg = {
105
+ 'tool_response': {
106
+ 'function_responses': [{
107
+ 'id': fc.get('id', ''),
108
+ 'name': fc.get('name', ''),
109
+ 'response': {'result': {'string_value': 'ok'}}
110
+ }]
111
+ }
112
+ }
113
+ await self.ws.send(json.dumps(resp_msg))
114
+
115
  except asyncio.TimeoutError:
116
  # No message received, continue checking
117
  continue
 
125
  traceback.print_exc()
126
  finally:
127
  print("[AudioLoop] receive_audio task ended")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  async def run(self):
130
  """Main entry point: connects to Gemini, starts send/receive tasks."""
131
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  # Initialize the connection with Gemini
133
+ await self.startup()
134
 
135
  # Start processing tasks
136
  try: