Spaces:
Sleeping
Sleeping
Update handler.py
Browse files- handler.py +32 -117
handler.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
#
|
2 |
import asyncio
|
3 |
import base64
|
4 |
import json
|
@@ -21,16 +21,11 @@ class AudioLoop:
|
|
21 |
self.audio_in_queue = asyncio.Queue()
|
22 |
# Flag to signal shutdown
|
23 |
self.shutdown_event = asyncio.Event()
|
24 |
-
|
25 |
-
|
26 |
-
# Track if we're in audio mode
|
27 |
-
self.audio_mode = True
|
28 |
-
|
29 |
-
async def startup(self, tools=None, api_key=None):
|
30 |
"""Send the model setup message to Gemini.
|
31 |
|
32 |
Args:
|
33 |
-
tools: Optional list of tools to enable for the model
|
34 |
api_key: API key to use (overrides environment variable)
|
35 |
"""
|
36 |
# Use provided API key or fallback to environment variable
|
@@ -41,31 +36,12 @@ class AudioLoop:
|
|
41 |
uri = uri_template.format(api_key=key)
|
42 |
self.ws = await connect(uri, additional_headers={"Content-Type": "application/json"})
|
43 |
|
44 |
-
#
|
45 |
-
speech_config = {
|
46 |
-
"speech_config": {
|
47 |
-
"voice_config": {
|
48 |
-
"prebuilt_voice_config": {
|
49 |
-
"voice_name": self.voice_name
|
50 |
-
}
|
51 |
-
}
|
52 |
-
}
|
53 |
-
}
|
54 |
-
|
55 |
-
# Add speech and audio configuration to setup
|
56 |
setup_msg = {
|
57 |
"setup": {
|
58 |
-
"model": f"models/{model}"
|
59 |
-
"live_connect_config": {
|
60 |
-
"response_modalities": ["AUDIO"],
|
61 |
-
**speech_config
|
62 |
-
}
|
63 |
}
|
64 |
}
|
65 |
-
|
66 |
-
# Add tools if provided
|
67 |
-
if tools:
|
68 |
-
setup_msg["setup"]["tools"] = tools
|
69 |
|
70 |
await self.ws.send(json.dumps(setup_msg))
|
71 |
|
@@ -73,22 +49,6 @@ class AudioLoop:
|
|
73 |
setup_response = json.loads(raw_response)
|
74 |
print("[AudioLoop] Setup response from Gemini:", setup_response)
|
75 |
|
76 |
-
def set_voice(self, voice_name):
|
77 |
-
"""Set the voice to use for audio responses.
|
78 |
-
|
79 |
-
Args:
|
80 |
-
voice_name: Name of the voice to use (e.g. "Puck", "Charon", etc.)
|
81 |
-
"""
|
82 |
-
self.voice_name = voice_name
|
83 |
-
|
84 |
-
def set_audio_mode(self, enabled=True):
|
85 |
-
"""Enable or disable audio mode.
|
86 |
-
|
87 |
-
Args:
|
88 |
-
enabled: True to enable audio mode, False to use text only
|
89 |
-
"""
|
90 |
-
self.audio_mode = enabled
|
91 |
-
|
92 |
async def send_realtime(self):
|
93 |
"""Read from out_queue and forward those messages to Gemini in real time."""
|
94 |
try:
|
@@ -96,17 +56,6 @@ class AudioLoop:
|
|
96 |
# Get next message from queue with timeout
|
97 |
try:
|
98 |
msg = await asyncio.wait_for(self.out_queue.get(), 0.5)
|
99 |
-
|
100 |
-
# If we're in audio-only mode and this is a text message,
|
101 |
-
# add flag to request audio output
|
102 |
-
if self.audio_mode and "client_content" in msg:
|
103 |
-
# Ensure there's a configuration section for this message
|
104 |
-
if "config" not in msg:
|
105 |
-
msg["config"] = {}
|
106 |
-
|
107 |
-
# Set response modality to audio
|
108 |
-
msg["config"]["response_modalities"] = ["AUDIO"]
|
109 |
-
|
110 |
await self.ws.send(json.dumps(msg))
|
111 |
except asyncio.TimeoutError:
|
112 |
# No message in queue, continue checking
|
@@ -120,41 +69,49 @@ class AudioLoop:
|
|
120 |
print("[AudioLoop] send_realtime task ended")
|
121 |
|
122 |
async def receive_audio(self):
|
123 |
-
"""Read from Gemini websocket and
|
124 |
try:
|
125 |
while not self.shutdown_event.is_set():
|
126 |
try:
|
127 |
raw_response = await asyncio.wait_for(self.ws.recv(), 0.5)
|
128 |
response = json.loads(raw_response)
|
129 |
|
130 |
-
#
|
131 |
-
|
132 |
|
133 |
-
# Process audio data
|
134 |
try:
|
135 |
# Check for inline PCM data
|
136 |
-
if "serverContent" in response and
|
|
|
|
|
|
|
137 |
parts = response["serverContent"]["modelTurn"]["parts"]
|
138 |
for part in parts:
|
139 |
if "inlineData" in part and "data" in part["inlineData"]:
|
140 |
b64data = part["inlineData"]["data"]
|
141 |
pcm_data = base64.b64decode(b64data)
|
142 |
await self.audio_in_queue.put(pcm_data)
|
143 |
-
|
144 |
-
|
145 |
-
if "serverContent" in response and "modelTurn" in response["serverContent"] and "parts" in response["serverContent"]["modelTurn"]:
|
146 |
-
parts = response["serverContent"]["modelTurn"]["parts"]
|
147 |
-
for part in parts:
|
148 |
-
if "text" in part:
|
149 |
-
print(f"[AudioLoop] Text response: {part['text']}")
|
150 |
-
# You could add text handling here if needed
|
151 |
-
except KeyError as e:
|
152 |
-
print(f"[AudioLoop] KeyError while parsing response: {e}")
|
153 |
|
154 |
-
# Handle tool calls
|
155 |
tool_call = response.pop('toolCall', None)
|
156 |
-
if tool_call
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
except asyncio.TimeoutError:
|
159 |
# No message received, continue checking
|
160 |
continue
|
@@ -168,54 +125,12 @@ class AudioLoop:
|
|
168 |
traceback.print_exc()
|
169 |
finally:
|
170 |
print("[AudioLoop] receive_audio task ended")
|
171 |
-
|
172 |
-
async def handle_tool_call(self, tool_call):
|
173 |
-
print("[AudioLoop] Tool call received:", tool_call)
|
174 |
-
for fc in tool_call['functionCalls']:
|
175 |
-
msg = {
|
176 |
-
'tool_response': {
|
177 |
-
'function_responses': [{
|
178 |
-
'id': fc['id'],
|
179 |
-
'name': fc['name'],
|
180 |
-
'response': {'result': {'string_value': 'ok'}}
|
181 |
-
}]
|
182 |
-
}
|
183 |
-
}
|
184 |
-
await self.ws.send(json.dumps(msg))
|
185 |
|
186 |
async def run(self):
|
187 |
"""Main entry point: connects to Gemini, starts send/receive tasks."""
|
188 |
try:
|
189 |
-
# Define tools that Gemini can use
|
190 |
-
turn_on_the_lights_schema = {'name': 'turn_on_the_lights'}
|
191 |
-
turn_off_the_lights_schema = {'name': 'turn_off_the_lights'}
|
192 |
-
github_repo_info_schema = {
|
193 |
-
'name': 'get_github_repo_info',
|
194 |
-
'description': 'Get information about a GitHub repository',
|
195 |
-
'parameters': {
|
196 |
-
'type': 'object',
|
197 |
-
'properties': {
|
198 |
-
'repo_url': {
|
199 |
-
'type': 'string',
|
200 |
-
'description': 'Full URL of the GitHub repository'
|
201 |
-
}
|
202 |
-
},
|
203 |
-
'required': ['repo_url']
|
204 |
-
}
|
205 |
-
}
|
206 |
-
|
207 |
-
tools = [
|
208 |
-
{'google_search': {}},
|
209 |
-
{'function_declarations': [
|
210 |
-
turn_on_the_lights_schema,
|
211 |
-
turn_off_the_lights_schema,
|
212 |
-
github_repo_info_schema
|
213 |
-
]},
|
214 |
-
{'code_execution': {}},
|
215 |
-
]
|
216 |
-
|
217 |
# Initialize the connection with Gemini
|
218 |
-
await self.startup(
|
219 |
|
220 |
# Start processing tasks
|
221 |
try:
|
|
|
1 |
+
# basic_handler.py
|
2 |
import asyncio
|
3 |
import base64
|
4 |
import json
|
|
|
21 |
self.audio_in_queue = asyncio.Queue()
|
22 |
# Flag to signal shutdown
|
23 |
self.shutdown_event = asyncio.Event()
|
24 |
+
|
25 |
+
async def startup(self, api_key=None):
|
|
|
|
|
|
|
|
|
26 |
"""Send the model setup message to Gemini.
|
27 |
|
28 |
Args:
|
|
|
29 |
api_key: API key to use (overrides environment variable)
|
30 |
"""
|
31 |
# Use provided API key or fallback to environment variable
|
|
|
36 |
uri = uri_template.format(api_key=key)
|
37 |
self.ws = await connect(uri, additional_headers={"Content-Type": "application/json"})
|
38 |
|
39 |
+
# Absolutely minimal setup message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
setup_msg = {
|
41 |
"setup": {
|
42 |
+
"model": f"models/{model}"
|
|
|
|
|
|
|
|
|
43 |
}
|
44 |
}
|
|
|
|
|
|
|
|
|
45 |
|
46 |
await self.ws.send(json.dumps(setup_msg))
|
47 |
|
|
|
49 |
setup_response = json.loads(raw_response)
|
50 |
print("[AudioLoop] Setup response from Gemini:", setup_response)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
async def send_realtime(self):
|
53 |
"""Read from out_queue and forward those messages to Gemini in real time."""
|
54 |
try:
|
|
|
56 |
# Get next message from queue with timeout
|
57 |
try:
|
58 |
msg = await asyncio.wait_for(self.out_queue.get(), 0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
await self.ws.send(json.dumps(msg))
|
60 |
except asyncio.TimeoutError:
|
61 |
# No message in queue, continue checking
|
|
|
69 |
print("[AudioLoop] send_realtime task ended")
|
70 |
|
71 |
async def receive_audio(self):
|
72 |
+
"""Read from Gemini websocket and process responses."""
|
73 |
try:
|
74 |
while not self.shutdown_event.is_set():
|
75 |
try:
|
76 |
raw_response = await asyncio.wait_for(self.ws.recv(), 0.5)
|
77 |
response = json.loads(raw_response)
|
78 |
|
79 |
+
# Print for debugging
|
80 |
+
print(f"[AudioLoop] Received response: {json.dumps(response)[:500]}...")
|
81 |
|
82 |
+
# Process audio data if present
|
83 |
try:
|
84 |
# Check for inline PCM data
|
85 |
+
if ("serverContent" in response and
|
86 |
+
"modelTurn" in response["serverContent"] and
|
87 |
+
"parts" in response["serverContent"]["modelTurn"]):
|
88 |
+
|
89 |
parts = response["serverContent"]["modelTurn"]["parts"]
|
90 |
for part in parts:
|
91 |
if "inlineData" in part and "data" in part["inlineData"]:
|
92 |
b64data = part["inlineData"]["data"]
|
93 |
pcm_data = base64.b64decode(b64data)
|
94 |
await self.audio_in_queue.put(pcm_data)
|
95 |
+
except Exception as e:
|
96 |
+
print(f"[AudioLoop] Error extracting audio: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
+
# Handle tool calls if present
|
99 |
tool_call = response.pop('toolCall', None)
|
100 |
+
if tool_call:
|
101 |
+
print(f"[AudioLoop] Tool call received: {tool_call}")
|
102 |
+
# Send simple OK response for now
|
103 |
+
for fc in tool_call.get('functionCalls', []):
|
104 |
+
resp_msg = {
|
105 |
+
'tool_response': {
|
106 |
+
'function_responses': [{
|
107 |
+
'id': fc.get('id', ''),
|
108 |
+
'name': fc.get('name', ''),
|
109 |
+
'response': {'result': {'string_value': 'ok'}}
|
110 |
+
}]
|
111 |
+
}
|
112 |
+
}
|
113 |
+
await self.ws.send(json.dumps(resp_msg))
|
114 |
+
|
115 |
except asyncio.TimeoutError:
|
116 |
# No message received, continue checking
|
117 |
continue
|
|
|
125 |
traceback.print_exc()
|
126 |
finally:
|
127 |
print("[AudioLoop] receive_audio task ended")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
async def run(self):
|
130 |
"""Main entry point: connects to Gemini, starts send/receive tasks."""
|
131 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
# Initialize the connection with Gemini
|
133 |
+
await self.startup()
|
134 |
|
135 |
# Start processing tasks
|
136 |
try:
|