Spaces:
Running
Running
ai: Implementing parallel request with first-success strategy.
Browse files
jarvis.py
CHANGED
@@ -55,8 +55,6 @@ META_TAGS = os.getenv("META_TAGS")
|
|
55 |
|
56 |
ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
|
57 |
|
58 |
-
ACTIVE_CANDIDATE = None
|
59 |
-
|
60 |
class SessionWithID(requests.Session):
|
61 |
def __init__(sess):
|
62 |
super().__init__()
|
@@ -222,13 +220,19 @@ async def fetch_response_async(host, key, model, msgs, cfg, sid):
|
|
222 |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
223 |
return None
|
224 |
|
|
|
|
|
|
|
|
|
225 |
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
|
226 |
ensure_stop_event(sess)
|
227 |
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS):
|
228 |
return RESPONSES["RESPONSE_3"]
|
229 |
-
if not hasattr(sess, "session_id"):
|
230 |
sess.session_id = str(uuid.uuid4())
|
231 |
sess.stop_event = asyncio.Event()
|
|
|
|
|
232 |
model_key = get_model_key(model_display)
|
233 |
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
|
234 |
msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
|
@@ -238,21 +242,32 @@ async def chat_with_model_async(history, user_input, model_display, sess, custom
|
|
238 |
prompt = custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)
|
239 |
msgs.insert(0, {"role": "system", "content": prompt})
|
240 |
msgs.append({"role": "user", "content": user_input})
|
241 |
-
|
242 |
-
|
243 |
-
res = await fetch_response_async(ACTIVE_CANDIDATE[0], ACTIVE_CANDIDATE[1], model_key, msgs, cfg, sess.session_id)
|
244 |
if res:
|
245 |
return res
|
246 |
-
|
247 |
keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
248 |
hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS)
|
249 |
cands = [(h, k) for h in hosts for k in keys]
|
250 |
random.shuffle(cands)
|
251 |
-
for h, k in cands
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
return RESPONSES["RESPONSE_2"]
|
257 |
|
258 |
async def respond_async(multi, history, model_display, sess, custom_prompt):
|
@@ -287,7 +302,7 @@ async def respond_async(multi, history, model_display, sess, custom_prompt):
|
|
287 |
buffer.clear()
|
288 |
last_update = current_time
|
289 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
290 |
-
await asyncio.sleep(0.
|
291 |
if buffer:
|
292 |
history[-1][1] += "".join(buffer)
|
293 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
|
|
55 |
|
56 |
ALLOWED_EXTENSIONS = json.loads(os.getenv("ALLOWED_EXTENSIONS", "[]"))
|
57 |
|
|
|
|
|
58 |
class SessionWithID(requests.Session):
|
59 |
def __init__(sess):
|
60 |
super().__init__()
|
|
|
220 |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
|
221 |
return None
|
222 |
|
223 |
+
async def candidate_task(h, k, model, msgs, cfg, sid):
|
224 |
+
r = await fetch_response_async(h, k, model, msgs, cfg, sid)
|
225 |
+
return r, (h, k)
|
226 |
+
|
227 |
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt):
|
228 |
ensure_stop_event(sess)
|
229 |
if not get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED) or not get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS):
|
230 |
return RESPONSES["RESPONSE_3"]
|
231 |
+
if not hasattr(sess, "session_id") or not sess.session_id:
|
232 |
sess.session_id = str(uuid.uuid4())
|
233 |
sess.stop_event = asyncio.Event()
|
234 |
+
if not hasattr(sess, "active_candidate"):
|
235 |
+
sess.active_candidate = None
|
236 |
model_key = get_model_key(model_display)
|
237 |
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
|
238 |
msgs = [{"role": "user", "content": u} for u, _ in history] + [{"role": "assistant", "content": a} for _, a in history if a]
|
|
|
242 |
prompt = custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)
|
243 |
msgs.insert(0, {"role": "system", "content": prompt})
|
244 |
msgs.append({"role": "user", "content": user_input})
|
245 |
+
if sess.active_candidate:
|
246 |
+
res = await fetch_response_async(sess.active_candidate[0], sess.active_candidate[1], model_key, msgs, cfg, sess.session_id)
|
|
|
247 |
if res:
|
248 |
return res
|
249 |
+
sess.active_candidate = None
|
250 |
keys = get_available_items(LINUX_SERVER_PROVIDER_KEYS, LINUX_SERVER_PROVIDER_KEYS_MARKED)
|
251 |
hosts = get_available_items(LINUX_SERVER_HOSTS, LINUX_SERVER_HOSTS_ATTEMPTS)
|
252 |
cands = [(h, k) for h in hosts for k in keys]
|
253 |
random.shuffle(cands)
|
254 |
+
tasks = [asyncio.create_task(candidate_task(h, k, model_key, msgs, cfg, sess.session_id)) for h, k in cands]
|
255 |
+
first_success = None
|
256 |
+
candidate = None
|
257 |
+
for task in asyncio.as_completed(tasks):
|
258 |
+
try:
|
259 |
+
res, cand = await task
|
260 |
+
if res is not None:
|
261 |
+
first_success = res
|
262 |
+
candidate = cand
|
263 |
+
break
|
264 |
+
except:
|
265 |
+
continue
|
266 |
+
for t in tasks:
|
267 |
+
t.cancel()
|
268 |
+
if first_success:
|
269 |
+
sess.active_candidate = candidate
|
270 |
+
return first_success
|
271 |
return RESPONSES["RESPONSE_2"]
|
272 |
|
273 |
async def respond_async(multi, history, model_display, sess, custom_prompt):
|
|
|
302 |
buffer.clear()
|
303 |
last_update = current_time
|
304 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
305 |
+
await asyncio.sleep(0.020)
|
306 |
if buffer:
|
307 |
history[-1][1] += "".join(buffer)
|
308 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|