Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -308,61 +308,28 @@ async def chat(query,history, method, sources,reports,subtype, client_ip=None, s
|
|
308 |
chat_model = inf_provider()
|
309 |
start_time = time.time()
|
310 |
async def process_stream():
|
311 |
-
nonlocal answer_yet
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
response_text = response.choices[0].message.content
|
327 |
-
words = response_text.split()
|
328 |
-
for word in words:
|
329 |
-
answer_yet += word + " "
|
330 |
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
331 |
history[-1] = (query, parsed_answer)
|
332 |
-
# Update logs_data with current answer (and get a new timestamp)
|
333 |
logs_data["answer"] = parsed_answer
|
334 |
yield [tuple(x) for x in history], docs_html, logs_data, session_id
|
335 |
-
await asyncio.sleep(0.05)
|
336 |
-
|
337 |
-
except Exception as e:
|
338 |
-
raise
|
339 |
|
|
|
340 |
async for update in process_stream():
|
341 |
yield update
|
342 |
-
|
343 |
-
# async def process_stream():
|
344 |
-
# nonlocal answer_yet # Use the outer scope's answer_yet variable
|
345 |
-
# # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
346 |
-
# # instead of modifying the one from the outer scope.
|
347 |
-
# # Iterate over the streaming response chunks
|
348 |
-
# response = chat_model.chat.completions.create(
|
349 |
-
# model=model_config.get("reader","INF_PROVIDER_MODEL"),
|
350 |
-
# messages = messages,
|
351 |
-
# stream= True,
|
352 |
-
# max_tokens=int(model_config.get('reader','MAX_TOKENS')),
|
353 |
-
# )
|
354 |
-
# for message in response:
|
355 |
-
# token = message.choices[0].delta.content
|
356 |
-
# if token:
|
357 |
-
# answer_yet += token
|
358 |
-
# parsed_answer = parse_output_llm_with_sources(answer_yet)
|
359 |
-
# history[-1] = (query, parsed_answer)
|
360 |
-
# logs_data["answer"] = parsed_answer
|
361 |
-
# yield [tuple(x) for x in history], docs_html, logs_data, session_id
|
362 |
-
#
|
363 |
-
# # Stream the response updates
|
364 |
-
# async for update in process_stream():
|
365 |
-
# yield update
|
366 |
|
367 |
|
368 |
elif model_config.get('reader','TYPE') == 'DEDICATED':
|
|
|
308 |
chat_model = inf_provider()
|
309 |
start_time = time.time()
|
310 |
async def process_stream():
|
311 |
+
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
312 |
+
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
313 |
+
# instead of modifying the one from the outer scope.
|
314 |
+
# Iterate over the streaming response chunks
|
315 |
+
response = chat_model.chat.completions.create(
|
316 |
+
model=model_config.get("reader","INF_PROVIDER_MODEL"),
|
317 |
+
messages = messages,
|
318 |
+
stream= True,
|
319 |
+
max_tokens=int(model_config.get('reader','MAX_TOKENS')),
|
320 |
+
)
|
321 |
+
async for message in response:
|
322 |
+
token = message.choices[0].delta.content
|
323 |
+
if token:
|
324 |
+
answer_yet += token
|
|
|
|
|
|
|
|
|
|
|
325 |
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
326 |
history[-1] = (query, parsed_answer)
|
|
|
327 |
logs_data["answer"] = parsed_answer
|
328 |
yield [tuple(x) for x in history], docs_html, logs_data, session_id
|
|
|
|
|
|
|
|
|
329 |
|
330 |
+
# Stream the response updates
|
331 |
async for update in process_stream():
|
332 |
yield update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
|
335 |
elif model_config.get('reader','TYPE') == 'DEDICATED':
|