ppsingh commited on
Commit
08db2a3
·
verified ·
1 Parent(s): cf374ac

trying streaming in new endpoint

Browse files
Files changed (1) hide show
  1. app.py +48 -15
app.py CHANGED
@@ -308,28 +308,61 @@ async def chat(query,history, method, sources,reports,subtype, client_ip=None, s
308
  chat_model = inf_provider()
309
  start_time = time.time()
310
  async def process_stream():
311
- nonlocal answer_yet # Use the outer scope's answer_yet variable
312
- # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
313
- # instead of modifying the one from the outer scope.
314
- # Iterate over the streaming response chunks
315
- response = chat_model.chat.completions.create(
316
- model=model_config.get("reader","INF_PROVIDER_MODEL"),
317
- messages = messages,
318
- stream= True,
319
- max_tokens=int(model_config.get('reader','MAX_TOKENS')),
320
- )
321
- for message in response:
322
- token = message.choices[0].delta.content
323
- if token:
324
- answer_yet += token
 
 
 
 
 
325
  parsed_answer = parse_output_llm_with_sources(answer_yet)
326
  history[-1] = (query, parsed_answer)
 
327
  logs_data["answer"] = parsed_answer
328
  yield [tuple(x) for x in history], docs_html, logs_data, session_id
 
 
 
 
329
 
330
- # Stream the response updates
331
  async for update in process_stream():
332
  yield update
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
 
335
  elif model_config.get('reader','TYPE') == 'DEDICATED':
 
308
  chat_model = inf_provider()
309
  start_time = time.time()
310
  async def process_stream():
311
+ nonlocal answer_yet
312
+ try:
313
+ formatted_messages = [
314
+ {
315
+ "role": msg.type if hasattr(msg, 'type') else msg.role,
316
+ "content": msg.content
317
+ }
318
+ for msg in messages
319
+ ]
320
+
321
+ response = chat_model.chat_completion(
322
+ messages=formatted_messages,
323
+ max_tokens=int(model_config.get('reader', 'MAX_TOKENS'))
324
+ )
325
+
326
+ response_text = response.choices[0].message.content
327
+ words = response_text.split()
328
+ for word in words:
329
+ answer_yet += word + " "
330
  parsed_answer = parse_output_llm_with_sources(answer_yet)
331
  history[-1] = (query, parsed_answer)
332
+ # Update logs_data with current answer (and get a new timestamp)
333
  logs_data["answer"] = parsed_answer
334
  yield [tuple(x) for x in history], docs_html, logs_data, session_id
335
+ await asyncio.sleep(0.05)
336
+
337
+ except Exception as e:
338
+ raise
339
 
 
340
  async for update in process_stream():
341
  yield update
342
+
343
+ # async def process_stream():
344
+ # nonlocal answer_yet # Use the outer scope's answer_yet variable
345
+ # # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
346
+ # # instead of modifying the one from the outer scope.
347
+ # # Iterate over the streaming response chunks
348
+ # response = chat_model.chat.completions.create(
349
+ # model=model_config.get("reader","INF_PROVIDER_MODEL"),
350
+ # messages = messages,
351
+ # stream= True,
352
+ # max_tokens=int(model_config.get('reader','MAX_TOKENS')),
353
+ # )
354
+ # for message in response:
355
+ # token = message.choices[0].delta.content
356
+ # if token:
357
+ # answer_yet += token
358
+ # parsed_answer = parse_output_llm_with_sources(answer_yet)
359
+ # history[-1] = (query, parsed_answer)
360
+ # logs_data["answer"] = parsed_answer
361
+ # yield [tuple(x) for x in history], docs_html, logs_data, session_id
362
+ #
363
+ # # Stream the response updates
364
+ # async for update in process_stream():
365
+ # yield update
366
 
367
 
368
  elif model_config.get('reader','TYPE') == 'DEDICATED':