Spaces:

GIZ
/

audit_assistant

Running on T4

App Files Files Community

ppsingh commited on May 15

Commit

d8434d8

verified ·

1 Parent(s): 08db2a3

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -48

app.py CHANGED Viewed

@@ -308,61 +308,28 @@ async def chat(query,history, method, sources,reports,subtype, client_ip=None, s
         chat_model = inf_provider()
         start_time = time.time()
         async def process_stream():
-            nonlocal answer_yet
-            try:
-                formatted_messages = [
-                    {
-                        "role": msg.type if hasattr(msg, 'type') else msg.role,
-                        "content": msg.content
-                    }
-                    for msg in messages
-                ]
-                response = chat_model.chat_completion(
-                    messages=formatted_messages,
-                    max_tokens=int(model_config.get('reader', 'MAX_TOKENS'))
-                )
-                response_text = response.choices[0].message.content
-                words = response_text.split()
-                for word in words:
-                    answer_yet += word + " "
                     parsed_answer = parse_output_llm_with_sources(answer_yet)
                     history[-1] = (query, parsed_answer)
-                    # Update logs_data with current answer (and get a new timestamp)
                     logs_data["answer"] = parsed_answer
                     yield [tuple(x) for x in history], docs_html, logs_data, session_id
-                    await asyncio.sleep(0.05)
-            except Exception as e:
-                raise
         async for update in process_stream():
             yield update
-#        async def process_stream():
-#            nonlocal answer_yet # Use the outer scope's answer_yet variable
-#            # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
-#            # instead of modifying the one from the outer scope.
-#            # Iterate over the streaming response chunks
-#            response = chat_model.chat.completions.create(
-#                                        model=model_config.get("reader","INF_PROVIDER_MODEL"),
-#                                        messages = messages,
-#                                        stream= True,
-#                                        max_tokens=int(model_config.get('reader','MAX_TOKENS')),
-#                                    )
-#            for message in response:
-#                token = message.choices[0].delta.content
-#                if token:
-#                    answer_yet += token
-#                    parsed_answer = parse_output_llm_with_sources(answer_yet)
-#                    history[-1] = (query, parsed_answer)
-#                    logs_data["answer"] = parsed_answer
-#                    yield [tuple(x) for x in history], docs_html, logs_data, session_id
-#
-#        # Stream the response updates
-#        async for update in process_stream():
-#            yield update
     elif model_config.get('reader','TYPE') == 'DEDICATED':

         chat_model = inf_provider()
         start_time = time.time()
         async def process_stream():
+            nonlocal answer_yet # Use the outer scope's answer_yet variable
+            # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
+            # instead of modifying the one from the outer scope.
+            # Iterate over the streaming response chunks
+            response = chat_model.chat.completions.create(
+                                        model=model_config.get("reader","INF_PROVIDER_MODEL"),
+                                        messages = messages,
+                                        stream= True,
+                                        max_tokens=int(model_config.get('reader','MAX_TOKENS')),
+                                    )
+            async for message in response:
+                token = message.choices[0].delta.content
+                if token:
+                    answer_yet += token
                     parsed_answer = parse_output_llm_with_sources(answer_yet)
                     history[-1] = (query, parsed_answer)
                     logs_data["answer"] = parsed_answer
                     yield [tuple(x) for x in history], docs_html, logs_data, session_id
+        # Stream the response updates
         async for update in process_stream():
             yield update
     elif model_config.get('reader','TYPE') == 'DEDICATED':