Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 2

Commit

bc17fe3

verified ·

1 Parent(s): 70d58c7

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -101

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ def encode_image(image_path):
 def respond(
     message,
-    image_files,  # Changed parameter name and structure
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -83,79 +83,79 @@ def respond(
     if seed == -1:
         seed = None
-    # Create multimodal content if images are present
     if image_files and len(image_files) > 0:
-        # Process the user message to include images
-        user_content = []
-        # Add text part if there is any
-        if message and message.strip():
-            user_content.append({
-                "type": "text",
-                "text": message
-            })
-        # Add image parts
-        for img in image_files:
-            if img is not None:
-                # Get raw image data from path
-                try:
-                    encoded_image = encode_image(img)
-                    if encoded_image:
-                        user_content.append({
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{encoded_image}"
-                            }
-                        })
-                except Exception as e:
-                    print(f"Error encoding image: {e}")
-    else:
-        # Text-only message
-        user_content = message
     # Prepare messages in the format expected by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
-        user_part = val[0]
-        assistant_part = val[1]
-        if user_part:
-            # Handle both text-only and multimodal messages in history
-            if isinstance(user_part, tuple) and len(user_part) == 2:
-                # This is a multimodal message with text and images
-                history_content = []
-                if user_part[0]:  # Text
-                    history_content.append({
                         "type": "text",
-                        "text": user_part[0]
                     })
-                for img in user_part[1]:  # Images
-                    if img:
-                        try:
-                            encoded_img = encode_image(img)
-                            if encoded_img:
-                                history_content.append({
-                                    "type": "image_url",
-                                    "image_url": {
-                                        "url": f"data:image/jpeg;base64,{encoded_img}"
-                                    }
-                                })
-                        except Exception as e:
-                            print(f"Error encoding history image: {e}")
-                messages.append({"role": "user", "content": history_content})
             else:
                 # Regular text message
-                messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context (type: {type(user_part)})")
-        if assistant_part:
-            messages.append({"role": "assistant", "content": assistant_part})
-            print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     messages.append({"role": "user", "content": user_content})
@@ -409,36 +409,26 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
             print("Empty message, skipping")
             return history
-        # Prepare multimodal message format
         text_content = user_message.get("text", "").strip()
-        files = user_message.get("files", [])
         print(f"Text content: {text_content}")
-        print(f"Files: {files}")
-        # If both text and files are empty, skip
-        if not text_content and not files:
-            print("No content to display")
-            return history
-        # Process multimodal content
-        if files:
-            # For multimodal messages with files
-            for file_path in files:
-                print(f"Processing file: {file_path}")
-                if not file_path:
-                    continue
-                # Add a combined message with text and file
-                history.append([(text_content, file_path), None])
-                # Reset text content for subsequent files if there are multiple
-                text_content = ""
-            return history
         else:
-            # For text-only messages
-            history.append([text_content, None])
-            return history
     # Define bot response function
     def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
@@ -451,29 +441,19 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         user_message = history[-1][0]
         print(f"Processing user message: {user_message}")
-        # Determine if the message contains images
-        text_content = ""
-        image_files = []
-        if isinstance(user_message, tuple):
-            # Tuple format: (text, image_path)
-            text_content = user_message[0] if user_message[0] else ""
-            # Handle both single image path and list of paths
-            if isinstance(user_message[1], list):
-                image_files = user_message[1]
-            else:
-                image_files = [user_message[1]]
-            print(f"Multimodal message detected - Text: {text_content}, Images: {image_files}")
         else:
-            # Text-only message
-            text_content = user_message
-            print(f"Text-only message detected: {text_content}")
         # Process message through respond function
         history[-1][1] = ""
         for response in respond(
             text_content,
-            image_files if image_files else None,
             history[:-1],
             system_msg,
             max_tokens,

 def respond(
     message,
+    image_files,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     if seed == -1:
         seed = None
+    # Prepare messages for the API
+    user_content = []
+    # Add text if there is any
+    if message and message.strip():
+        user_content.append({
+            "type": "text",
+            "text": message
+        })
+    # Add images if any
     if image_files and len(image_files) > 0:
+        for file_path in image_files:
+            if not file_path:
+                continue
+            try:
+                print(f"Processing image file: {file_path}")
+                # For direct file paths, no need to encode as base64
+                user_content.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"file://{file_path}"
+                    }
+                })
+            except Exception as e:
+                print(f"Error processing image file: {e}")
+    # If empty content, set to text only
+    if not user_content:
+        user_content = ""
     # Prepare messages in the format expected by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
+        user_msg = val[0]
+        assistant_msg = val[1]
+        # Process user message
+        if user_msg:
+            if isinstance(user_msg, dict) and "text" in user_msg:
+                # This is a MultimodalTextbox message
+                hist_text = user_msg.get("text", "")
+                hist_files = user_msg.get("files", [])
+                hist_content = []
+                if hist_text:
+                    hist_content.append({
                         "type": "text",
+                        "text": hist_text
                     })
+                for hist_file in hist_files:
+                    if hist_file:
+                        hist_content.append({
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"file://{hist_file}"
+                            }
+                        })
+                if hist_content:
+                    messages.append({"role": "user", "content": hist_content})
             else:
                 # Regular text message
+                messages.append({"role": "user", "content": user_msg})
+        # Process assistant message
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
     # Append the latest user message
     messages.append({"role": "user", "content": user_content})
             print("Empty message, skipping")
             return history
+        # Extract data from the MultimodalTextbox
         text_content = user_message.get("text", "").strip()
+        file_paths = user_message.get("files", [])
         print(f"Text content: {text_content}")
+        print(f"Files: {file_paths}")
+        # Process the message
+        if file_paths and len(file_paths) > 0:
+            # We have files - create a multimodal message
+            file_path = file_paths[0]  # For simplicity, use the first file
+            print(f"Using file: {file_path}")
+            # Add the message with both text and file as separate components
+            history.append([user_message, None])  # Keep the original format for processing
         else:
+            # Text-only message
+            history.append([{"text": text_content, "files": []}, None])
+        return history
     # Define bot response function
     def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
         user_message = history[-1][0]
         print(f"Processing user message: {user_message}")
+        # Get text and files from the message
+        if isinstance(user_message, dict) and "text" in user_message:
+            text_content = user_message.get("text", "")
+            image_files = user_message.get("files", [])
         else:
+            text_content = ""
+            image_files = []
         # Process message through respond function
         history[-1][1] = ""
         for response in respond(
             text_content,
+            image_files,
             history[:-1],
             system_msg,
             max_tokens,