Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 2

Commit

4c304f3

verified ·

1 Parent(s): bc17fe3

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -100

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ def encode_image(image_path):
 def respond(
     message,
-    image_files,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -83,79 +83,79 @@ def respond(
     if seed == -1:
         seed = None
-    # Prepare messages for the API
-    user_content = []
-    # Add text if there is any
-    if message and message.strip():
-        user_content.append({
-            "type": "text",
-            "text": message
-        })
-    # Add images if any
     if image_files and len(image_files) > 0:
-        for file_path in image_files:
-            if not file_path:
-                continue
-            try:
-                print(f"Processing image file: {file_path}")
-                # For direct file paths, no need to encode as base64
-                user_content.append({
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"file://{file_path}"
-                    }
-                })
-            except Exception as e:
-                print(f"Error processing image file: {e}")
-    # If empty content, set to text only
-    if not user_content:
-        user_content = ""
     # Prepare messages in the format expected by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
-        user_msg = val[0]
-        assistant_msg = val[1]
-        # Process user message
-        if user_msg:
-            if isinstance(user_msg, dict) and "text" in user_msg:
-                # This is a MultimodalTextbox message
-                hist_text = user_msg.get("text", "")
-                hist_files = user_msg.get("files", [])
-                hist_content = []
-                if hist_text:
-                    hist_content.append({
                         "type": "text",
-                        "text": hist_text
                     })
-                for hist_file in hist_files:
-                    if hist_file:
-                        hist_content.append({
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"file://{hist_file}"
-                            }
-                        })
-                if hist_content:
-                    messages.append({"role": "user", "content": hist_content})
             else:
                 # Regular text message
-                messages.append({"role": "user", "content": user_msg})
-        # Process assistant message
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     # Append the latest user message
     messages.append({"role": "user", "content": user_content})
@@ -409,26 +409,39 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
             print("Empty message, skipping")
             return history
-        # Extract data from the MultimodalTextbox
         text_content = user_message.get("text", "").strip()
-        file_paths = user_message.get("files", [])
         print(f"Text content: {text_content}")
-        print(f"Files: {file_paths}")
-        # Process the message
-        if file_paths and len(file_paths) > 0:
-            # We have files - create a multimodal message
-            file_path = file_paths[0]  # For simplicity, use the first file
-            print(f"Using file: {file_path}")
-            # Add the message with both text and file as separate components
-            history.append([user_message, None])  # Keep the original format for processing
-        else:
-            # Text-only message
-            history.append([{"text": text_content, "files": []}, None])
-        return history
     # Define bot response function
     def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
@@ -437,38 +450,75 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
             print("No history to process")
             return history
-        # Extract the last user message
         user_message = history[-1][0]
         print(f"Processing user message: {user_message}")
-        # Get text and files from the message
-        if isinstance(user_message, dict) and "text" in user_message:
-            text_content = user_message.get("text", "")
-            image_files = user_message.get("files", [])
-        else:
-            text_content = ""
-            image_files = []
         # Process message through respond function
         history[-1][1] = ""
-        for response in respond(
-            text_content,
-            image_files,
-            history[:-1],
-            system_msg,
-            max_tokens,
-            temperature,
-            top_p,
-            freq_penalty,
-            seed,
-            provider,
-            api_key,
-            custom_model,
-            search_term,
-            selected_model
-        ):
-            history[-1][1] = response
-            yield history
     # Event handlers - only using the MultimodalTextbox's built-in submit functionality
     msg.submit(

 def respond(
     message,
+    image_files,  # Changed parameter name and structure
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     if seed == -1:
         seed = None
+    # Create multimodal content if images are present
     if image_files and len(image_files) > 0:
+        # Process the user message to include images
+        user_content = []
+        # Add text part if there is any
+        if message and message.strip():
+            user_content.append({
+                "type": "text",
+                "text": message
+            })
+        # Add image parts
+        for img in image_files:
+            if img is not None:
+                # Get raw image data from path
+                try:
+                    encoded_image = encode_image(img)
+                    if encoded_image:
+                        user_content.append({
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{encoded_image}"
+                            }
+                        })
+                except Exception as e:
+                    print(f"Error encoding image: {e}")
+    else:
+        # Text-only message
+        user_content = message
     # Prepare messages in the format expected by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
     # Add conversation history to the context
     for val in history:
+        user_part = val[0]
+        assistant_part = val[1]
+        if user_part:
+            # Handle both text-only and multimodal messages in history
+            if isinstance(user_part, tuple) and len(user_part) == 2:
+                # This is a multimodal message with text and images
+                history_content = []
+                if user_part[0]:  # Text
+                    history_content.append({
                         "type": "text",
+                        "text": user_part[0]
                     })
+                for img in user_part[1]:  # Images
+                    if img:
+                        try:
+                            encoded_img = encode_image(img)
+                            if encoded_img:
+                                history_content.append({
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:image/jpeg;base64,{encoded_img}"
+                                    }
+                                })
+                        except Exception as e:
+                            print(f"Error encoding history image: {e}")
+                messages.append({"role": "user", "content": history_content})
             else:
                 # Regular text message
+                messages.append({"role": "user", "content": user_part})
+            print(f"Added user message to context (type: {type(user_part)})")
+        if assistant_part:
+            messages.append({"role": "assistant", "content": assistant_part})
+            print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     messages.append({"role": "user", "content": user_content})
             print("Empty message, skipping")
             return history
+        # Prepare multimodal message format
         text_content = user_message.get("text", "").strip()
+        files = user_message.get("files", [])
         print(f"Text content: {text_content}")
+        print(f"Files: {files}")
+        # If both text and files are empty, skip
+        if not text_content and not files:
+            print("No content to display")
+            return history
+        # Add message with images to history
+        if files and len(files) > 0:
+            # Add text message first if it exists
+            if text_content:
+                # Add a separate text message
+                print(f"Adding text message: {text_content}")
+                history.append([text_content, None])
+            # Then add each image file separately
+            for file_path in files:
+                if file_path and isinstance(file_path, str):
+                    print(f"Adding image: {file_path}")
+                    # Add image as a separate message with no text
+                    history.append([f"![Image]({file_path})", None])
+            return history
+        else:
+            # For text-only messages
+            print(f"Adding text-only message: {text_content}")
+            history.append([text_content, None])
+            return history
     # Define bot response function
     def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
             print("No history to process")
             return history
+        # Get the most recent message and detect if it's an image
         user_message = history[-1][0]
         print(f"Processing user message: {user_message}")
+        is_image = False
+        image_path = None
+        text_content = user_message
+        # Check if this is an image message (marked with ![Image])
+        if isinstance(user_message, str) and user_message.startswith("![Image]("):
+            is_image = True
+            # Extract image path from markdown format ![Image](path)
+            image_path = user_message.replace("![Image](", "").replace(")", "")
+            print(f"Image detected: {image_path}")
+            text_content = ""  # No text for image-only messages
+        # Look back for text context if this is an image
+        text_context = ""
+        if is_image and len(history) > 1:
+            # Use the previous message as context if it's text
+            prev_message = history[-2][0]
+            if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
+                text_context = prev_message
+                print(f"Using text context from previous message: {text_context}")
         # Process message through respond function
         history[-1][1] = ""
+        # Use either the image or text for the API
+        if is_image:
+            # For image messages
+            for response in respond(
+                text_context,           # Text context from previous message if any
+                [image_path],           # Current image
+                history[:-1],           # Previous history
+                system_msg,
+                max_tokens,
+                temperature,
+                top_p,
+                freq_penalty,
+                seed,
+                provider,
+                api_key,
+                custom_model,
+                search_term,
+                selected_model
+            ):
+                history[-1][1] = response
+                yield history
+        else:
+            # For text-only messages
+            for response in respond(
+                text_content,           # Text message
+                None,                   # No image
+                history[:-1],           # Previous history
+                system_msg,
+                max_tokens,
+                temperature,
+                top_p,
+                freq_penalty,
+                seed,
+                provider,
+                api_key,
+                custom_model,
+                search_term,
+                selected_model
+            ):
+                history[-1][1] = response
+                yield history
     # Event handlers - only using the MultimodalTextbox's built-in submit functionality
     msg.submit(