Nymbo commited on
Commit
4c304f3
·
verified ·
1 Parent(s): bc17fe3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -100
app.py CHANGED
@@ -41,7 +41,7 @@ def encode_image(image_path):
41
 
42
  def respond(
43
  message,
44
- image_files,
45
  history: list[tuple[str, str]],
46
  system_message,
47
  max_tokens,
@@ -83,79 +83,79 @@ def respond(
83
  if seed == -1:
84
  seed = None
85
 
86
- # Prepare messages for the API
87
- user_content = []
88
-
89
- # Add text if there is any
90
- if message and message.strip():
91
- user_content.append({
92
- "type": "text",
93
- "text": message
94
- })
95
-
96
- # Add images if any
97
  if image_files and len(image_files) > 0:
98
- for file_path in image_files:
99
- if not file_path:
100
- continue
101
-
102
- try:
103
- print(f"Processing image file: {file_path}")
104
- # For direct file paths, no need to encode as base64
105
- user_content.append({
106
- "type": "image_url",
107
- "image_url": {
108
- "url": f"file://{file_path}"
109
- }
110
- })
111
- except Exception as e:
112
- print(f"Error processing image file: {e}")
113
-
114
- # If empty content, set to text only
115
- if not user_content:
116
- user_content = ""
117
-
 
 
 
 
 
 
 
 
 
118
  # Prepare messages in the format expected by the API
119
  messages = [{"role": "system", "content": system_message}]
120
  print("Initial messages array constructed.")
121
 
122
  # Add conversation history to the context
123
  for val in history:
124
- user_msg = val[0]
125
- assistant_msg = val[1]
126
-
127
- # Process user message
128
- if user_msg:
129
- if isinstance(user_msg, dict) and "text" in user_msg:
130
- # This is a MultimodalTextbox message
131
- hist_text = user_msg.get("text", "")
132
- hist_files = user_msg.get("files", [])
133
-
134
- hist_content = []
135
- if hist_text:
136
- hist_content.append({
137
  "type": "text",
138
- "text": hist_text
139
  })
140
 
141
- for hist_file in hist_files:
142
- if hist_file:
143
- hist_content.append({
144
- "type": "image_url",
145
- "image_url": {
146
- "url": f"file://{hist_file}"
147
- }
148
- })
 
 
 
 
 
149
 
150
- if hist_content:
151
- messages.append({"role": "user", "content": hist_content})
152
  else:
153
  # Regular text message
154
- messages.append({"role": "user", "content": user_msg})
 
155
 
156
- # Process assistant message
157
- if assistant_msg:
158
- messages.append({"role": "assistant", "content": assistant_msg})
159
 
160
  # Append the latest user message
161
  messages.append({"role": "user", "content": user_content})
@@ -409,26 +409,39 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
409
  print("Empty message, skipping")
410
  return history
411
 
412
- # Extract data from the MultimodalTextbox
413
  text_content = user_message.get("text", "").strip()
414
- file_paths = user_message.get("files", [])
415
 
416
  print(f"Text content: {text_content}")
417
- print(f"Files: {file_paths}")
418
 
419
- # Process the message
420
- if file_paths and len(file_paths) > 0:
421
- # We have files - create a multimodal message
422
- file_path = file_paths[0] # For simplicity, use the first file
423
- print(f"Using file: {file_path}")
 
 
 
 
 
 
 
424
 
425
- # Add the message with both text and file as separate components
426
- history.append([user_message, None]) # Keep the original format for processing
427
- else:
428
- # Text-only message
429
- history.append([{"text": text_content, "files": []}, None])
 
430
 
431
- return history
 
 
 
 
 
432
 
433
  # Define bot response function
434
  def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
@@ -437,38 +450,75 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
437
  print("No history to process")
438
  return history
439
 
440
- # Extract the last user message
441
  user_message = history[-1][0]
442
  print(f"Processing user message: {user_message}")
443
 
444
- # Get text and files from the message
445
- if isinstance(user_message, dict) and "text" in user_message:
446
- text_content = user_message.get("text", "")
447
- image_files = user_message.get("files", [])
448
- else:
449
- text_content = ""
450
- image_files = []
 
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
  # Process message through respond function
453
  history[-1][1] = ""
454
- for response in respond(
455
- text_content,
456
- image_files,
457
- history[:-1],
458
- system_msg,
459
- max_tokens,
460
- temperature,
461
- top_p,
462
- freq_penalty,
463
- seed,
464
- provider,
465
- api_key,
466
- custom_model,
467
- search_term,
468
- selected_model
469
- ):
470
- history[-1][1] = response
471
- yield history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
  # Event handlers - only using the MultimodalTextbox's built-in submit functionality
474
  msg.submit(
 
41
 
42
  def respond(
43
  message,
44
+ image_files, # Changed parameter name and structure
45
  history: list[tuple[str, str]],
46
  system_message,
47
  max_tokens,
 
83
  if seed == -1:
84
  seed = None
85
 
86
+ # Create multimodal content if images are present
 
 
 
 
 
 
 
 
 
 
87
  if image_files and len(image_files) > 0:
88
+ # Process the user message to include images
89
+ user_content = []
90
+
91
+ # Add text part if there is any
92
+ if message and message.strip():
93
+ user_content.append({
94
+ "type": "text",
95
+ "text": message
96
+ })
97
+
98
+ # Add image parts
99
+ for img in image_files:
100
+ if img is not None:
101
+ # Get raw image data from path
102
+ try:
103
+ encoded_image = encode_image(img)
104
+ if encoded_image:
105
+ user_content.append({
106
+ "type": "image_url",
107
+ "image_url": {
108
+ "url": f"data:image/jpeg;base64,{encoded_image}"
109
+ }
110
+ })
111
+ except Exception as e:
112
+ print(f"Error encoding image: {e}")
113
+ else:
114
+ # Text-only message
115
+ user_content = message
116
+
117
  # Prepare messages in the format expected by the API
118
  messages = [{"role": "system", "content": system_message}]
119
  print("Initial messages array constructed.")
120
 
121
  # Add conversation history to the context
122
  for val in history:
123
+ user_part = val[0]
124
+ assistant_part = val[1]
125
+ if user_part:
126
+ # Handle both text-only and multimodal messages in history
127
+ if isinstance(user_part, tuple) and len(user_part) == 2:
128
+ # This is a multimodal message with text and images
129
+ history_content = []
130
+ if user_part[0]: # Text
131
+ history_content.append({
 
 
 
 
132
  "type": "text",
133
+ "text": user_part[0]
134
  })
135
 
136
+ for img in user_part[1]: # Images
137
+ if img:
138
+ try:
139
+ encoded_img = encode_image(img)
140
+ if encoded_img:
141
+ history_content.append({
142
+ "type": "image_url",
143
+ "image_url": {
144
+ "url": f"data:image/jpeg;base64,{encoded_img}"
145
+ }
146
+ })
147
+ except Exception as e:
148
+ print(f"Error encoding history image: {e}")
149
 
150
+ messages.append({"role": "user", "content": history_content})
 
151
  else:
152
  # Regular text message
153
+ messages.append({"role": "user", "content": user_part})
154
+ print(f"Added user message to context (type: {type(user_part)})")
155
 
156
+ if assistant_part:
157
+ messages.append({"role": "assistant", "content": assistant_part})
158
+ print(f"Added assistant message to context: {assistant_part}")
159
 
160
  # Append the latest user message
161
  messages.append({"role": "user", "content": user_content})
 
409
  print("Empty message, skipping")
410
  return history
411
 
412
+ # Prepare multimodal message format
413
  text_content = user_message.get("text", "").strip()
414
+ files = user_message.get("files", [])
415
 
416
  print(f"Text content: {text_content}")
417
+ print(f"Files: {files}")
418
 
419
+ # If both text and files are empty, skip
420
+ if not text_content and not files:
421
+ print("No content to display")
422
+ return history
423
+
424
+ # Add message with images to history
425
+ if files and len(files) > 0:
426
+ # Add text message first if it exists
427
+ if text_content:
428
+ # Add a separate text message
429
+ print(f"Adding text message: {text_content}")
430
+ history.append([text_content, None])
431
 
432
+ # Then add each image file separately
433
+ for file_path in files:
434
+ if file_path and isinstance(file_path, str):
435
+ print(f"Adding image: {file_path}")
436
+ # Add image as a separate message with no text
437
+ history.append([f"![Image]({file_path})", None])
438
 
439
+ return history
440
+ else:
441
+ # For text-only messages
442
+ print(f"Adding text-only message: {text_content}")
443
+ history.append([text_content, None])
444
+ return history
445
 
446
  # Define bot response function
447
  def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
 
450
  print("No history to process")
451
  return history
452
 
453
+ # Get the most recent message and detect if it's an image
454
  user_message = history[-1][0]
455
  print(f"Processing user message: {user_message}")
456
 
457
+ is_image = False
458
+ image_path = None
459
+ text_content = user_message
460
+
461
+ # Check if this is an image message (marked with ![Image])
462
+ if isinstance(user_message, str) and user_message.startswith("![Image]("):
463
+ is_image = True
464
+ # Extract image path from markdown format ![Image](path)
465
+ image_path = user_message.replace("![Image](", "").replace(")", "")
466
+ print(f"Image detected: {image_path}")
467
+ text_content = "" # No text for image-only messages
468
+
469
+ # Look back for text context if this is an image
470
+ text_context = ""
471
+ if is_image and len(history) > 1:
472
+ # Use the previous message as context if it's text
473
+ prev_message = history[-2][0]
474
+ if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
475
+ text_context = prev_message
476
+ print(f"Using text context from previous message: {text_context}")
477
 
478
  # Process message through respond function
479
  history[-1][1] = ""
480
+
481
+ # Use either the image or text for the API
482
+ if is_image:
483
+ # For image messages
484
+ for response in respond(
485
+ text_context, # Text context from previous message if any
486
+ [image_path], # Current image
487
+ history[:-1], # Previous history
488
+ system_msg,
489
+ max_tokens,
490
+ temperature,
491
+ top_p,
492
+ freq_penalty,
493
+ seed,
494
+ provider,
495
+ api_key,
496
+ custom_model,
497
+ search_term,
498
+ selected_model
499
+ ):
500
+ history[-1][1] = response
501
+ yield history
502
+ else:
503
+ # For text-only messages
504
+ for response in respond(
505
+ text_content, # Text message
506
+ None, # No image
507
+ history[:-1], # Previous history
508
+ system_msg,
509
+ max_tokens,
510
+ temperature,
511
+ top_p,
512
+ freq_penalty,
513
+ seed,
514
+ provider,
515
+ api_key,
516
+ custom_model,
517
+ search_term,
518
+ selected_model
519
+ ):
520
+ history[-1][1] = response
521
+ yield history
522
 
523
  # Event handlers - only using the MultimodalTextbox's built-in submit functionality
524
  msg.submit(