gpaasch commited on
Commit
c7fb41b
·
1 Parent(s): f960da5

hit the token limit

Browse files
Files changed (1) hide show
  1. app.py +21 -1
app.py CHANGED
@@ -8,6 +8,7 @@ import torchaudio.transforms as T
8
  import re
9
  import logging, sys
10
  import json
 
11
 
12
 
13
  logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
@@ -51,6 +52,25 @@ SYSTEM_PROMPT = (
51
  "If you need to ask the user a follow-up question, do so clearly.\n",
52
  )
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # ========== Generator handler ==========
55
  def on_submit(symptoms_text, history):
56
  log = []
@@ -103,8 +123,8 @@ def on_submit(symptoms_text, history):
103
  f"{SYSTEM_PROMPT}",
104
  f"User symptoms: '{cleaned}'",
105
  f"Relevant ICD-10 context:\n{context_text}",
106
- "Respond with your top 3 ICD-10 codes and their confidence scores.",
107
  ])
 
108
 
109
  msg = "✏️ Prompt built"
110
  log.append(msg)
 
8
  import re
9
  import logging, sys
10
  import json
11
+ from llama_cpp import Llama
12
 
13
 
14
  logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
 
52
  "If you need to ask the user a follow-up question, do so clearly.\n",
53
  )
54
 
55
+ def truncate_prompt(prompt, max_tokens=2048):
56
+ # Use your model's tokenizer here; this is a placeholder
57
+ tokens = prompt.split() # Replace with actual tokenization
58
+ if len(tokens) > max_tokens:
59
+ tokens = tokens[:max_tokens]
60
+ return " ".join(tokens)
61
+
62
+ # Initialize your model (adjust path and params as needed)
63
+ llm = Llama(model_path=model_path)
64
+
65
+ def truncate_prompt_llama(prompt, max_tokens=2048):
66
+ # Tokenize the prompt using llama_cpp's tokenizer
67
+ tokens = llm.tokenize(prompt.encode("utf-8"))
68
+ if len(tokens) > max_tokens:
69
+ # Truncate tokens and decode back to string
70
+ tokens = tokens[:max_tokens]
71
+ prompt = llm.detokenize(tokens).decode("utf-8", errors="ignore")
72
+ return prompt
73
+
74
  # ========== Generator handler ==========
75
  def on_submit(symptoms_text, history):
76
  log = []
 
123
  f"{SYSTEM_PROMPT}",
124
  f"User symptoms: '{cleaned}'",
125
  f"Relevant ICD-10 context:\n{context_text}",
 
126
  ])
127
+ prompt = truncate_prompt_llama(prompt, max_tokens=2048)
128
 
129
  msg = "✏️ Prompt built"
130
  log.append(msg)