Spaces:
Running
Running
hit the token limit
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import torchaudio.transforms as T
|
|
8 |
import re
|
9 |
import logging, sys
|
10 |
import json
|
|
|
11 |
|
12 |
|
13 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
|
@@ -51,6 +52,25 @@ SYSTEM_PROMPT = (
|
|
51 |
"If you need to ask the user a follow-up question, do so clearly.\n",
|
52 |
)
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
# ========== Generator handler ==========
|
55 |
def on_submit(symptoms_text, history):
|
56 |
log = []
|
@@ -103,8 +123,8 @@ def on_submit(symptoms_text, history):
|
|
103 |
f"{SYSTEM_PROMPT}",
|
104 |
f"User symptoms: '{cleaned}'",
|
105 |
f"Relevant ICD-10 context:\n{context_text}",
|
106 |
-
"Respond with your top 3 ICD-10 codes and their confidence scores.",
|
107 |
])
|
|
|
108 |
|
109 |
msg = "✏️ Prompt built"
|
110 |
log.append(msg)
|
|
|
8 |
import re
|
9 |
import logging, sys
|
10 |
import json
|
11 |
+
from llama_cpp import Llama
|
12 |
|
13 |
|
14 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
|
|
|
52 |
"If you need to ask the user a follow-up question, do so clearly.\n",
|
53 |
)
|
54 |
|
55 |
+
def truncate_prompt(prompt, max_tokens=2048):
|
56 |
+
# Use your model's tokenizer here; this is a placeholder
|
57 |
+
tokens = prompt.split() # Replace with actual tokenization
|
58 |
+
if len(tokens) > max_tokens:
|
59 |
+
tokens = tokens[:max_tokens]
|
60 |
+
return " ".join(tokens)
|
61 |
+
|
62 |
+
# Initialize your model (adjust path and params as needed)
|
63 |
+
llm = Llama(model_path=model_path)
|
64 |
+
|
65 |
+
def truncate_prompt_llama(prompt, max_tokens=2048):
|
66 |
+
# Tokenize the prompt using llama_cpp's tokenizer
|
67 |
+
tokens = llm.tokenize(prompt.encode("utf-8"))
|
68 |
+
if len(tokens) > max_tokens:
|
69 |
+
# Truncate tokens and decode back to string
|
70 |
+
tokens = tokens[:max_tokens]
|
71 |
+
prompt = llm.detokenize(tokens).decode("utf-8", errors="ignore")
|
72 |
+
return prompt
|
73 |
+
|
74 |
# ========== Generator handler ==========
|
75 |
def on_submit(symptoms_text, history):
|
76 |
log = []
|
|
|
123 |
f"{SYSTEM_PROMPT}",
|
124 |
f"User symptoms: '{cleaned}'",
|
125 |
f"Relevant ICD-10 context:\n{context_text}",
|
|
|
126 |
])
|
127 |
+
prompt = truncate_prompt_llama(prompt, max_tokens=2048)
|
128 |
|
129 |
msg = "✏️ Prompt built"
|
130 |
log.append(msg)
|