Spaces:
Running
Running
hf embeddings for llama index had to be installed: https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings/
Browse files- data/processed/icd_to_description.json +0 -0
- requirements.txt +2 -0
- src/__init__.py +0 -0
- src/app.py +6 -4
- src/parse_tabular.py +43 -6
- src/symptom_to_icd.json +5 -0
- utils/__init__.py +0 -0
data/processed/icd_to_description.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -4,6 +4,7 @@ gradio
|
|
4 |
|
5 |
# core Llama-Index + HF model support
|
6 |
llama-index>=0.9.0 # Specify minimum version
|
|
|
7 |
openai
|
8 |
transformers
|
9 |
torch
|
@@ -12,6 +13,7 @@ accelerate
|
|
12 |
# optional extras
|
13 |
langchain
|
14 |
langchain-community
|
|
|
15 |
|
16 |
# system requirement for audio I/O (ffmpeg must be installed):
|
17 |
# • Debian/Ubuntu: sudo apt install ffmpeg
|
|
|
4 |
|
5 |
# core Llama-Index + HF model support
|
6 |
llama-index>=0.9.0 # Specify minimum version
|
7 |
+
llama-index-embeddings-huggingface
|
8 |
openai
|
9 |
transformers
|
10 |
torch
|
|
|
13 |
# optional extras
|
14 |
langchain
|
15 |
langchain-community
|
16 |
+
sentence-transformers>=2.2.0
|
17 |
|
18 |
# system requirement for audio I/O (ffmpeg must be installed):
|
19 |
# • Debian/Ubuntu: sudo apt install ffmpeg
|
src/__init__.py
ADDED
File without changes
|
src/app.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
from llama_index import
|
4 |
-
from
|
|
|
5 |
|
6 |
-
#
|
7 |
-
from
|
|
|
8 |
|
9 |
# --- System prompt ---
|
10 |
SYSTEM_PROMPT = """
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from llama_index.core import VectorStoreIndex
|
4 |
+
from llama_index.llms import HuggingFaceLLMPredictor
|
5 |
+
from llama_index.readers import SimpleDirectoryReader
|
6 |
|
7 |
+
# Relative imports should be explicit
|
8 |
+
from parse_tabular import symptom_index # Changed from relative import
|
9 |
+
from ..utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms
|
10 |
|
11 |
# --- System prompt ---
|
12 |
SYSTEM_PROMPT = """
|
src/parse_tabular.py
CHANGED
@@ -2,8 +2,19 @@ import xml.etree.ElementTree as ET
|
|
2 |
import json
|
3 |
import sys
|
4 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
def main(xml_path):
|
|
|
|
|
|
|
7 |
if not os.path.isfile(xml_path):
|
8 |
print(f"ERROR: cannot find tabular XML at '{xml_path}'")
|
9 |
sys.exit(1)
|
@@ -34,15 +45,41 @@ def main(xml_path):
|
|
34 |
icd_to_description[code] = description
|
35 |
|
36 |
# Write out a flat JSON mapping code → description
|
37 |
-
out_path = "icd_to_description.json"
|
38 |
with open(out_path, "w", encoding="utf-8") as fp:
|
39 |
json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
|
40 |
|
41 |
print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
if __name__ == "__main__":
|
45 |
-
if len(sys.argv)
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
2 |
import json
|
3 |
import sys
|
4 |
import os
|
5 |
+
from llama_index.core import VectorStoreIndex, Document, Settings
|
6 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
7 |
+
# Update path constants
|
8 |
+
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
9 |
+
DATA_DIR = os.path.join(BASE_DIR, "data")
|
10 |
+
ICD_DIR = os.path.join(DATA_DIR, "icd10cm_tabular_2025")
|
11 |
+
DEFAULT_XML_PATH = os.path.join(ICD_DIR, "icd10cm_tabular_2025.xml")
|
12 |
+
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")
|
13 |
|
14 |
+
def main(xml_path=DEFAULT_XML_PATH):
|
15 |
+
# Create processed directory if it doesn't exist
|
16 |
+
os.makedirs(PROCESSED_DIR, exist_ok=True)
|
17 |
+
|
18 |
if not os.path.isfile(xml_path):
|
19 |
print(f"ERROR: cannot find tabular XML at '{xml_path}'")
|
20 |
sys.exit(1)
|
|
|
45 |
icd_to_description[code] = description
|
46 |
|
47 |
# Write out a flat JSON mapping code → description
|
48 |
+
out_path = os.path.join(PROCESSED_DIR, "icd_to_description.json")
|
49 |
with open(out_path, "w", encoding="utf-8") as fp:
|
50 |
json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
|
51 |
|
52 |
print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
|
53 |
|
54 |
+
def create_symptom_index():
|
55 |
+
# Configure to use local HuggingFace embeddings
|
56 |
+
Settings.embed_model = HuggingFaceEmbedding(
|
57 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
58 |
+
)
|
59 |
+
|
60 |
+
# Load and process data
|
61 |
+
json_path = os.path.join(PROCESSED_DIR, "icd_to_description.json")
|
62 |
+
with open(json_path, "r") as f:
|
63 |
+
icd_data = json.load(f)
|
64 |
+
|
65 |
+
# Convert to Document objects
|
66 |
+
documents = [
|
67 |
+
Document(
|
68 |
+
text=f"ICD-10 Code {code}: {desc}",
|
69 |
+
metadata={"code": code}
|
70 |
+
)
|
71 |
+
for code, desc in icd_data.items()
|
72 |
+
]
|
73 |
+
|
74 |
+
# Create and return the index
|
75 |
+
return VectorStoreIndex.from_documents(documents)
|
76 |
+
|
77 |
+
# Move this outside the main() function
|
78 |
+
symptom_index = None
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
+
if len(sys.argv) > 1:
|
82 |
+
main(sys.argv[1])
|
83 |
+
else:
|
84 |
+
main() # Use default path
|
85 |
+
symptom_index = create_symptom_index()
|
src/symptom_to_icd.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cough": ["R05"],
|
3 |
+
"fever": ["R50.9"],
|
4 |
+
"headache": ["R51"]
|
5 |
+
}
|
utils/__init__.py
ADDED
File without changes
|