Spaces:

gpaasch
/

MedCodeMCP

Running

App Files Files Community

gpaasch commited on Jun 7

Commit

357828f

1 Parent(s): 36b0f3d

hf embeddings for llama index had to be installed: https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings/

Browse files

Files changed (7) hide show

data/processed/icd_to_description.json +0 -0
requirements.txt +2 -0
src/__init__.py +0 -0
src/app.py +6 -4
src/parse_tabular.py +43 -6
src/symptom_to_icd.json +5 -0
utils/__init__.py +0 -0

data/processed/icd_to_description.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -4,6 +4,7 @@ gradio
 # core Llama-Index + HF model support
 llama-index>=0.9.0  # Specify minimum version
 openai
 transformers
 torch
@@ -12,6 +13,7 @@ accelerate
 # optional extras
 langchain
 langchain-community
 # system requirement for audio I/O (ffmpeg must be installed):
 #   • Debian/Ubuntu: sudo apt install ffmpeg

 # core Llama-Index + HF model support
 llama-index>=0.9.0  # Specify minimum version
+llama-index-embeddings-huggingface
 openai
 transformers
 torch
 # optional extras
 langchain
 langchain-community
+sentence-transformers>=2.2.0
 # system requirement for audio I/O (ffmpeg must be installed):
 #   • Debian/Ubuntu: sudo apt install ffmpeg

src/__init__.py ADDED Viewed

File without changes

src/app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import os
 import gradio as gr
-from llama_index import HuggingFaceLLMPredictor
-from src.parse_tabular import symptom_index
-# --- LlamaIndex utils import ---
-from utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms
 # --- System prompt ---
 SYSTEM_PROMPT = """

 import os
 import gradio as gr
+from llama_index.core import VectorStoreIndex
+from llama_index.llms import HuggingFaceLLMPredictor
+from llama_index.readers import SimpleDirectoryReader
+# Relative imports should be explicit
+from parse_tabular import symptom_index  # Changed from relative import
+from ..utils.llama_index_utils import get_llm_predictor, build_index, query_symptoms
 # --- System prompt ---
 SYSTEM_PROMPT = """

src/parse_tabular.py CHANGED Viewed

@@ -2,8 +2,19 @@ import xml.etree.ElementTree as ET
 import json
 import sys
 import os
-def main(xml_path):
     if not os.path.isfile(xml_path):
         print(f"ERROR: cannot find tabular XML at '{xml_path}'")
         sys.exit(1)
@@ -34,15 +45,41 @@ def main(xml_path):
             icd_to_description[code] = description
     # Write out a flat JSON mapping code → description
-    out_path = "icd_to_description.json"
     with open(out_path, "w", encoding="utf-8") as fp:
         json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
     print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
 if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>")
-        sys.exit(1)
-    main(sys.argv[1])

 import json
 import sys
 import os
+from llama_index.core import VectorStoreIndex, Document, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+# Update path constants
+BASE_DIR = os.path.dirname(os.path.dirname(__file__))
+DATA_DIR = os.path.join(BASE_DIR, "data")
+ICD_DIR = os.path.join(DATA_DIR, "icd10cm_tabular_2025")
+DEFAULT_XML_PATH = os.path.join(ICD_DIR, "icd10cm_tabular_2025.xml")
+PROCESSED_DIR = os.path.join(DATA_DIR, "processed")
+def main(xml_path=DEFAULT_XML_PATH):
+    # Create processed directory if it doesn't exist
+    os.makedirs(PROCESSED_DIR, exist_ok=True)
     if not os.path.isfile(xml_path):
         print(f"ERROR: cannot find tabular XML at '{xml_path}'")
         sys.exit(1)
             icd_to_description[code] = description
     # Write out a flat JSON mapping code → description
+    out_path = os.path.join(PROCESSED_DIR, "icd_to_description.json")
     with open(out_path, "w", encoding="utf-8") as fp:
         json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
     print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
+def create_symptom_index():
+    # Configure to use local HuggingFace embeddings
+    Settings.embed_model = HuggingFaceEmbedding(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    # Load and process data
+    json_path = os.path.join(PROCESSED_DIR, "icd_to_description.json")
+    with open(json_path, "r") as f:
+        icd_data = json.load(f)
+    # Convert to Document objects
+    documents = [
+        Document(
+            text=f"ICD-10 Code {code}: {desc}",
+            metadata={"code": code}
+        )
+        for code, desc in icd_data.items()
+    ]
+    # Create and return the index
+    return VectorStoreIndex.from_documents(documents)
+# Move this outside the main() function
+symptom_index = None
 if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        main(sys.argv[1])
+    else:
+        main()  # Use default path
+    symptom_index = create_symptom_index()

src/symptom_to_icd.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "cough": ["R05"],
+  "fever": ["R50.9"],
+  "headache": ["R51"]
+}

utils/__init__.py ADDED Viewed

File without changes