Spaces:
Build error
Build error
File size: 1,575 Bytes
0d38280 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import xml.etree.ElementTree as ET
import json
import sys
import os
def main(xml_path):
if not os.path.isfile(xml_path):
print(f"ERROR: cannot find tabular XML at '{xml_path}'")
sys.exit(1)
tree = ET.parse(xml_path)
root = tree.getroot()
icd_to_description = {}
# Iterate over every <diag> in the entire file, recursively.
# Each <diag> has:
# • <name> (the ICD-10 code)
# • <desc> (the human-readable description)
# • zero or more nested <diag> children (sub-codes).
for diag in root.iter("diag"):
name_elem = diag.find("name")
desc_elem = diag.find("desc")
if name_elem is None or desc_elem is None:
continue
# Some <diag> nodes might have <name/> or <desc/> with no text; skip those.
if name_elem.text is None or desc_elem.text is None:
continue
code = name_elem.text.strip()
description = desc_elem.text.strip()
# Only store non-empty strings:
if code and description:
icd_to_description[code] = description
# Write out a flat JSON mapping code → description
out_path = "icd_to_description.json"
with open(out_path, "w", encoding="utf-8") as fp:
json.dump(icd_to_description, fp, indent=2, ensure_ascii=False)
print(f"Wrote {len(icd_to_description)} code entries to {out_path}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python parse_tabular.py <path/to/icd10cm_tabular_2025.xml>")
sys.exit(1)
main(sys.argv[1])
|