Spaces:

atharvasc27112001
/

medical_chatbot

Runtime error

App Files Files Community

atharvasc27112001 commited on Apr 23

Commit

d1af83e

verified ·

1 Parent(s): 29aa409

Create app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import pandas as pd
+import torch
+import transformers
+from torch.nn.functional import cosine_similarity
+import gradio as gr
+# ── 1) Constants & Device ────────────────────────────────────────────────
+MODEL_NAME = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
+MIN_FREQ   = 4
+MAX_LEN    = 256
+DEVICE     = "cuda" if torch.cuda.is_available() else "cpu"
+# ── 2) Load & Filter Dataset ─────────────────────────────────────────────
+df = pd.read_csv("medquad.csv")
+df["text"] = df["question"].str.strip() + " " + df["answer"].str.strip()
+vc   = df["focus_area"].value_counts()
+keep = vc[vc >= MIN_FREQ].index
+df   = df[df["focus_area"].isin(keep)].reset_index(drop=True)
+labels   = sorted(df["focus_area"].unique())
+label2id = {lbl:i for i,lbl in enumerate(labels)}
+id2label = {i:l for l,i in label2id.items()}
+# ── 3) Load Tokenizer & Frozen BERT ─────────────────────────────────────
+tokenizer  = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
+bert_model = transformers.AutoModel.from_pretrained(MODEL_NAME).to(DEVICE).eval()
+@torch.no_grad()
+def encode_text(s: str, max_length=MAX_LEN):
+    toks = tokenizer(
+        s,
+        return_tensors="pt",
+        truncation=True,
+        max_length=max_length,
+        add_special_tokens=True
+    ).to(DEVICE)
+    hidden = bert_model(**toks).last_hidden_state
+    return hidden[:,0].squeeze().cpu()   # CLS token
+# ── 4) Precompute Static Label Embeddings ─────────────────────────────────
+label_embs = torch.stack([
+    encode_text(lbl, max_length=16)
+    for lbl in labels
+])
+# ── 5) Classification Function ────────────────────────────────────────────
+def predict_disease(symptoms: str):
+    """
+    Encode the user's input, compute cosine similarity
+    to each label embedding, and return the top label.
+    """
+    q_emb = encode_text(symptoms).unsqueeze(0)      # [1, hidden_size]
+    sims  = cosine_similarity(q_emb, label_embs)   # [1, num_labels]
+    idx   = sims.argmax(dim=1).item()
+    return id2label[idx]
+# ── 6) Gradio Interface ───────────────────────────────────────────────────
+iface = gr.Interface(
+    fn=predict_disease,
+    inputs=gr.Textbox(
+        lines=3,
+        placeholder="Enter your symptoms here, e.g.\n'I have eye pain and blurred vision...'"
+    ),
+    outputs="text",
+    title="🔬 Medical Symptom→Disease Chatbot",
+    description="Type your symptoms; PubMed‐BERT + cosine similarity predicts the most likely disease category."
+)
+if __name__ == "__main__":
+    iface.launch()