Spaces:

mbalvi
/

Multilingual_Sentiment_Analysis_v01

Runtime error

App Files Files Community

mbalvi commited on Aug 30

Commit

0f5bc63

verified ·

1 Parent(s): 133afa1

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -46

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 """
 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)
 -------------------------------------------------------------
-• Uses Hugging Face model: nlptown/bert-base-multilingual-uncased-sentiment (5-star output)
-• Maps 5-star probabilities to 3 classes:
-      Negative = P(1★) + P(2★)
-      Neutral  = P(3★)
-      Positive = P(4★) + P(5★)
-• Saves each query to sentiment_logs.xlsx (downloadable)
 """
 import os
@@ -16,7 +15,6 @@ import gradio as gr
 from transformers import pipeline
 # -------- Model & Pipeline --------
-# This model supports many languages (incl. English/Urdu/Roman Urdu)
 MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
 clf = pipeline("sentiment-analysis", model=MODEL_NAME)
@@ -24,47 +22,39 @@ clf = pipeline("sentiment-analysis", model=MODEL_NAME)
 LOG_PATH = "sentiment_logs.xlsx"
 if not os.path.exists(LOG_PATH):
     pd.DataFrame(columns=[
-        "timestamp", "text", "predicted_label_3class", "confidence_3class",
         "stars_probs", "top_star_label"
     ]).to_excel(LOG_PATH, index=False)
 def _aggregate_to_3class(star_scores):
-    """
-    star_scores: list of dicts like:
-        [{'label': '1 star', 'score': 0.05}, ..., {'label': '5 stars', 'score': 0.6}]
-    Returns: (pred_label, confidence, probs_dict, top_star_label)
-    """
-    # Normalize keys (some labels are singular/plural)
     scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
-    s1 = scores.get("1 star", 0.0)
-    s2 = scores.get("2 stars", 0.0)
-    s3 = scores.get("3 stars", 0.0)
-    s4 = scores.get("4 stars", 0.0)
-    s5 = scores.get("5 stars", 0.0)
-    neg = s1 + s2
-    neu = s3
-    pos = s4 + s5
     probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
     pred_label = max(probs3, key=probs3.get)
     confidence = probs3[pred_label]
-    # Top star label for reference
     top_star_label = max(
         ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
         key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
     )
     return pred_label, confidence, probs3, top_star_label
-def analyze(text):
     if not text or not text.strip():
         return "❌ Please enter some text.", "", "", LOG_PATH
-    # Ask pipeline for all class scores (needed to aggregate)
-    star_results = clf(text, return_all_scores=True)[0]  # list of 5 dicts
     pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
     polarity = {
@@ -73,17 +63,19 @@ def analyze(text):
         "Negative": "☹️ Negative",
     }[pred_label]
-    # Log to Excel
     try:
         df = pd.read_excel(LOG_PATH)
     except Exception:
         df = pd.DataFrame(columns=[
-            "timestamp", "text", "predicted_label_3class", "confidence_3class",
             "stars_probs", "top_star_label"
         ])
     new_row = {
         "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
         "text": text,
         "predicted_label_3class": pred_label,
         "confidence_3class": round(conf, 4),
@@ -93,13 +85,38 @@ def analyze(text):
     df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
     df.to_excel(LOG_PATH, index=False)
-    # Display nicely
-    return (
-        f"Sentiment: {pred_label}",
-        f"Confidence: {conf:.3f}",   # 0..1
-        f"Polarity: {polarity}",
-        LOG_PATH
-    )
 # -------- Gradio UI --------
 with gr.Blocks() as demo:
@@ -109,15 +126,31 @@ with gr.Blocks() as demo:
         "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5★ → 3 classes)"
     )
-    user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
-    btn = gr.Button("Analyze")
-    out_sent = gr.Textbox(label="Sentiment")
-    out_conf = gr.Textbox(label="Confidence (0–1)")
-    out_pol  = gr.Textbox(label="Polarity")
-    out_file = gr.File(label="Download logs (.xlsx)")
-    btn.click(analyze, inputs=user_text, outputs=[out_sent, out_conf, out_pol, out_file])
 if __name__ == "__main__":
-    demo.launch()

 """
 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)
 -------------------------------------------------------------
+Features:
+• Single text sentiment analysis with language hint.
+• Batch analysis from CSV/XLSX file.
+• 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores.
+• Saves logs to sentiment_logs.xlsx.
 """
 import os
 from transformers import pipeline
 # -------- Model & Pipeline --------
 MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
 clf = pipeline("sentiment-analysis", model=MODEL_NAME)
 LOG_PATH = "sentiment_logs.xlsx"
 if not os.path.exists(LOG_PATH):
     pd.DataFrame(columns=[
+        "timestamp", "language_hint", "text",
+        "predicted_label_3class", "confidence_3class",
         "stars_probs", "top_star_label"
     ]).to_excel(LOG_PATH, index=False)
+# -------- Helper function: aggregate 5★ → 3-class --------
 def _aggregate_to_3class(star_scores):
     scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
+    s1, s2, s3, s4, s5 = (
+        scores.get("1 star", 0.0),
+        scores.get("2 stars", 0.0),
+        scores.get("3 stars", 0.0),
+        scores.get("4 stars", 0.0),
+        scores.get("5 stars", 0.0),
+    )
+    neg, neu, pos = s1 + s2, s3, s4 + s5
     probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
     pred_label = max(probs3, key=probs3.get)
     confidence = probs3[pred_label]
     top_star_label = max(
         ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
         key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
     )
     return pred_label, confidence, probs3, top_star_label
+# -------- Single text analysis --------
+def analyze_single(text, lang_hint):
     if not text or not text.strip():
         return "❌ Please enter some text.", "", "", LOG_PATH
+    star_results = clf(text, return_all_scores=True)[0]
     pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
     polarity = {
         "Negative": "☹️ Negative",
     }[pred_label]
+    # Log
     try:
         df = pd.read_excel(LOG_PATH)
     except Exception:
         df = pd.DataFrame(columns=[
+            "timestamp", "language_hint", "text",
+            "predicted_label_3class", "confidence_3class",
             "stars_probs", "top_star_label"
         ])
     new_row = {
         "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+        "language_hint": lang_hint,
         "text": text,
         "predicted_label_3class": pred_label,
         "confidence_3class": round(conf, 4),
     df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
     df.to_excel(LOG_PATH, index=False)
+    return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH
+# -------- Batch analysis --------
+def analyze_batch(file, lang_hint):
+    if file is None:
+        return "❌ Please upload a CSV/XLSX file.", None
+    ext = os.path.splitext(file.name)[-1].lower()
+    if ext == ".csv":
+        df = pd.read_csv(file.name)
+    elif ext in [".xls", ".xlsx"]:
+        df = pd.read_excel(file.name)
+    else:
+        return "❌ Only CSV or Excel files are supported.", None
+    if "text" not in df.columns:
+        return "❌ The file must contain a 'text' column.", None
+    results = []
+    for t in df["text"]:
+        if not isinstance(t, str) or not t.strip():
+            results.append(("N/A", 0.0, "Invalid text"))
+            continue
+        star_results = clf(t, return_all_scores=True)[0]
+        pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
+        results.append((pred_label, conf, top_star))
+    df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results)
+    out_path = "batch_results.xlsx"
+    df.to_excel(out_path, index=False)
+    return "✅ Batch analysis complete.", out_path
 # -------- Gradio UI --------
 with gr.Blocks() as demo:
         "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5★ → 3 classes)"
     )
+    with gr.Tab("🔹 Single Text"):
+        user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
+        lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
+        btn = gr.Button("Analyze")
+        out_sent = gr.Textbox(label="Sentiment")
+        out_conf = gr.Textbox(label="Confidence (0–1)")
+        out_pol  = gr.Textbox(label="Polarity")
+        out_file = gr.File(label="Download logs (.xlsx)")
+        btn.click(analyze_single, inputs=[user_text, lang_dropdown],
+                  outputs=[out_sent, out_conf, out_pol, out_file])
+    with gr.Tab("🔹 Batch Upload"):
+        gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.")
+        file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"])
+        lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"],
+                                          label="Language Hint", value="English")
+        btn_batch = gr.Button("Analyze Batch")
+        batch_status = gr.Textbox(label="Status")
+        batch_file   = gr.File(label="Download Batch Results")
+        btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch],
+                        outputs=[batch_status, batch_file])
 if __name__ == "__main__":
+    demo.launch()