mbalvi commited on
Commit
0f5bc63
Β·
verified Β·
1 Parent(s): 133afa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -46
app.py CHANGED
@@ -1,12 +1,11 @@
1
  """
2
  Multilingual Sentiment Analysis (English β€’ Urdu β€’ Roman Urdu)
3
  -------------------------------------------------------------
4
- β€’ Uses Hugging Face model: nlptown/bert-base-multilingual-uncased-sentiment (5-star output)
5
- β€’ Maps 5-star probabilities to 3 classes:
6
- Negative = P(1β˜…) + P(2β˜…)
7
- Neutral = P(3β˜…)
8
- Positive = P(4β˜…) + P(5β˜…)
9
- β€’ Saves each query to sentiment_logs.xlsx (downloadable)
10
  """
11
 
12
  import os
@@ -16,7 +15,6 @@ import gradio as gr
16
  from transformers import pipeline
17
 
18
  # -------- Model & Pipeline --------
19
- # This model supports many languages (incl. English/Urdu/Roman Urdu)
20
  MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
21
  clf = pipeline("sentiment-analysis", model=MODEL_NAME)
22
 
@@ -24,47 +22,39 @@ clf = pipeline("sentiment-analysis", model=MODEL_NAME)
24
  LOG_PATH = "sentiment_logs.xlsx"
25
  if not os.path.exists(LOG_PATH):
26
  pd.DataFrame(columns=[
27
- "timestamp", "text", "predicted_label_3class", "confidence_3class",
 
28
  "stars_probs", "top_star_label"
29
  ]).to_excel(LOG_PATH, index=False)
30
 
 
31
  def _aggregate_to_3class(star_scores):
32
- """
33
- star_scores: list of dicts like:
34
- [{'label': '1 star', 'score': 0.05}, ..., {'label': '5 stars', 'score': 0.6}]
35
- Returns: (pred_label, confidence, probs_dict, top_star_label)
36
- """
37
- # Normalize keys (some labels are singular/plural)
38
  scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
39
- s1 = scores.get("1 star", 0.0)
40
- s2 = scores.get("2 stars", 0.0)
41
- s3 = scores.get("3 stars", 0.0)
42
- s4 = scores.get("4 stars", 0.0)
43
- s5 = scores.get("5 stars", 0.0)
44
-
45
- neg = s1 + s2
46
- neu = s3
47
- pos = s4 + s5
48
 
 
49
  probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
50
  pred_label = max(probs3, key=probs3.get)
51
  confidence = probs3[pred_label]
52
 
53
- # Top star label for reference
54
  top_star_label = max(
55
  ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
56
  key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
57
  )
58
-
59
  return pred_label, confidence, probs3, top_star_label
60
 
61
- def analyze(text):
 
62
  if not text or not text.strip():
63
  return "❌ Please enter some text.", "", "", LOG_PATH
64
 
65
- # Ask pipeline for all class scores (needed to aggregate)
66
- star_results = clf(text, return_all_scores=True)[0] # list of 5 dicts
67
-
68
  pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
69
 
70
  polarity = {
@@ -73,17 +63,19 @@ def analyze(text):
73
  "Negative": "☹️ Negative",
74
  }[pred_label]
75
 
76
- # Log to Excel
77
  try:
78
  df = pd.read_excel(LOG_PATH)
79
  except Exception:
80
  df = pd.DataFrame(columns=[
81
- "timestamp", "text", "predicted_label_3class", "confidence_3class",
 
82
  "stars_probs", "top_star_label"
83
  ])
84
 
85
  new_row = {
86
  "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
 
87
  "text": text,
88
  "predicted_label_3class": pred_label,
89
  "confidence_3class": round(conf, 4),
@@ -93,13 +85,38 @@ def analyze(text):
93
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
94
  df.to_excel(LOG_PATH, index=False)
95
 
96
- # Display nicely
97
- return (
98
- f"Sentiment: {pred_label}",
99
- f"Confidence: {conf:.3f}", # 0..1
100
- f"Polarity: {polarity}",
101
- LOG_PATH
102
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  # -------- Gradio UI --------
105
  with gr.Blocks() as demo:
@@ -109,15 +126,31 @@ with gr.Blocks() as demo:
109
  "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β˜… β†’ 3 classes)"
110
  )
111
 
112
- user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
113
- btn = gr.Button("Analyze")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- out_sent = gr.Textbox(label="Sentiment")
116
- out_conf = gr.Textbox(label="Confidence (0–1)")
117
- out_pol = gr.Textbox(label="Polarity")
118
- out_file = gr.File(label="Download logs (.xlsx)")
119
 
120
- btn.click(analyze, inputs=user_text, outputs=[out_sent, out_conf, out_pol, out_file])
 
121
 
122
  if __name__ == "__main__":
123
- demo.launch()
 
1
  """
2
  Multilingual Sentiment Analysis (English β€’ Urdu β€’ Roman Urdu)
3
  -------------------------------------------------------------
4
+ Features:
5
+ β€’ Single text sentiment analysis with language hint.
6
+ β€’ Batch analysis from CSV/XLSX file.
7
+ β€’ 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores.
8
+ β€’ Saves logs to sentiment_logs.xlsx.
 
9
  """
10
 
11
  import os
 
15
  from transformers import pipeline
16
 
17
  # -------- Model & Pipeline --------
 
18
  MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
19
  clf = pipeline("sentiment-analysis", model=MODEL_NAME)
20
 
 
22
  LOG_PATH = "sentiment_logs.xlsx"
23
  if not os.path.exists(LOG_PATH):
24
  pd.DataFrame(columns=[
25
+ "timestamp", "language_hint", "text",
26
+ "predicted_label_3class", "confidence_3class",
27
  "stars_probs", "top_star_label"
28
  ]).to_excel(LOG_PATH, index=False)
29
 
30
+ # -------- Helper function: aggregate 5β˜… β†’ 3-class --------
31
  def _aggregate_to_3class(star_scores):
 
 
 
 
 
 
32
  scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
33
+ s1, s2, s3, s4, s5 = (
34
+ scores.get("1 star", 0.0),
35
+ scores.get("2 stars", 0.0),
36
+ scores.get("3 stars", 0.0),
37
+ scores.get("4 stars", 0.0),
38
+ scores.get("5 stars", 0.0),
39
+ )
 
 
40
 
41
+ neg, neu, pos = s1 + s2, s3, s4 + s5
42
  probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
43
  pred_label = max(probs3, key=probs3.get)
44
  confidence = probs3[pred_label]
45
 
 
46
  top_star_label = max(
47
  ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
48
  key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
49
  )
 
50
  return pred_label, confidence, probs3, top_star_label
51
 
52
+ # -------- Single text analysis --------
53
+ def analyze_single(text, lang_hint):
54
  if not text or not text.strip():
55
  return "❌ Please enter some text.", "", "", LOG_PATH
56
 
57
+ star_results = clf(text, return_all_scores=True)[0]
 
 
58
  pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
59
 
60
  polarity = {
 
63
  "Negative": "☹️ Negative",
64
  }[pred_label]
65
 
66
+ # Log
67
  try:
68
  df = pd.read_excel(LOG_PATH)
69
  except Exception:
70
  df = pd.DataFrame(columns=[
71
+ "timestamp", "language_hint", "text",
72
+ "predicted_label_3class", "confidence_3class",
73
  "stars_probs", "top_star_label"
74
  ])
75
 
76
  new_row = {
77
  "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
78
+ "language_hint": lang_hint,
79
  "text": text,
80
  "predicted_label_3class": pred_label,
81
  "confidence_3class": round(conf, 4),
 
85
  df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
86
  df.to_excel(LOG_PATH, index=False)
87
 
88
+ return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH
89
+
90
+ # -------- Batch analysis --------
91
+ def analyze_batch(file, lang_hint):
92
+ if file is None:
93
+ return "❌ Please upload a CSV/XLSX file.", None
94
+
95
+ ext = os.path.splitext(file.name)[-1].lower()
96
+ if ext == ".csv":
97
+ df = pd.read_csv(file.name)
98
+ elif ext in [".xls", ".xlsx"]:
99
+ df = pd.read_excel(file.name)
100
+ else:
101
+ return "❌ Only CSV or Excel files are supported.", None
102
+
103
+ if "text" not in df.columns:
104
+ return "❌ The file must contain a 'text' column.", None
105
+
106
+ results = []
107
+ for t in df["text"]:
108
+ if not isinstance(t, str) or not t.strip():
109
+ results.append(("N/A", 0.0, "Invalid text"))
110
+ continue
111
+ star_results = clf(t, return_all_scores=True)[0]
112
+ pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
113
+ results.append((pred_label, conf, top_star))
114
+
115
+ df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results)
116
+ out_path = "batch_results.xlsx"
117
+ df.to_excel(out_path, index=False)
118
+
119
+ return "βœ… Batch analysis complete.", out_path
120
 
121
  # -------- Gradio UI --------
122
  with gr.Blocks() as demo:
 
126
  "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β˜… β†’ 3 classes)"
127
  )
128
 
129
+ with gr.Tab("πŸ”Ή Single Text"):
130
+ user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
131
+ lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
132
+ btn = gr.Button("Analyze")
133
+
134
+ out_sent = gr.Textbox(label="Sentiment")
135
+ out_conf = gr.Textbox(label="Confidence (0–1)")
136
+ out_pol = gr.Textbox(label="Polarity")
137
+ out_file = gr.File(label="Download logs (.xlsx)")
138
+
139
+ btn.click(analyze_single, inputs=[user_text, lang_dropdown],
140
+ outputs=[out_sent, out_conf, out_pol, out_file])
141
+
142
+ with gr.Tab("πŸ”Ή Batch Upload"):
143
+ gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.")
144
+ file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"])
145
+ lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"],
146
+ label="Language Hint", value="English")
147
+ btn_batch = gr.Button("Analyze Batch")
148
 
149
+ batch_status = gr.Textbox(label="Status")
150
+ batch_file = gr.File(label="Download Batch Results")
 
 
151
 
152
+ btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch],
153
+ outputs=[batch_status, batch_file])
154
 
155
  if __name__ == "__main__":
156
+ demo.launch()