Spaces:
Runtime error
Runtime error
""" | |
Multilingual Sentiment Analysis (English β’ Urdu β’ Roman Urdu) | |
------------------------------------------------------------- | |
Features: | |
β’ Single text sentiment analysis with language hint. | |
β’ Batch analysis from CSV/XLSX file. | |
β’ 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores. | |
β’ Saves logs to sentiment_logs.xlsx. | |
""" | |
import os | |
from datetime import datetime | |
import pandas as pd | |
import gradio as gr | |
from transformers import pipeline | |
# -------- Model & Pipeline -------- | |
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment" | |
clf = pipeline("sentiment-analysis", model=MODEL_NAME) | |
# -------- Logging setup -------- | |
LOG_PATH = "sentiment_logs.xlsx" | |
if not os.path.exists(LOG_PATH): | |
pd.DataFrame(columns=[ | |
"timestamp", "language_hint", "text", | |
"predicted_label_3class", "confidence_3class", | |
"stars_probs", "top_star_label" | |
]).to_excel(LOG_PATH, index=False) | |
# -------- Helper function: aggregate 5β β 3-class -------- | |
def _aggregate_to_3class(star_scores): | |
scores = {d["label"].lower(): float(d["score"]) for d in star_scores} | |
s1, s2, s3, s4, s5 = ( | |
scores.get("1 star", 0.0), | |
scores.get("2 stars", 0.0), | |
scores.get("3 stars", 0.0), | |
scores.get("4 stars", 0.0), | |
scores.get("5 stars", 0.0), | |
) | |
neg, neu, pos = s1 + s2, s3, s4 + s5 | |
probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos} | |
pred_label = max(probs3, key=probs3.get) | |
confidence = probs3[pred_label] | |
top_star_label = max( | |
["1 star", "2 stars", "3 stars", "4 stars", "5 stars"], | |
key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k] | |
) | |
return pred_label, confidence, probs3, top_star_label | |
# -------- Single text analysis -------- | |
def analyze_single(text, lang_hint): | |
if not text or not text.strip(): | |
return "β Please enter some text.", "", "", LOG_PATH | |
star_results = clf(text, return_all_scores=True)[0] | |
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results) | |
polarity = { | |
"Positive": "π Positive", | |
"Neutral": "π Neutral", | |
"Negative": "βΉοΈ Negative", | |
}[pred_label] | |
# Log | |
try: | |
df = pd.read_excel(LOG_PATH) | |
except Exception: | |
df = pd.DataFrame(columns=[ | |
"timestamp", "language_hint", "text", | |
"predicted_label_3class", "confidence_3class", | |
"stars_probs", "top_star_label" | |
]) | |
new_row = { | |
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), | |
"language_hint": lang_hint, | |
"text": text, | |
"predicted_label_3class": pred_label, | |
"confidence_3class": round(conf, 4), | |
"stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}), | |
"top_star_label": top_star, | |
} | |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) | |
df.to_excel(LOG_PATH, index=False) | |
return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH | |
# -------- Batch analysis -------- | |
def analyze_batch(file, lang_hint): | |
if file is None: | |
return "β Please upload a CSV/XLSX file.", None | |
ext = os.path.splitext(file.name)[-1].lower() | |
if ext == ".csv": | |
df = pd.read_csv(file.name) | |
elif ext in [".xls", ".xlsx"]: | |
df = pd.read_excel(file.name) | |
else: | |
return "β Only CSV or Excel files are supported.", None | |
if "text" not in df.columns: | |
return "β The file must contain a 'text' column.", None | |
results = [] | |
for t in df["text"]: | |
if not isinstance(t, str) or not t.strip(): | |
results.append(("N/A", 0.0, "Invalid text")) | |
continue | |
star_results = clf(t, return_all_scores=True)[0] | |
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results) | |
results.append((pred_label, conf, top_star)) | |
df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results) | |
out_path = "batch_results.xlsx" | |
df.to_excel(out_path, index=False) | |
return "β Batch analysis complete.", out_path | |
# -------- Gradio UI -------- | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
"## π Multilingual Sentiment Analysis (Positive β’ Neutral β’ Negative)\n" | |
"**Languages:** English, Urdu, Roman Urdu \n" | |
"Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β β 3 classes)" | |
) | |
with gr.Tab("πΉ Single Text"): | |
user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...") | |
lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English") | |
btn = gr.Button("Analyze") | |
out_sent = gr.Textbox(label="Sentiment") | |
out_conf = gr.Textbox(label="Confidence (0β1)") | |
out_pol = gr.Textbox(label="Polarity") | |
out_file = gr.File(label="Download logs (.xlsx)") | |
btn.click(analyze_single, inputs=[user_text, lang_dropdown], | |
outputs=[out_sent, out_conf, out_pol, out_file]) | |
with gr.Tab("πΉ Batch Upload"): | |
gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.") | |
file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"]) | |
lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"], | |
label="Language Hint", value="English") | |
btn_batch = gr.Button("Analyze Batch") | |
batch_status = gr.Textbox(label="Status") | |
batch_file = gr.File(label="Download Batch Results") | |
btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch], | |
outputs=[batch_status, batch_file]) | |
if __name__ == "__main__": | |
demo.launch() | |