{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "* Running on local URL:  http://127.0.0.1:7870\n",
      "* Running on public URL: https://a94e18f722148a0463.gradio.live\n",
      "\n",
      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://a94e18f722148a0463.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline\n",
    "import torch\n",
    "import gradio as gr\n",
    "from openpyxl import load_workbook\n",
    "from numpy import mean\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "theme = gr.themes.Soft(\n",
    "    primary_hue=\"amber\",\n",
    "    secondary_hue=\"amber\",\n",
    "    neutral_hue=\"stone\",\n",
    ")\n",
    "\n",
    "# Load tokenizers and models\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n",
    "model = AutoModelForSeq2SeqLM.from_pretrained(\"suriya7/bart-finetuned-text-summarization\")\n",
    "\n",
    "tokenizer_keywords = AutoTokenizer.from_pretrained(\"transformer3/H2-keywordextractor\")\n",
    "model_keywords = AutoModelForSeq2SeqLM.from_pretrained(\"transformer3/H2-keywordextractor\")\n",
    "\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')\n",
    "new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')\n",
    "\n",
    "classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)\n",
    "\n",
    "label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}\n",
    "\n",
    "# Function to display and filter the Excel workbook\n",
    "def filter_xl(file, keywords):\n",
    "    # Load the workbook and convert it to a DataFrame\n",
    "    workbook = load_workbook(filename=file)\n",
    "    sheet = workbook.active\n",
    "    data = sheet.values\n",
    "    columns = next(data)[0:]\n",
    "    df = pd.DataFrame(data, columns=columns)\n",
    "    \n",
    "    if keywords:\n",
    "        keyword_list = keywords.split(',')\n",
    "        for keyword in keyword_list:\n",
    "            df = df[df.apply(lambda row: row.astype(str).str.contains(keyword.strip(), case=False).any(), axis=1)]\n",
    "    \n",
    "    return df\n",
    "\n",
    "# Function to calculate overall rating from filtered data\n",
    "def calculate_rating(filtered_df):\n",
    "    reviews = filtered_df.to_numpy().flatten()\n",
    "    ratings = []\n",
    "    for review in reviews:\n",
    "        if pd.notna(review):\n",
    "            rating = int(classifier(review)[0]['label'].split('_')[1])\n",
    "            ratings.append(rating)\n",
    "    \n",
    "    return round(mean(ratings), 2), ratings\n",
    "\n",
    "# Function to calculate results including summary, keywords, and sentiment\n",
    "def calculate_results(file, keywords):\n",
    "    filtered_df = filter_xl(file, keywords)\n",
    "    overall_rating, ratings = calculate_rating(filtered_df)\n",
    "    \n",
    "    # Summarize and extract keywords from the filtered reviews\n",
    "    text = \" \".join(filtered_df.to_numpy().flatten())\n",
    "    inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
    "    summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=10, max_length=50)\n",
    "    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n",
    "    summary = summary.replace(\"I\", \"They\").replace(\"my\", \"their\").replace(\"me\", \"them\")\n",
    "\n",
    "    inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
    "    summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n",
    "    keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n",
    "\n",
    "    # Determine overall sentiment\n",
    "    sentiments = []\n",
    "    for review in filtered_df.to_numpy().flatten():\n",
    "        if pd.notna(review):\n",
    "            sentiment = classifier(review)[0]['label']\n",
    "            sentiment_label = \"Positive\" if sentiment == \"LABEL_4\" or sentiment == \"LABEL_5\" else \"Negative\" if sentiment == \"LABEL_1\" or sentiment == \"LABEL_2\" else \"Neutral\"\n",
    "            sentiments.append(sentiment_label)\n",
    "    \n",
    "    overall_sentiment = \"Positive\" if sentiments.count(\"Positive\") > sentiments.count(\"Negative\") else \"Negative\" if sentiments.count(\"Negative\") > sentiments.count(\"Positive\") else \"Neutral\"\n",
    "\n",
    "    return overall_rating, summary, keywords, overall_sentiment, ratings, sentiments\n",
    "\n",
    "# Function to analyze a single review\n",
    "def analyze_review(review):\n",
    "    if not review.strip():\n",
    "        return \"Error: No text provided\", \"Error: No text provided\", \"Error: No text provided\", \"Error: No text provided\"\n",
    "    \n",
    "    # Calculate rating\n",
    "    rating = int(classifier(review)[0]['label'].split('_')[1])\n",
    "    \n",
    "    # Summarize review\n",
    "    inputs = tokenizer([review], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
    "    summary_ids = model.generate(inputs[\"input_ids\"], num_beams=2, min_length=10, max_length=50)\n",
    "    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n",
    "    summary = summary.replace(\"I\", \"he/she\").replace(\"my\", \"his/her\").replace(\"me\", \"him/her\")\n",
    "\n",
    "    # Extract keywords\n",
    "    inputs_keywords = tokenizer_keywords([review], max_length=1024, truncation=True, return_tensors=\"pt\")\n",
    "    summary_ids_keywords = model_keywords.generate(inputs_keywords[\"input_ids\"], num_beams=2, min_length=0, max_length=100)\n",
    "    keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]\n",
    "\n",
    "    # Determine sentiment\n",
    "    sentiment = classifier(review)[0]['label']\n",
    "    sentiment_label = \"Positive\" if sentiment == \"LABEL_4\" or sentiment == \"LABEL_5\" else \"Negative\" if sentiment == \"LABEL_1\" or sentiment == \"LABEL_2\" else \"Neutral\"\n",
    "\n",
    "    return rating, summary, keywords, sentiment_label\n",
    "\n",
    "# Function to count rows in the filtered DataFrame\n",
    "def count_rows(filtered_df):\n",
    "    return len(filtered_df)\n",
    "\n",
    "# Function to plot ratings\n",
    "def plot_ratings(ratings):\n",
    "    plt.figure(figsize=(10, 5))\n",
    "    plt.hist(ratings, bins=range(1, 7), edgecolor='black', align='left')\n",
    "    plt.xlabel('Rating')\n",
    "    plt.ylabel('Frequency')\n",
    "    plt.title('Distribution of Ratings')\n",
    "    plt.xticks(range(1, 6))\n",
    "    plt.grid(True)\n",
    "    plt.savefig('ratings_distribution.png')\n",
    "    return 'ratings_distribution.png'\n",
    "\n",
    "# Function to plot sentiments\n",
    "def plot_sentiments(sentiments):\n",
    "    sentiment_counts = pd.Series(sentiments).value_counts()\n",
    "    plt.figure(figsize=(10, 5))\n",
    "    sentiment_counts.plot(kind='bar', color=['green', 'red', 'blue'])\n",
    "    plt.xlabel('Sentiment')\n",
    "    plt.ylabel('Frequency')\n",
    "    plt.title('Distribution of Sentiments')\n",
    "    plt.grid(True)\n",
    "    plt.savefig('sentiments_distribution.png')\n",
    "    return 'sentiments_distribution.png'\n",
    "\n",
    "# Gradio interface\n",
    "with gr.Blocks(theme=theme) as demo:\n",
    "    gr.Markdown(\"<h1 style='text-align: center;'>Feedback and Auditing Survey AI Analyzer</h1><br>\")\n",
    "    with gr.Tabs():\n",
    "        with gr.TabItem(\"Upload and Filter\"):\n",
    "            with gr.Row():\n",
    "                with gr.Column(scale=1):\n",
    "                    excel_file = gr.File(label=\"Upload Excel File\")\n",
    "                    #excel_file = gr.File(label=\"Upload Excel File\", file_types=[\".xlsx\", \".xlsm\", \".xltx\", \".xltm\"])\n",
    "                    keywords_input = gr.Textbox(label=\"Filter by Keywords (comma-separated)\")\n",
    "                    display_button = gr.Button(\"Display and Filter Excel Data\")\n",
    "                    clear_button_upload = gr.Button(\"Clear\")\n",
    "                    row_count = gr.Textbox(label=\"Number of Rows\", interactive=False)\n",
    "                with gr.Column(scale=3):\n",
    "                    filtered_data = gr.Dataframe(label=\"Filtered Excel Contents\")\n",
    "        \n",
    "        with gr.TabItem(\"Calculate Results\"):\n",
    "            with gr.Row():\n",
    "                with gr.Column():\n",
    "                    overall_rating = gr.Textbox(label=\"Overall Rating\")\n",
    "                    summary = gr.Textbox(label=\"Summary\")\n",
    "                    keywords_output = gr.Textbox(label=\"Keywords\")\n",
    "                    overall_sentiment = gr.Textbox(label=\"Overall Sentiment\")\n",
    "                    calculate_button = gr.Button(\"Calculate Results\")\n",
    "                with gr.Column():\n",
    "                    ratings_graph = gr.Image(label=\"Ratings Distribution\")\n",
    "                    sentiments_graph = gr.Image(label=\"Sentiments Distribution\")\n",
    "                    calculate_graph_button = gr.Button(\"Calculate Graph Results\")\n",
    "        \n",
    "        with gr.TabItem(\"Testing Area / Write a Review\"):\n",
    "            with gr.Row():\n",
    "                with gr.Column(scale=2):\n",
    "                    review_input = gr.Textbox(label=\"Write your review here\")\n",
    "                    analyze_button = gr.Button(\"Analyze Review\")\n",
    "                    clear_button_review = gr.Button(\"Clear\")\n",
    "                with gr.Column(scale=2):\n",
    "                    review_rating = gr.Textbox(label=\"Rating\")\n",
    "                    review_summary = gr.Textbox(label=\"Summary\")\n",
    "                    review_keywords = gr.Textbox(label=\"Keywords\")\n",
    "                    review_sentiment = gr.Textbox(label=\"Sentiment\")\n",
    "\n",
    "    display_button.click(lambda file, keywords: (filter_xl(file, keywords), count_rows(filter_xl(file, keywords))), inputs=[excel_file, keywords_input], outputs=[filtered_data, row_count])\n",
    "    calculate_graph_button.click(lambda file, keywords: (*calculate_results(file, keywords)[:4], plot_ratings(calculate_results(file, keywords)[4]), plot_sentiments(calculate_results(file, keywords)[5])), inputs=[excel_file, keywords_input], outputs=[overall_rating, summary, keywords_output, overall_sentiment, ratings_graph, sentiments_graph])\n",
    "    calculate_button.click(lambda file, keywords: (*calculate_results(file, keywords)[:4], plot_ratings(calculate_results(file, keywords)[4])), inputs=[excel_file, keywords_input], outputs=[overall_rating, summary, keywords_output, overall_sentiment])\n",
    "    analyze_button.click(analyze_review, inputs=review_input, outputs=[review_rating, review_summary, review_keywords, review_sentiment])\n",
    "    clear_button_upload.click(lambda: (\"\"), outputs=[keywords_input])\n",
    "    clear_button_review.click(lambda: (\"\", \"\", \"\", \"\", \"\"), outputs=[review_input, review_rating, review_summary, review_keywords, review_sentiment])\n",
    "\n",
    "demo.launch(share=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "SolutionsInPR",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}