Spaces:

RAHULJUNEJA33
/

Financial_Report_Sentiment_Analyzer

Sleeping

App Files Files Community

RAHULJUNEJA33 commited on Mar 17

Commit

0fdee13

verified ·

1 Parent(s): b880bb4

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -13

app.py CHANGED Viewed

@@ -3,21 +3,26 @@ import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from nltk.tokenize import sent_tokenize
 from collections import defaultdict
-import nltk
 import fitz  # PyMuPDF
 import re
-# Download NLTK tokenizer
-nltk.download('punkt')
-st.set_page_config(page_title="📊 Financial Report Sentiment Analyzer", layout="wide")
 st.title("📊 Financial Report Sentiment Analyzer")
 st.markdown("""
 ### What is FinBERT?
 **FinBERT** is a language model fine-tuned specifically for financial text. It helps in detecting sentiment (Positive, Neutral, Negative) in financial reports.
 We analyze three key financial aspects:
 1. **Assets** – What the company owns
 2. **Liabilities** – What the company owes
@@ -25,8 +30,10 @@ We analyze three key financial aspects:
 ---
 """)
 uploaded_file = st.file_uploader("📂 Upload Financial Report (.pdf or .txt)", type=["pdf", "txt"])
 st.markdown("""
 <style>
 .report-preview {
@@ -44,13 +51,14 @@ st.markdown("""
 """, unsafe_allow_html=True)
 if uploaded_file:
-    # Extract text from uploaded file
     if uploaded_file.name.endswith('.pdf'):
         with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
-            report_text = "\n".join([page.get_text() for page in doc])
     else:
         report_text = uploaded_file.read().decode('utf-8')
     st.write("### 📄 Uploaded Report Preview:")
     st.markdown(f'''
     <div class="report-preview">
@@ -58,7 +66,7 @@ if uploaded_file:
     </div>
     ''', unsafe_allow_html=True)
-    # Load FinBERT Model
     @st.cache_resource
     def load_model():
         tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
@@ -78,19 +86,20 @@ if uploaded_file:
         label = label_mapping[label_idx]
         return label, probs.tolist()[0]
-    # Extract sentences based on financial keywords
     def extract_sentences(text, keywords):
         sentences = sent_tokenize(text)
         keywords_lower = [k.lower() for k in keywords]
         pattern = re.compile(r'\b(' + '|'.join(map(re.escape, keywords_lower)) + r')\b', re.IGNORECASE)
         return [s for s in sentences if pattern.search(s)]
     def analyze_category(text, category_name, keywords):
         sentences = extract_sentences(text, keywords)
         if not sentences:
             st.warning(f"⚠️ No relevant sentences found for {category_name}")
             return None, []
         sentiment_scores = defaultdict(int)
         negative_sentences = []
@@ -100,7 +109,7 @@ if uploaded_file:
             if label == 'Negative':
                 negative_sentences.append((sentence, probs))
-        total = sum(sentiment_scores.values()) or 1  # Avoid division by zero
         sentiment_percentages = {
             'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
             'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
@@ -108,7 +117,7 @@ if uploaded_file:
         }
         return sentiment_percentages, negative_sentences
-    # Expanded financial categories
     categories = {
         'Assets': [
             'asset', 'assets', 'current assets', 'fixed assets', 'cash equivalents',
@@ -129,12 +138,13 @@ if uploaded_file:
     st.write("## 📝 Sentiment Analysis Results:")
     for category, keywords in categories.items():
         st.write(f"### 🔍 {category}")
         result = analyze_category(report_text, category, keywords)
         if result[0] is None:
             continue
         sentiment_percentages, negative_sentences = result
         cols = st.columns(3)

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from nltk.tokenize import sent_tokenize
 from collections import defaultdict
 import fitz  # PyMuPDF
 import re
+import nltk
+import os
+# Ensure NLTK data is stored in the correct directory
+NLTK_DATA_PATH = "/root/nltk_data"
+os.makedirs(NLTK_DATA_PATH, exist_ok=True)
+nltk.data.path.append(NLTK_DATA_PATH)
+# Download required resources
+nltk.download('punkt', download_dir=NLTK_DATA_PATH)
+# Streamlit app configuration
+st.set_page_config(page_title="📊 Financial Report Sentiment Analyzer", layout="wide")
 st.title("📊 Financial Report Sentiment Analyzer")
 st.markdown("""
 ### What is FinBERT?
 **FinBERT** is a language model fine-tuned specifically for financial text. It helps in detecting sentiment (Positive, Neutral, Negative) in financial reports.
 We analyze three key financial aspects:
 1. **Assets** – What the company owns
 2. **Liabilities** – What the company owes
 ---
 """)
+# File uploader
 uploaded_file = st.file_uploader("📂 Upload Financial Report (.pdf or .txt)", type=["pdf", "txt"])
+# CSS Styling for Report Preview
 st.markdown("""
 <style>
 .report-preview {
 """, unsafe_allow_html=True)
 if uploaded_file:
+    # Extract text from the uploaded file
     if uploaded_file.name.endswith('.pdf'):
         with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
+            report_text = "".join([page.get_text() for page in doc])
     else:
         report_text = uploaded_file.read().decode('utf-8')
+    # Display the uploaded report preview
     st.write("### 📄 Uploaded Report Preview:")
     st.markdown(f'''
     <div class="report-preview">
     </div>
     ''', unsafe_allow_html=True)
+    # Load FinBERT Model (cached for performance)
     @st.cache_resource
     def load_model():
         tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
         label = label_mapping[label_idx]
         return label, probs.tolist()[0]
+    # Extract sentences using regex and NLTK tokenizer
     def extract_sentences(text, keywords):
         sentences = sent_tokenize(text)
         keywords_lower = [k.lower() for k in keywords]
         pattern = re.compile(r'\b(' + '|'.join(map(re.escape, keywords_lower)) + r')\b', re.IGNORECASE)
         return [s for s in sentences if pattern.search(s)]
+    # Analyze financial sentiment category-wise
     def analyze_category(text, category_name, keywords):
         sentences = extract_sentences(text, keywords)
         if not sentences:
             st.warning(f"⚠️ No relevant sentences found for {category_name}")
             return None, []
         sentiment_scores = defaultdict(int)
         negative_sentences = []
             if label == 'Negative':
                 negative_sentences.append((sentence, probs))
+        total = sum(sentiment_scores.values())
         sentiment_percentages = {
             'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
             'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
         }
         return sentiment_percentages, negative_sentences
+    # Define financial categories and keywords
     categories = {
         'Assets': [
             'asset', 'assets', 'current assets', 'fixed assets', 'cash equivalents',
     st.write("## 📝 Sentiment Analysis Results:")
+    # Perform sentiment analysis for each financial category
     for category, keywords in categories.items():
         st.write(f"### 🔍 {category}")
         result = analyze_category(report_text, category, keywords)
         if result[0] is None:
             continue
         sentiment_percentages, negative_sentences = result
         cols = st.columns(3)