Spaces:

RAHULJUNEJA33
/

Financial_Report_Sentiment_Analyzer

Sleeping

App Files Files Community

RAHULJUNEJA33 commited on Mar 17

Commit

b880bb4

verified ·

1 Parent(s): 2230e3e

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -11

app.py CHANGED Viewed

@@ -4,10 +4,10 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from nltk.tokenize import sent_tokenize
 from collections import defaultdict
 import nltk
-nltk.download('punkt')  # Download the correct tokenizer
 import fitz  # PyMuPDF
 import re
 nltk.download('punkt')
 st.set_page_config(page_title="📊 Financial Report Sentiment Analyzer", layout="wide")
@@ -22,7 +22,6 @@ We analyze three key financial aspects:
 1. **Assets** – What the company owns
 2. **Liabilities** – What the company owes
 3. **Equity** – Net worth (Assets - Liabilities)
 ---
 """)
@@ -45,18 +44,16 @@ st.markdown("""
 """, unsafe_allow_html=True)
 if uploaded_file:
-    # Text extraction
     if uploaded_file.name.endswith('.pdf'):
         with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
-            report_text = ""
-            for page in doc:
-                report_text += page.get_text()
     else:
         report_text = uploaded_file.read().decode('utf-8')
     st.write("### 📄 Uploaded Report Preview:")
     st.markdown(f'''
-    <div class="report-pview">
         {report_text[:5000]}
     </div>
     ''', unsafe_allow_html=True)
@@ -81,7 +78,7 @@ if uploaded_file:
         label = label_mapping[label_idx]
         return label, probs.tolist()[0]
-    # Enhanced sentence extraction with regex
     def extract_sentences(text, keywords):
         sentences = sent_tokenize(text)
         keywords_lower = [k.lower() for k in keywords]
@@ -93,7 +90,7 @@ if uploaded_file:
         if not sentences:
             st.warning(f"⚠️ No relevant sentences found for {category_name}")
             return None, []
         sentiment_scores = defaultdict(int)
         negative_sentences = []
@@ -103,7 +100,7 @@ if uploaded_file:
             if label == 'Negative':
                 negative_sentences.append((sentence, probs))
-        total = sum(sentiment_scores.values())
         sentiment_percentages = {
             'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
             'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
@@ -137,7 +134,7 @@ if uploaded_file:
         result = analyze_category(report_text, category, keywords)
         if result[0] is None:
             continue
         sentiment_percentages, negative_sentences = result
         cols = st.columns(3)

 from nltk.tokenize import sent_tokenize
 from collections import defaultdict
 import nltk
 import fitz  # PyMuPDF
 import re
+# Download NLTK tokenizer
 nltk.download('punkt')
 st.set_page_config(page_title="📊 Financial Report Sentiment Analyzer", layout="wide")
 1. **Assets** – What the company owns
 2. **Liabilities** – What the company owes
 3. **Equity** – Net worth (Assets - Liabilities)
 ---
 """)
 """, unsafe_allow_html=True)
 if uploaded_file:
+    # Extract text from uploaded file
     if uploaded_file.name.endswith('.pdf'):
         with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
+            report_text = "\n".join([page.get_text() for page in doc])
     else:
         report_text = uploaded_file.read().decode('utf-8')
     st.write("### 📄 Uploaded Report Preview:")
     st.markdown(f'''
+    <div class="report-preview">
         {report_text[:5000]}
     </div>
     ''', unsafe_allow_html=True)
         label = label_mapping[label_idx]
         return label, probs.tolist()[0]
+    # Extract sentences based on financial keywords
     def extract_sentences(text, keywords):
         sentences = sent_tokenize(text)
         keywords_lower = [k.lower() for k in keywords]
         if not sentences:
             st.warning(f"⚠️ No relevant sentences found for {category_name}")
             return None, []
         sentiment_scores = defaultdict(int)
         negative_sentences = []
             if label == 'Negative':
                 negative_sentences.append((sentence, probs))
+        total = sum(sentiment_scores.values()) or 1  # Avoid division by zero
         sentiment_percentages = {
             'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
             'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
         result = analyze_category(report_text, category, keywords)
         if result[0] is None:
             continue
         sentiment_percentages, negative_sentences = result
         cols = st.columns(3)