RAHULJUNEJA33 commited on
Commit
b880bb4
Β·
verified Β·
1 Parent(s): 2230e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -11
app.py CHANGED
@@ -4,10 +4,10 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
  from nltk.tokenize import sent_tokenize
5
  from collections import defaultdict
6
  import nltk
7
- nltk.download('punkt') # Download the correct tokenizer
8
  import fitz # PyMuPDF
9
  import re
10
 
 
11
  nltk.download('punkt')
12
 
13
  st.set_page_config(page_title="πŸ“Š Financial Report Sentiment Analyzer", layout="wide")
@@ -22,7 +22,6 @@ We analyze three key financial aspects:
22
  1. **Assets** – What the company owns
23
  2. **Liabilities** – What the company owes
24
  3. **Equity** – Net worth (Assets - Liabilities)
25
-
26
  ---
27
  """)
28
 
@@ -45,18 +44,16 @@ st.markdown("""
45
  """, unsafe_allow_html=True)
46
 
47
  if uploaded_file:
48
- # Text extraction
49
  if uploaded_file.name.endswith('.pdf'):
50
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
51
- report_text = ""
52
- for page in doc:
53
- report_text += page.get_text()
54
  else:
55
  report_text = uploaded_file.read().decode('utf-8')
56
 
57
  st.write("### πŸ“„ Uploaded Report Preview:")
58
  st.markdown(f'''
59
- <div class="report-pview">
60
  {report_text[:5000]}
61
  </div>
62
  ''', unsafe_allow_html=True)
@@ -81,7 +78,7 @@ if uploaded_file:
81
  label = label_mapping[label_idx]
82
  return label, probs.tolist()[0]
83
 
84
- # Enhanced sentence extraction with regex
85
  def extract_sentences(text, keywords):
86
  sentences = sent_tokenize(text)
87
  keywords_lower = [k.lower() for k in keywords]
@@ -93,7 +90,7 @@ if uploaded_file:
93
  if not sentences:
94
  st.warning(f"⚠️ No relevant sentences found for {category_name}")
95
  return None, []
96
-
97
  sentiment_scores = defaultdict(int)
98
  negative_sentences = []
99
 
@@ -103,7 +100,7 @@ if uploaded_file:
103
  if label == 'Negative':
104
  negative_sentences.append((sentence, probs))
105
 
106
- total = sum(sentiment_scores.values())
107
  sentiment_percentages = {
108
  'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
109
  'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
@@ -137,7 +134,7 @@ if uploaded_file:
137
  result = analyze_category(report_text, category, keywords)
138
  if result[0] is None:
139
  continue
140
-
141
  sentiment_percentages, negative_sentences = result
142
 
143
  cols = st.columns(3)
 
4
  from nltk.tokenize import sent_tokenize
5
  from collections import defaultdict
6
  import nltk
 
7
  import fitz # PyMuPDF
8
  import re
9
 
10
+ # Download NLTK tokenizer
11
  nltk.download('punkt')
12
 
13
  st.set_page_config(page_title="πŸ“Š Financial Report Sentiment Analyzer", layout="wide")
 
22
  1. **Assets** – What the company owns
23
  2. **Liabilities** – What the company owes
24
  3. **Equity** – Net worth (Assets - Liabilities)
 
25
  ---
26
  """)
27
 
 
44
  """, unsafe_allow_html=True)
45
 
46
  if uploaded_file:
47
+ # Extract text from uploaded file
48
  if uploaded_file.name.endswith('.pdf'):
49
  with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
50
+ report_text = "\n".join([page.get_text() for page in doc])
 
 
51
  else:
52
  report_text = uploaded_file.read().decode('utf-8')
53
 
54
  st.write("### πŸ“„ Uploaded Report Preview:")
55
  st.markdown(f'''
56
+ <div class="report-preview">
57
  {report_text[:5000]}
58
  </div>
59
  ''', unsafe_allow_html=True)
 
78
  label = label_mapping[label_idx]
79
  return label, probs.tolist()[0]
80
 
81
+ # Extract sentences based on financial keywords
82
  def extract_sentences(text, keywords):
83
  sentences = sent_tokenize(text)
84
  keywords_lower = [k.lower() for k in keywords]
 
90
  if not sentences:
91
  st.warning(f"⚠️ No relevant sentences found for {category_name}")
92
  return None, []
93
+
94
  sentiment_scores = defaultdict(int)
95
  negative_sentences = []
96
 
 
100
  if label == 'Negative':
101
  negative_sentences.append((sentence, probs))
102
 
103
+ total = sum(sentiment_scores.values()) or 1 # Avoid division by zero
104
  sentiment_percentages = {
105
  'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
106
  'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
 
134
  result = analyze_category(report_text, category, keywords)
135
  if result[0] is None:
136
  continue
137
+
138
  sentiment_percentages, negative_sentences = result
139
 
140
  cols = st.columns(3)