Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,10 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
4 |
from nltk.tokenize import sent_tokenize
|
5 |
from collections import defaultdict
|
6 |
import nltk
|
7 |
-
nltk.download('punkt') # Download the correct tokenizer
|
8 |
import fitz # PyMuPDF
|
9 |
import re
|
10 |
|
|
|
11 |
nltk.download('punkt')
|
12 |
|
13 |
st.set_page_config(page_title="π Financial Report Sentiment Analyzer", layout="wide")
|
@@ -22,7 +22,6 @@ We analyze three key financial aspects:
|
|
22 |
1. **Assets** β What the company owns
|
23 |
2. **Liabilities** β What the company owes
|
24 |
3. **Equity** β Net worth (Assets - Liabilities)
|
25 |
-
|
26 |
---
|
27 |
""")
|
28 |
|
@@ -45,18 +44,16 @@ st.markdown("""
|
|
45 |
""", unsafe_allow_html=True)
|
46 |
|
47 |
if uploaded_file:
|
48 |
-
#
|
49 |
if uploaded_file.name.endswith('.pdf'):
|
50 |
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
|
51 |
-
report_text = ""
|
52 |
-
for page in doc:
|
53 |
-
report_text += page.get_text()
|
54 |
else:
|
55 |
report_text = uploaded_file.read().decode('utf-8')
|
56 |
|
57 |
st.write("### π Uploaded Report Preview:")
|
58 |
st.markdown(f'''
|
59 |
-
<div class="report-
|
60 |
{report_text[:5000]}
|
61 |
</div>
|
62 |
''', unsafe_allow_html=True)
|
@@ -81,7 +78,7 @@ if uploaded_file:
|
|
81 |
label = label_mapping[label_idx]
|
82 |
return label, probs.tolist()[0]
|
83 |
|
84 |
-
#
|
85 |
def extract_sentences(text, keywords):
|
86 |
sentences = sent_tokenize(text)
|
87 |
keywords_lower = [k.lower() for k in keywords]
|
@@ -93,7 +90,7 @@ if uploaded_file:
|
|
93 |
if not sentences:
|
94 |
st.warning(f"β οΈ No relevant sentences found for {category_name}")
|
95 |
return None, []
|
96 |
-
|
97 |
sentiment_scores = defaultdict(int)
|
98 |
negative_sentences = []
|
99 |
|
@@ -103,7 +100,7 @@ if uploaded_file:
|
|
103 |
if label == 'Negative':
|
104 |
negative_sentences.append((sentence, probs))
|
105 |
|
106 |
-
total = sum(sentiment_scores.values())
|
107 |
sentiment_percentages = {
|
108 |
'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
|
109 |
'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
|
@@ -137,7 +134,7 @@ if uploaded_file:
|
|
137 |
result = analyze_category(report_text, category, keywords)
|
138 |
if result[0] is None:
|
139 |
continue
|
140 |
-
|
141 |
sentiment_percentages, negative_sentences = result
|
142 |
|
143 |
cols = st.columns(3)
|
|
|
4 |
from nltk.tokenize import sent_tokenize
|
5 |
from collections import defaultdict
|
6 |
import nltk
|
|
|
7 |
import fitz # PyMuPDF
|
8 |
import re
|
9 |
|
10 |
+
# Download NLTK tokenizer
|
11 |
nltk.download('punkt')
|
12 |
|
13 |
st.set_page_config(page_title="π Financial Report Sentiment Analyzer", layout="wide")
|
|
|
22 |
1. **Assets** β What the company owns
|
23 |
2. **Liabilities** β What the company owes
|
24 |
3. **Equity** β Net worth (Assets - Liabilities)
|
|
|
25 |
---
|
26 |
""")
|
27 |
|
|
|
44 |
""", unsafe_allow_html=True)
|
45 |
|
46 |
if uploaded_file:
|
47 |
+
# Extract text from uploaded file
|
48 |
if uploaded_file.name.endswith('.pdf'):
|
49 |
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
|
50 |
+
report_text = "\n".join([page.get_text() for page in doc])
|
|
|
|
|
51 |
else:
|
52 |
report_text = uploaded_file.read().decode('utf-8')
|
53 |
|
54 |
st.write("### π Uploaded Report Preview:")
|
55 |
st.markdown(f'''
|
56 |
+
<div class="report-preview">
|
57 |
{report_text[:5000]}
|
58 |
</div>
|
59 |
''', unsafe_allow_html=True)
|
|
|
78 |
label = label_mapping[label_idx]
|
79 |
return label, probs.tolist()[0]
|
80 |
|
81 |
+
# Extract sentences based on financial keywords
|
82 |
def extract_sentences(text, keywords):
|
83 |
sentences = sent_tokenize(text)
|
84 |
keywords_lower = [k.lower() for k in keywords]
|
|
|
90 |
if not sentences:
|
91 |
st.warning(f"β οΈ No relevant sentences found for {category_name}")
|
92 |
return None, []
|
93 |
+
|
94 |
sentiment_scores = defaultdict(int)
|
95 |
negative_sentences = []
|
96 |
|
|
|
100 |
if label == 'Negative':
|
101 |
negative_sentences.append((sentence, probs))
|
102 |
|
103 |
+
total = sum(sentiment_scores.values()) or 1 # Avoid division by zero
|
104 |
sentiment_percentages = {
|
105 |
'Positive': (sentiment_scores.get('Positive', 0) / total) * 100,
|
106 |
'Negative': (sentiment_scores.get('Negative', 0) / total) * 100,
|
|
|
134 |
result = analyze_category(report_text, category, keywords)
|
135 |
if result[0] is None:
|
136 |
continue
|
137 |
+
|
138 |
sentiment_percentages, negative_sentences = result
|
139 |
|
140 |
cols = st.columns(3)
|