ai validation
Browse files- app.py +54 -44
- pii.py +11 -2
- requirements.txt +7 -1
app.py
CHANGED
@@ -8,6 +8,8 @@ from knowledge import graph
|
|
8 |
from pii import derisk
|
9 |
from rag import rbc_product
|
10 |
from tool import rival_product
|
|
|
|
|
11 |
|
12 |
# Define the Google Analytics script
|
13 |
head = """
|
@@ -296,44 +298,6 @@ Surfaces hidden patterns and relationships for better analytics and insights.
|
|
296 |
Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
|
297 |
""")
|
298 |
|
299 |
-
with gr.Tab("PII Audit"):
|
300 |
-
gr.Markdown("""
|
301 |
-
Objective: Automated PII Data Removal: Proactive Compliance & Risk Mitigation
|
302 |
-
================================================
|
303 |
-
""")
|
304 |
-
in_verbatim = gr.Textbox(label="Peronal Info")
|
305 |
-
out_product = gr.Textbox(label="PII")
|
306 |
-
|
307 |
-
gr.Examples(
|
308 |
-
[
|
309 |
-
[
|
310 |
-
"""
|
311 |
-
He Hua (Hua Hua) Director
|
312 |
-
hehua@chengdu.com
|
313 |
-
+86-28-83505513
|
314 |
-
|
315 |
-
Alternative Address Format:
|
316 |
-
Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
|
317 |
-
"""
|
318 |
-
]
|
319 |
-
],
|
320 |
-
[in_verbatim]
|
321 |
-
)
|
322 |
-
btn_recommend = gr.Button("Mask PII")
|
323 |
-
btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
|
324 |
-
gr.Markdown("""
|
325 |
-
Benefits of Entity Removal
|
326 |
-
==================
|
327 |
-
- Data Privacy & Compliance
|
328 |
-
Ensures sensitive information (names, emails, phone numbers, etc.) is anonymized to comply with GDPR, HIPAA, or other regulations.
|
329 |
-
|
330 |
-
- Improved Data Quality
|
331 |
-
Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and more usable for modeling or analysis.
|
332 |
-
|
333 |
-
- Enhanced Focus for NLP Models
|
334 |
-
Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
|
335 |
-
""")
|
336 |
-
|
337 |
with gr.Tab("Segmentation"):
|
338 |
gr.Markdown("""
|
339 |
Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
|
@@ -575,18 +539,64 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
|
|
575 |
|
576 |
with gr.Tab("Eval"):
|
577 |
gr.Markdown("""
|
578 |
-
🏦
|
579 |
====================
|
580 |
What happens when your generative AI exposes customer data before you even launch?
|
581 |
|
582 |
-
|
583 |
|
584 |
-
|
|
|
|
|
|
|
585 |
|
586 |
-
|
587 |
|
588 |
-
|
589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
""")
|
591 |
|
592 |
demo.launch(allowed_paths=["."])
|
|
|
8 |
from pii import derisk
|
9 |
from rag import rbc_product
|
10 |
from tool import rival_product
|
11 |
+
import os
|
12 |
+
os.system("pip freeze | tee requirements_log.txt")
|
13 |
|
14 |
# Define the Google Analytics script
|
15 |
head = """
|
|
|
298 |
Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
|
299 |
""")
|
300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
with gr.Tab("Segmentation"):
|
302 |
gr.Markdown("""
|
303 |
Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
|
|
|
539 |
|
540 |
with gr.Tab("Eval"):
|
541 |
gr.Markdown("""
|
542 |
+
🏦 LLM Eval for Application Security in Personal Banking
|
543 |
====================
|
544 |
What happens when your generative AI exposes customer data before you even launch?
|
545 |
|
546 |
+
The push toward personalized digital experiences is powered by generative AI. But with that innovation comes a hidden risk: insecure prompt logic, hallucinated outputs, and untested model behavior can expose sensitive financial information—undermining customer trust and inviting regulatory scrutiny.
|
547 |
|
548 |
+
### 🔒 LLM evaluations aren’t just a technical safeguard—they're a business enabler. By proactively identifying vulnerabilities in AI-generated content and application logic, LLM evals help teams launch secure, compliant personal banking apps.
|
549 |
+
|
550 |
+
#### ✅ Outcome: At one Tier-1 Canadian bank, I built a cross-functional model risk pipeline that automated LLM testing—catching data leaks and unsafe outputs before deployment. The result: fewer post-launch incidents, smoother audits, and accelerated go-to-market timelines.
|
551 |
+
https://postimg.cc/3WtG4ZK2
|
552 |
|
553 |
+
### Whether the goal is to reduce fraud, streamline compliance reviews, or optimize AI-generated product recommendations—LLM evals give business, risk, and engineering teams a shared lens on safety, explainability, and performance.
|
554 |
|
555 |
+
How confident are you that your generative AI app won’t hallucinate its way into a privacy breach?
|
556 |
+
""")
|
557 |
+
|
558 |
+
with gr.Tab("PII Guardrail"):
|
559 |
+
gr.Markdown("""
|
560 |
+
Objective: Automated PII Data Removal: Proactive Compliance & Risk Mitigation
|
561 |
+
================================================
|
562 |
+
""")
|
563 |
+
in_verbatim = gr.Textbox(label="Peronal Info")
|
564 |
+
out_product = gr.Textbox(label="PII")
|
565 |
+
|
566 |
+
gr.Examples(
|
567 |
+
[
|
568 |
+
[
|
569 |
+
"""
|
570 |
+
He Hua (Hua Hua) Director
|
571 |
+
hehua@chengdu.com
|
572 |
+
+86-28-83505513
|
573 |
+
|
574 |
+
Alternative Address Format:
|
575 |
+
Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
|
576 |
+
"""
|
577 |
+
]
|
578 |
+
],
|
579 |
+
[in_verbatim]
|
580 |
+
)
|
581 |
+
btn_recommend = gr.Button("Mask PII")
|
582 |
+
btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
|
583 |
+
gr.Markdown("""
|
584 |
+
"Is your personal banking AI trained on customer conversations—or customer identities?"
|
585 |
+
===========
|
586 |
+
At RBC, generative AI is transforming personal banking—from real-time support to automated financial advice. But the same customer data powering these insights can expose the bank to regulatory violations, data breaches, and biased models—especially when names, emails, and phone numbers slip through into training or inference pipelines.
|
587 |
+
|
588 |
+
### 🔍 Entity removal solves this by automatically detecting and anonymizing personal identifiers before data is used for training, fine-tuning, or analysis.
|
589 |
+
|
590 |
+
#### ✅ Outcome: In one internal audit scenario, a Tier-1 bank uncovered PII exposure in over 12% of its chatbot training data. By integrating entity removal, the bank reduced privacy risk, aligned with GDPR and CPPA compliance—and increased the usable volume of safe, structured text by 35%.
|
591 |
+
|
592 |
+
### The benefits go beyond compliance:
|
593 |
+
- Reduce financial crime by safely leveraging more chat logs and support tickets to train fraud detection systems without compromising privacy.
|
594 |
+
|
595 |
+
- Optimize internal processes by focusing NLP models on customer intent rather than noisy identifiers.
|
596 |
+
|
597 |
+
- Boost revenue by extracting clean, actionable signals from anonymized product inquiries and complaints—fueling more accurate product recommendations.
|
598 |
+
|
599 |
+
Are your AI systems learning from customer insights—or leaking customer identities in the process?
|
600 |
""")
|
601 |
|
602 |
demo.launch(allowed_paths=["."])
|
pii.py
CHANGED
@@ -2,8 +2,10 @@ import os
|
|
2 |
|
3 |
import instructor
|
4 |
from pydantic import BaseModel
|
5 |
-
|
|
|
6 |
"""
|
|
|
7 |
client = instructor.from_openai(
|
8 |
OpenAI(
|
9 |
base_url="http://localhost:11434/v1",
|
@@ -44,6 +46,13 @@ class PIIExtraction(BaseModel):
|
|
44 |
for i, data in enumerate(self.private_data):
|
45 |
content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
return content
|
48 |
|
49 |
|
@@ -61,7 +70,7 @@ def derisk(content) -> PIIExtraction:
|
|
61 |
"role": "user",
|
62 |
"content": {content},
|
63 |
}
|
64 |
-
]).model_dump_json(indent=2)
|
65 |
|
66 |
|
67 |
if __name__ == '__main__':
|
|
|
2 |
|
3 |
import instructor
|
4 |
from pydantic import BaseModel
|
5 |
+
from presidio_analyzer import AnalyzerEngine
|
6 |
+
from presidio_anonymizer import AnonymizerEngine
|
7 |
"""
|
8 |
+
from openai import OpenAI
|
9 |
client = instructor.from_openai(
|
10 |
OpenAI(
|
11 |
base_url="http://localhost:11434/v1",
|
|
|
46 |
for i, data in enumerate(self.private_data):
|
47 |
content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
|
48 |
|
49 |
+
presidio_analyzer = AnalyzerEngine()
|
50 |
+
presidio_anonymizer = AnonymizerEngine()
|
51 |
+
analysis = presidio_analyzer.analyze(content, language='en',
|
52 |
+
entities=["PERSON", "PHONE_NUMBER"])
|
53 |
+
|
54 |
+
if [entity.entity_type for entity in analysis]:
|
55 |
+
content = "GUARDRAILED: " + presidio_anonymizer.anonymize(text=content, analyzer_results=analysis).text
|
56 |
return content
|
57 |
|
58 |
|
|
|
70 |
"role": "user",
|
71 |
"content": {content},
|
72 |
}
|
73 |
+
]).sanitize(content) #.model_dump_json(indent=2)
|
74 |
|
75 |
|
76 |
if __name__ == '__main__':
|
requirements.txt
CHANGED
@@ -35,4 +35,10 @@ pydantic
|
|
35 |
instructor[groq]==1.7.9
|
36 |
|
37 |
#LLM evals
|
38 |
-
#inspect_ai
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
instructor[groq]==1.7.9
|
36 |
|
37 |
#LLM evals
|
38 |
+
#inspect_ai
|
39 |
+
|
40 |
+
#PII
|
41 |
+
presidio_analyzer
|
42 |
+
presidio_anonymizer
|
43 |
+
spacy
|
44 |
+
https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl
|