kevinhug commited on
Commit
b06e07d
·
1 Parent(s): f3b1317

ai validation

Browse files
Files changed (3) hide show
  1. app.py +54 -44
  2. pii.py +11 -2
  3. requirements.txt +7 -1
app.py CHANGED
@@ -8,6 +8,8 @@ from knowledge import graph
8
  from pii import derisk
9
  from rag import rbc_product
10
  from tool import rival_product
 
 
11
 
12
  # Define the Google Analytics script
13
  head = """
@@ -296,44 +298,6 @@ Surfaces hidden patterns and relationships for better analytics and insights.
296
  Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
297
  """)
298
 
299
- with gr.Tab("PII Audit"):
300
- gr.Markdown("""
301
- Objective: Automated PII Data Removal: Proactive Compliance & Risk Mitigation
302
- ================================================
303
- """)
304
- in_verbatim = gr.Textbox(label="Peronal Info")
305
- out_product = gr.Textbox(label="PII")
306
-
307
- gr.Examples(
308
- [
309
- [
310
- """
311
- He Hua (Hua Hua) Director
312
- hehua@chengdu.com
313
- +86-28-83505513
314
-
315
- Alternative Address Format:
316
- Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
317
- """
318
- ]
319
- ],
320
- [in_verbatim]
321
- )
322
- btn_recommend = gr.Button("Mask PII")
323
- btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
324
- gr.Markdown("""
325
- Benefits of Entity Removal
326
- ==================
327
- - Data Privacy & Compliance
328
- Ensures sensitive information (names, emails, phone numbers, etc.) is anonymized to comply with GDPR, HIPAA, or other regulations.
329
-
330
- - Improved Data Quality
331
- Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and more usable for modeling or analysis.
332
-
333
- - Enhanced Focus for NLP Models
334
- Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
335
- """)
336
-
337
  with gr.Tab("Segmentation"):
338
  gr.Markdown("""
339
  Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
@@ -575,18 +539,64 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
575
 
576
  with gr.Tab("Eval"):
577
  gr.Markdown("""
578
- 🏦 LLMs for Application Security in Personal Banking
579
  ====================
580
  What happens when your generative AI exposes customer data before you even launch?
581
 
582
- LLM evals reduce security risks in generative AI banking apps by identifying vulnerabilities and guiding secure fixes.
583
 
584
- Personal banking apps increasingly rely on generative AIbut insecure logic and hallucinations expose sensitive customer data. LLM evals help assess code and AI-generated responses for correctness, task completion, hallucination risk, and safety—enabling proactive guardrails against vulnerabilities before deployment.
 
 
 
585
 
586
- I’ve led cross-functional model risk initiatives, building pipelines that transform LLM evaluations into automated alerts and remediation workflows—strengthening regulatory compliance and protecting customer trust.
587
 
588
- Using open-source frameworks, I identify flaws in LLM prompt and translate risks into explainable insights for business, risk, and engineering stakeholders.
589
- https://postimg.cc/3WtG4ZK2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  """)
591
 
592
  demo.launch(allowed_paths=["."])
 
8
  from pii import derisk
9
  from rag import rbc_product
10
  from tool import rival_product
11
+ import os
12
+ os.system("pip freeze | tee requirements_log.txt")
13
 
14
  # Define the Google Analytics script
15
  head = """
 
298
  Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
299
  """)
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  with gr.Tab("Segmentation"):
302
  gr.Markdown("""
303
  Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
 
539
 
540
  with gr.Tab("Eval"):
541
  gr.Markdown("""
542
+ 🏦 LLM Eval for Application Security in Personal Banking
543
  ====================
544
  What happens when your generative AI exposes customer data before you even launch?
545
 
546
+ The push toward personalized digital experiences is powered by generative AI. But with that innovation comes a hidden risk: insecure prompt logic, hallucinated outputs, and untested model behavior can expose sensitive financial information—undermining customer trust and inviting regulatory scrutiny.
547
 
548
+ ### 🔒 LLM evaluations aren’t just a technical safeguardthey're a business enabler. By proactively identifying vulnerabilities in AI-generated content and application logic, LLM evals help teams launch secure, compliant personal banking apps.
549
+
550
+ #### ✅ Outcome: At one Tier-1 Canadian bank, I built a cross-functional model risk pipeline that automated LLM testing—catching data leaks and unsafe outputs before deployment. The result: fewer post-launch incidents, smoother audits, and accelerated go-to-market timelines.
551
+ https://postimg.cc/3WtG4ZK2
552
 
553
+ ### Whether the goal is to reduce fraud, streamline compliance reviews, or optimize AI-generated product recommendations—LLM evals give business, risk, and engineering teams a shared lens on safety, explainability, and performance.
554
 
555
+ How confident are you that your generative AI app won’t hallucinate its way into a privacy breach?
556
+ """)
557
+
558
+ with gr.Tab("PII Guardrail"):
559
+ gr.Markdown("""
560
+ Objective: Automated PII Data Removal: Proactive Compliance & Risk Mitigation
561
+ ================================================
562
+ """)
563
+ in_verbatim = gr.Textbox(label="Peronal Info")
564
+ out_product = gr.Textbox(label="PII")
565
+
566
+ gr.Examples(
567
+ [
568
+ [
569
+ """
570
+ He Hua (Hua Hua) Director
571
+ hehua@chengdu.com
572
+ +86-28-83505513
573
+
574
+ Alternative Address Format:
575
+ Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
576
+ """
577
+ ]
578
+ ],
579
+ [in_verbatim]
580
+ )
581
+ btn_recommend = gr.Button("Mask PII")
582
+ btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
583
+ gr.Markdown("""
584
+ "Is your personal banking AI trained on customer conversations—or customer identities?"
585
+ ===========
586
+ At RBC, generative AI is transforming personal banking—from real-time support to automated financial advice. But the same customer data powering these insights can expose the bank to regulatory violations, data breaches, and biased models—especially when names, emails, and phone numbers slip through into training or inference pipelines.
587
+
588
+ ### 🔍 Entity removal solves this by automatically detecting and anonymizing personal identifiers before data is used for training, fine-tuning, or analysis.
589
+
590
+ #### ✅ Outcome: In one internal audit scenario, a Tier-1 bank uncovered PII exposure in over 12% of its chatbot training data. By integrating entity removal, the bank reduced privacy risk, aligned with GDPR and CPPA compliance—and increased the usable volume of safe, structured text by 35%.
591
+
592
+ ### The benefits go beyond compliance:
593
+ - Reduce financial crime by safely leveraging more chat logs and support tickets to train fraud detection systems without compromising privacy.
594
+
595
+ - Optimize internal processes by focusing NLP models on customer intent rather than noisy identifiers.
596
+
597
+ - Boost revenue by extracting clean, actionable signals from anonymized product inquiries and complaints—fueling more accurate product recommendations.
598
+
599
+ Are your AI systems learning from customer insights—or leaking customer identities in the process?
600
  """)
601
 
602
  demo.launch(allowed_paths=["."])
pii.py CHANGED
@@ -2,8 +2,10 @@ import os
2
 
3
  import instructor
4
  from pydantic import BaseModel
5
-
 
6
  """
 
7
  client = instructor.from_openai(
8
  OpenAI(
9
  base_url="http://localhost:11434/v1",
@@ -44,6 +46,13 @@ class PIIExtraction(BaseModel):
44
  for i, data in enumerate(self.private_data):
45
  content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
46
 
 
 
 
 
 
 
 
47
  return content
48
 
49
 
@@ -61,7 +70,7 @@ def derisk(content) -> PIIExtraction:
61
  "role": "user",
62
  "content": {content},
63
  }
64
- ]).model_dump_json(indent=2)
65
 
66
 
67
  if __name__ == '__main__':
 
2
 
3
  import instructor
4
  from pydantic import BaseModel
5
+ from presidio_analyzer import AnalyzerEngine
6
+ from presidio_anonymizer import AnonymizerEngine
7
  """
8
+ from openai import OpenAI
9
  client = instructor.from_openai(
10
  OpenAI(
11
  base_url="http://localhost:11434/v1",
 
46
  for i, data in enumerate(self.private_data):
47
  content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
48
 
49
+ presidio_analyzer = AnalyzerEngine()
50
+ presidio_anonymizer = AnonymizerEngine()
51
+ analysis = presidio_analyzer.analyze(content, language='en',
52
+ entities=["PERSON", "PHONE_NUMBER"])
53
+
54
+ if [entity.entity_type for entity in analysis]:
55
+ content = "GUARDRAILED: " + presidio_anonymizer.anonymize(text=content, analyzer_results=analysis).text
56
  return content
57
 
58
 
 
70
  "role": "user",
71
  "content": {content},
72
  }
73
+ ]).sanitize(content) #.model_dump_json(indent=2)
74
 
75
 
76
  if __name__ == '__main__':
requirements.txt CHANGED
@@ -35,4 +35,10 @@ pydantic
35
  instructor[groq]==1.7.9
36
 
37
  #LLM evals
38
- #inspect_ai
 
 
 
 
 
 
 
35
  instructor[groq]==1.7.9
36
 
37
  #LLM evals
38
+ #inspect_ai
39
+
40
+ #PII
41
+ presidio_analyzer
42
+ presidio_anonymizer
43
+ spacy
44
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl