Spaces:

kevinhug
/

ai

Running

App Files Files Community

kevinhug commited on 10 days ago

Commit

b06e07d

1 Parent(s): f3b1317

ai validation

Browse files

Files changed (3) hide show

app.py +54 -44
pii.py +11 -2
requirements.txt +7 -1

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from knowledge import graph
 from pii import derisk
 from rag import rbc_product
 from tool import rival_product
 # Define the Google Analytics script
 head = """
@@ -296,44 +298,6 @@ Surfaces hidden patterns and relationships for better analytics and insights.
 Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
           """)
-  with gr.Tab("PII Audit"):
-    gr.Markdown("""
-    Objective: Automated PII Data Removal: Proactive Compliance & Risk Mitigation
-    ================================================
-    """)
-    in_verbatim = gr.Textbox(label="Peronal Info")
-    out_product = gr.Textbox(label="PII")
-    gr.Examples(
-      [
-        [
-          """
-          He Hua (Hua Hua) Director
-          hehua@chengdu.com
-          +86-28-83505513
-          Alternative Address Format:
-          Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
-          """
-        ]
-      ],
-      [in_verbatim]
-    )
-    btn_recommend = gr.Button("Mask PII")
-    btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
-    gr.Markdown("""
-Benefits of Entity Removal
-==================
-- Data Privacy & Compliance
-Ensures sensitive information (names, emails, phone numbers, etc.) is anonymized to comply with GDPR, HIPAA, or other regulations.
-- Improved Data Quality
-Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and more usable for modeling or analysis.
-- Enhanced Focus for NLP Models
-Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
-    """)
   with gr.Tab("Segmentation"):
     gr.Markdown("""
     Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
@@ -575,18 +539,64 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
   with gr.Tab("Eval"):
     gr.Markdown("""
-    🏦 LLMs for Application Security in Personal Banking
     ====================
     What happens when your generative AI exposes customer data before you even launch?
-    LLM evals reduce security risks in generative AI banking apps by identifying vulnerabilities and guiding secure fixes.
-    Personal banking apps increasingly rely on generative AI—but insecure logic and hallucinations expose sensitive customer data. LLM evals help assess code and AI-generated responses for correctness, task completion, hallucination risk, and safety—enabling proactive guardrails against vulnerabilities before deployment.
-    I’ve led cross-functional model risk initiatives, building pipelines that transform LLM evaluations into automated alerts and remediation workflows—strengthening regulatory compliance and protecting customer trust.
-    Using open-source frameworks, I identify flaws in LLM prompt and translate risks into explainable insights for business, risk, and engineering stakeholders.
-    https://postimg.cc/3WtG4ZK2
     """)
 demo.launch(allowed_paths=["."])

 from pii import derisk
 from rag import rbc_product
 from tool import rival_product
+import os
+os.system("pip freeze | tee requirements_log.txt")
 # Define the Google Analytics script
 head = """
 Once created, knowledge graphs can be repurposed across multiple use cases (e.g., search, recommendation, fraud detection).
           """)
   with gr.Tab("Segmentation"):
     gr.Markdown("""
     Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
   with gr.Tab("Eval"):
     gr.Markdown("""
+    🏦 LLM Eval for Application Security in Personal Banking
     ====================
     What happens when your generative AI exposes customer data before you even launch?
+    The push toward personalized digital experiences is powered by generative AI. But with that innovation comes a hidden risk: insecure prompt logic, hallucinated outputs, and untested model behavior can expose sensitive financial information—undermining customer trust and inviting regulatory scrutiny.
+    ### 🔒 LLM evaluations aren’t just a technical safeguard—they're a business enabler. By proactively identifying vulnerabilities in AI-generated content and application logic, LLM evals help teams launch secure, compliant personal banking apps.
+    #### ✅ Outcome: At one Tier-1 Canadian bank, I built a cross-functional model risk pipeline that automated LLM testing—catching data leaks and unsafe outputs before deployment. The result: fewer post-launch incidents, smoother audits, and accelerated go-to-market timelines.
+      https://postimg.cc/3WtG4ZK2
+    ### Whether the goal is to reduce fraud, streamline compliance reviews, or optimize AI-generated product recommendations—LLM evals give business, risk, and engineering teams a shared lens on safety, explainability, and performance.
+    How confident are you that your generative AI app won’t hallucinate its way into a privacy breach?
+    """)
+  with gr.Tab("PII Guardrail"):
+    gr.Markdown("""
+    Objective: Automated PII Data Removal: Proactive Compliance & Risk Mitigation
+    ================================================
+    """)
+    in_verbatim = gr.Textbox(label="Peronal Info")
+    out_product = gr.Textbox(label="PII")
+    gr.Examples(
+      [
+        [
+          """
+          He Hua (Hua Hua) Director
+          hehua@chengdu.com
+          +86-28-83505513
+          Alternative Address Format:
+          Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
+          """
+        ]
+      ],
+      [in_verbatim]
+    )
+    btn_recommend = gr.Button("Mask PII")
+    btn_recommend.click(fn=derisk, inputs=in_verbatim, outputs=out_product)
+    gr.Markdown("""
+"Is your personal banking AI trained on customer conversations—or customer identities?"
+===========
+At RBC, generative AI is transforming personal banking—from real-time support to automated financial advice. But the same customer data powering these insights can expose the bank to regulatory violations, data breaches, and biased models—especially when names, emails, and phone numbers slip through into training or inference pipelines.
+### 🔍 Entity removal solves this by automatically detecting and anonymizing personal identifiers before data is used for training, fine-tuning, or analysis.
+#### ✅ Outcome: In one internal audit scenario, a Tier-1 bank uncovered PII exposure in over 12% of its chatbot training data. By integrating entity removal, the bank reduced privacy risk, aligned with GDPR and CPPA compliance—and increased the usable volume of safe, structured text by 35%.
+### The benefits go beyond compliance:
+- Reduce financial crime by safely leveraging more chat logs and support tickets to train fraud detection systems without compromising privacy.
+- Optimize internal processes by focusing NLP models on customer intent rather than noisy identifiers.
+- Boost revenue by extracting clean, actionable signals from anonymized product inquiries and complaints—fueling more accurate product recommendations.
+Are your AI systems learning from customer insights—or leaking customer identities in the process?
     """)
 demo.launch(allowed_paths=["."])

pii.py CHANGED Viewed

@@ -2,8 +2,10 @@ import os
 import instructor
 from pydantic import BaseModel
 """
 client = instructor.from_openai(
     OpenAI(
         base_url="http://localhost:11434/v1",
@@ -44,6 +46,13 @@ class PIIExtraction(BaseModel):
     for i, data in enumerate(self.private_data):
       content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
     return content
@@ -61,7 +70,7 @@ def derisk(content) -> PIIExtraction:
         "role": "user",
         "content": {content},
       }
-    ]).model_dump_json(indent=2)
 if __name__ == '__main__':

 import instructor
 from pydantic import BaseModel
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine
 """
+from openai import OpenAI
 client = instructor.from_openai(
     OpenAI(
         base_url="http://localhost:11434/v1",
     for i, data in enumerate(self.private_data):
       content = content.replace(data.pii_value, f"<{data.data_type}_{i}>")
+    presidio_analyzer = AnalyzerEngine()
+    presidio_anonymizer = AnonymizerEngine()
+    analysis = presidio_analyzer.analyze(content, language='en',
+          entities=["PERSON", "PHONE_NUMBER"])
+    if [entity.entity_type for entity in analysis]:
+      content = "GUARDRAILED: " + presidio_anonymizer.anonymize(text=content, analyzer_results=analysis).text
     return content
         "role": "user",
         "content": {content},
       }
+    ]).sanitize(content) #.model_dump_json(indent=2)
 if __name__ == '__main__':

requirements.txt CHANGED Viewed

@@ -35,4 +35,10 @@ pydantic
 instructor[groq]==1.7.9
 #LLM evals
-#inspect_ai

 instructor[groq]==1.7.9
 #LLM evals
+#inspect_ai
+#PII
+presidio_analyzer
+presidio_anonymizer
+spacy
+https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl