llm evals
Browse files
app.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
-
from tool import rival_product
|
4 |
-
from graphrag import marketingPlan
|
5 |
-
from knowledge import graph
|
6 |
-
from pii import derisk
|
7 |
from classify import judge
|
8 |
from entity import resolve
|
|
|
9 |
from human import email, feedback
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Define the Google Analytics script
|
12 |
head = """
|
@@ -94,11 +95,12 @@ Other Links:
|
|
94 |
|
95 |
gr.Examples(
|
96 |
[
|
97 |
-
[
|
|
|
98 |
],
|
99 |
[in_verbatim]
|
100 |
)
|
101 |
-
btn_recommend=gr.Button("Recommend")
|
102 |
btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
|
103 |
|
104 |
gr.Markdown("""
|
@@ -254,7 +256,7 @@ Representative: "Confirmed. Your next payment of $200 will process May 1st. A co
|
|
254 |
|
255 |
Customer: "No, thank you."
|
256 |
"""
|
257 |
-
|
258 |
],
|
259 |
[in_verbatim]
|
260 |
)
|
@@ -262,7 +264,6 @@ Customer: "No, thank you."
|
|
262 |
btn_clear = gr.ClearButton(components=[out_product])
|
263 |
btn_recommend.click(fn=graph, inputs=[in_verbatim, out_product], outputs=out_product)
|
264 |
|
265 |
-
|
266 |
gr.Markdown("""
|
267 |
Example of Customer Profile in Graph
|
268 |
=================
|
@@ -306,15 +307,15 @@ Once created, knowledge graphs can be repurposed across multiple use cases (e.g.
|
|
306 |
gr.Examples(
|
307 |
[
|
308 |
[
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
],
|
319 |
[in_verbatim]
|
320 |
)
|
@@ -333,7 +334,6 @@ Removes noise (e.g., irrelevant names or addresses) to make datasets cleaner and
|
|
333 |
Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
|
334 |
""")
|
335 |
|
336 |
-
|
337 |
with gr.Tab("Segmentation"):
|
338 |
gr.Markdown("""
|
339 |
Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
|
@@ -353,14 +353,14 @@ Allows downstream tasks (like sentiment analysis or topic modeling) to focus on
|
|
353 |
gr.Examples(
|
354 |
[
|
355 |
[
|
356 |
-
|
357 |
-
"The online portal makes managing my mortgage payments so convenient.";
|
358 |
-
"RBC offer great mortgage for my home with competitive rate thank you";
|
359 |
-
"Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
|
360 |
-
"The mobile check deposit feature saves me so much time. Banking made easy!";
|
361 |
-
"Affordable premiums with great coverage. Switched from my old provider and saved!"
|
362 |
-
|
363 |
-
|
364 |
],
|
365 |
[in_verbatim]
|
366 |
)
|
@@ -444,7 +444,7 @@ Customer: "No, thank you."
|
|
444 |
],
|
445 |
[in_verbatim]
|
446 |
)
|
447 |
-
btn_recommend=gr.Button("Resolve")
|
448 |
btn_recommend.click(fn=resolve, inputs=in_verbatim, outputs=out_product)
|
449 |
|
450 |
gr.Markdown("""
|
@@ -483,7 +483,9 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
|
|
483 |
|
484 |
gr.Examples(
|
485 |
[
|
486 |
-
[
|
|
|
|
|
487 |
["my credit card limit is too low, I need a card with bigger limit and low fee", "CARD"]
|
488 |
],
|
489 |
[in_verbatim, in_campaign]
|
@@ -541,50 +543,50 @@ For example, Comcast reduced repeat service calls by 17% after deploying entity
|
|
541 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
542 |
|
543 |
gr.Markdown("""
|
544 |
-
Companies pour millions into product catalogs, marketing funnels, and user acquisition—yet many still face the same challenge:
|
545 |
-
==================
|
546 |
-
### 📉 Pain points:
|
547 |
-
- High bounce rates and low conversion despite heavy traffic
|
548 |
-
- Customers struggle to find relevant products on their own
|
549 |
-
- One-size-fits-all promotions result in wasted ad spend and poor ROI
|
550 |
-
|
551 |
-
### 🧩 The real question:
|
552 |
-
What if your product catalog could *adapt itself* to each user in real time—just like your best salesperson would?
|
553 |
-
|
554 |
-
### 🎯 The customer need:
|
555 |
-
Businesses need a way to dynamically personalize product discovery, so every customer sees the most relevant items—without manually configuring hundreds of rules.
|
556 |
-
|
557 |
-
## ✅ Enter: Product Recommender Systems
|
558 |
-
|
559 |
-
By analyzing behavioral data, preferences, and historical purchases, a recommender engine surfaces what each user is most likely to want—boosting engagement and revenue.
|
560 |
-
|
561 |
-
### 📌 Real-world use cases:
|
562 |
-
- **Amazon** attributes up to 35% of its revenue to its recommender system, which tailors the home page, emails, and checkout cross-sells per user.
|
563 |
-
- **Netflix** leverages personalized content recommendations to reduce churn and increase watch time—saving the company over $1B annually in retention value.
|
564 |
-
- **Stitch Fix** uses machine learning-powered recommendations to curate clothing boxes tailored to individual style profiles—scaling personal styling.
|
565 |
-
|
566 |
-
### 💡 Business benefits:
|
567 |
-
- Higher conversion rates through relevant discovery
|
568 |
-
- Increased average order value (AOV) via cross-sell and upsell
|
569 |
-
- Improved retention and lower customer acquisition cost (CAC)
|
570 |
-
|
571 |
-
If your product discovery experience isn’t working as hard as your marketing budget, it’s time to make your catalog intelligent—with recommendations that convert.
|
572 |
""")
|
573 |
|
574 |
-
with gr.Tab("
|
575 |
gr.Markdown("""
|
576 |
-
🏦 LLMs for Application Security in Personal Banking
|
577 |
-
====================
|
578 |
-
What happens when your generative AI exposes customer data before you even launch?
|
579 |
-
|
580 |
-
LLM evals reduce security risks in generative AI banking apps by identifying vulnerabilities and guiding secure fixes.
|
581 |
-
|
582 |
-
Personal banking apps increasingly rely on generative AI—but insecure logic and hallucinations expose sensitive customer data. LLM evals help assess code and AI-generated responses for correctness, task completion, hallucination risk, and safety—enabling proactive guardrails against vulnerabilities before deployment.
|
583 |
-
|
584 |
-
I’ve led cross-functional model risk initiatives, building pipelines that transform LLM evaluations into automated alerts and remediation workflows—strengthening regulatory compliance and protecting customer trust.
|
585 |
-
|
586 |
-
Using open-source frameworks, I identify flaws in LLM prompt and translate risks into explainable insights for business, risk, and engineering stakeholders.
|
587 |
-
https://postimg.cc/3WtG4ZK2
|
588 |
""")
|
589 |
|
590 |
-
demo.launch(allowed_paths=["."])
|
|
|
1 |
import gradio as gr
|
2 |
+
|
|
|
|
|
|
|
|
|
3 |
from classify import judge
|
4 |
from entity import resolve
|
5 |
+
from graphrag import marketingPlan
|
6 |
from human import email, feedback
|
7 |
+
from knowledge import graph
|
8 |
+
from pii import derisk
|
9 |
+
from rag import rbc_product
|
10 |
+
from tool import rival_product
|
11 |
|
12 |
# Define the Google Analytics script
|
13 |
head = """
|
|
|
95 |
|
96 |
gr.Examples(
|
97 |
[
|
98 |
+
[
|
99 |
+
"Low APR and great customer service. I would highly recommend if you’re looking for a great credit card company and looking to rebuild your credit. I have had my credit limit increased annually and the annual fee is very low."]
|
100 |
],
|
101 |
[in_verbatim]
|
102 |
)
|
103 |
+
btn_recommend = gr.Button("Recommend")
|
104 |
btn_recommend.click(fn=rival_product, inputs=in_verbatim, outputs=out_product)
|
105 |
|
106 |
gr.Markdown("""
|
|
|
256 |
|
257 |
Customer: "No, thank you."
|
258 |
"""
|
259 |
+
]
|
260 |
],
|
261 |
[in_verbatim]
|
262 |
)
|
|
|
264 |
btn_clear = gr.ClearButton(components=[out_product])
|
265 |
btn_recommend.click(fn=graph, inputs=[in_verbatim, out_product], outputs=out_product)
|
266 |
|
|
|
267 |
gr.Markdown("""
|
268 |
Example of Customer Profile in Graph
|
269 |
=================
|
|
|
307 |
gr.Examples(
|
308 |
[
|
309 |
[
|
310 |
+
"""
|
311 |
+
He Hua (Hua Hua) Director
|
312 |
+
hehua@chengdu.com
|
313 |
+
+86-28-83505513
|
314 |
+
|
315 |
+
Alternative Address Format:
|
316 |
+
Xiongmao Ave West Section, Jinniu District (listed in some records as 610016 postcode)
|
317 |
+
"""
|
318 |
+
]
|
319 |
],
|
320 |
[in_verbatim]
|
321 |
)
|
|
|
334 |
Allows downstream tasks (like sentiment analysis or topic modeling) to focus on content rather than personal identifiers.
|
335 |
""")
|
336 |
|
|
|
337 |
with gr.Tab("Segmentation"):
|
338 |
gr.Markdown("""
|
339 |
Objective: Streamline Customer Insights: Auto-Classify Feedback for Product Optimization
|
|
|
353 |
gr.Examples(
|
354 |
[
|
355 |
[
|
356 |
+
"""
|
357 |
+
"The online portal makes managing my mortgage payments so convenient.";
|
358 |
+
"RBC offer great mortgage for my home with competitive rate thank you";
|
359 |
+
"Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
|
360 |
+
"The mobile check deposit feature saves me so much time. Banking made easy!";
|
361 |
+
"Affordable premiums with great coverage. Switched from my old provider and saved!"
|
362 |
+
"""
|
363 |
+
]
|
364 |
],
|
365 |
[in_verbatim]
|
366 |
)
|
|
|
444 |
],
|
445 |
[in_verbatim]
|
446 |
)
|
447 |
+
btn_recommend = gr.Button("Resolve")
|
448 |
btn_recommend.click(fn=resolve, inputs=in_verbatim, outputs=out_product)
|
449 |
|
450 |
gr.Markdown("""
|
|
|
483 |
|
484 |
gr.Examples(
|
485 |
[
|
486 |
+
[
|
487 |
+
"""My mortgage was assumed by Bank of America when Countrywide mortgages ceased to do business. My mortgage increased without any explanation. When I inquired, they stumbled and gave me the run around. I’d NEVER do business with Bank of America again""",
|
488 |
+
"MORT"],
|
489 |
["my credit card limit is too low, I need a card with bigger limit and low fee", "CARD"]
|
490 |
],
|
491 |
[in_verbatim, in_campaign]
|
|
|
543 |
btn_recommend.click(fn=rbc_product, inputs=in_verbatim, outputs=out_product)
|
544 |
|
545 |
gr.Markdown("""
|
546 |
+
Companies pour millions into product catalogs, marketing funnels, and user acquisition—yet many still face the same challenge:
|
547 |
+
==================
|
548 |
+
### 📉 Pain points:
|
549 |
+
- High bounce rates and low conversion despite heavy traffic
|
550 |
+
- Customers struggle to find relevant products on their own
|
551 |
+
- One-size-fits-all promotions result in wasted ad spend and poor ROI
|
552 |
+
|
553 |
+
### 🧩 The real question:
|
554 |
+
What if your product catalog could *adapt itself* to each user in real time—just like your best salesperson would?
|
555 |
+
|
556 |
+
### 🎯 The customer need:
|
557 |
+
Businesses need a way to dynamically personalize product discovery, so every customer sees the most relevant items—without manually configuring hundreds of rules.
|
558 |
+
|
559 |
+
## ✅ Enter: Product Recommender Systems
|
560 |
+
|
561 |
+
By analyzing behavioral data, preferences, and historical purchases, a recommender engine surfaces what each user is most likely to want—boosting engagement and revenue.
|
562 |
+
|
563 |
+
### 📌 Real-world use cases:
|
564 |
+
- **Amazon** attributes up to 35% of its revenue to its recommender system, which tailors the home page, emails, and checkout cross-sells per user.
|
565 |
+
- **Netflix** leverages personalized content recommendations to reduce churn and increase watch time—saving the company over $1B annually in retention value.
|
566 |
+
- **Stitch Fix** uses machine learning-powered recommendations to curate clothing boxes tailored to individual style profiles—scaling personal styling.
|
567 |
+
|
568 |
+
### 💡 Business benefits:
|
569 |
+
- Higher conversion rates through relevant discovery
|
570 |
+
- Increased average order value (AOV) via cross-sell and upsell
|
571 |
+
- Improved retention and lower customer acquisition cost (CAC)
|
572 |
+
|
573 |
+
If your product discovery experience isn’t working as hard as your marketing budget, it’s time to make your catalog intelligent—with recommendations that convert.
|
574 |
""")
|
575 |
|
576 |
+
with gr.Tab("Eval"):
|
577 |
gr.Markdown("""
|
578 |
+
🏦 LLMs for Application Security in Personal Banking
|
579 |
+
====================
|
580 |
+
What happens when your generative AI exposes customer data before you even launch?
|
581 |
+
|
582 |
+
LLM evals reduce security risks in generative AI banking apps by identifying vulnerabilities and guiding secure fixes.
|
583 |
+
|
584 |
+
Personal banking apps increasingly rely on generative AI—but insecure logic and hallucinations expose sensitive customer data. LLM evals help assess code and AI-generated responses for correctness, task completion, hallucination risk, and safety—enabling proactive guardrails against vulnerabilities before deployment.
|
585 |
+
|
586 |
+
I’ve led cross-functional model risk initiatives, building pipelines that transform LLM evaluations into automated alerts and remediation workflows—strengthening regulatory compliance and protecting customer trust.
|
587 |
+
|
588 |
+
Using open-source frameworks, I identify flaws in LLM prompt and translate risks into explainable insights for business, risk, and engineering stakeholders.
|
589 |
+
https://postimg.cc/3WtG4ZK2
|
590 |
""")
|
591 |
|
592 |
+
demo.launch(allowed_paths=["."])
|