PyRIT_demo / app.py
faizaaska517's picture
Create app.py
b831d3f verified
raw
history blame
961 Bytes
import gradio as gr
from pyrit.core.pyrit import PyRIT
from pyrit.core.config import LLMProvider
from pyrit.llm_providers.huggingface_local import HuggingFaceLocal
# Initialize PyRIT with a local HF model (you can change this)
provider = HuggingFaceLocal(
model_name="HuggingFaceH4/zephyr-7b-beta", # Change model here
max_tokens=512,
)
pyrit = PyRIT(provider=provider)
def attack_prompt(prompt):
try:
result = pyrit.run(prompt, attack="jailbreak", max_tokens=512)
return f"✅ Successful Attack:\n{result}"
except Exception as e:
return f"❌ Error: {str(e)}"
gr.Interface(
fn=attack_prompt,
inputs=gr.Textbox(label="Prompt to Attack", placeholder="Enter a benign-looking prompt..."),
outputs=gr.Textbox(label="Attack Result"),
title="🧪 PyRIT - Red Teaming Hugging Face LLMs",
description="This tool uses PyRIT to test Hugging Face models for jailbreak-style adversarial prompts.",
).launch()