VicidiLochi's picture
Add anonymization app 3
fdd8710
raw
history blame
2.92 kB
import gradio as gr
from transformers import pipeline
# Load a lightweight model
generator = pipeline("text2text-generation", model="facebook/bart-large", tokenizer="facebook/bart-large")
# Function to construct the anonymization prompt
def construct_prompt(input_text):
prompt = f"""
You are an advanced text anonymizer. Your task is to process the given text, identify all personal names, company names, addresses, and other identifying information, and replace them with pseudonyms or placeholders.
Please ensure that:
1. Personal names (e.g., "John Doe") are replaced with realistic pseudonyms (e.g., "Michael Brown").
2. Company names (e.g., "SIA Partners") are replaced with generic placeholders (e.g., "CompanyPlaceholder1").
3. Addresses (e.g., "14 rue Championnet") are replaced with "AddressPlaceholder".
4. All other identifying information like registration numbers and codes are replaced with generic placeholders (e.g., "NumberPlaceholder").
5. Any other part of the text should remain unchanged.
The output should strictly follow this JSON format:
{{
"anonymized_text": "The text with all identifying information replaced.",
"replacements": {{
"Original1": "Replacement1",
"Original2": "Replacement2"
}}
}}
Here is the input text:
"{input_text}"
"""
return prompt.strip()
# Function to process the input text and generate the anonymized output
def anonymize_text(input_text):
# Construct the instruction prompt
prompt = construct_prompt(input_text)
# Generate the output using the model
response = generator(prompt, max_length=512, num_return_sequences=1)
generated_text = response[0]['generated_text']
# Attempt to parse the JSON output
try:
result = eval(generated_text) # Convert the output string to a Python dictionary
anonymized_text = result.get("anonymized_text", input_text)
name_mapping = result.get("name_mapping", {})
except Exception as e:
anonymized_text = input_text
name_mapping = {}
return anonymized_text, name_mapping
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Text Anonymizer")
gr.Markdown("Enter text containing personal names, and the model will anonymize it by replacing the names with pseudonyms. The app will also return a dictionary linking original names to their replacements.")
input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...")
anonymized_text = gr.Textbox(label="Anonymized Text", interactive=False)
name_mapping = gr.JSON(label="Name Mapping")
def process_text(input_text):
return anonymize_text(input_text)
submit_button = gr.Button("Anonymize")
submit_button.click(process_text, inputs=[input_text], outputs=[anonymized_text, name_mapping])
# Launch the app
demo.launch()