Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
# Load a lightweight model | |
generator = pipeline("text2text-generation", model="facebook/bart-large", tokenizer="facebook/bart-large") | |
# Function to construct the anonymization prompt | |
def construct_prompt(input_text): | |
prompt = f""" | |
You are an advanced text anonymizer. Your task is to process the given text, identify all personal names, company names, addresses, and other identifying information, and replace them with pseudonyms or placeholders. | |
Please ensure that: | |
1. Personal names (e.g., "John Doe") are replaced with realistic pseudonyms (e.g., "Michael Brown"). | |
2. Company names (e.g., "SIA Partners") are replaced with generic placeholders (e.g., "CompanyPlaceholder1"). | |
3. Addresses (e.g., "14 rue Championnet") are replaced with "AddressPlaceholder". | |
4. All other identifying information like registration numbers and codes are replaced with generic placeholders (e.g., "NumberPlaceholder"). | |
5. Any other part of the text should remain unchanged. | |
The output should strictly follow this JSON format: | |
{{ | |
"anonymized_text": "The text with all identifying information replaced.", | |
"replacements": {{ | |
"Original1": "Replacement1", | |
"Original2": "Replacement2" | |
}} | |
}} | |
Here is the input text: | |
"{input_text}" | |
""" | |
return prompt.strip() | |
# Function to process the input text and generate the anonymized output | |
def anonymize_text(input_text): | |
# Construct the instruction prompt | |
prompt = construct_prompt(input_text) | |
# Generate the output using the model | |
response = generator(prompt, max_length=512, num_return_sequences=1) | |
generated_text = response[0]['generated_text'] | |
# Attempt to parse the JSON output | |
try: | |
result = eval(generated_text) # Convert the output string to a Python dictionary | |
anonymized_text = result.get("anonymized_text", input_text) | |
name_mapping = result.get("name_mapping", {}) | |
except Exception as e: | |
anonymized_text = input_text | |
name_mapping = {} | |
return anonymized_text, name_mapping | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## Text Anonymizer") | |
gr.Markdown("Enter text containing personal names, and the model will anonymize it by replacing the names with pseudonyms. The app will also return a dictionary linking original names to their replacements.") | |
input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...") | |
anonymized_text = gr.Textbox(label="Anonymized Text", interactive=False) | |
name_mapping = gr.JSON(label="Name Mapping") | |
def process_text(input_text): | |
return anonymize_text(input_text) | |
submit_button = gr.Button("Anonymize") | |
submit_button.click(process_text, inputs=[input_text], outputs=[anonymized_text, name_mapping]) | |
# Launch the app | |
demo.launch() |