Spaces:

VicidiLochi
/

SIA_Partners_Hackathon

Sleeping

App Files Files Community

SIA_Partners_Hackathon / app.py

VicidiLochi

Add anonymization app 3

fdd8710 7 months ago

raw

history blame

2.92 kB

	import gradio as gr
	from transformers import pipeline

	# Load a lightweight model
	generator = pipeline("text2text-generation", model="facebook/bart-large", tokenizer="facebook/bart-large")

	# Function to construct the anonymization prompt
	def construct_prompt(input_text):
	prompt = f"""
	You are an advanced text anonymizer. Your task is to process the given text, identify all personal names, company names, addresses, and other identifying information, and replace them with pseudonyms or placeholders.

	Please ensure that:
	1. Personal names (e.g., "John Doe") are replaced with realistic pseudonyms (e.g., "Michael Brown").
	2. Company names (e.g., "SIA Partners") are replaced with generic placeholders (e.g., "CompanyPlaceholder1").
	3. Addresses (e.g., "14 rue Championnet") are replaced with "AddressPlaceholder".
	4. All other identifying information like registration numbers and codes are replaced with generic placeholders (e.g., "NumberPlaceholder").
	5. Any other part of the text should remain unchanged.

	The output should strictly follow this JSON format:
	{{
	"anonymized_text": "The text with all identifying information replaced.",
	"replacements": {{
	"Original1": "Replacement1",
	"Original2": "Replacement2"
	}}
	}}

	Here is the input text:

	"{input_text}"
	"""
	return prompt.strip()

	# Function to process the input text and generate the anonymized output
	def anonymize_text(input_text):
	# Construct the instruction prompt
	prompt = construct_prompt(input_text)

	# Generate the output using the model
	response = generator(prompt, max_length=512, num_return_sequences=1)
	generated_text = response[0]['generated_text']

	# Attempt to parse the JSON output
	try:
	result = eval(generated_text) # Convert the output string to a Python dictionary
	anonymized_text = result.get("anonymized_text", input_text)
	name_mapping = result.get("name_mapping", {})
	except Exception as e:
	anonymized_text = input_text
	name_mapping = {}

	return anonymized_text, name_mapping

	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("## Text Anonymizer")
	gr.Markdown("Enter text containing personal names, and the model will anonymize it by replacing the names with pseudonyms. The app will also return a dictionary linking original names to their replacements.")

	input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...")
	anonymized_text = gr.Textbox(label="Anonymized Text", interactive=False)
	name_mapping = gr.JSON(label="Name Mapping")

	def process_text(input_text):
	return anonymize_text(input_text)

	submit_button = gr.Button("Anonymize")
	submit_button.click(process_text, inputs=[input_text], outputs=[anonymized_text, name_mapping])

	# Launch the app
	demo.launch()