VicidiLochi commited on
Commit
fdd8710
·
1 Parent(s): 631dead

Add anonymization app 3

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -7,28 +7,27 @@ generator = pipeline("text2text-generation", model="facebook/bart-large", tokeni
7
  # Function to construct the anonymization prompt
8
  def construct_prompt(input_text):
9
  prompt = f"""
10
- You are a text anonymizer. Your task is to process a given text, identify all entity or personnal names, replace them with unique and realistic pseudonyms, and return both the anonymized text and a dictionary linking the original names to the replacement names.
11
 
12
- The input text will be provided, and your output should follow this JSON structure:
 
 
 
 
 
 
 
13
  {{
14
- "anonymized_text": "The text with all names replaced by pseudonyms.",
15
- "name_mapping": {{
16
- "OriginalName1": "ReplacementName1",
17
- "OriginalName2": "ReplacementName2"
18
  }}
19
  }}
20
 
21
- ### Guidelines:
22
- 1. Only replace entity or personnal names.
23
- 2. Ensure the pseudonyms are realistic and appropriate for the type of name being replaced.
24
- 3. Keep all other parts of the text unchanged.
25
- 4. If no names are found, return the original text with an empty dictionary.
26
-
27
  Here is the input text:
28
 
29
  "{input_text}"
30
-
31
- Please process the input and return your output in the specified JSON format.
32
  """
33
  return prompt.strip()
34
 
 
7
  # Function to construct the anonymization prompt
8
  def construct_prompt(input_text):
9
  prompt = f"""
10
+ You are an advanced text anonymizer. Your task is to process the given text, identify all personal names, company names, addresses, and other identifying information, and replace them with pseudonyms or placeholders.
11
 
12
+ Please ensure that:
13
+ 1. Personal names (e.g., "John Doe") are replaced with realistic pseudonyms (e.g., "Michael Brown").
14
+ 2. Company names (e.g., "SIA Partners") are replaced with generic placeholders (e.g., "CompanyPlaceholder1").
15
+ 3. Addresses (e.g., "14 rue Championnet") are replaced with "AddressPlaceholder".
16
+ 4. All other identifying information like registration numbers and codes are replaced with generic placeholders (e.g., "NumberPlaceholder").
17
+ 5. Any other part of the text should remain unchanged.
18
+
19
+ The output should strictly follow this JSON format:
20
  {{
21
+ "anonymized_text": "The text with all identifying information replaced.",
22
+ "replacements": {{
23
+ "Original1": "Replacement1",
24
+ "Original2": "Replacement2"
25
  }}
26
  }}
27
 
 
 
 
 
 
 
28
  Here is the input text:
29
 
30
  "{input_text}"
 
 
31
  """
32
  return prompt.strip()
33