Spaces:
Sleeping
Sleeping
Commit
·
fdd8710
1
Parent(s):
631dead
Add anonymization app 3
Browse files
app.py
CHANGED
@@ -7,28 +7,27 @@ generator = pipeline("text2text-generation", model="facebook/bart-large", tokeni
|
|
7 |
# Function to construct the anonymization prompt
|
8 |
def construct_prompt(input_text):
|
9 |
prompt = f"""
|
10 |
-
You are
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
{{
|
14 |
-
"anonymized_text": "The text with all
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
}}
|
19 |
}}
|
20 |
|
21 |
-
### Guidelines:
|
22 |
-
1. Only replace entity or personnal names.
|
23 |
-
2. Ensure the pseudonyms are realistic and appropriate for the type of name being replaced.
|
24 |
-
3. Keep all other parts of the text unchanged.
|
25 |
-
4. If no names are found, return the original text with an empty dictionary.
|
26 |
-
|
27 |
Here is the input text:
|
28 |
|
29 |
"{input_text}"
|
30 |
-
|
31 |
-
Please process the input and return your output in the specified JSON format.
|
32 |
"""
|
33 |
return prompt.strip()
|
34 |
|
|
|
7 |
# Function to construct the anonymization prompt
|
8 |
def construct_prompt(input_text):
|
9 |
prompt = f"""
|
10 |
+
You are an advanced text anonymizer. Your task is to process the given text, identify all personal names, company names, addresses, and other identifying information, and replace them with pseudonyms or placeholders.
|
11 |
|
12 |
+
Please ensure that:
|
13 |
+
1. Personal names (e.g., "John Doe") are replaced with realistic pseudonyms (e.g., "Michael Brown").
|
14 |
+
2. Company names (e.g., "SIA Partners") are replaced with generic placeholders (e.g., "CompanyPlaceholder1").
|
15 |
+
3. Addresses (e.g., "14 rue Championnet") are replaced with "AddressPlaceholder".
|
16 |
+
4. All other identifying information like registration numbers and codes are replaced with generic placeholders (e.g., "NumberPlaceholder").
|
17 |
+
5. Any other part of the text should remain unchanged.
|
18 |
+
|
19 |
+
The output should strictly follow this JSON format:
|
20 |
{{
|
21 |
+
"anonymized_text": "The text with all identifying information replaced.",
|
22 |
+
"replacements": {{
|
23 |
+
"Original1": "Replacement1",
|
24 |
+
"Original2": "Replacement2"
|
25 |
}}
|
26 |
}}
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
Here is the input text:
|
29 |
|
30 |
"{input_text}"
|
|
|
|
|
31 |
"""
|
32 |
return prompt.strip()
|
33 |
|