vargha commited on
Commit
1000353
Β·
1 Parent(s): 6a70048

script for database management

Browse files
scripts/list_phase2_rejected_unreviewed.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Phase 2 Rejected/Unreviewed Items Report Script
4
+
5
+ This script lists all rejected or unreviewed items from Phase 2 review process,
6
+ showing TTS data indices, rejection reasons, and detailed information.
7
+ """
8
+
9
+ import argparse
10
+ import sys
11
+ import os
12
+ from datetime import datetime
13
+ from sqlalchemy import and_, or_
14
+ from sqlalchemy.orm import joinedload
15
+
16
+ # Add project root to Python path
17
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
18
+ if project_root not in sys.path:
19
+ sys.path.insert(0, project_root)
20
+
21
+ from utils.database import get_db
22
+ from data.models import Annotator, Annotation, Validation, TTSData
23
+ from utils.logger import Logger
24
+ from config import conf
25
+
26
+ log = Logger()
27
+
28
+ def list_rejected_unreviewed_items(status_filter="all", reviewer_filter=None, annotator_filter=None, export_csv=False):
29
+ """
30
+ Lists rejected or unreviewed items from Phase 2 review process.
31
+
32
+ Args:
33
+ status_filter (str): Filter by status - "rejected", "unreviewed", or "all"
34
+ reviewer_filter (str): Filter by specific reviewer name
35
+ annotator_filter (str): Filter by specific annotator whose work is being reviewed
36
+ export_csv (bool): Export results to CSV file
37
+ """
38
+ with get_db() as db:
39
+ try:
40
+ print("=" * 80)
41
+ print(" PHASE 2 REJECTED/UNREVIEWED ITEMS REPORT")
42
+ print("=" * 80)
43
+ print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
44
+ print(f"Status filter: {status_filter.upper()}")
45
+ if reviewer_filter:
46
+ print(f"Reviewer filter: {reviewer_filter}")
47
+ if annotator_filter:
48
+ print(f"Annotator filter: {annotator_filter}")
49
+ print()
50
+
51
+ # Get review mapping pairs
52
+ review_pairs = []
53
+ for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
54
+ # Apply filters
55
+ if reviewer_filter and reviewer_name != reviewer_filter:
56
+ continue
57
+ if annotator_filter and annotator_name != annotator_filter:
58
+ continue
59
+
60
+ # Get annotator and reviewer objects
61
+ annotator = db.query(Annotator).filter_by(name=annotator_name).first()
62
+ reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
63
+
64
+ if annotator and reviewer:
65
+ review_pairs.append((annotator, reviewer))
66
+ else:
67
+ print(f"⚠️ Warning: Missing annotator ({annotator_name}) or reviewer ({reviewer_name}) in database")
68
+
69
+ if not review_pairs:
70
+ print("No valid review pairs found with current filters.")
71
+ return
72
+
73
+ all_items = []
74
+ total_rejected = 0
75
+ total_unreviewed = 0
76
+
77
+ # Process each review pair
78
+ for annotator, reviewer in review_pairs:
79
+ print(f"\nπŸ“‹ REVIEWER: {reviewer.name} β†’ ANNOTATOR: {annotator.name}")
80
+ print("-" * 60)
81
+
82
+ # Get all annotations by this annotator
83
+ annotations_query = db.query(Annotation).join(TTSData).filter(
84
+ Annotation.annotator_id == annotator.id,
85
+ # Only include annotations that have actual content
86
+ Annotation.annotated_sentence.isnot(None),
87
+ Annotation.annotated_sentence != ""
88
+ ).options(
89
+ joinedload(Annotation.tts_data)
90
+ ).order_by(TTSData.id)
91
+
92
+ annotations = annotations_query.all()
93
+
94
+ if not annotations:
95
+ print(" No annotations found for this annotator.")
96
+ continue
97
+
98
+ print(f" Total annotations to review: {len(annotations)}")
99
+
100
+ rejected_items = []
101
+ unreviewed_items = []
102
+
103
+ for annotation in annotations:
104
+ # Check if this annotation has been reviewed by the assigned reviewer
105
+ validation = db.query(Validation).filter_by(
106
+ annotation_id=annotation.id,
107
+ validator_id=reviewer.id
108
+ ).first()
109
+
110
+ item_data = {
111
+ "tts_id": annotation.tts_data.id,
112
+ "filename": annotation.tts_data.filename,
113
+ "original_sentence": annotation.tts_data.sentence,
114
+ "annotated_sentence": annotation.annotated_sentence,
115
+ "annotator": annotator.name,
116
+ "reviewer": reviewer.name,
117
+ "annotated_at": annotation.annotated_at.strftime('%Y-%m-%d %H:%M:%S') if annotation.annotated_at else "N/A"
118
+ }
119
+
120
+ if not validation:
121
+ # Unreviewed
122
+ item_data["status"] = "Unreviewed"
123
+ item_data["rejection_reason"] = ""
124
+ unreviewed_items.append(item_data)
125
+ all_items.append(item_data)
126
+ elif not validation.validated:
127
+ # Rejected
128
+ item_data["status"] = "Rejected"
129
+ item_data["rejection_reason"] = validation.description or "No reason provided"
130
+ rejected_items.append(item_data)
131
+ all_items.append(item_data)
132
+
133
+ # Print summary for this pair
134
+ pair_rejected = len(rejected_items)
135
+ pair_unreviewed = len(unreviewed_items)
136
+ total_rejected += pair_rejected
137
+ total_unreviewed += pair_unreviewed
138
+
139
+ print(f" ❌ Rejected: {pair_rejected}")
140
+ print(f" ⏳ Unreviewed: {pair_unreviewed}")
141
+
142
+ # Show detailed items based on filter
143
+ items_to_show = []
144
+ if status_filter == "rejected" or status_filter == "all":
145
+ items_to_show.extend(rejected_items)
146
+ if status_filter == "unreviewed" or status_filter == "all":
147
+ items_to_show.extend(unreviewed_items)
148
+
149
+ if items_to_show:
150
+ print(f"\n πŸ“ Detailed Items ({len(items_to_show)}):")
151
+ for item in sorted(items_to_show, key=lambda x: x["tts_id"]):
152
+ status_icon = "❌" if item["status"] == "Rejected" else "⏳"
153
+ print(f" {status_icon} ID: {item['tts_id']} | Status: {item['status']}")
154
+ if item["status"] == "Rejected" and item["rejection_reason"]:
155
+ print(f" Reason: {item['rejection_reason']}")
156
+
157
+ # Show truncated sentences for context
158
+ orig_preview = item["original_sentence"][:80] + "..." if len(item["original_sentence"]) > 80 else item["original_sentence"]
159
+ ann_preview = item["annotated_sentence"][:80] + "..." if len(item["annotated_sentence"]) > 80 else item["annotated_sentence"]
160
+
161
+ print(f" Original: {orig_preview}")
162
+ print(f" Annotated: {ann_preview}")
163
+ print(f" Annotated at: {item['annotated_at']}")
164
+ print()
165
+
166
+ # Overall summary
167
+ print("\n" + "=" * 80)
168
+ print(" OVERALL SUMMARY")
169
+ print("=" * 80)
170
+ print(f"πŸ“Š Total items found: {len(all_items)}")
171
+ print(f"❌ Total rejected: {total_rejected}")
172
+ print(f"⏳ Total unreviewed: {total_unreviewed}")
173
+
174
+ # Export to CSV if requested
175
+ if export_csv and all_items:
176
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
177
+ csv_filename = f"phase2_rejected_unreviewed_{timestamp}.csv"
178
+
179
+ import csv
180
+ with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
181
+ fieldnames = ['tts_id', 'status', 'rejection_reason', 'annotator', 'reviewer',
182
+ 'filename', 'original_sentence', 'annotated_sentence', 'annotated_at']
183
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
184
+
185
+ writer.writeheader()
186
+ for item in sorted(all_items, key=lambda x: x["tts_id"]):
187
+ writer.writerow(item)
188
+
189
+ print(f"\nπŸ’Ύ Results exported to: {csv_filename}")
190
+
191
+ except Exception as e:
192
+ log.error(f"Error generating rejected/unreviewed items report: {e}")
193
+ print(f"❌ Error: {e}")
194
+
195
+ def list_by_ids(ids_list, export_csv=False):
196
+ """
197
+ Lists specific TTS data items by their IDs and shows their Phase 2 review status.
198
+
199
+ Args:
200
+ ids_list (list): List of TTS data IDs to look up
201
+ export_csv (bool): Export results to CSV file
202
+ """
203
+ with get_db() as db:
204
+ try:
205
+ print("=" * 80)
206
+ print(" PHASE 2 STATUS FOR SPECIFIC IDS")
207
+ print("=" * 80)
208
+ print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
209
+ print(f"Requested IDs: {', '.join(map(str, ids_list))}")
210
+ print()
211
+
212
+ found_items = []
213
+ not_found = []
214
+
215
+ for tts_id in ids_list:
216
+ # Find the TTS data
217
+ tts_data = db.query(TTSData).filter_by(id=tts_id).first()
218
+ if not tts_data:
219
+ not_found.append(tts_id)
220
+ continue
221
+
222
+ # Find the annotation for this TTS data
223
+ annotation = db.query(Annotation).filter_by(tts_data_id=tts_id).first()
224
+ if not annotation:
225
+ print(f"⚠️ ID {tts_id}: No annotation found")
226
+ continue
227
+
228
+ # Find the assigned reviewer for this annotator
229
+ annotator = db.query(Annotator).filter_by(id=annotation.annotator_id).first()
230
+ if not annotator:
231
+ print(f"⚠️ ID {tts_id}: Annotator not found")
232
+ continue
233
+
234
+ reviewer_name = conf.REVIEW_MAPPING.get(annotator.name)
235
+ if not reviewer_name:
236
+ print(f"⚠️ ID {tts_id}: No reviewer assigned for annotator {annotator.name}")
237
+ continue
238
+
239
+ reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
240
+ if not reviewer:
241
+ print(f"⚠️ ID {tts_id}: Reviewer {reviewer_name} not found in database")
242
+ continue
243
+
244
+ # Check validation status
245
+ validation = db.query(Validation).filter_by(
246
+ annotation_id=annotation.id,
247
+ validator_id=reviewer.id
248
+ ).first()
249
+
250
+ status = "Unreviewed"
251
+ rejection_reason = ""
252
+
253
+ if validation:
254
+ if validation.validated:
255
+ status = "Approved"
256
+ else:
257
+ status = "Rejected"
258
+ rejection_reason = validation.description or "No reason provided"
259
+
260
+ item_data = {
261
+ "tts_id": tts_id,
262
+ "status": status,
263
+ "rejection_reason": rejection_reason,
264
+ "annotator": annotator.name,
265
+ "reviewer": reviewer.name,
266
+ "filename": tts_data.filename,
267
+ "original_sentence": tts_data.sentence,
268
+ "annotated_sentence": annotation.annotated_sentence or "[No annotation]",
269
+ "annotated_at": annotation.annotated_at.strftime('%Y-%m-%d %H:%M:%S') if annotation.annotated_at else "N/A"
270
+ }
271
+
272
+ found_items.append(item_data)
273
+
274
+ # Display the item
275
+ status_icon = "βœ…" if status == "Approved" else "❌" if status == "Rejected" else "⏳"
276
+ print(f"{status_icon} ID: {tts_id} | Status: {status} | Annotator: {annotator.name} | Reviewer: {reviewer.name}")
277
+
278
+ if status == "Rejected" and rejection_reason:
279
+ print(f" Rejection Reason: {rejection_reason}")
280
+
281
+ orig_preview = tts_data.sentence[:100] + "..." if len(tts_data.sentence) > 100 else tts_data.sentence
282
+ ann_preview = (annotation.annotated_sentence[:100] + "..." if annotation.annotated_sentence and len(annotation.annotated_sentence) > 100
283
+ else annotation.annotated_sentence or "[No annotation]")
284
+
285
+ print(f" Original: {orig_preview}")
286
+ print(f" Annotated: {ann_preview}")
287
+ print(f" Annotated at: {item_data['annotated_at']}")
288
+ print()
289
+
290
+ if not_found:
291
+ print(f"⚠️ IDs not found: {', '.join(map(str, not_found))}")
292
+
293
+ # Export to CSV if requested
294
+ if export_csv and found_items:
295
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
296
+ csv_filename = f"phase2_specific_ids_{timestamp}.csv"
297
+
298
+ import csv
299
+ with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
300
+ fieldnames = ['tts_id', 'status', 'rejection_reason', 'annotator', 'reviewer',
301
+ 'filename', 'original_sentence', 'annotated_sentence', 'annotated_at']
302
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
303
+
304
+ writer.writeheader()
305
+ for item in found_items:
306
+ writer.writerow(item)
307
+
308
+ print(f"πŸ’Ύ Results exported to: {csv_filename}")
309
+
310
+ except Exception as e:
311
+ log.error(f"Error looking up specific IDs: {e}")
312
+ print(f"❌ Error: {e}")
313
+
314
+ if __name__ == "__main__":
315
+ parser = argparse.ArgumentParser(description="List rejected or unreviewed items from Phase 2 review process.")
316
+
317
+ subparsers = parser.add_subparsers(dest='command', help='Available commands')
318
+
319
+ # List command
320
+ list_parser = subparsers.add_parser('list', help='List rejected/unreviewed items')
321
+ list_parser.add_argument(
322
+ "--status",
323
+ choices=["rejected", "unreviewed", "all"],
324
+ default="all",
325
+ help="Filter by status (default: all)"
326
+ )
327
+ list_parser.add_argument(
328
+ "--reviewer",
329
+ type=str,
330
+ help="Filter by specific reviewer name"
331
+ )
332
+ list_parser.add_argument(
333
+ "--annotator",
334
+ type=str,
335
+ help="Filter by specific annotator whose work is being reviewed"
336
+ )
337
+ list_parser.add_argument(
338
+ "--csv",
339
+ action="store_true",
340
+ help="Export results to CSV file"
341
+ )
342
+
343
+ # IDs command
344
+ ids_parser = subparsers.add_parser('ids', help='Check status of specific TTS data IDs')
345
+ ids_parser.add_argument(
346
+ "ids",
347
+ nargs='+',
348
+ type=int,
349
+ help="TTS data IDs to check"
350
+ )
351
+ ids_parser.add_argument(
352
+ "--csv",
353
+ action="store_true",
354
+ help="Export results to CSV file"
355
+ )
356
+
357
+ args = parser.parse_args()
358
+
359
+ if args.command == 'list':
360
+ list_rejected_unreviewed_items(
361
+ status_filter=args.status,
362
+ reviewer_filter=args.reviewer,
363
+ annotator_filter=args.annotator,
364
+ export_csv=args.csv
365
+ )
366
+ elif args.command == 'ids':
367
+ list_by_ids(args.ids, export_csv=args.csv)
368
+ else:
369
+ parser.print_help()
scripts/report_annotator_progress.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ import os
4
+ from sqlalchemy import func, and_
5
+
6
+ # Add project root to Python path
7
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
8
+ if project_root not in sys.path:
9
+ sys.path.insert(0, project_root)
10
+
11
+ from utils.database import get_db
12
+ from data.models import Annotator, Annotation, AnnotationInterval, TTSData
13
+ from utils.logger import Logger
14
+
15
+ log = Logger()
16
+
17
+ def generate_annotator_progress_report():
18
+ """
19
+ Generates and prints a progress report for each annotator.
20
+ Progress is defined as the percentage of assigned TTSData items
21
+ that have a non-empty annotation.
22
+ """
23
+ with get_db() as db:
24
+ try:
25
+ annotators = db.query(Annotator).filter(Annotator.is_active == True).all()
26
+
27
+ if not annotators:
28
+ log.info("No active annotators found.")
29
+ return
30
+
31
+ log.info("--- Annotator Progress Report ---")
32
+
33
+ for annotator in annotators:
34
+ # Calculate total assigned items for the annotator
35
+ total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1))\
36
+ .filter(AnnotationInterval.annotator_id == annotator.id)
37
+ total_assigned_result = total_assigned_query.scalar()
38
+ total_assigned = total_assigned_result if total_assigned_result is not None else 0
39
+
40
+ # Calculate completed items by this annotator within their assigned intervals
41
+ # An item is considered completed if annotated_sentence is not None and not an empty string.
42
+ completed_count_query = db.query(func.count(Annotation.id))\
43
+ .join(TTSData, Annotation.tts_data_id == TTSData.id)\
44
+ .join(AnnotationInterval,
45
+ and_(AnnotationInterval.annotator_id == annotator.id,
46
+ TTSData.id >= AnnotationInterval.start_index,
47
+ TTSData.id <= AnnotationInterval.end_index))\
48
+ .filter(Annotation.annotator_id == annotator.id,
49
+ Annotation.annotated_sentence != None,
50
+ Annotation.annotated_sentence != "")
51
+
52
+ completed_count_result = completed_count_query.scalar()
53
+ completed_count = completed_count_result if completed_count_result is not None else 0
54
+
55
+ percentage_completed = 0
56
+ if total_assigned > 0:
57
+ percentage_completed = (completed_count / total_assigned) * 100
58
+
59
+ log.info(f"Annotator: {annotator.name} (ID: {annotator.id})")
60
+ log.info(f" Total Assigned Items: {total_assigned}")
61
+ log.info(f" Completed Items: {completed_count}")
62
+ log.info(f" Progress: {percentage_completed:.2f}%")
63
+ log.info("-" * 30)
64
+
65
+ except Exception as e:
66
+ # For errors, we might still want the full log details
67
+ log.error(f"Failed to generate annotator progress report: {e}")
68
+
69
+ if __name__ == "__main__":
70
+ # No arguments needed for this script, it reports on all active annotators
71
+ # parser = argparse.ArgumentParser(description="Generate a progress report for all active annotators.")
72
+ # args = parser.parse_args() # Not needed for now
73
+
74
+ generate_annotator_progress_report()
scripts/report_review_results.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Phase 2 Review Results Report Script
4
+
5
+ This script generates a comprehensive report of Phase 2 review results,
6
+ showing approval and rejection statistics for each reviewer and overall totals.
7
+ """
8
+
9
+ import argparse
10
+ import sys
11
+ import os
12
+ from datetime import datetime
13
+ from collections import defaultdict
14
+ from sqlalchemy import func, and_
15
+
16
+ # Add project root to Python path
17
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
18
+ if project_root not in sys.path:
19
+ sys.path.insert(0, project_root)
20
+
21
+ from utils.database import get_db
22
+ from data.models import Annotator, Annotation, Validation, TTSData
23
+ from utils.logger import Logger
24
+ from config import conf
25
+
26
+ log = Logger()
27
+
28
+ def generate_review_results_report(detailed=False, export_csv=False):
29
+ """
30
+ Generates and prints a review results report for Phase 2 validation.
31
+
32
+ Args:
33
+ detailed (bool): If True, shows detailed breakdown by annotator being reviewed.
34
+ export_csv (bool): If True, exports results to CSV file.
35
+ """
36
+ with get_db() as db:
37
+ try:
38
+ # Get all reviewers (users who appear in REVIEW_MAPPING values)
39
+ reviewers = list(conf.REVIEW_MAPPING.values())
40
+
41
+ if not reviewers:
42
+ print("No reviewers found in REVIEW_MAPPING configuration.")
43
+ return
44
+
45
+ print("=" * 80)
46
+ print(" PHASE 2 REVIEW RESULTS REPORT")
47
+ print("=" * 80)
48
+ print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
49
+ print()
50
+
51
+ overall_approved = 0
52
+ overall_rejected = 0
53
+ overall_total = 0
54
+ csv_data = []
55
+
56
+ for reviewer_name in reviewers:
57
+ # Get reviewer object
58
+ reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
59
+ if not reviewer:
60
+ print(f"⚠️ Reviewer '{reviewer_name}' not found in database")
61
+ continue
62
+
63
+ # Find which annotator this reviewer is assigned to review
64
+ assigned_annotator = None
65
+ for annotator_name, assigned_reviewer in conf.REVIEW_MAPPING.items():
66
+ if assigned_reviewer == reviewer_name:
67
+ assigned_annotator = annotator_name
68
+ break
69
+
70
+ if not assigned_annotator:
71
+ print(f"⚠️ No annotator assignment found for reviewer '{reviewer_name}'")
72
+ continue
73
+
74
+ # Get annotator being reviewed
75
+ annotator = db.query(Annotator).filter_by(name=assigned_annotator).first()
76
+ if not annotator:
77
+ print(f"⚠️ Assigned annotator '{assigned_annotator}' not found in database")
78
+ continue
79
+
80
+ print(f"\nπŸ“‹ REVIEWER: {reviewer_name}")
81
+ print(f" Reviewing work by: {assigned_annotator}")
82
+ print("-" * 60)
83
+
84
+ # Get all validations by this reviewer for the assigned annotator's work
85
+ validations_query = db.query(Validation)\
86
+ .join(Annotation, Validation.annotation_id == Annotation.id)\
87
+ .filter(
88
+ Validation.validator_id == reviewer.id,
89
+ Annotation.annotator_id == annotator.id
90
+ )
91
+
92
+ total_validations = validations_query.count()
93
+ approved_validations = validations_query.filter(Validation.validated == True).count()
94
+ rejected_validations = validations_query.filter(Validation.validated == False).count()
95
+
96
+ # Calculate percentages
97
+ approved_percentage = (approved_validations / total_validations * 100) if total_validations > 0 else 0
98
+ rejected_percentage = (rejected_validations / total_validations * 100) if total_validations > 0 else 0
99
+
100
+ print(f" πŸ“Š Total Reviews: {total_validations:,}")
101
+ print(f" βœ… Approved: {approved_validations:,} ({approved_percentage:.1f}%)")
102
+ print(f" ❌ Rejected: {rejected_validations:,} ({rejected_percentage:.1f}%)")
103
+
104
+ # Update overall totals
105
+ overall_total += total_validations
106
+ overall_approved += approved_validations
107
+ overall_rejected += rejected_validations
108
+
109
+ # Collect CSV data
110
+ if export_csv:
111
+ csv_data.append({
112
+ 'reviewer': reviewer_name,
113
+ 'reviewed_annotator': assigned_annotator,
114
+ 'total_reviews': total_validations,
115
+ 'approved': approved_validations,
116
+ 'rejected': rejected_validations,
117
+ 'approval_rate': approved_percentage
118
+ })
119
+
120
+ # Show detailed rejection reasons if requested
121
+ if detailed and rejected_validations > 0:
122
+ print("\n πŸ“ Rejection Reasons:")
123
+ rejection_reasons = db.query(Validation.description)\
124
+ .join(Annotation, Validation.annotation_id == Annotation.id)\
125
+ .filter(
126
+ Validation.validator_id == reviewer.id,
127
+ Annotation.annotator_id == annotator.id,
128
+ Validation.validated == False,
129
+ Validation.description.isnot(None),
130
+ Validation.description != ""
131
+ ).all()
132
+
133
+ reason_counts = {}
134
+ for (reason,) in rejection_reasons:
135
+ if reason:
136
+ reason_counts[reason] = reason_counts.get(reason, 0) + 1
137
+
138
+ for reason, count in sorted(reason_counts.items(), key=lambda x: x[1], reverse=True):
139
+ print(f" β€’ {reason}: {count} times")
140
+
141
+ if not reason_counts:
142
+ print(" (No reasons provided)")
143
+
144
+ # Show annotation coverage (how much of assigned work has been reviewed)
145
+ total_annotations_query = db.query(Annotation)\
146
+ .filter(
147
+ Annotation.annotator_id == annotator.id,
148
+ Annotation.annotated_sentence.isnot(None),
149
+ Annotation.annotated_sentence != ""
150
+ )
151
+ total_annotations = total_annotations_query.count()
152
+
153
+ coverage_percentage = (total_validations / total_annotations * 100) if total_annotations > 0 else 0
154
+ print(f" πŸ“ˆ Review Coverage: {total_validations:,}/{total_annotations:,} ({coverage_percentage:.1f}%)")
155
+
156
+ # Overall summary
157
+ print("\n" + "=" * 80)
158
+ print(" OVERALL SUMMARY")
159
+ print("=" * 80)
160
+
161
+ overall_approved_percentage = (overall_approved / overall_total * 100) if overall_total > 0 else 0
162
+ overall_rejected_percentage = (overall_rejected / overall_total * 100) if overall_total > 0 else 0
163
+
164
+ print(f"πŸ“Š Total Reviews Across All Reviewers: {overall_total:,}")
165
+ print(f"βœ… Total Approved: {overall_approved:,} ({overall_approved_percentage:.1f}%)")
166
+ print(f"❌ Total Rejected: {overall_rejected:,} ({overall_rejected_percentage:.1f}%)")
167
+
168
+ # Quality score (approval rate)
169
+ if overall_total > 0:
170
+ print(f"🎯 Overall Quality Score: {overall_approved_percentage:.1f}% approval rate")
171
+
172
+ # Quality assessment
173
+ if overall_approved_percentage >= 95:
174
+ quality_rating = "🌟 Excellent"
175
+ elif overall_approved_percentage >= 85:
176
+ quality_rating = "πŸ‘ Good"
177
+ elif overall_approved_percentage >= 75:
178
+ quality_rating = "⚠️ Fair"
179
+ else:
180
+ quality_rating = "πŸ”΄ Needs Improvement"
181
+
182
+ print(f"πŸ“Š Quality Rating: {quality_rating}")
183
+
184
+ print("=" * 80)
185
+
186
+ # Export to CSV if requested
187
+ if export_csv and csv_data:
188
+ try:
189
+ import pandas as pd
190
+ df = pd.DataFrame(csv_data)
191
+ filename = f"review_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
192
+ df.to_csv(filename, index=False)
193
+ print(f"\nπŸ“„ Results exported to: {filename}")
194
+ except ImportError:
195
+ print("\n⚠️ CSV export requires pandas. Install with: pip install pandas")
196
+
197
+ except Exception as e:
198
+ log.error(f"Failed to generate review results report: {e}")
199
+ print(f"❌ Error generating report: {e}")
200
+
201
+ def generate_annotator_breakdown_report():
202
+ """
203
+ Generates a report showing how each annotator's work was reviewed.
204
+ """
205
+ with get_db() as db:
206
+ try:
207
+ print("\n" + "=" * 80)
208
+ print(" ANNOTATOR PERFORMANCE BREAKDOWN")
209
+ print("=" * 80)
210
+
211
+ # Get all annotators who have been reviewed
212
+ for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
213
+ annotator = db.query(Annotator).filter_by(name=annotator_name).first()
214
+ reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
215
+
216
+ if not annotator or not reviewer:
217
+ continue
218
+
219
+ print(f"\nπŸ‘€ ANNOTATOR: {annotator_name}")
220
+ print(f" Reviewed by: {reviewer_name}")
221
+ print("-" * 60)
222
+
223
+ # Get validation stats for this annotator's work
224
+ validations = db.query(Validation)\
225
+ .join(Annotation, Validation.annotation_id == Annotation.id)\
226
+ .filter(
227
+ Annotation.annotator_id == annotator.id,
228
+ Validation.validator_id == reviewer.id
229
+ ).all()
230
+
231
+ if not validations:
232
+ print(" πŸ“Š No reviews completed yet")
233
+ continue
234
+
235
+ total = len(validations)
236
+ approved = sum(1 for v in validations if v.validated)
237
+ rejected = total - approved
238
+
239
+ approved_percentage = (approved / total * 100) if total > 0 else 0
240
+ rejected_percentage = (rejected / total * 100) if total > 0 else 0
241
+
242
+ print(f" πŸ“Š Total Reviewed: {total:,}")
243
+ print(f" βœ… Approved: {approved:,} ({approved_percentage:.1f}%)")
244
+ print(f" ❌ Rejected: {rejected:,} ({rejected_percentage:.1f}%)")
245
+
246
+ # Performance rating
247
+ if approved_percentage >= 95:
248
+ rating = "🌟 Excellent"
249
+ elif approved_percentage >= 85:
250
+ rating = "πŸ‘ Good"
251
+ elif approved_percentage >= 75:
252
+ rating = "⚠️ Fair"
253
+ elif approved_percentage >= 60:
254
+ rating = "πŸ”΄ Needs Improvement"
255
+ else:
256
+ rating = "πŸ’₯ Poor"
257
+
258
+ print(f" πŸ“ˆ Performance: {rating}")
259
+
260
+ # Show most common rejection reasons if any
261
+ if rejected > 0:
262
+ rejected_validations = [v for v in validations if not v.validated and v.description]
263
+ if rejected_validations:
264
+ print(" πŸ“ Top Rejection Reasons:")
265
+ reason_counts = defaultdict(int)
266
+ for v in rejected_validations:
267
+ if v.description:
268
+ reason_counts[v.description.strip()] += 1
269
+
270
+ for reason, count in sorted(reason_counts.items(), key=lambda x: x[1], reverse=True)[:3]:
271
+ print(f" β€’ {reason}: {count} times")
272
+
273
+ except Exception as e:
274
+ log.error(f"Failed to generate annotator breakdown report: {e}")
275
+ print(f"❌ Error generating annotator breakdown: {e}")
276
+
277
+
278
+ def generate_quick_summary():
279
+ """Generate a quick one-line summary of review results."""
280
+ with get_db() as db:
281
+ try:
282
+ total_reviews = db.query(Validation).count()
283
+ if total_reviews == 0:
284
+ print("No review data found.")
285
+ return
286
+
287
+ approved_reviews = db.query(Validation).filter(Validation.validated == True).count()
288
+ rejected_reviews = total_reviews - approved_reviews
289
+
290
+ approval_rate = (approved_reviews / total_reviews) * 100
291
+
292
+ print(f"πŸ“Š QUICK SUMMARY: {total_reviews:,} total reviews | {approved_reviews:,} approved ({approval_rate:.1f}%) | {rejected_reviews:,} rejected ({100-approval_rate:.1f}%)")
293
+
294
+ except Exception as e:
295
+ print(f"❌ Error generating summary: {e}")
296
+
297
+ if __name__ == "__main__":
298
+ parser = argparse.ArgumentParser(description="Generate Phase 2 review results report.")
299
+ parser.add_argument(
300
+ "--detailed",
301
+ action="store_true",
302
+ help="Show detailed breakdown including rejection reasons"
303
+ )
304
+ parser.add_argument(
305
+ "--annotator-breakdown",
306
+ action="store_true",
307
+ help="Show performance breakdown by annotator"
308
+ )
309
+ parser.add_argument(
310
+ "--csv",
311
+ action="store_true",
312
+ help="Export results to CSV file"
313
+ )
314
+ parser.add_argument(
315
+ "--quick",
316
+ action="store_true",
317
+ help="Show only a quick summary line"
318
+ )
319
+
320
+ args = parser.parse_args()
321
+
322
+ if args.quick:
323
+ generate_quick_summary()
324
+ else:
325
+ generate_review_results_report(detailed=args.detailed, export_csv=args.csv)
326
+
327
+ if args.annotator_breakdown:
328
+ generate_annotator_breakdown_report()
scripts/update_annotator_name.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ import os
4
+
5
+ # Add project root to Python path
6
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
7
+ if project_root not in sys.path:
8
+ sys.path.insert(0, project_root)
9
+
10
+ from utils.database import get_db
11
+ from data.models import Annotator
12
+ from utils.logger import Logger
13
+
14
+ log = Logger()
15
+
16
+ def update_annotator_name(old_name: str, new_name: str):
17
+ """
18
+ Updates the name of an existing annotator.
19
+ Keeps the password and annotation intervals the same.
20
+ """
21
+ with get_db() as db:
22
+ try:
23
+ # Check if the new name already exists
24
+ existing_annotator_with_new_name = db.query(Annotator).filter(Annotator.name == new_name).first()
25
+ if existing_annotator_with_new_name:
26
+ log.error(f"Error: An annotator with the name '{new_name}' already exists.")
27
+ return
28
+
29
+ annotator = db.query(Annotator).filter(Annotator.name == old_name).first()
30
+
31
+ if not annotator:
32
+ log.error(f"Error: Annotator with name '{old_name}' not found.")
33
+ return
34
+
35
+ annotator.name = new_name
36
+ db.commit()
37
+ log.info(f"Successfully updated annotator name from '{old_name}' to '{new_name}'.")
38
+ log.info(f"ID: {annotator.id}, New Name: {annotator.name}")
39
+ log.info("Password and annotation intervals remain unchanged.")
40
+
41
+ except Exception as e:
42
+ db.rollback()
43
+ log.error(f"Failed to update annotator name: {e}")
44
+
45
+ if __name__ == "__main__":
46
+ parser = argparse.ArgumentParser(description="Update an annotator's name.")
47
+ parser.add_argument("old_name", type=str, help="The current name of the annotator.")
48
+ parser.add_argument("new_name", type=str, help="The new name for the annotator.")
49
+
50
+ args = parser.parse_args()
51
+
52
+ update_annotator_name(args.old_name, args.new_name)