Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on Jun 12

Commit

1000353

1 Parent(s): 6a70048

script for database management

Browse files

Files changed (4) hide show

scripts/list_phase2_rejected_unreviewed.py +369 -0
scripts/report_annotator_progress.py +74 -0
scripts/report_review_results.py +328 -0
scripts/update_annotator_name.py +52 -0

scripts/list_phase2_rejected_unreviewed.py ADDED Viewed

	@@ -0,0 +1,369 @@

+#!/usr/bin/env python3
+"""
+Phase 2 Rejected/Unreviewed Items Report Script
+This script lists all rejected or unreviewed items from Phase 2 review process,
+showing TTS data indices, rejection reasons, and detailed information.
+"""
+import argparse
+import sys
+import os
+from datetime import datetime
+from sqlalchemy import and_, or_
+from sqlalchemy.orm import joinedload
+# Add project root to Python path
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from utils.database import get_db
+from data.models import Annotator, Annotation, Validation, TTSData
+from utils.logger import Logger
+from config import conf
+log = Logger()
+def list_rejected_unreviewed_items(status_filter="all", reviewer_filter=None, annotator_filter=None, export_csv=False):
+    """
+    Lists rejected or unreviewed items from Phase 2 review process.
+    Args:
+        status_filter (str): Filter by status - "rejected", "unreviewed", or "all"
+        reviewer_filter (str): Filter by specific reviewer name
+        annotator_filter (str): Filter by specific annotator whose work is being reviewed
+        export_csv (bool): Export results to CSV file
+    """
+    with get_db() as db:
+        try:
+            print("=" * 80)
+            print("           PHASE 2 REJECTED/UNREVIEWED ITEMS REPORT")
+            print("=" * 80)
+            print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+            print(f"Status filter: {status_filter.upper()}")
+            if reviewer_filter:
+                print(f"Reviewer filter: {reviewer_filter}")
+            if annotator_filter:
+                print(f"Annotator filter: {annotator_filter}")
+            print()
+            # Get review mapping pairs
+            review_pairs = []
+            for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
+                # Apply filters
+                if reviewer_filter and reviewer_name != reviewer_filter:
+                    continue
+                if annotator_filter and annotator_name != annotator_filter:
+                    continue
+                # Get annotator and reviewer objects
+                annotator = db.query(Annotator).filter_by(name=annotator_name).first()
+                reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
+                if annotator and reviewer:
+                    review_pairs.append((annotator, reviewer))
+                else:
+                    print(f"⚠️  Warning: Missing annotator ({annotator_name}) or reviewer ({reviewer_name}) in database")
+            if not review_pairs:
+                print("No valid review pairs found with current filters.")
+                return
+            all_items = []
+            total_rejected = 0
+            total_unreviewed = 0
+            # Process each review pair
+            for annotator, reviewer in review_pairs:
+                print(f"\n📋 REVIEWER: {reviewer.name} → ANNOTATOR: {annotator.name}")
+                print("-" * 60)
+                # Get all annotations by this annotator
+                annotations_query = db.query(Annotation).join(TTSData).filter(
+                    Annotation.annotator_id == annotator.id,
+                    # Only include annotations that have actual content
+                    Annotation.annotated_sentence.isnot(None),
+                    Annotation.annotated_sentence != ""
+                ).options(
+                    joinedload(Annotation.tts_data)
+                ).order_by(TTSData.id)
+                annotations = annotations_query.all()
+                if not annotations:
+                    print("   No annotations found for this annotator.")
+                    continue
+                print(f"   Total annotations to review: {len(annotations)}")
+                rejected_items = []
+                unreviewed_items = []
+                for annotation in annotations:
+                    # Check if this annotation has been reviewed by the assigned reviewer
+                    validation = db.query(Validation).filter_by(
+                        annotation_id=annotation.id,
+                        validator_id=reviewer.id
+                    ).first()
+                    item_data = {
+                        "tts_id": annotation.tts_data.id,
+                        "filename": annotation.tts_data.filename,
+                        "original_sentence": annotation.tts_data.sentence,
+                        "annotated_sentence": annotation.annotated_sentence,
+                        "annotator": annotator.name,
+                        "reviewer": reviewer.name,
+                        "annotated_at": annotation.annotated_at.strftime('%Y-%m-%d %H:%M:%S') if annotation.annotated_at else "N/A"
+                    }
+                    if not validation:
+                        # Unreviewed
+                        item_data["status"] = "Unreviewed"
+                        item_data["rejection_reason"] = ""
+                        unreviewed_items.append(item_data)
+                        all_items.append(item_data)
+                    elif not validation.validated:
+                        # Rejected
+                        item_data["status"] = "Rejected"
+                        item_data["rejection_reason"] = validation.description or "No reason provided"
+                        rejected_items.append(item_data)
+                        all_items.append(item_data)
+                # Print summary for this pair
+                pair_rejected = len(rejected_items)
+                pair_unreviewed = len(unreviewed_items)
+                total_rejected += pair_rejected
+                total_unreviewed += pair_unreviewed
+                print(f"   ❌ Rejected: {pair_rejected}")
+                print(f"   ⏳ Unreviewed: {pair_unreviewed}")
+                # Show detailed items based on filter
+                items_to_show = []
+                if status_filter == "rejected" or status_filter == "all":
+                    items_to_show.extend(rejected_items)
+                if status_filter == "unreviewed" or status_filter == "all":
+                    items_to_show.extend(unreviewed_items)
+                if items_to_show:
+                    print(f"\n   📝 Detailed Items ({len(items_to_show)}):")
+                    for item in sorted(items_to_show, key=lambda x: x["tts_id"]):
+                        status_icon = "❌" if item["status"] == "Rejected" else "⏳"
+                        print(f"      {status_icon} ID: {item['tts_id']} | Status: {item['status']}")
+                        if item["status"] == "Rejected" and item["rejection_reason"]:
+                            print(f"         Reason: {item['rejection_reason']}")
+                        # Show truncated sentences for context
+                        orig_preview = item["original_sentence"][:80] + "..." if len(item["original_sentence"]) > 80 else item["original_sentence"]
+                        ann_preview = item["annotated_sentence"][:80] + "..." if len(item["annotated_sentence"]) > 80 else item["annotated_sentence"]
+                        print(f"         Original: {orig_preview}")
+                        print(f"         Annotated: {ann_preview}")
+                        print(f"         Annotated at: {item['annotated_at']}")
+                        print()
+            # Overall summary
+            print("\n" + "=" * 80)
+            print("                           OVERALL SUMMARY")
+            print("=" * 80)
+            print(f"📊 Total items found: {len(all_items)}")
+            print(f"❌ Total rejected: {total_rejected}")
+            print(f"⏳ Total unreviewed: {total_unreviewed}")
+            # Export to CSV if requested
+            if export_csv and all_items:
+                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+                csv_filename = f"phase2_rejected_unreviewed_{timestamp}.csv"
+                import csv
+                with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
+                    fieldnames = ['tts_id', 'status', 'rejection_reason', 'annotator', 'reviewer',
+                                'filename', 'original_sentence', 'annotated_sentence', 'annotated_at']
+                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                    writer.writeheader()
+                    for item in sorted(all_items, key=lambda x: x["tts_id"]):
+                        writer.writerow(item)
+                print(f"\n💾 Results exported to: {csv_filename}")
+        except Exception as e:
+            log.error(f"Error generating rejected/unreviewed items report: {e}")
+            print(f"❌ Error: {e}")
+def list_by_ids(ids_list, export_csv=False):
+    """
+    Lists specific TTS data items by their IDs and shows their Phase 2 review status.
+    Args:
+        ids_list (list): List of TTS data IDs to look up
+        export_csv (bool): Export results to CSV file
+    """
+    with get_db() as db:
+        try:
+            print("=" * 80)
+            print("              PHASE 2 STATUS FOR SPECIFIC IDS")
+            print("=" * 80)
+            print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+            print(f"Requested IDs: {', '.join(map(str, ids_list))}")
+            print()
+            found_items = []
+            not_found = []
+            for tts_id in ids_list:
+                # Find the TTS data
+                tts_data = db.query(TTSData).filter_by(id=tts_id).first()
+                if not tts_data:
+                    not_found.append(tts_id)
+                    continue
+                # Find the annotation for this TTS data
+                annotation = db.query(Annotation).filter_by(tts_data_id=tts_id).first()
+                if not annotation:
+                    print(f"⚠️  ID {tts_id}: No annotation found")
+                    continue
+                # Find the assigned reviewer for this annotator
+                annotator = db.query(Annotator).filter_by(id=annotation.annotator_id).first()
+                if not annotator:
+                    print(f"⚠️  ID {tts_id}: Annotator not found")
+                    continue
+                reviewer_name = conf.REVIEW_MAPPING.get(annotator.name)
+                if not reviewer_name:
+                    print(f"⚠️  ID {tts_id}: No reviewer assigned for annotator {annotator.name}")
+                    continue
+                reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
+                if not reviewer:
+                    print(f"⚠️  ID {tts_id}: Reviewer {reviewer_name} not found in database")
+                    continue
+                # Check validation status
+                validation = db.query(Validation).filter_by(
+                    annotation_id=annotation.id,
+                    validator_id=reviewer.id
+                ).first()
+                status = "Unreviewed"
+                rejection_reason = ""
+                if validation:
+                    if validation.validated:
+                        status = "Approved"
+                    else:
+                        status = "Rejected"
+                        rejection_reason = validation.description or "No reason provided"
+                item_data = {
+                    "tts_id": tts_id,
+                    "status": status,
+                    "rejection_reason": rejection_reason,
+                    "annotator": annotator.name,
+                    "reviewer": reviewer.name,
+                    "filename": tts_data.filename,
+                    "original_sentence": tts_data.sentence,
+                    "annotated_sentence": annotation.annotated_sentence or "[No annotation]",
+                    "annotated_at": annotation.annotated_at.strftime('%Y-%m-%d %H:%M:%S') if annotation.annotated_at else "N/A"
+                }
+                found_items.append(item_data)
+                # Display the item
+                status_icon = "✅" if status == "Approved" else "❌" if status == "Rejected" else "⏳"
+                print(f"{status_icon} ID: {tts_id} | Status: {status} | Annotator: {annotator.name} | Reviewer: {reviewer.name}")
+                if status == "Rejected" and rejection_reason:
+                    print(f"   Rejection Reason: {rejection_reason}")
+                orig_preview = tts_data.sentence[:100] + "..." if len(tts_data.sentence) > 100 else tts_data.sentence
+                ann_preview = (annotation.annotated_sentence[:100] + "..." if annotation.annotated_sentence and len(annotation.annotated_sentence) > 100
+                              else annotation.annotated_sentence or "[No annotation]")
+                print(f"   Original: {orig_preview}")
+                print(f"   Annotated: {ann_preview}")
+                print(f"   Annotated at: {item_data['annotated_at']}")
+                print()
+            if not_found:
+                print(f"⚠️  IDs not found: {', '.join(map(str, not_found))}")
+            # Export to CSV if requested
+            if export_csv and found_items:
+                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+                csv_filename = f"phase2_specific_ids_{timestamp}.csv"
+                import csv
+                with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
+                    fieldnames = ['tts_id', 'status', 'rejection_reason', 'annotator', 'reviewer',
+                                'filename', 'original_sentence', 'annotated_sentence', 'annotated_at']
+                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                    writer.writeheader()
+                    for item in found_items:
+                        writer.writerow(item)
+                print(f"💾 Results exported to: {csv_filename}")
+        except Exception as e:
+            log.error(f"Error looking up specific IDs: {e}")
+            print(f"❌ Error: {e}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="List rejected or unreviewed items from Phase 2 review process.")
+    subparsers = parser.add_subparsers(dest='command', help='Available commands')
+    # List command
+    list_parser = subparsers.add_parser('list', help='List rejected/unreviewed items')
+    list_parser.add_argument(
+        "--status",
+        choices=["rejected", "unreviewed", "all"],
+        default="all",
+        help="Filter by status (default: all)"
+    )
+    list_parser.add_argument(
+        "--reviewer",
+        type=str,
+        help="Filter by specific reviewer name"
+    )
+    list_parser.add_argument(
+        "--annotator",
+        type=str,
+        help="Filter by specific annotator whose work is being reviewed"
+    )
+    list_parser.add_argument(
+        "--csv",
+        action="store_true",
+        help="Export results to CSV file"
+    )
+    # IDs command
+    ids_parser = subparsers.add_parser('ids', help='Check status of specific TTS data IDs')
+    ids_parser.add_argument(
+        "ids",
+        nargs='+',
+        type=int,
+        help="TTS data IDs to check"
+    )
+    ids_parser.add_argument(
+        "--csv",
+        action="store_true",
+        help="Export results to CSV file"
+    )
+    args = parser.parse_args()
+    if args.command == 'list':
+        list_rejected_unreviewed_items(
+            status_filter=args.status,
+            reviewer_filter=args.reviewer,
+            annotator_filter=args.annotator,
+            export_csv=args.csv
+        )
+    elif args.command == 'ids':
+        list_by_ids(args.ids, export_csv=args.csv)
+    else:
+        parser.print_help()

scripts/report_annotator_progress.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import argparse
+import sys
+import os
+from sqlalchemy import func, and_
+# Add project root to Python path
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from utils.database import get_db
+from data.models import Annotator, Annotation, AnnotationInterval, TTSData
+from utils.logger import Logger
+log = Logger()
+def generate_annotator_progress_report():
+    """
+    Generates and prints a progress report for each annotator.
+    Progress is defined as the percentage of assigned TTSData items
+    that have a non-empty annotation.
+    """
+    with get_db() as db:
+        try:
+            annotators = db.query(Annotator).filter(Annotator.is_active == True).all()
+            if not annotators:
+                log.info("No active annotators found.")
+                return
+            log.info("--- Annotator Progress Report ---")
+            for annotator in annotators:
+                # Calculate total assigned items for the annotator
+                total_assigned_query = db.query(func.sum(AnnotationInterval.end_index - AnnotationInterval.start_index + 1))\
+                                         .filter(AnnotationInterval.annotator_id == annotator.id)
+                total_assigned_result = total_assigned_query.scalar()
+                total_assigned = total_assigned_result if total_assigned_result is not None else 0
+                # Calculate completed items by this annotator within their assigned intervals
+                # An item is considered completed if annotated_sentence is not None and not an empty string.
+                completed_count_query = db.query(func.count(Annotation.id))\
+                                          .join(TTSData, Annotation.tts_data_id == TTSData.id)\
+                                          .join(AnnotationInterval,
+                                                and_(AnnotationInterval.annotator_id == annotator.id,
+                                                     TTSData.id >= AnnotationInterval.start_index,
+                                                     TTSData.id <= AnnotationInterval.end_index))\
+                                          .filter(Annotation.annotator_id == annotator.id,
+                                                  Annotation.annotated_sentence != None,
+                                                  Annotation.annotated_sentence != "")
+                completed_count_result = completed_count_query.scalar()
+                completed_count = completed_count_result if completed_count_result is not None else 0
+                percentage_completed = 0
+                if total_assigned > 0:
+                    percentage_completed = (completed_count / total_assigned) * 100
+                log.info(f"Annotator: {annotator.name} (ID: {annotator.id})")
+                log.info(f"  Total Assigned Items: {total_assigned}")
+                log.info(f"  Completed Items: {completed_count}")
+                log.info(f"  Progress: {percentage_completed:.2f}%")
+                log.info("-" * 30)
+        except Exception as e:
+            # For errors, we might still want the full log details
+            log.error(f"Failed to generate annotator progress report: {e}")
+if __name__ == "__main__":
+    # No arguments needed for this script, it reports on all active annotators
+    # parser = argparse.ArgumentParser(description="Generate a progress report for all active annotators.")
+    # args = parser.parse_args() # Not needed for now
+    generate_annotator_progress_report()

scripts/report_review_results.py ADDED Viewed

	@@ -0,0 +1,328 @@

+#!/usr/bin/env python3
+"""
+Phase 2 Review Results Report Script
+This script generates a comprehensive report of Phase 2 review results,
+showing approval and rejection statistics for each reviewer and overall totals.
+"""
+import argparse
+import sys
+import os
+from datetime import datetime
+from collections import defaultdict
+from sqlalchemy import func, and_
+# Add project root to Python path
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from utils.database import get_db
+from data.models import Annotator, Annotation, Validation, TTSData
+from utils.logger import Logger
+from config import conf
+log = Logger()
+def generate_review_results_report(detailed=False, export_csv=False):
+    """
+    Generates and prints a review results report for Phase 2 validation.
+    Args:
+        detailed (bool): If True, shows detailed breakdown by annotator being reviewed.
+        export_csv (bool): If True, exports results to CSV file.
+    """
+    with get_db() as db:
+        try:
+            # Get all reviewers (users who appear in REVIEW_MAPPING values)
+            reviewers = list(conf.REVIEW_MAPPING.values())
+            if not reviewers:
+                print("No reviewers found in REVIEW_MAPPING configuration.")
+                return
+            print("=" * 80)
+            print("                    PHASE 2 REVIEW RESULTS REPORT")
+            print("=" * 80)
+            print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+            print()
+            overall_approved = 0
+            overall_rejected = 0
+            overall_total = 0
+            csv_data = []
+            for reviewer_name in reviewers:
+                # Get reviewer object
+                reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
+                if not reviewer:
+                    print(f"⚠️  Reviewer '{reviewer_name}' not found in database")
+                    continue
+                # Find which annotator this reviewer is assigned to review
+                assigned_annotator = None
+                for annotator_name, assigned_reviewer in conf.REVIEW_MAPPING.items():
+                    if assigned_reviewer == reviewer_name:
+                        assigned_annotator = annotator_name
+                        break
+                if not assigned_annotator:
+                    print(f"⚠️  No annotator assignment found for reviewer '{reviewer_name}'")
+                    continue
+                # Get annotator being reviewed
+                annotator = db.query(Annotator).filter_by(name=assigned_annotator).first()
+                if not annotator:
+                    print(f"⚠️  Assigned annotator '{assigned_annotator}' not found in database")
+                    continue
+                print(f"\n📋 REVIEWER: {reviewer_name}")
+                print(f"   Reviewing work by: {assigned_annotator}")
+                print("-" * 60)
+                # Get all validations by this reviewer for the assigned annotator's work
+                validations_query = db.query(Validation)\
+                    .join(Annotation, Validation.annotation_id == Annotation.id)\
+                    .filter(
+                        Validation.validator_id == reviewer.id,
+                        Annotation.annotator_id == annotator.id
+                    )
+                total_validations = validations_query.count()
+                approved_validations = validations_query.filter(Validation.validated == True).count()
+                rejected_validations = validations_query.filter(Validation.validated == False).count()
+                # Calculate percentages
+                approved_percentage = (approved_validations / total_validations * 100) if total_validations > 0 else 0
+                rejected_percentage = (rejected_validations / total_validations * 100) if total_validations > 0 else 0
+                print(f"   📊 Total Reviews: {total_validations:,}")
+                print(f"   ✅ Approved: {approved_validations:,} ({approved_percentage:.1f}%)")
+                print(f"   ❌ Rejected: {rejected_validations:,} ({rejected_percentage:.1f}%)")
+                # Update overall totals
+                overall_total += total_validations
+                overall_approved += approved_validations
+                overall_rejected += rejected_validations
+                # Collect CSV data
+                if export_csv:
+                    csv_data.append({
+                        'reviewer': reviewer_name,
+                        'reviewed_annotator': assigned_annotator,
+                        'total_reviews': total_validations,
+                        'approved': approved_validations,
+                        'rejected': rejected_validations,
+                        'approval_rate': approved_percentage
+                    })
+                # Show detailed rejection reasons if requested
+                if detailed and rejected_validations > 0:
+                    print("\n   📝 Rejection Reasons:")
+                    rejection_reasons = db.query(Validation.description)\
+                        .join(Annotation, Validation.annotation_id == Annotation.id)\
+                        .filter(
+                            Validation.validator_id == reviewer.id,
+                            Annotation.annotator_id == annotator.id,
+                            Validation.validated == False,
+                            Validation.description.isnot(None),
+                            Validation.description != ""
+                        ).all()
+                    reason_counts = {}
+                    for (reason,) in rejection_reasons:
+                        if reason:
+                            reason_counts[reason] = reason_counts.get(reason, 0) + 1
+                    for reason, count in sorted(reason_counts.items(), key=lambda x: x[1], reverse=True):
+                        print(f"      • {reason}: {count} times")
+                    if not reason_counts:
+                        print("      (No reasons provided)")
+                # Show annotation coverage (how much of assigned work has been reviewed)
+                total_annotations_query = db.query(Annotation)\
+                    .filter(
+                        Annotation.annotator_id == annotator.id,
+                        Annotation.annotated_sentence.isnot(None),
+                        Annotation.annotated_sentence != ""
+                    )
+                total_annotations = total_annotations_query.count()
+                coverage_percentage = (total_validations / total_annotations * 100) if total_annotations > 0 else 0
+                print(f"   📈 Review Coverage: {total_validations:,}/{total_annotations:,} ({coverage_percentage:.1f}%)")
+            # Overall summary
+            print("\n" + "=" * 80)
+            print("                        OVERALL SUMMARY")
+            print("=" * 80)
+            overall_approved_percentage = (overall_approved / overall_total * 100) if overall_total > 0 else 0
+            overall_rejected_percentage = (overall_rejected / overall_total * 100) if overall_total > 0 else 0
+            print(f"📊 Total Reviews Across All Reviewers: {overall_total:,}")
+            print(f"✅ Total Approved: {overall_approved:,} ({overall_approved_percentage:.1f}%)")
+            print(f"❌ Total Rejected: {overall_rejected:,} ({overall_rejected_percentage:.1f}%)")
+            # Quality score (approval rate)
+            if overall_total > 0:
+                print(f"🎯 Overall Quality Score: {overall_approved_percentage:.1f}% approval rate")
+                # Quality assessment
+                if overall_approved_percentage >= 95:
+                    quality_rating = "🌟 Excellent"
+                elif overall_approved_percentage >= 85:
+                    quality_rating = "👍 Good"
+                elif overall_approved_percentage >= 75:
+                    quality_rating = "⚠️  Fair"
+                else:
+                    quality_rating = "🔴 Needs Improvement"
+                print(f"📊 Quality Rating: {quality_rating}")
+            print("=" * 80)
+            # Export to CSV if requested
+            if export_csv and csv_data:
+                try:
+                    import pandas as pd
+                    df = pd.DataFrame(csv_data)
+                    filename = f"review_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+                    df.to_csv(filename, index=False)
+                    print(f"\n📄 Results exported to: {filename}")
+                except ImportError:
+                    print("\n⚠️  CSV export requires pandas. Install with: pip install pandas")
+        except Exception as e:
+            log.error(f"Failed to generate review results report: {e}")
+            print(f"❌ Error generating report: {e}")
+def generate_annotator_breakdown_report():
+    """
+    Generates a report showing how each annotator's work was reviewed.
+    """
+    with get_db() as db:
+        try:
+            print("\n" + "=" * 80)
+            print("                  ANNOTATOR PERFORMANCE BREAKDOWN")
+            print("=" * 80)
+            # Get all annotators who have been reviewed
+            for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
+                annotator = db.query(Annotator).filter_by(name=annotator_name).first()
+                reviewer = db.query(Annotator).filter_by(name=reviewer_name).first()
+                if not annotator or not reviewer:
+                    continue
+                print(f"\n👤 ANNOTATOR: {annotator_name}")
+                print(f"   Reviewed by: {reviewer_name}")
+                print("-" * 60)
+                # Get validation stats for this annotator's work
+                validations = db.query(Validation)\
+                    .join(Annotation, Validation.annotation_id == Annotation.id)\
+                    .filter(
+                        Annotation.annotator_id == annotator.id,
+                        Validation.validator_id == reviewer.id
+                    ).all()
+                if not validations:
+                    print("   📊 No reviews completed yet")
+                    continue
+                total = len(validations)
+                approved = sum(1 for v in validations if v.validated)
+                rejected = total - approved
+                approved_percentage = (approved / total * 100) if total > 0 else 0
+                rejected_percentage = (rejected / total * 100) if total > 0 else 0
+                print(f"   📊 Total Reviewed: {total:,}")
+                print(f"   ✅ Approved: {approved:,} ({approved_percentage:.1f}%)")
+                print(f"   ❌ Rejected: {rejected:,} ({rejected_percentage:.1f}%)")
+                # Performance rating
+                if approved_percentage >= 95:
+                    rating = "🌟 Excellent"
+                elif approved_percentage >= 85:
+                    rating = "👍 Good"
+                elif approved_percentage >= 75:
+                    rating = "⚠️  Fair"
+                elif approved_percentage >= 60:
+                    rating = "🔴 Needs Improvement"
+                else:
+                    rating = "💥 Poor"
+                print(f"   📈 Performance: {rating}")
+                # Show most common rejection reasons if any
+                if rejected > 0:
+                    rejected_validations = [v for v in validations if not v.validated and v.description]
+                    if rejected_validations:
+                        print("   📝 Top Rejection Reasons:")
+                        reason_counts = defaultdict(int)
+                        for v in rejected_validations:
+                            if v.description:
+                                reason_counts[v.description.strip()] += 1
+                        for reason, count in sorted(reason_counts.items(), key=lambda x: x[1], reverse=True)[:3]:
+                            print(f"      • {reason}: {count} times")
+        except Exception as e:
+            log.error(f"Failed to generate annotator breakdown report: {e}")
+            print(f"❌ Error generating annotator breakdown: {e}")
+def generate_quick_summary():
+    """Generate a quick one-line summary of review results."""
+    with get_db() as db:
+        try:
+            total_reviews = db.query(Validation).count()
+            if total_reviews == 0:
+                print("No review data found.")
+                return
+            approved_reviews = db.query(Validation).filter(Validation.validated == True).count()
+            rejected_reviews = total_reviews - approved_reviews
+            approval_rate = (approved_reviews / total_reviews) * 100
+            print(f"📊 QUICK SUMMARY: {total_reviews:,} total reviews | {approved_reviews:,} approved ({approval_rate:.1f}%) | {rejected_reviews:,} rejected ({100-approval_rate:.1f}%)")
+        except Exception as e:
+            print(f"❌ Error generating summary: {e}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate Phase 2 review results report.")
+    parser.add_argument(
+        "--detailed",
+        action="store_true",
+        help="Show detailed breakdown including rejection reasons"
+    )
+    parser.add_argument(
+        "--annotator-breakdown",
+        action="store_true",
+        help="Show performance breakdown by annotator"
+    )
+    parser.add_argument(
+        "--csv",
+        action="store_true",
+        help="Export results to CSV file"
+    )
+    parser.add_argument(
+        "--quick",
+        action="store_true",
+        help="Show only a quick summary line"
+    )
+    args = parser.parse_args()
+    if args.quick:
+        generate_quick_summary()
+    else:
+        generate_review_results_report(detailed=args.detailed, export_csv=args.csv)
+        if args.annotator_breakdown:
+            generate_annotator_breakdown_report()

scripts/update_annotator_name.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import argparse
+import sys
+import os
+# Add project root to Python path
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from utils.database import get_db
+from data.models import Annotator
+from utils.logger import Logger
+log = Logger()
+def update_annotator_name(old_name: str, new_name: str):
+    """
+    Updates the name of an existing annotator.
+    Keeps the password and annotation intervals the same.
+    """
+    with get_db() as db:
+        try:
+            # Check if the new name already exists
+            existing_annotator_with_new_name = db.query(Annotator).filter(Annotator.name == new_name).first()
+            if existing_annotator_with_new_name:
+                log.error(f"Error: An annotator with the name '{new_name}' already exists.")
+                return
+            annotator = db.query(Annotator).filter(Annotator.name == old_name).first()
+            if not annotator:
+                log.error(f"Error: Annotator with name '{old_name}' not found.")
+                return
+            annotator.name = new_name
+            db.commit()
+            log.info(f"Successfully updated annotator name from '{old_name}' to '{new_name}'.")
+            log.info(f"ID: {annotator.id}, New Name: {annotator.name}")
+            log.info("Password and annotation intervals remain unchanged.")
+        except Exception as e:
+            db.rollback()
+            log.error(f"Failed to update annotator name: {e}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Update an annotator's name.")
+    parser.add_argument("old_name", type=str, help="The current name of the annotator.")
+    parser.add_argument("new_name", type=str, help="The new name for the annotator.")
+    args = parser.parse_args()
+    update_annotator_name(args.old_name, args.new_name)