Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on 7 days ago

Commit

5dc5cb8

1 Parent(s): 6cf928e

progress tracking

Browse files

Files changed (2) hide show

components/review_dashboard_page.py +54 -69
data/models.py +1 -20

components/review_dashboard_page.py CHANGED Viewed

@@ -12,11 +12,6 @@ from config import conf
 from utils.database import get_db
 from data.models import Annotation, TTSData, Annotator, Validation
 from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
-from utils.user_progress import (
-    get_next_unreviewed_annotation,
-    update_user_progress,
-    get_annotations_from_position
-)
 log = Logger()
 LOADER = CloudServerAudioLoader(conf.FTP_URL)
@@ -307,7 +302,7 @@ class ReviewDashboardPage:
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
-            # Load annotations with PROGRESS TRACKING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
@@ -317,28 +312,33 @@ class ReviewDashboardPage:
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
-                # 🎯 PROGRESS TRACKING: Find next unreviewed annotation position
-                next_annotation_id, next_position = get_next_unreviewed_annotation(db, user_id, target_annotator_obj.id)
-                # Load batch size for responsive loading
-                INITIAL_BATCH_SIZE = 10  # Increased from 5 to 10 for better UX
-                # Load annotations starting from the next unreviewed position
-                annotations_data = get_annotations_from_position(db, target_annotator_obj.id, next_position, INITIAL_BATCH_SIZE)
                 # Get total count for progress info (this is fast)
                 total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
                 ).count()
-                log.info(f"Progress-aware load: Starting from position {next_position}, loaded {len(annotations_data)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
-                for annotation in annotations_data:
-                    # Get TTS data
-                    tts_data = annotation.tts_data
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
@@ -346,8 +346,8 @@ class ReviewDashboardPage:
                     items.append({
                         "annotation_id": annotation.id,
                         "tts_id": annotation.tts_data_id,
-                        "filename": tts_data.filename,
-                        "sentence": tts_data.sentence,
                         "annotated_sentence": annotated_sentence_display,
                         "is_deleted": is_deleted,
                         "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
@@ -355,28 +355,36 @@ class ReviewDashboardPage:
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
-                # 🎯 PROGRESS TRACKING: Start from first item (index 0) since we loaded from the correct position
                 initial_idx = 0
                 # Set initial display
                 if items:
                     initial_item = items[initial_idx]
-                    review_info_text = f"🔍 **Phase 2 Review Mode** - Continuing from position {next_position + 1}/{total_count}. Loaded {len(items)} items."
-                    # Load validation status for the first item immediately
-                    try:
-                        annotation_obj = db.query(Annotation).filter_by(id=initial_item["annotation_id"]).first()
-                        if annotation_obj:
-                            validation_status, is_deleted = get_validation_status_for_item(db, initial_item["annotation_id"], user_id, annotation_obj)
-                            initial_item["validation_status"] = validation_status
-                            initial_item["is_deleted"] = is_deleted
-                            initial_item["validation_loaded"] = True
-                            if is_deleted:
-                                initial_item["annotated_sentence"] = "[DELETED ANNOTATION]"
-                    except Exception as e:
-                        log.warning(f"Failed to load initial validation status: {e}")
                     rejection_reason_val = ""
                     rejection_visible_val = False
                     if initial_item["validation_status"].startswith("Rejected"):
@@ -386,8 +394,6 @@ class ReviewDashboardPage:
                             rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren]
                         rejection_visible_val = True
-                    log.info(f"🎯 User {username} resuming review from position {next_position}, annotation ID {initial_item['annotation_id']}")
                     return (
                         items,
                         initial_idx,
@@ -405,9 +411,15 @@ class ReviewDashboardPage:
                         gr.update(value="❌ Reject")  # Reset reject button
                     )
                 else:
-                    # All items have been reviewed
-                    review_info_text = f"🎉 **Review Complete!** - All {total_count} annotations have been reviewed for {target_annotator}."
-                    return [], 0, review_info_text, "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
         def show_current_review_item_fn(items, idx, session):
             if not items or idx >= len(items) or idx < 0:
@@ -537,33 +549,6 @@ class ReviewDashboardPage:
                 db.commit()
                 log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
-                # 🎯 UPDATE USER PROGRESS TRACKING
-                try:
-                    username = session.get("username")
-                    if username:
-                        # Find target annotator for this user
-                        target_annotator = None
-                        for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
-                            if reviewer_name == username:
-                                target_annotator = annotator_name
-                                break
-                        if target_annotator:
-                            target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
-                            if target_annotator_obj:
-                                # Calculate the current position in the review list
-                                current_position = db.query(Annotation).filter(
-                                    Annotation.annotator_id == target_annotator_obj.id,
-                                    Annotation.id <= annotation_id
-                                ).count() - 1  # Convert to 0-based index
-                                # Update user progress
-                                update_user_progress(db, user_id, target_annotator_obj.id, annotation_id, current_position)
-                                log.info(f"🎯 Updated progress for user {user_id}: annotation {annotation_id} at position {current_position}")
-                except Exception as e:
-                    log.warning(f"Failed to update user progress: {e}")
-                    # Don't fail the validation save if progress tracking fails
                 items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
                 # Show rejection reason input only if rejected, otherwise hide and clear

 from utils.database import get_db
 from data.models import Annotation, TTSData, Annotator, Validation
 from data.repository.annotator_workload_repo import AnnotatorWorkloadRepo
 log = Logger()
 LOADER = CloudServerAudioLoader(conf.FTP_URL)
                 log.warning(f"No target annotator found for reviewer {username}")
                 return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+            # Load annotations from target annotator with FAST INITIAL LOADING
             with get_db() as db:
                 # Get target annotator's ID
                 target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
                 log.info(f"Found target annotator with ID: {target_annotator_obj.id}")
+                # FAST INITIAL QUERY: Load only essential data without complex validation processing
+                # Reduced batch size for instant loading in HuggingFace spaces
+                INITIAL_BATCH_SIZE = 5  # Load only 5 items initially for instant response
+                # Simple query to get basic annotation data quickly
+                initial_query = db.query(
+                    Annotation,
+                    TTSData.filename,
+                    TTSData.sentence
+                ).join(
+                    TTSData, Annotation.tts_data_id == TTSData.id
+                ).filter(
+                    Annotation.annotator_id == target_annotator_obj.id
+                ).order_by(Annotation.id).limit(INITIAL_BATCH_SIZE)
+                initial_results = initial_query.all()
                 # Get total count for progress info (this is fast)
                 total_count = db.query(Annotation).filter(
                     Annotation.annotator_id == target_annotator_obj.id
                 ).count()
+                log.info(f"Fast initial load: {len(initial_results)} annotations out of {total_count} total for target annotator ID {target_annotator_obj.id}")
                 # Process items with minimal data - validation status will be loaded on-demand
                 items = []
+                for annotation, filename, sentence in initial_results:
                     # Check if annotation is deleted (minimal processing)
                     is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
                     annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
                     items.append({
                         "annotation_id": annotation.id,
                         "tts_id": annotation.tts_data_id,
+                        "filename": filename,
+                        "sentence": sentence,
                         "annotated_sentence": annotated_sentence_display,
                         "is_deleted": is_deleted,
                         "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
                         "validation_loaded": False  # Track if validation status has been loaded
                     })
+                # Find the first item that is not reviewed (prioritize non-deleted annotations)
                 initial_idx = 0
+                if items:
+                    found_unreviewed = False
+                    # First, try to find unreviewed non-deleted annotations
+                    for i, item_data in enumerate(items):
+                        if (item_data["validation_status"] == "Not Reviewed" and
+                            not item_data.get("is_deleted", False)):
+                            initial_idx = i
+                            found_unreviewed = True
+                            break
+                    # If no unreviewed non-deleted items, look for any unreviewed items
+                    if not found_unreviewed:
+                        for i, item_data in enumerate(items):
+                            if item_data["validation_status"].startswith("Not Reviewed"):
+                                initial_idx = i
+                                found_unreviewed = True
+                                break
+                    # If no unreviewed items at all, use the last item
+                    if not found_unreviewed:
+                        initial_idx = len(items) - 1 if items else 0
                 # Set initial display
                 if items:
                     initial_item = items[initial_idx]
+                    review_info_text = f"🔍 **Phase 2 Review Mode** - Reviewing assigned annotations. Loaded {len(items)} of {total_count} total items."
+                    # Ensure correct order of return values for 12 outputs
+                    # items, idx, review_info, tts_id, filename, sentence, ann_sentence, annotated_at, validation_status, annotator_placeholder, audio_update, rejection_reason_update
                     rejection_reason_val = ""
                     rejection_visible_val = False
                     if initial_item["validation_status"].startswith("Rejected"):
                             rejection_reason_val = initial_item["validation_status"][start_paren+1:end_paren]
                         rejection_visible_val = True
                     return (
                         items,
                         initial_idx,
                         gr.update(value="❌ Reject")  # Reset reject button
                     )
                 else:
+                    # Ensure correct order and number of return values for empty items (14 outputs)
+                    return [], 0, f"🔍 **Phase 2 Review Mode** - No annotations found for review.", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
+                # except Exception as e:
+                #     log.error(f"Error loading review items: {e}")
+                #     sentry_sdk.capture_exception(e)
+                #     gr.Error(f"Failed to load review data: {e}")
+                #     # Ensure correct order and number of return values for error case (14 outputs)
+                #     return [], 0, "", "", "", "", "", "", "", "", gr.update(value=None, autoplay=False), gr.update(visible=False, value=""), False, gr.update(value="❌ Reject")
         def show_current_review_item_fn(items, idx, session):
             if not items or idx >= len(items) or idx < 0:
                 db.commit()
                 log.info(f"Validation saved successfully for annotation_id: {annotation_id}")
                 items[idx]["validation_status"] = "Approved" if approved else f"Rejected ({rejection_reason})" if rejection_reason else "Rejected"
                 # Show rejection reason input only if rejected, otherwise hide and clear

data/models.py CHANGED Viewed

@@ -158,23 +158,4 @@ class Validation(Base):
     validated_at = Column(DateTime, nullable=False)
     annotation = relationship("Annotation")
-    validator = relationship("Annotator", foreign_keys=[validator_id])  # Fixed: should reference Annotator
-# --------------------------------------------------------------------------- #
-#                                UserProgress                                 #
-# --------------------------------------------------------------------------- #
-class UserProgress(Base):
-    __tablename__ = "user_progress"
-    id = Column(Integer, primary_key=True)
-    user_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
-    target_annotator_id = Column(Integer, ForeignKey("annotators.id"), nullable=False)
-    last_reviewed_annotation_id = Column(Integer, ForeignKey("annotations.id"), nullable=True)
-    last_position = Column(Integer, default=0)  # Position in the review list
-    updated_at = Column(DateTime, nullable=False)
-    # Relationships
-    user = relationship("Annotator", foreign_keys=[user_id])
-    target_annotator = relationship("Annotator", foreign_keys=[target_annotator_id])
-    last_reviewed_annotation = relationship("Annotation", foreign_keys=[last_reviewed_annotation_id])

     validated_at = Column(DateTime, nullable=False)
     annotation = relationship("Annotation")
+    validator = relationship("Annotator", foreign_keys=[validator_id])  # Fixed: should reference Annotator