# src/implementations/document_service.py
from fastapi import HTTPException
from pathlib import Path
import shutil
import os
from uuid import uuid4
from typing import List, Tuple, Dict
from fastapi import UploadFile, BackgroundTasks

from src.vectorstores.chroma_vectorstore import ChromaVectorStore
from src.utils.document_processor import DocumentProcessor
from src.models import DocumentResponse, DocumentInfo, BatchUploadResponse
from src.utils.logger import logger
from src.db.mongodb_store import MongoDBStore
from config.config import settings


class DocumentService:
    def __init__(
        self,
        doc_processor: DocumentProcessor,
        mongodb: MongoDBStore
    ):
        self.doc_processor = doc_processor
        self.mongodb = mongodb
        self.permanent_dir = Path(settings.UPLOADS_DIR)
        self.permanent_dir.mkdir(parents=True, exist_ok=True)

    async def check_duplicate_filename(self, filename: str) -> bool:
        """
        Check if a file with the same name exists

        Args:
            filename (str): Original filename to check

        Returns:
            bool: True if duplicate exists, False otherwise
        """
        documents = await self.mongodb.get_all_documents()
        return any(doc.get('filename') == filename for doc in documents)

    async def process_documents(
        self,
        files: List[UploadFile],
        vector_store: ChromaVectorStore,
        background_tasks: BackgroundTasks
    ) -> BatchUploadResponse:
        """Process multiple document uploads"""
        processed_files, failed_files = await self._handle_file_uploads(
            files,
            vector_store,
            background_tasks
        )

        return BatchUploadResponse(
            message=f"Processed {len(processed_files)} documents with {len(failed_files)} failures",
            processed_files=processed_files,
            failed_files=failed_files
        )

    async def _handle_file_uploads(
        self,
        files: List[UploadFile],
        vector_store: ChromaVectorStore,
        background_tasks: BackgroundTasks
    ) -> Tuple[List[DocumentResponse], List[dict]]:
        """Handle individual file uploads and processing"""
        processed_files = []
        failed_files = []

        for file in files:
            try:
                # Check for duplicate filename
                if await self.check_duplicate_filename(file.filename):
                    failed_files.append(self._create_failed_file_entry(
                        file.filename,
                        "A document with this name already exists. Please upload another document."
                    ))
                    continue

                if not self._is_supported_format(file.filename):
                    failed_files.append(self._create_failed_file_entry(
                        file.filename,
                        "Unsupported file format"
                    ))
                    continue

                document_response = await self._process_single_file(
                    file,
                    vector_store,
                    background_tasks
                )
                processed_files.append(document_response)

            except Exception as e:
                logger.error(
                    f"Error processing file {file.filename}: {str(e)}")
                failed_files.append(self._create_failed_file_entry(
                    file.filename,
                    str(e)
                ))

        return processed_files, failed_files

    async def _process_single_file(
        self,
        file: UploadFile,
        vector_store: ChromaVectorStore,
        background_tasks: BackgroundTasks
    ) -> DocumentResponse:
        """Process a single file upload with proper handle closure"""
        document_id = str(uuid4())
        filename = f"{document_id}_{file.filename}"
        file_path = self.permanent_dir / filename
        url_path = f"/docs/{filename}"

        try:
            # Save file to permanent location using a context manager
            with open(file_path, "wb") as buffer:
                shutil.copyfileobj(file.file, buffer)

            # Close the uploaded file explicitly
            await file.close()

            # Process document with proper cleanup for Excel files
            try:
                processed_doc = await self.doc_processor.process_document(file_path)

                # For Excel files, ensure pandas closes the file
                if file_path.suffix.lower() in ['.xlsx', '.xls']:
                    import gc
                    gc.collect()  # Help cleanup any lingering file handles

            except Exception as proc_error:
                logger.error(f"Error processing document: {str(proc_error)}")
                raise

            # Store in MongoDB with url_path
            await self.mongodb.store_document(
                document_id=document_id,
                filename=file.filename,
                content_type=file.content_type,
                file_size=os.path.getsize(file_path),
                url_path=url_path,
                source="user_upload"
            )

            # Process for vector store in background
            background_tasks.add_task(
                self._process_for_vector_store,
                processed_doc['chunks'],
                vector_store,
                document_id,
                file.filename
            )

            return DocumentResponse(
                message="Document uploaded successfully",
                document_id=document_id,
                status="processing",
                document_info=DocumentInfo(
                    original_filename=file.filename,
                    size=os.path.getsize(file_path),
                    content_type=file.content_type,
                    url_path=url_path
                )
            )

        except Exception as e:
            # Clean up file if it was created
            if file_path.exists():
                try:
                    file_path.unlink()
                except Exception as cleanup_error:
                    logger.error(
                        f"Error cleaning up file {file_path}: {str(cleanup_error)}")

            # Clean up from MongoDB if document was created
            try:
                await self.mongodb.delete_document(document_id)
            except Exception as db_cleanup_error:
                logger.error(
                    f"Error cleaning up MongoDB document {document_id}: {str(db_cleanup_error)}")

            logger.error(f"Error processing file {file.filename}: {str(e)}")
            raise

    async def _process_for_vector_store(
        self,
        chunks: List[str],
        vector_store: ChromaVectorStore,
        document_id: str,
        filename: str
    ):
        """Process document content for vector store"""
        try:
            # Generate chunk IDs using document_id
            chunk_ids = [
                f"{document_id}-chunk-{i}" for i in range(len(chunks))]

            # Get embeddings
            embeddings = vector_store.embedding_function(chunks)

            # Prepare metadata for each chunk
            metadatas = [{
                'document_id': document_id,
                'source_file': filename,
                'chunk_index': i,
                'total_chunks': len(chunks)
            } for i in range(len(chunks))]

            # Store in vector store
            vector_store.add_documents(
                documents=chunks,
                embeddings=embeddings,
                metadatas=metadatas,
                ids=chunk_ids
            )

            logger.info(
                f"Successfully processed document {filename} (ID: {document_id}) into {len(chunks)} chunks")

        except Exception as e:
            logger.error(
                f"Error processing document {filename} (ID: {document_id}) for vector store: {str(e)}")
            raise

    def _is_supported_format(self, filename: str) -> bool:
        """Check if file format is supported"""
        return any(filename.lower().endswith(ext)
                   for ext in self.doc_processor.supported_formats)

    def _create_failed_file_entry(self, filename: str, error: str) -> dict:
        """Create a failed file entry"""
        return {
            "filename": filename,
            "error": error
        }

    async def delete_document(self, document_id: str) -> bool:
        """Delete document from storage and MongoDB"""
        try:
            # Get document details from MongoDB
            doc = await self.mongodb.get_document(document_id)
            if doc:
                # Get filename from url_path
                filename = doc['url_path'].split('/')[-1]
                file_path = self.permanent_dir / filename

                # Delete physical file if it exists
                if file_path.exists():
                    file_path.unlink()

                # Delete from MongoDB
                return await self.mongodb.delete_document(document_id)
            return False

        except Exception as e:
            logger.error(f"Error deleting document: {str(e)}")
            raise

    def cleanup(self):
        """Clean up permanent directory if empty"""
        if self.permanent_dir.exists() and not any(self.permanent_dir.iterdir()):
            self.permanent_dir.rmdir()