import os
import pymupdf4llm

def process_pdf(pdf_path: str) -> str:
    """
    Extracts text from a PDF file using pymupdf4llm.
    """
    try:
        text = pymupdf4llm.extract_text(pdf_path)
        return text
    except Exception as e:
        raise ValueError(f"Error processing PDF: {str(e)}")

def read_text_file(file_path: str) -> str:
    """
    Reads text from a .txt or .md file.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return text
    except Exception as e:
        raise ValueError(f"Error reading text file: {str(e)}")

def format_prompt(output_format: str) -> str:
    """
    Formats the prompt based on the output type.
    """
    if output_format.lower() == "json":
        return """You only respond in JSON format. Follow the example below.

    EXAMPLE:
    [
        {"question": "What is AI?", "answer": "Artificial Intelligence."},
        {"question": "What is ML?", "answer": "Machine Learning."}
    ]
    """
    elif output_format.lower() == "csv":
        return """You only respond with cards in CSV format. Follow the example below.

    EXAMPLE:
        "What is AI?", "Artificial Intelligence."
        "What is ML?", "Machine Learning."
    """

# def extract_flashcards(text: str, output_format: str, pipeline: str) -> str:
#     """
#     Extracts flashcards from the input text using the LLM and formats them in CSV or JSON.
#     """
#     prompt = f"""You are an expert flashcard creator. You always include a single knowledge item per flashcard.

#     {format_prompt(output_format)}


#     Extract flashcards from the user's text:

#     {text}

#     Do not include the prompt or any other unnecessary information in the flashcards.
#     Do not include triple ticks (```) or any other code blocks in the flashcards.
#     """
#     # TODO:
#     response = pipeline.generate_flashcards("json", prompt)
#     return response

def process_file(file_obj, output_format: str, pipeline) -> str:
    """
    Processes the uploaded file based on its type and extracts flashcards.
    """
    file_path = file_obj.name
    file_ext = os.path.splitext(file_path)[1].lower()

    if file_ext == '.pdf':
        text = process_pdf(file_path)
    elif file_ext in ['.txt', '.md']:
        text = read_text_file(file_path)
    else:
        raise ValueError("Unsupported file type.")

    flashcards = pipeline.generate_flashcards(output_format, text)
    return flashcards

def process_text_input(output_format: str, input_text: str) -> str:
    """
    Processes the input text and extracts flashcards.
    """
    if not input_text.strip():
        raise ValueError("No text provided.")

    flashcards = pipeline.generate_flashcards(output_format, input_text)
    return flashcards