File size: 2,389 Bytes
4d17caa
 
 
82915e5
b8d2f65
82915e5
4d17caa
b8d2f65
4d17caa
f5b41ec
4d17caa
 
 
 
 
b8d2f65
4d17caa
 
 
 
 
 
 
2f264ab
b8d2f65
4d17caa
 
 
 
 
 
 
 
b8d2f65
4d17caa
 
b8d2f65
 
 
 
 
 
 
4d17caa
b8d2f65
4d17caa
b8d2f65
 
 
 
 
 
4d17caa
 
b8d2f65
 
4d17caa
8428312
82915e5
b8d2f65
8428312
 
 
82915e5
8428312
 
82915e5
8428312
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import pymupdf4llm

from .models import parse_message
from .pipeline import Pipeline

def process_pdf(pdf_path: str) -> str:
    """Extracts text from a PDF file using pymupdf4llm."""
    try:
        text = pymupdf4llm.to_markdown(pdf_path)
        return text
    except Exception as e:
        raise ValueError(f"Error processing PDF: {str(e)}")

def read_text_file(file_path: str) -> str:
    """Reads text from a .txt or .md file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
        return text
    except Exception as e:
        raise ValueError(f"Error reading text file: {str(e)}")

def process_file(file_obj, output_format: str, pipeline) -> str:
    """Processes the uploaded file based on its type and extracts flashcards."""
    file_path = file_obj.name
    file_ext = os.path.splitext(file_path)[1].lower()
    if file_ext == '.pdf':
        text = process_pdf(file_path)
    elif file_ext in ['.txt', '.md']:
        text = read_text_file(file_path)
    else:
        raise ValueError("Unsupported file type.")
    flashcards = generate_flashcards(output_format, text)
    return flashcards

def reduce_newlines(text: str) -> str:
    """Reduces consecutive newlines exceeding 2 to just 2."""
    while "\n\n\n" in text:
        text = text.replace("\n\n\n", "\n\n")
    return text

def generate_flashcards(output_format: str, content: str) -> str:
    """
    Generates flashcards from the content.
    """
    content = reduce_newlines(content)
    response = Pipeline().extract_flashcards(content)
    return format_flashcards(output_format, response)

def process_text_input(input_text: str, output_format: str = "csv") -> str:
    """Processes the input text and extracts flashcards."""
    if not input_text.strip():
        raise ValueError("No text provided.")
    pipeline = Pipeline()
    flashcards = generate_flashcards(output_format, input_text)
    return flashcards

def format_flashcards(output_format: str, response: str) -> str:
    """Formats the response into the desired output format."""
    output = ""
    try :
        message = parse_message(response)
    except Exception as e:
        raise e
    if output_format.lower() == "json":
        output:str = message.content_to_json()
    elif output_format.lower() == "csv":
        output = message.content_to_csv()
    return output