Spaces:

nslaughter
/

flashcard-studio

Sleeping

+import json
+import csv
+from io import StringIO
+from pydantic import BaseModel, validator, ValidationError
+class Card(BaseModel):
+    question: str
+    answer: str
+class Message(BaseModel):
+    role: str
+    content: list[Card]
+    @validator('content', pre=True)
+    def parse_content(cls, v):
+        if isinstance(v, str):
+            try:
+                content_list = json.loads(v)
+                return content_list
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Error decoding 'content' JSON: {e}") from e
+        return v
+    def content_to_json(self) -> str:
+        return json.dumps([card.dict() for card in self.content], indent=2)
+    def content_to_csv(self) -> str:
+        """
+        Converts the content of the Message instance into a CSV string.
+        """
+        output = StringIO()
+        # Step 2: Create a CSV writer - windows style is the default, so set Unix-style line endings
+        writer = csv.writer(output, lineterminator='\n')
+        writer.writerow(["Question", "Answer"])
+        for card in self.content:
+            writer.writerow([card.question, card.answer])
+        csv_content = output.getvalue()
+        output.close()
+        return csv_content
+class PydanticEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, BaseModel):
+            return obj.dict()
+        return super().default(obj)

app/pipeline.py CHANGED Viewed

@@ -1,48 +1,14 @@
 from io import StringIO
-import csv
 import json
 import logging
 import torch
 from transformers import pipeline
-from pydantic import BaseModel, ValidationError, validator
-logger = logging.getLogger(__name__)
-class Card(BaseModel):
-    question: str
-    answer: str
-class Message(BaseModel):
-    role: str
-    content: list[Card]
-    @validator('content', pre=True)
-    def parse_content(cls, v):
-        if isinstance(v, str):
-            try:
-                content_list = json.loads(v)
-                return content_list
-            except json.JSONDecodeError as e:
-                raise ValueError(f"Error decoding 'content' JSON: {e}") from e
-        return v
-    def content_to_json(self) -> str:
-        return json.dumps([card.dict() for card in self.content], indent=2)
-    def content_to_csv(self) -> str:
-        output = StringIO()
-        writer = csv.writer(output)
-        writer.writerow(['Question', 'Answer'])  # CSV Header
-        for card in self.content:
-            writer.writerow([card.question, card.answer])
-        return output.getvalue()
-class PydanticEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, BaseModel):
-            return obj.dict()
-        return super().default(obj)
 class Pipeline:
     def __init__(self, model_name: str = "Qwen/Qwen2.5-7B-Instruct"):
@@ -53,6 +19,7 @@ class Pipeline:
             device_map="auto"
         )
         self.device = self._determine_device()
         self.messages = [
             {"role": "system", "content": """You are an expert flashcard creator. You always include a single knowledge item per flashcard.
             - You ALWAYS include a single knowledge item per flashcard.
@@ -77,46 +44,9 @@ class Pipeline:
         )[0]["generated_text"][-1]
         return response_message
-    def format_flashcards(self, output_format: str, response: str) -> str:
-        output = ""
-        try :
-            message = parse_message(response)
-            logger.debug("after parse_obj_as")
-        except ValidationError as e:
-            raise e
-        if output_format.lower() == "json":
-            output = message.content_to_json()
-        elif output_format.lower() == "csv":
-            output = message.content_to_csv()
-        return output
     def generate_flashcards(self, output_format: str, content: str) -> str:
         response = self.extract_flashcards(content)
-        return self.format_flashcards(output_format, response)
-    def parse_message(self, input_dict: dict[str, any]) -> Message:
-        try:
-            # Extract the role
-            role = input_dict['role']
-            # Parse the content
-            content = input_dict['content']
-            # If content is a string, try to parse it as JSON
-            if isinstance(content, str):
-                content = content.strip()
-                content = json.loads(content)
-            # Create Card objects from the content
-            cards = [Card(**item) for item in content]
-            # Create and return the Message object
-            return Message(role=role, content=cards)
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid JSON in content: {str(e)}")
-        except ValidationError as e:
-            raise ValueError(f"Validation error: {str(e)}")
-        except KeyError as e:
-            raise ValueError(f"Missing required key: {str(e)}")
     def _determine_device(self):
         if torch.cuda.is_available():
@@ -144,8 +74,11 @@ def parse_message(input_dict: dict[str, any]) -> Message:
         # Create and return the Message object
         return Message(role=role, content=cards)
     except json.JSONDecodeError as e:
         raise ValueError(f"Invalid JSON in content: {str(e)}")
     except ValidationError as e:
         raise ValueError(f"Validation error: {str(e)}")
     except KeyError as e:
         raise ValueError(f"Missing required key: {str(e)}")

 from io import StringIO
 import json
 import logging
 import torch
 from transformers import pipeline
+from .models import Card, Message, ValidationError
+logger = logging.getLogger(__name__)
+logging.basicConfig(filename="pipeline.log", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
 class Pipeline:
     def __init__(self, model_name: str = "Qwen/Qwen2.5-7B-Instruct"):
             device_map="auto"
         )
         self.device = self._determine_device()
+        logger.info(f"device type: {self.device}")
         self.messages = [
             {"role": "system", "content": """You are an expert flashcard creator. You always include a single knowledge item per flashcard.
             - You ALWAYS include a single knowledge item per flashcard.
         )[0]["generated_text"][-1]
         return response_message
     def generate_flashcards(self, output_format: str, content: str) -> str:
         response = self.extract_flashcards(content)
+        return format_flashcards(output_format, response)
     def _determine_device(self):
         if torch.cuda.is_available():
         # Create and return the Message object
         return Message(role=role, content=cards)
     except json.JSONDecodeError as e:
+        logger.error(f"Invalid JSON in content: {str(e)}")
         raise ValueError(f"Invalid JSON in content: {str(e)}")
     except ValidationError as e:
+        logger.error(f"Validation error: {str(e)}")
         raise ValueError(f"Validation error: {str(e)}")
     except KeyError as e:
+        logger.error(f"Missing required key: {str(e)}")
         raise ValueError(f"Missing required key: {str(e)}")

app/processing.py CHANGED Viewed

@@ -22,47 +22,6 @@ def read_text_file(file_path: str) -> str:
     except Exception as e:
         raise ValueError(f"Error reading text file: {str(e)}")
-def format_prompt(output_format: str) -> str:
-    """
-    Formats the prompt based on the output type.
-    """
-    if output_format.lower() == "json":
-        return """You only respond in JSON format. Follow the example below.
-    EXAMPLE:
-    [
-        {"question": "What is AI?", "answer": "Artificial Intelligence."},
-        {"question": "What is ML?", "answer": "Machine Learning."}
-    ]
-    """
-    elif output_format.lower() == "csv":
-        return """You only respond with cards in CSV format. Follow the example below.
-    EXAMPLE:
-        "What is AI?", "Artificial Intelligence."
-        "What is ML?", "Machine Learning."
-    """
-# def extract_flashcards(text: str, output_format: str, pipeline: str) -> str:
-#     """
-#     Extracts flashcards from the input text using the LLM and formats them in CSV or JSON.
-#     """
-#     prompt = f"""You are an expert flashcard creator. You always include a single knowledge item per flashcard.
-#     {format_prompt(output_format)}
-#     Extract flashcards from the user's text:
-#     {text}
-#     Do not include the prompt or any other unnecessary information in the flashcards.
-#     Do not include triple ticks (```) or any other code blocks in the flashcards.
-#     """
-#     # TODO:
-#     response = pipeline.generate_flashcards("json", prompt)
-#     return response
 def process_file(file_obj, output_format: str, pipeline) -> str:
     """
     Processes the uploaded file based on its type and extracts flashcards.
@@ -89,3 +48,17 @@ def process_text_input(output_format: str, input_text: str) -> str:
     flashcards = pipeline.generate_flashcards(output_format, input_text)
     return flashcards

     except Exception as e:
         raise ValueError(f"Error reading text file: {str(e)}")
 def process_file(file_obj, output_format: str, pipeline) -> str:
     """
     Processes the uploaded file based on its type and extracts flashcards.
     flashcards = pipeline.generate_flashcards(output_format, input_text)
     return flashcards
+def format_flashcards(self, output_format: str, response: str) -> str:
+    output = ""
+    try :
+        message = parse_message(response)
+        logger.debug("after parse_obj_as")
+    except ValidationError as e:
+        raise e
+    if output_format.lower() == "json":
+        output = message.content_to_json()
+    elif output_format.lower() == "csv":
+        output = message.content_to_csv()
+    return output

tests/conftest.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import pytest
 from unittest.mock import Mock
-from app.pipeline import LanguageModel
 @pytest.fixture
 def pipeline():
@@ -8,7 +8,7 @@ def pipeline():
     Fixture to provide a mocked LanguageModel instance.
     """
     # Create a mock instance of LanguageModel
-    lm = Mock(spec=LanguageModel)
     # Mock the generate_flashcards method
     lm.generate_flashcards.return_value = '{"flashcards": []}'
     return lm

 import pytest
 from unittest.mock import Mock
+from app.pipeline import Pipeline
 @pytest.fixture
 def pipeline():
     Fixture to provide a mocked LanguageModel instance.
     """
     # Create a mock instance of LanguageModel
+    lm = Mock(spec=Pipeline)
     # Mock the generate_flashcards method
     lm.generate_flashcards.return_value = '{"flashcards": []}'
     return lm

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import json
+from app.models import Card, Message
+ # Tests for Card and Message models
+def test_card_model():
+    card = Card(question="What is Python?", answer="A programming language")
+    assert card.question == "What is Python?"
+    assert card.answer == "A programming language"
+def test_message_model():
+    cards = [
+        Card(question="What is AI?", answer="Artificial Intelligence"),
+        Card(question="What is ML?", answer="Machine Learning")
+    ]
+    message = Message(role="assistant", content=cards)
+    assert message.role == "assistant"
+    assert len(message.content) == 2
+    assert message.content[0].question == "What is AI?"
+def test_message_content_json_parsing():
+    json_content = '[{"question": "Q1", "answer": "A1"}, {"question": "Q2", "answer": "A2"}]'
+    message = Message(role="assistant", content=json_content)
+    assert len(message.content) == 2
+    assert message.content[0].question == "Q1"
+def test_message_content_to_json():
+    cards = [Card(question="Q1", answer="A1"), Card(question="Q2", answer="A2")]
+    message = Message(role="assistant", content=cards)
+    json_output = message.content_to_json()
+    assert json.loads(json_output) == [
+        {"question": "Q1", "answer": "A1"},
+        {"question": "Q2", "answer": "A2"}
+    ]
+# failed test
+def test_message_content_to_csv():
+    cards = [Card(question="Q1", answer="A1"), Card(question="Q2", answer="A2")]
+    message = Message(role="assistant", content=cards)
+    csv_output = message.content_to_csv()
+    expected_output = "Question,Answer\nQ1,A1\nQ2,A2\n"  # Use Unix-style line endings
+    print(csv_output)  # Optional: for debugging purposes
+    assert csv_output == expected_output

tests/test_pipeline.py CHANGED Viewed

@@ -1,18 +1,67 @@
 import pytest
-def test_generate_flashcards(pipeline, mocker):
-    """
-    Test the generate_flashcards method of LanguageModel.
-    """
-    prompt = "Sample prompt for flashcard generation."
-    expected_response = '{"flashcards": [{"Question": "What is AI?", "Answer": "Artificial Intelligence."}]}'
-    # Configure the mock to return a specific response
-    pipeline.generate_flashcards.return_value = expected_response
-    # Call the method
-    response = pipeline.generate_flashcards(prompt)
-    # Assertions
-    assert response == expected_response
-    pipeline.generate_flashcards.assert_called_once_with(prompt)

 import pytest
+from unittest.mock import Mock, patch
+import json
+from io import StringIO
+from pydantic import ValidationError
+from app.pipeline import Pipeline, Message, Card, parse_message
+from app.models import PydanticEncoder
+# Tests for Pipeline class
+@pytest.fixture
+def mock_pipeline():
+    with patch('app.pipeline') as mock_pipe:
+        mock_pipe.return_value = Mock()
+        yield Pipeline("mock_model")
+# def test_extract_flashcards(mock_pipeline):
+#     mock_pipeline.torch_pipe.return_value = [{"generated_text": [{"role": "assistant", "content": '[{"question": "Q", "answer": "A"}]'}]}]
+#     response = mock_pipeline.extract_flashcards("Test content")
+#     assert isinstance(response, dict)
+#     assert "content" in response
+# def test_format_flashcards_csv(mock_pipeline):
+#     response = {"role": "assistant", "content": '[{"question": "Q", "answer": "A"}]'}
+#     formatted = mock_pipeline.format_flashcards("csv", response)
+#     assert formatted.strip() == "Question,Answer\nQ,A"
+# def test_generate_flashcards(mock_pipeline):
+#     mock_pipeline.extract_flashcards.return_value = {"role": "assistant", "content": '[{"question": "Q", "answer": "A"}]'}
+#     result = mock_pipeline.generate_flashcards("json", "Test content")
+#     assert json.loads(result) == [{"question": "Q", "answer": "A"}]
+# Tests for parse_message function
+def test_parse_message_valid_input():
+    input_dict = {
+        "role": "assistant",
+        "content": '[{"question": "Q1", "answer": "A1"}, {"question": "Q2", "answer": "A2"}]'
+    }
+    message = parse_message(input_dict)
+    assert isinstance(message, Message)
+    assert message.role == "assistant"
+    assert len(message.content) == 2
+def test_parse_message_invalid_json():
+    input_dict = {
+        "role": "assistant",
+        "content": 'Invalid JSON'
+    }
+    with pytest.raises(ValueError, match="Invalid JSON in content"):
+        parse_message(input_dict)
+def test_parse_message_missing_key():
+    input_dict = {
+        "content": '[{"question": "Q", "answer": "A"}]'
+    }
+    with pytest.raises(ValueError, match="Missing required key"):
+        parse_message(input_dict)
+# Test for PydanticEncoder
+def test_pydantic_encoder():
+    card = Card(question="Q", answer="A")
+    encoded = json.dumps(card, cls=PydanticEncoder)
+    assert json.loads(encoded) == {"question": "Q", "answer": "A"}
+# Test error cases
+def test_message_invalid_content():
+    with pytest.raises(ValidationError):
+        Message(role="assistant", content="Invalid content")

tests/test_processing.py CHANGED Viewed

@@ -1,71 +1,40 @@
 import pytest
-from app.processing import process_text_input, process_file
-def test_process_text_input_success(pipeline):
-    """
-    Test processing of valid text input.
-    """
-    input_text = "This is a sample text for flashcard extraction."
-    output_format = "JSON"
-    expected_output = '{"flashcards": []}'
-    result = process_text_input(input_text, output_format, pipeline)
-    assert result == expected_output
-    pipeline.generate_flashcards.assert_called_once()
-def test_process_text_input_empty(pipeline):
-    """
-    Test processing of empty text input.
-    """
-    input_text = "   "
-    output_format = "JSON"
-    with pytest.raises(ValueError) as excinfo:
-        process_text_input(input_text, output_format, pipeline)
-    assert "No text provided." in str(excinfo.value)
-def test_process_file_unsupported_type(pipeline, tmp_path):
-    """
-    Test processing of an unsupported file type.
-    """
-    # Create a dummy unsupported file
-    dummy_file = tmp_path / "dummy.unsupported"
-    dummy_file.write_text("Unsupported content")
-    with pytest.raises(ValueError) as excinfo:
-        process_file(dummy_file, "JSON", pipeline)
-    assert "Unsupported file type." in str(excinfo.value)
-def test_process_file_pdf(pipeline, tmp_path, mocker):
-    """
-    Test processing of a PDF file.
-    """
-    # Mock the process_pdf function
-    mocker.patch('app.processing.process_pdf', return_value="Extracted PDF text.")
-    # Create a dummy PDF file
-    dummy_file = tmp_path / "test.pdf"
-    dummy_file.write_text("PDF content")
-    expected_output = '{"flashcards": []}'
-    result = process_file(dummy_file, "JSON", pipeline)
-    assert result == expected_output
-    pipeline.generate_flashcards.assert_called_once()
-def test_process_file_txt(pipeline, tmp_path, mocker):
-    """
-    Test processing of a TXT file.
-    """
-    # Mock the read_text_file function
-    mocker.patch('app.processing.read_text_file', return_value="Extracted TXT text.")
-    # Create a dummy TXT file
-    dummy_file = tmp_path / "test.txt"
-    dummy_file.write_text("TXT content")
-    expected_output = '{"flashcards": []}'
-    result = process_file(dummy_file, "JSON", pipeline)
-    assert result == expected_output
-    pipeline.generate_flashcards.assert_called_once()

 import pytest
+from unittest.mock import patch, Mock
+from app.processing import process_pdf, read_text_file, process_file, process_text_input
+def test_read_text_file_error():
+    with patch("builtins.open", side_effect=IOError("File read error")):
+        with pytest.raises(ValueError, match="Error reading text file: File read error"):
+            read_text_file("test.txt")
+# Test for process_file function
+def test_process_file_pdf(pipeline):
+    mock_file = Mock()
+    mock_file.name = "test.pdf"
+    with patch('app.processing.process_pdf', return_value="PDF content"):
+        result = process_file(mock_file, "json", pipeline)
+        pipeline.generate_flashcards.assert_called_once_with("json", "PDF content")
+        assert result == '{"flashcards": []}'
+def test_process_file_txt(pipeline):
+    mock_file = Mock()
+    mock_file.name = "test.txt"
+    with patch('app.processing.read_text_file', return_value="Text content"):
+        result = process_file(mock_file, "json", pipeline)
+        pipeline.generate_flashcards.assert_called_once_with("json", "Text content")
+        assert result == '{"flashcards": []}'
+def test_process_file_unsupported():
+    mock_file = Mock()
+    mock_file.name = "test.unsupported"
+    with pytest.raises(ValueError, match="Unsupported file type."):
+        process_file(mock_file, "json", None)
+# Ensure the pipeline fixture is used in all tests that require it
+@pytest.mark.usefixtures("pipeline")
+class TestWithPipeline:
+    def test_pipeline_usage(self, pipeline):
+        assert pipeline.generate_flashcards.return_value == '{"flashcards": []}'