tokenizers / tests /conftest.py
bartar's picture
Upload 26 files
d66ab65 verified
raw
history blame
1.96 kB
"""
pytest configuration file
"""
import pytest
import os
import tempfile
from unittest.mock import Mock, patch
from flask import Flask
# Add the parent directory to Python path so we can import the app
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app import create_app
from config import TestingConfig
@pytest.fixture
def app():
"""Create a test Flask application."""
app = create_app(TestingConfig())
# Create a temporary directory for file uploads during testing
with tempfile.TemporaryDirectory() as temp_dir:
app.config['UPLOAD_FOLDER'] = temp_dir
app.config['TESTING'] = True
yield app
@pytest.fixture
def client(app):
"""Create a test client."""
return app.test_client()
@pytest.fixture
def mock_tokenizer():
"""Create a mock tokenizer for testing."""
tokenizer = Mock()
tokenizer.tokenize.return_value = ['Hello', 'world', '!']
tokenizer.vocab_size = 50257
tokenizer.model_max_length = 1024
tokenizer.__class__.__name__ = 'MockTokenizer'
# Mock special tokens
tokenizer.pad_token = '<pad>'
tokenizer.eos_token = '</s>'
tokenizer.unk_token = '<unk>'
tokenizer.bos_token = '<s>'
return tokenizer
@pytest.fixture
def sample_text():
"""Sample text for testing."""
return "Hello world! This is a test."
@pytest.fixture
def sample_tokens():
"""Sample tokens for testing."""
return ['Hello', ' world', '!', ' This', ' is', ' a', ' test', '.']
@pytest.fixture
def temp_file():
"""Create a temporary file for testing."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("Hello world! This is a test file.")
temp_path = f.name
yield temp_path
# Cleanup
if os.path.exists(temp_path):
os.unlink(temp_path)