YuITC's picture
feat: initial project upload after testing
0063d17
raw
history blame
1.03 kB
import os
import torch
import random
import numpy as np
# Data settings
os.makedirs('data', exist_ok=True)
os.makedirs('data/original', exist_ok=True)
os.makedirs('data/processed', exist_ok=True)
os.makedirs('data/retrieval', exist_ok=True)
# Model settings
MODEL_ID = 'google-bert/bert-base-multilingual-cased'
MODEL_NAME = 'VN-legalDocs-SBERT'
CACHE_DIR = f"cache/{MODEL_NAME}"
OUTPUT_DIR = f"models/{MODEL_NAME}"
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# Reproducibility: deterministic=True, benchmark=False
# Optimize inference/training speed: deterministic=False, benchmark=True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
# Hyperparameters
MAX_SEQ_LEN = 512
EPOCHS = 5
LR = 3e-5
BATCH_SIZE = 128
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")