YuITC
refactor: refactor all files
4d6a130
import os
import torch
import random
import numpy as np
# ===== Data settings =====
os.makedirs('data', exist_ok=True)
os.makedirs('data/original', exist_ok=True)
os.makedirs('data/processed', exist_ok=True)
os.makedirs('data/retrieval', exist_ok=True)
# ===== Model settings =====
MODEL_ID = 'google-bert/bert-base-multilingual-cased'
MODEL_NAME = 'VN-legalDocs-SBERT'
CACHE_DIR = f"cache/{MODEL_NAME}"
OUTPUT_DIR = f"models/{MODEL_NAME}"
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ===== Reproducibility =====
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# Reproducibility: deterministic=True, benchmark=False
# Optimize inference/training speed: deterministic=False, benchmark=True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
# ===== Hyperparameters =====
MAX_SEQ_LEN = 512
EPOCHS = 5
LR = 3e-5
BATCH_SIZE = 128
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")