File size: 1,026 Bytes
0063d17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import os
import torch
import random
import numpy as np
# Data settings
os.makedirs('data', exist_ok=True)
os.makedirs('data/original', exist_ok=True)
os.makedirs('data/processed', exist_ok=True)
os.makedirs('data/retrieval', exist_ok=True)
# Model settings
MODEL_ID = 'google-bert/bert-base-multilingual-cased'
MODEL_NAME = 'VN-legalDocs-SBERT'
CACHE_DIR = f"cache/{MODEL_NAME}"
OUTPUT_DIR = f"models/{MODEL_NAME}"
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
# Reproducibility: deterministic=True, benchmark=False
# Optimize inference/training speed: deterministic=False, benchmark=True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
# Hyperparameters
MAX_SEQ_LEN = 512
EPOCHS = 5
LR = 3e-5
BATCH_SIZE = 128
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}") |