File size: 2,888 Bytes
7934b29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# Text2Sparql with BART
name: &name Text2Sparql
trainer:
devices: 1 # the number of gpus, 0 for CPU, or list with gpu indices
num_nodes: 1
max_epochs: 2 # the number of training epochs
max_steps: -1 # precedence over max_epochs
accumulate_grad_batches: 1 # accumulates grads every k batches
accelerator: gpu
strategy: ddp
gradient_clip_val: 0.0
log_every_n_steps: 1
val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
enable_checkpointing: False # provided by exp_manager
logger: false # provided by exp_manager
model:
nemo_path: null # exported .nemo path
max_seq_length: 150
batch_size: 16
convert_labels: true # true if Bart, false otherwise (converts pad_id to -100 for masked loss)
data_dir: null
language_model:
pretrained_model_name: facebook/bart-base # huggingface end-to-end model name
pretrained_encoder_model_name: null # huggingface encoder model name
pretrained_decoder_model_name: null # huggingface decoder model name
lm_checkpoint: null
config: null
config_file: null # json file, precedence over config
encoder_tokenizer:
tokenizer_name: ${model.language_model.pretrained_model_name} # tokenizer that inherits from TokenizerSpec
vocab_file: null # path to vocab file
tokenizer_model: null # tokenizer model for sentencepiece
special_tokens: null
add_special_tokens: true
decoder_tokenizer:
tokenizer_name: ${model.language_model.pretrained_model_name} # tokenizer that inherits from TokenizerSpec
vocab_file: null # path to vocab file
tokenizer_model: null # tokenizer model for sentencepiece
special_tokens: null
add_special_tokens: true
train_ds:
filepath: ${model.data_dir}/train.tsv # path to data file
shuffle: true
num_samples: -1
num_workers: 2
drop_last: false
pin_memory: false
validation_ds:
filepath: ${model.data_dir}/test_easy.tsv # path to data file
shuffle: false
num_samples: -1
num_workers: 2
drop_last: false
pin_memory: false
test_ds:
filepath: ${model.data_dir}/test_hard.tsv # path to data file
shuffle: false
num_samples: -1
num_workers: 2
drop_last: false
pin_memory: false
optim:
name: adamw
lr: 4e-5
weight_decay: 0.0
sched:
name: CosineAnnealing
warmup_steps: null
warmup_ratio: 0.06
min_lr: 0.0
last_epoch: -1
generate:
max_length: ${model.max_seq_length}
num_beams: 1
length_penalty: 2.0
early_stopping: true
repetition_penalty: 1.0
do_sample: false
top_k: null
top_p: null
num_return_sequences: 1
exp_manager:
exp_dir: null # where to store logs and checkpoints
name: *name # name of experiment
create_tensorboard_logger: True
create_checkpoint_callback: True
hydra:
run:
dir: .
job_logging:
root:
handlers: null |