NeMo

File size: 2,888 Bytes

7934b29

# Text2Sparql with BART

name: &name Text2Sparql

trainer:
  devices: 1 # the number of gpus, 0 for CPU, or list with gpu indices
  num_nodes: 1
  max_epochs: 2 # the number of training epochs
  max_steps: -1 # precedence over max_epochs
  accumulate_grad_batches: 1 # accumulates grads every k batches
  accelerator: gpu
  strategy: ddp
  gradient_clip_val: 0.0
  log_every_n_steps: 1
  val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
  enable_checkpointing: False # provided by exp_manager
  logger: false # provided by exp_manager

model:
  nemo_path: null # exported .nemo path
  max_seq_length: 150
  batch_size: 16
  convert_labels: true # true if Bart, false otherwise (converts pad_id to -100 for masked loss)
  data_dir: null

  language_model:
    pretrained_model_name: facebook/bart-base # huggingface end-to-end model name
    pretrained_encoder_model_name: null # huggingface encoder model name
    pretrained_decoder_model_name: null # huggingface decoder model name
    lm_checkpoint: null
    config: null
    config_file: null # json file, precedence over config

  encoder_tokenizer:
    tokenizer_name: ${model.language_model.pretrained_model_name} # tokenizer that inherits from TokenizerSpec
    vocab_file: null # path to vocab file
    tokenizer_model: null # tokenizer model for sentencepiece
    special_tokens: null
    add_special_tokens: true

  decoder_tokenizer:
    tokenizer_name: ${model.language_model.pretrained_model_name} # tokenizer that inherits from TokenizerSpec
    vocab_file: null # path to vocab file
    tokenizer_model: null # tokenizer model for sentencepiece
    special_tokens: null
    add_special_tokens: true

  train_ds:
    filepath: ${model.data_dir}/train.tsv # path to data file
    shuffle: true
    num_samples: -1
    num_workers: 2
    drop_last: false
    pin_memory: false

  validation_ds:
    filepath: ${model.data_dir}/test_easy.tsv # path to data file
    shuffle: false
    num_samples: -1
    num_workers: 2
    drop_last: false
    pin_memory: false

  test_ds:
    filepath: ${model.data_dir}/test_hard.tsv # path to data file
    shuffle: false
    num_samples: -1
    num_workers: 2
    drop_last: false
    pin_memory: false

  optim:
    name: adamw
    lr: 4e-5
    weight_decay: 0.0

    sched:
      name: CosineAnnealing
      warmup_steps: null
      warmup_ratio: 0.06
      min_lr: 0.0
      last_epoch: -1

  generate:
    max_length: ${model.max_seq_length}
    num_beams: 1
    length_penalty: 2.0
    early_stopping: true
    repetition_penalty: 1.0
    do_sample: false
    top_k: null
    top_p: null
    num_return_sequences: 1

exp_manager:
  exp_dir: null # where to store logs and checkpoints
  name: *name # name of experiment
  create_tensorboard_logger: True
  create_checkpoint_callback: True

hydra:
  run:
    dir: .
  job_logging:
    root:
      handlers: null