NeMo / examples /nlp /language_modeling /conf /transformer_lm_config.yaml
camenduru's picture
thanks to NVIDIA ❤
7934b29
name: TransformerLanguageModel
do_training: True # set to False if only preprocessing data
model:
label_smoothing: 0.0
preproc_out_dir: null # path to store data preprocessing outputs
train_ds:
file_name: ??? # path to file with training data
tokens_in_batch: 4096
clean: true
shuffle: true
num_workers: 8
# tarred dataset specific config
# use_tarred_dataset: true
# tar_files: ??? # path to tarred files
# metadata_file: ??? # metadata for tarred dataset
# shard_strategy: scatter
# tar_shuffle_n: 256
validation_ds:
file_name: ??? # path to file with validation data
tokens_in_batch: 512
clean: false
shuffle: false
num_samples: -1
drop_last: false
pin_memory: false
num_workers: 8
test_ds:
file_name: ??? # path to file with test data
tokens_in_batch: 512
clean: false
shuffle: false
num_samples: -1
drop_last: false
pin_memory: false
num_workers: 8
optim:
name: adam
lr: 0.001
betas:
- 0.9
- 0.98
weight_decay: 0.0
sched:
name: InverseSquareRootAnnealing
min_lr: 0.0
last_epoch: -1
warmup_ratio: 0.1
tokenizer:
tokenizer_name: yttm
tokenizer_model: ???
vocab_file: null
special_tokens: null
training_sample_size: null # valid for sentencepiece tokenizer
encoder:
library: nemo
model_name: null
pretrained: false
max_sequence_length: 512
num_token_types: 0
embedding_dropout: 0.1
learn_positional_encodings: false
hidden_size: 512
num_layers: 6
inner_size: 2048
num_attention_heads: 8
ffn_dropout: 0.1
attn_score_dropout: 0.1
attn_layer_dropout: 0.1
hidden_act: relu
mask_future: true
pre_ln: false
head:
num_layers: 1
activation: relu
log_softmax: true
dropout: 0.0
trainer:
devices: 4
num_nodes: 1
max_epochs: 200
precision: 16 # Should be set to 16 for O1 and O2, default is 16 as PT ignores it when am_level is O0
accelerator: gpu
strategy: ddp
enable_checkpointing: False
logger: False
log_every_n_steps: 50 # Interval of logging.
check_val_every_n_epoch: 1
benchmark: False
exp_manager:
name: TransformerLM
files_to_copy: []