NeMo / examples /nlp /language_modeling /conf /transformer_lm_config.yaml

thanks to NVIDIA ❤

7934b29 about 2 years ago

2.27 kB

	name: TransformerLanguageModel
	do_training: True # set to False if only preprocessing data

	model:
	label_smoothing: 0.0
	preproc_out_dir: null # path to store data preprocessing outputs

	train_ds:
	file_name: ??? # path to file with training data
	tokens_in_batch: 4096
	clean: true
	shuffle: true
	num_workers: 8

	# tarred dataset specific config
	# use_tarred_dataset: true
	# tar_files: ??? # path to tarred files
	# metadata_file: ??? # metadata for tarred dataset
	# shard_strategy: scatter
	# tar_shuffle_n: 256

	validation_ds:
	file_name: ??? # path to file with validation data
	tokens_in_batch: 512
	clean: false
	shuffle: false
	num_samples: -1
	drop_last: false
	pin_memory: false
	num_workers: 8

	test_ds:
	file_name: ??? # path to file with test data
	tokens_in_batch: 512
	clean: false
	shuffle: false
	num_samples: -1
	drop_last: false
	pin_memory: false
	num_workers: 8

	optim:
	name: adam
	lr: 0.001
	betas:
	- 0.9
	- 0.98
	weight_decay: 0.0
	sched:
	name: InverseSquareRootAnnealing
	min_lr: 0.0
	last_epoch: -1
	warmup_ratio: 0.1

	tokenizer:
	tokenizer_name: yttm
	tokenizer_model: ???
	vocab_file: null
	special_tokens: null
	training_sample_size: null # valid for sentencepiece tokenizer

	encoder:
	library: nemo
	model_name: null
	pretrained: false
	max_sequence_length: 512
	num_token_types: 0
	embedding_dropout: 0.1
	learn_positional_encodings: false
	hidden_size: 512
	num_layers: 6
	inner_size: 2048
	num_attention_heads: 8
	ffn_dropout: 0.1
	attn_score_dropout: 0.1
	attn_layer_dropout: 0.1
	hidden_act: relu
	mask_future: true
	pre_ln: false

	head:
	num_layers: 1
	activation: relu
	log_softmax: true
	dropout: 0.0

	trainer:
	devices: 4
	num_nodes: 1
	max_epochs: 200
	precision: 16 # Should be set to 16 for O1 and O2, default is 16 as PT ignores it when am_level is O0
	accelerator: gpu
	strategy: ddp
	enable_checkpointing: False
	logger: False
	log_every_n_steps: 50 # Interval of logging.
	check_val_every_n_epoch: 1
	benchmark: False

	exp_manager:
	name: TransformerLM
	files_to_copy: []