NeMo / examples /nlp /glue_benchmark /glue_benchmark_config.yaml

thanks to NVIDIA ❤

7934b29 about 2 years ago

2.55 kB

	# GLUE Benchmark with pre-trained BERT models
	supported_tasks: &supported_tasks ['cola', 'sst-2', 'mrpc', 'sts-b', 'qqp', 'mnli', 'qnli', 'rte', 'wnli']

	trainer:
	devices: 1 # the number of gpus, 0 for CPU
	num_nodes: 1
	max_epochs: 3
	max_steps: -1 # precedence over max_epochs
	accumulate_grad_batches: 1 # accumulates grads every k batches
	precision: 16
	accelerator: gpu
	strategy: ddp
	enable_checkpointing: False # Provided by exp_manager
	logger: False # Provided by exp_manager

	model:
	task_name: &task_name mrpc # choose from: ["cola", "sst-2", "mrpc", "sts-b", "qqp", "mnli", "qnli", "rte", "wnli"] GLUE task name, MNLI includes both matched and mismatched dev sets
	supported_tasks: *supported_tasks
	output_dir: null # dir to write write predictions
	nemo_path: null # filename to save the model and associated artifacts to .nemo file
	dataset:
	data_dir: ??? # /path/to/data
	max_seq_length: 128
	use_cache: true

	# shared across dataloaders:
	num_workers: 2
	pin_memory: false
	drop_last: false

	train_ds:
	ds_item: 'train.tsv'
	shuffle: true
	num_samples: -1
	batch_size: 32

	validation_ds:
	ds_item: 'dev.tsv' # for MNLI 'dev_matched.tsv' and 'dev_mismatched.tsv' will de used
	shuffle: false
	num_samples: -1
	batch_size: 32

	tokenizer:
	tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece
	vocab_file: null # path to vocab file
	tokenizer_model: null # only used if tokenizer is sentencepiece
	special_tokens: null # only necessary for adding transformer/bert-specific special tokens to tokenizer if the tokenizer does not already have these inherently.

	language_model:
	pretrained_model_name: bert-base-uncased
	lm_checkpoint: null
	config_file: null # json file, precedence over config
	config: null

	optim:
	name: adam
	lr: 5e-5
	weight_decay: 0.00

	sched:
	name: WarmupAnnealing
	# Scheduler params
	warmup_steps: null
	warmup_ratio: 0.1
	last_epoch: -1

	# pytorch lightning args
	monitor: val_loss
	reduce_on_plateau: false

	exp_manager:
	exp_dir: null # exp_dir for your experiment, if None, defaults to "./NeMo_experiments"
	name: *task_name # The name of your model
	create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger
	create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback

	hydra:
	run:
	dir: .
	job_logging:
	root:
	handlers: null