dataset: bpe_model: checkpoints/bpe.model sample_rate: 24000 squeeze: false mel: sample_rate: 24000 n_fft: 1024 hop_length: 256 win_length: 1024 n_mels: 100 mel_fmin: 0 normalize: false gpt: model_dim: 1024 max_mel_tokens: 605 max_text_tokens: 402 heads: 16 use_mel_codes_as_input: true mel_length_compression: 1024 layers: 20 number_text_tokens: 12000 number_mel_codes: 8194 start_mel_token: 8192 stop_mel_token: 8193 start_text_token: 0 stop_text_token: 1 train_solo_embeddings: false condition_type: "conformer_perceiver" condition_module: output_size: 512 linear_units: 2048 attention_heads: 8 num_blocks: 6 input_layer: "conv2d2" perceiver_mult: 2 vqvae: channels: 100 num_tokens: 8192 hidden_dim: 512 num_resnet_blocks: 3 codebook_dim: 512 num_layers: 2 positional_dims: 1 kernel_size: 3 smooth_l1_loss: true use_transposed_convs: false bigvgan: adam_b1: 0.8 adam_b2: 0.99 lr_decay: 0.999998 seed: 1234 resblock: "1" upsample_rates: [4,4,4,4,2,2] upsample_kernel_sizes: [8,8,4,4,4,4] upsample_initial_channel: 1536 resblock_kernel_sizes: [3,7,11] resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]] feat_upsample: false speaker_embedding_dim: 512 cond_d_vector_in_each_upsampling_layer: true gpt_dim: 1024 activation: "snakebeta" snake_logscale: true use_cqtd_instead_of_mrd: true cqtd_filters: 128 cqtd_max_filters: 1024 cqtd_filters_scale: 1 cqtd_dilations: [1, 2, 4] cqtd_hop_lengths: [512, 256, 256] cqtd_n_octaves: [9, 9, 9] cqtd_bins_per_octaves: [24, 36, 48] resolutions: [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]] mpd_reshapes: [2, 3, 5, 7, 11] use_spectral_norm: false discriminator_channel_mult: 1 use_multiscale_melloss: true lambda_melloss: 15 clip_grad_norm: 1000 segment_size: 16384 num_mels: 100 num_freq: 1025 n_fft: 1024 hop_size: 256 win_size: 1024 sampling_rate: 24000 fmin: 0 fmax: null fmax_for_loss: null mel_type: "pytorch" num_workers: 2 dist_config: dist_backend: "nccl" dist_url: "tcp://localhost:54321" world_size: 1 dvae_checkpoint: dvae.pth gpt_checkpoint: gpt.pth bigvgan_checkpoint: bigvgan_generator.pth