yujiepan commited on
Commit
a45549b
·
verified ·
1 Parent(s): 849fafd

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +4 -2
  2. config.json +4 -5
  3. model.safetensors +2 -2
README.md CHANGED
@@ -140,8 +140,8 @@ config.token2wav_config.bigvgan_config.upsample_initial_channel = 32
140
  config.token2wav_config.bigvgan_config.upsample_kernel_sizes = [11, 4]
141
  config.token2wav_config.bigvgan_config.upsample_rates = [5, 2]
142
 
143
- config.token2wav_config.dit_config.depth = 1
144
- config.token2wav_config.dit_config.num_hidden_layers = 1
145
  config.token2wav_config.dit_config.hidden_size = 16
146
  config.token2wav_config.dit_config.dim = 16
147
  config.token2wav_config.dit_config.emb_dim = 16
@@ -155,6 +155,8 @@ config.token2wav_config.dit_config.enc_lin_neurons = 16
155
  config.token2wav_config.dit_config.head_dim = 16
156
  config.token2wav_config.dit_config.num_attention_heads = 1
157
  config.token2wav_config.dit_config.heads = 1
 
 
158
  # avoid mismatch in vocab size because this is random model!
159
  config.token2wav_config.dit_config.num_embeds = config.talker_config.vocab_size
160
  print(config)
 
140
  config.token2wav_config.bigvgan_config.upsample_kernel_sizes = [11, 4]
141
  config.token2wav_config.bigvgan_config.upsample_rates = [5, 2]
142
 
143
+ config.token2wav_config.dit_config.depth = 2
144
+ config.token2wav_config.dit_config.num_hidden_layers = 2
145
  config.token2wav_config.dit_config.hidden_size = 16
146
  config.token2wav_config.dit_config.dim = 16
147
  config.token2wav_config.dit_config.emb_dim = 16
 
155
  config.token2wav_config.dit_config.head_dim = 16
156
  config.token2wav_config.dit_config.num_attention_heads = 1
157
  config.token2wav_config.dit_config.heads = 1
158
+ config.token2wav_config.dit_config.look_ahead_layers = [1]
159
+ config.token2wav_config.dit_config.look_backward_layers = [0]
160
  # avoid mismatch in vocab size because this is random model!
161
  config.token2wav_config.dit_config.num_embeds = config.talker_config.vocab_size
162
  print(config)
config.json CHANGED
@@ -437,7 +437,7 @@
437
  "chunk_size_feed_forward": 0,
438
  "cross_attention_hidden_size": null,
439
  "decoder_start_token_id": null,
440
- "depth": 1,
441
  "dim": 16,
442
  "diversity_penalty": 0.0,
443
  "do_sample": false,
@@ -488,11 +488,10 @@
488
  },
489
  "length_penalty": 1.0,
490
  "look_ahead_layers": [
491
- 10
492
  ],
493
  "look_backward_layers": [
494
- 0,
495
- 20
496
  ],
497
  "max_length": 20,
498
  "max_position_embeddings": 32768,
@@ -504,7 +503,7 @@
504
  "num_beam_groups": 1,
505
  "num_beams": 1,
506
  "num_embeds": 8448,
507
- "num_hidden_layers": 1,
508
  "num_return_sequences": 1,
509
  "output_attentions": false,
510
  "output_hidden_states": false,
 
437
  "chunk_size_feed_forward": 0,
438
  "cross_attention_hidden_size": null,
439
  "decoder_start_token_id": null,
440
+ "depth": 2,
441
  "dim": 16,
442
  "diversity_penalty": 0.0,
443
  "do_sample": false,
 
488
  },
489
  "length_penalty": 1.0,
490
  "look_ahead_layers": [
491
+ 1
492
  ],
493
  "look_backward_layers": [
494
+ 0
 
495
  ],
496
  "max_length": 20,
497
  "max_position_embeddings": 32768,
 
503
  "num_beam_groups": 1,
504
  "num_beams": 1,
505
  "num_embeds": 8448,
506
+ "num_hidden_layers": 2,
507
  "num_return_sequences": 1,
508
  "output_attentions": false,
509
  "output_hidden_states": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5856e6a6c48b25459916c6233cedb53ad30943caf32674d030e780e8228f022
3
- size 11223328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:872869d5ab1b80233192acb88483009b3588d82ca7419ec5ff35d7e44673540b
3
+ size 11240320