Upload folder using huggingface_hub
Browse files- README.md +4 -2
- config.json +4 -5
- model.safetensors +2 -2
README.md
CHANGED
@@ -140,8 +140,8 @@ config.token2wav_config.bigvgan_config.upsample_initial_channel = 32
|
|
140 |
config.token2wav_config.bigvgan_config.upsample_kernel_sizes = [11, 4]
|
141 |
config.token2wav_config.bigvgan_config.upsample_rates = [5, 2]
|
142 |
|
143 |
-
config.token2wav_config.dit_config.depth =
|
144 |
-
config.token2wav_config.dit_config.num_hidden_layers =
|
145 |
config.token2wav_config.dit_config.hidden_size = 16
|
146 |
config.token2wav_config.dit_config.dim = 16
|
147 |
config.token2wav_config.dit_config.emb_dim = 16
|
@@ -155,6 +155,8 @@ config.token2wav_config.dit_config.enc_lin_neurons = 16
|
|
155 |
config.token2wav_config.dit_config.head_dim = 16
|
156 |
config.token2wav_config.dit_config.num_attention_heads = 1
|
157 |
config.token2wav_config.dit_config.heads = 1
|
|
|
|
|
158 |
# avoid mismatch in vocab size because this is random model!
|
159 |
config.token2wav_config.dit_config.num_embeds = config.talker_config.vocab_size
|
160 |
print(config)
|
|
|
140 |
config.token2wav_config.bigvgan_config.upsample_kernel_sizes = [11, 4]
|
141 |
config.token2wav_config.bigvgan_config.upsample_rates = [5, 2]
|
142 |
|
143 |
+
config.token2wav_config.dit_config.depth = 2
|
144 |
+
config.token2wav_config.dit_config.num_hidden_layers = 2
|
145 |
config.token2wav_config.dit_config.hidden_size = 16
|
146 |
config.token2wav_config.dit_config.dim = 16
|
147 |
config.token2wav_config.dit_config.emb_dim = 16
|
|
|
155 |
config.token2wav_config.dit_config.head_dim = 16
|
156 |
config.token2wav_config.dit_config.num_attention_heads = 1
|
157 |
config.token2wav_config.dit_config.heads = 1
|
158 |
+
config.token2wav_config.dit_config.look_ahead_layers = [1]
|
159 |
+
config.token2wav_config.dit_config.look_backward_layers = [0]
|
160 |
# avoid mismatch in vocab size because this is random model!
|
161 |
config.token2wav_config.dit_config.num_embeds = config.talker_config.vocab_size
|
162 |
print(config)
|
config.json
CHANGED
@@ -437,7 +437,7 @@
|
|
437 |
"chunk_size_feed_forward": 0,
|
438 |
"cross_attention_hidden_size": null,
|
439 |
"decoder_start_token_id": null,
|
440 |
-
"depth":
|
441 |
"dim": 16,
|
442 |
"diversity_penalty": 0.0,
|
443 |
"do_sample": false,
|
@@ -488,11 +488,10 @@
|
|
488 |
},
|
489 |
"length_penalty": 1.0,
|
490 |
"look_ahead_layers": [
|
491 |
-
|
492 |
],
|
493 |
"look_backward_layers": [
|
494 |
-
0
|
495 |
-
20
|
496 |
],
|
497 |
"max_length": 20,
|
498 |
"max_position_embeddings": 32768,
|
@@ -504,7 +503,7 @@
|
|
504 |
"num_beam_groups": 1,
|
505 |
"num_beams": 1,
|
506 |
"num_embeds": 8448,
|
507 |
-
"num_hidden_layers":
|
508 |
"num_return_sequences": 1,
|
509 |
"output_attentions": false,
|
510 |
"output_hidden_states": false,
|
|
|
437 |
"chunk_size_feed_forward": 0,
|
438 |
"cross_attention_hidden_size": null,
|
439 |
"decoder_start_token_id": null,
|
440 |
+
"depth": 2,
|
441 |
"dim": 16,
|
442 |
"diversity_penalty": 0.0,
|
443 |
"do_sample": false,
|
|
|
488 |
},
|
489 |
"length_penalty": 1.0,
|
490 |
"look_ahead_layers": [
|
491 |
+
1
|
492 |
],
|
493 |
"look_backward_layers": [
|
494 |
+
0
|
|
|
495 |
],
|
496 |
"max_length": 20,
|
497 |
"max_position_embeddings": 32768,
|
|
|
503 |
"num_beam_groups": 1,
|
504 |
"num_beams": 1,
|
505 |
"num_embeds": 8448,
|
506 |
+
"num_hidden_layers": 2,
|
507 |
"num_return_sequences": 1,
|
508 |
"output_attentions": false,
|
509 |
"output_hidden_states": false,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:872869d5ab1b80233192acb88483009b3588d82ca7419ec5ff35d7e44673540b
|
3 |
+
size 11240320
|