manaestras commited on
Commit
2128afa
·
verified ·
1 Parent(s): 1a5df51

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +25 -129
config.json CHANGED
@@ -1,144 +1,41 @@
1
  {
 
2
  "architectures": [
3
  "HunYuanDenseV1ForCausalLM"
4
  ],
5
  "attention_bias": false,
6
- "attention_dropout": 0.0,
7
  "attention_head_dim": 128,
8
- "head_dim": 128,
9
- "auto_map": {
10
- "AutoConfig": "configuration_hunyuan.HunYuanConfig",
11
- "AutoModel": "modeling_hunyuan.HunyuanModel",
12
- "AutoModelForCausalLM": "modeling_hunyuan.HunYuanForCausalLM"
13
- },
14
  "bos_token_id": 1,
15
  "cla_share_factor": 2,
16
- "eos_token_id": 2,
17
- "group_limited_greedy": false,
 
 
 
 
 
 
18
  "hidden_act": "silu",
19
  "hidden_size": 4096,
 
 
 
20
  "initializer_range": 0.02,
21
  "intermediate_size": 14336,
22
- "kv_lora_rank": null,
23
- "max_position_embeddings": 4096,
24
  "mlp_bias": false,
25
  "model_type": "hunyuan_v1_dense",
26
- "moe_drop_tokens": false,
27
- "moe_intermediate_size": [
28
- 14336,
29
- 14336,
30
- 14336,
31
- 14336,
32
- 14336,
33
- 14336,
34
- 14336,
35
- 14336,
36
- 14336,
37
- 14336,
38
- 14336,
39
- 14336,
40
- 14336,
41
- 14336,
42
- 14336,
43
- 14336,
44
- 14336,
45
- 14336,
46
- 14336,
47
- 14336,
48
- 14336,
49
- 14336,
50
- 14336,
51
- 14336,
52
- 14336,
53
- 14336,
54
- 14336,
55
- 14336,
56
- 14336,
57
- 14336,
58
- 14336,
59
- 14336
60
- ],
61
- "moe_layer_num_skipped": 0,
62
- "moe_random_routing_dropped_token": false,
63
- "moe_topk": [
64
- 1,
65
- 1,
66
- 1,
67
- 1,
68
- 1,
69
- 1,
70
- 1,
71
- 1,
72
- 1,
73
- 1,
74
- 1,
75
- 1,
76
- 1,
77
- 1,
78
- 1,
79
- 1,
80
- 1,
81
- 1,
82
- 1,
83
- 1,
84
- 1,
85
- 1,
86
- 1,
87
- 1,
88
- 1,
89
- 1,
90
- 1,
91
- 1,
92
- 1,
93
- 1,
94
- 1,
95
- 1
96
- ],
97
- "n_group": false,
98
- "norm_topk_prob": false,
99
  "num_attention_heads": 32,
100
- "num_experts": 1,
101
  "num_hidden_layers": 32,
102
  "num_key_value_heads": 8,
103
- "num_shared_expert": [
104
- 1,
105
- 1,
106
- 1,
107
- 1,
108
- 1,
109
- 1,
110
- 1,
111
- 1,
112
- 1,
113
- 1,
114
- 1,
115
- 1,
116
- 1,
117
- 1,
118
- 1,
119
- 1,
120
- 1,
121
- 1,
122
- 1,
123
- 1,
124
- 1,
125
- 1,
126
- 1,
127
- 1,
128
- 1,
129
- 1,
130
- 1,
131
- 1,
132
- 1,
133
- 1,
134
- 1,
135
- 1
136
- ],
137
- "pad_token_id": 0,
138
  "pretraining_tp": 1,
139
- "q_lora_rank": null,
140
- "qk_nope_head_dim": null,
141
- "qk_rope_head_dim": null,
142
  "rms_norm_eps": 1e-05,
143
  "rope_scaling": {
144
  "alpha": 1000.0,
@@ -150,16 +47,15 @@
150
  "type": "dynamic"
151
  },
152
  "rope_theta": 10000.0,
153
- "routed_scaling_factor": false,
 
 
154
  "tie_word_embeddings": true,
155
- "topk_group": false,
156
  "torch_dtype": "bfloat16",
157
  "transformers_version": "4.41.2",
158
  "use_cache": true,
159
  "use_cla": false,
160
- "use_mixed_mlp_moe": false,
161
- "use_mla": false,
162
  "use_qk_norm": true,
163
- "v_head_dim": null,
164
- "vocab_size": 129024
165
  }
 
1
  {
2
+ "add_classification_head": false,
3
  "architectures": [
4
  "HunYuanDenseV1ForCausalLM"
5
  ],
6
  "attention_bias": false,
7
+ "attention_dropout": 0.1,
8
  "attention_head_dim": 128,
 
 
 
 
 
 
9
  "bos_token_id": 1,
10
  "cla_share_factor": 2,
11
+ "class_num": 0,
12
+ "dense_list": [
13
+ 4096,
14
+ 0
15
+ ],
16
+ "eod_token_id": 127967,
17
+ "eos_token_id": 127960,
18
+ "head_dim": 128,
19
  "hidden_act": "silu",
20
  "hidden_size": 4096,
21
+ "im_end_id": 5,
22
+ "im_newline_id": 11,
23
+ "im_start_id": 4,
24
  "initializer_range": 0.02,
25
  "intermediate_size": 14336,
26
+ "mask_init_id": 12,
27
+ "max_position_embeddings": 32768,
28
  "mlp_bias": false,
29
  "model_type": "hunyuan_v1_dense",
30
+ "norm_type": "rms",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "num_attention_heads": 32,
 
32
  "num_hidden_layers": 32,
33
  "num_key_value_heads": 8,
34
+ "org_vocab_size": 128167,
35
+ "pad_id": 127961,
36
+ "pad_token_id": 127961,
37
+ "pool_type": "last",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "pretraining_tp": 1,
 
 
 
39
  "rms_norm_eps": 1e-05,
40
  "rope_scaling": {
41
  "alpha": 1000.0,
 
47
  "type": "dynamic"
48
  },
49
  "rope_theta": 10000.0,
50
+ "sep_token_id": 127962,
51
+ "text_end_id": 7,
52
+ "text_start_id": 6,
53
  "tie_word_embeddings": true,
 
54
  "torch_dtype": "bfloat16",
55
  "transformers_version": "4.41.2",
56
  "use_cache": true,
57
  "use_cla": false,
 
 
58
  "use_qk_norm": true,
59
+ "use_rotary_pos_emb": true,
60
+ "vocab_size": 128167
61
  }