dacorvo HF Staff commited on
Commit
a6c37fe
·
verified ·
1 Parent(s): b4a21ce

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +255 -0
  2. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd0c6ed2a9764168cb6.json +73 -0
  3. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/58bb3ae501d4d87ab565.json +73 -0
  4. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f5771202abb1a7ae2611.json +73 -0
  5. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/af1749224d7631fdb38a.json +73 -0
  6. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/38357c754f2cbd1d382e.json +77 -0
  7. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/89acac1543511de63934.json +77 -0
  8. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a91139c3aee28b0f6c94.json +77 -0
  9. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/3ed9abae6f7f67d07da0.json +77 -0
  10. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/42b299016a0208fd74b0.json +77 -0
  11. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/7b749096ec9bec32a9c5.json +77 -0
  12. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/c36bfaee43f6f840c055.json +77 -0
  13. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/29f3c7ef0e6a87f35788.json +78 -0
  14. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/6d3aa041c08b8396ee41.json +78 -0
  15. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/7523661354aea5f66d6c.json +78 -0
  16. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/7c2b4b0f296eeda524b6.json +78 -0
  17. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/9932c1d05bd91a781c3a.json +78 -0
  18. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/ad521b5b119cf355612b.json +78 -0
  19. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/6517315f1bb86d99f525.json +78 -0
  20. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ec762024d178ab5e922b.json +78 -0
  21. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ee2a399906ca25519a40.json +78 -0
  22. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/350aa0c9a5de191ee7c7.json +73 -0
  23. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/85ab3636ce10c112729d.json +73 -0
  24. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/e6b337232e4796edc487.json +73 -0
  25. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/aaf845721275df37b728.json +77 -0
  26. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/16874c09c9bed580256d.json +74 -0
  27. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/e38863225742ea4adc6d.json +74 -0
  28. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/fecca0469b6993e5d3c5.json +74 -0
  29. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/373976326fffd2754567.json +71 -0
  30. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/84548c49ed5ba851eeb3.json +71 -0
  31. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/92ed3c6dd358107ccc6f.json +71 -0
  32. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/34b1d02021287cfd46b2.json +75 -0
  33. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/7ee9b03bb7a8b01f359e.json +75 -0
  34. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/e709ce08492ca8a65007.json +75 -0
  35. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/4a3a9b902e21f2cd7582.json +72 -0
  36. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/2ccaa328023740986e21.json +73 -0
  37. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0984b1f74e21db9e1ffb.json +78 -0
  38. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/1e2d4e1d3b95dfb315e1.json +77 -0
  39. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/a40dd4e2c2b33ddea710.json +71 -0
  40. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/qwen3/Qwen/Qwen3-0.6B/79fe6d5dc76e8ab41df4.json +72 -0
  41. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/81fac095f3d6ebe884c6.json +73 -0
  42. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c665cd695ad271120e6d.json +73 -0
  43. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ec221ea8393e9e9fd62f.json +73 -0
  44. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/d891b7116252e4de6e44.json +73 -0
  45. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/834a675df2e91e91bb4f.json +77 -0
  46. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/836e81ae1c74a144e099.json +77 -0
  47. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/f070d170462fdefcc7cb.json +77 -0
  48. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/9e060dd6a7994acb47cf.json +78 -0
  49. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d8583f8c5b487ebc1043.json +78 -0
  50. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ddc7f7d79500c5808b41.json +78 -0
.gitattributes CHANGED
@@ -9165,3 +9165,258 @@ neuronxcc-2.17.194.0+d312836f/MODULE_14c06ce926029fd4a799+bfe5714b/model.neff fi
9165
  neuronxcc-2.17.194.0+d312836f/MODULE_a1d0dc30908164d5670a+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
9166
  neuronxcc-2.17.194.0+d312836f/MODULE_a1d0dc30908164d5670a+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9167
  neuronxcc-2.17.194.0+d312836f/MODULE_cf5f61d60b0a013ebeb0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9165
  neuronxcc-2.17.194.0+d312836f/MODULE_a1d0dc30908164d5670a+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
9166
  neuronxcc-2.17.194.0+d312836f/MODULE_a1d0dc30908164d5670a+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9167
  neuronxcc-2.17.194.0+d312836f/MODULE_cf5f61d60b0a013ebeb0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9168
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_00a086963636c3805778+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9169
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0173a1d42ddbeaeb8a09+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9170
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0173a1d42ddbeaeb8a09+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9171
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_025b98fcff252cba485d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9172
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9173
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0589868b7472c4335f2c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9174
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_061f22ddc7af0a5d29f9+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9175
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_097b7e476646dbe13102+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9176
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0aacfff4eaf4af570582+97bb129f/model.neff filter=lfs diff=lfs merge=lfs -text
9177
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0aacfff4eaf4af570582+97bb129f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9178
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b8b4fa5620a4855f332+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9179
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0b8b4fa5620a4855f332+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9180
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9181
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bbe60dde8eaacbc8218+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9182
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f259e869c3d6531c373+c2cadba6/model.neff filter=lfs diff=lfs merge=lfs -text
9183
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9184
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0f82fe03a3af29dff8f6+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9185
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_0faba5d4e9a33c984547+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9186
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1845adad88a4d5ae3381+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9187
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_184d4ed11977011ce5e7+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9188
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1bce819dd943e86ebacb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9189
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1cb238717180fb6073dd+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9190
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1cb238717180fb6073dd+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9191
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1d687728a717f87883ca+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9192
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
9193
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1e6502638c15da1920c2+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9194
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea6ee6758cc98aa0e89+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9195
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ea7c53c16e767166083+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9196
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9197
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2180fcda61d340fd5708+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9198
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_225aa072d587f98b83e9+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9199
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_225aa072d587f98b83e9+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9200
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9201
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2371c90829406a620831+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9202
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
9203
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_23a2b8e835dd2021c6b3+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9204
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_251684745b0a47fb333e+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9205
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_25b514ebfb24f7862617+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9206
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_25b514ebfb24f7862617+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9207
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2716a18eea591aaf4f20+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9208
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_27b73ba03dc7e3a65afc+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9209
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2a9a2536c0daf08da8a9+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9210
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2aa08aa6793c444a88ea+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9211
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2af3b88e8bfa7573c510+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9212
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9213
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bed80089cc0d5def73a+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9214
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9215
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2c3f8d60681c3a37a82f+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9216
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2e56c815cb8a782e4314+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9217
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2e56c815cb8a782e4314+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9218
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2e79c5faf457fd022d25+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9219
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9220
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_30c8e5dffb371f5a2fc0+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9221
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_31652417a84067caf6b0+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9222
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_31652417a84067caf6b0+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9223
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_32d7c444ecc189aa17a8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9224
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_32d7c444ecc189aa17a8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9225
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9226
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3422142ebef7a787c200+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9227
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3596a0566dd0e4bfadf9+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9228
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3596a0566dd0e4bfadf9+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9229
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3816aa1fdb03c5072bf0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9230
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3829e8e6510db07ee7eb+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
9231
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_38a491bb249cb0a65a66+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9232
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a1def4d4bbb23b557b4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9233
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3ae44108fc0ad72d2ae5+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9234
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_3fcde42e05c364259ae4+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9235
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9236
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_4298565bf741752d31de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9237
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9238
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_445ff816d483a8457b1f+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9239
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_44b3dad90bfddb867dbb+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9240
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9241
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_46df7985de24ff1ac32d+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9242
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_4aad761989e914a680ea+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9243
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_4cc296fe1b7abcee7c5b+8afcc287/model.neff filter=lfs diff=lfs merge=lfs -text
9244
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5011cce3a1b686f14b46+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9245
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5011cce3a1b686f14b46+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9246
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9247
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5062c09b51ff9154f184+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9248
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_51d7ba87477561ddf31e+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9249
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_569380bce0f73e129472+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9250
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_57e3cfa23bd65e42d62f+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9251
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9252
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a10198534c5f2725fd7+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9253
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9254
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5a53296578f317d37283+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9255
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5ba89bbb07ae57870f9d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9256
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_5fefca4728e29af5f535+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9257
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_63c2c34c7243bf28abca+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9258
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_65cc17d5daa2b60921f9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9259
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9260
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6642d35addb60ec085b1+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9261
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9262
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_67377aec01dd303d63ef+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9263
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_67660d190a903f2f351f+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9264
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_679852c8ccd2cff92dd1+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9265
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e4efecaeb594ce776fc+651dae50/model.neff filter=lfs diff=lfs merge=lfs -text
9266
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e4efecaeb594ce776fc+651dae50/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9267
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ec54d1cb60ff01fc2a1+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9268
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ec54d1cb60ff01fc2a1+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9269
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6eee1d2655983d3e332f+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9270
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_735269a5fa23fa6ad110+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9271
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_735269a5fa23fa6ad110+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9272
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7361e4b296923179e6ad+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9273
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_73ec31085ec199e0bf87+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9274
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_73ec31085ec199e0bf87+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9275
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_761b3c734083fe56af68+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9276
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_76b0c22f7fc8c9e2ff54+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9277
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_781e8d2ba7d7773d4447+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9278
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7850f8c16172e0c763cf+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9279
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9280
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_791f80da2052a6cfa05d+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9281
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_796c64ab3e80099de463+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9282
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7b3c7ebc2d0673f07a89+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9283
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7b3c7ebc2d0673f07a89+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9284
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c80fa99c1314097c5ca+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9285
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c80fa99c1314097c5ca+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9286
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_7d84a4cdb76e9616c2a7+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
9287
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_81e0fb8dfd58e42d4ea8+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9288
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_835052e8e110b842f692+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9289
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_85a3b00cb47dd0b8aebf+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9290
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9291
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8bbedb082fb6a65e221e+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9292
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8c3869a20faa15ac2c30+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9293
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8c3869a20faa15ac2c30+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9294
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8cad8a60097b60afe9bb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9295
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e06e2fcd26e59afdf55+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9296
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e61477d57dfc23ed04f+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9297
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8e61477d57dfc23ed04f+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9298
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8ef8846d75fa5761c030+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9299
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8f8f2bb409247485b54f+408096d6/model.neff filter=lfs diff=lfs merge=lfs -text
9300
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_8fb80ba5396ef0e2e659+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9301
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_915f412a8437431eb2b2+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9302
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_915f412a8437431eb2b2+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9303
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_92cdc4fd3b2c266a1f01+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9304
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_92cdc4fd3b2c266a1f01+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9305
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9430ff6a35fc239154d1+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9306
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9461a71f0b134dc8d40a+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9307
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9461a71f0b134dc8d40a+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9308
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_94e031bc9e9d532910fa+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9309
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_94e031bc9e9d532910fa+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9310
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_951c8a5cf15504464697+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9311
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_951c8a5cf15504464697+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9312
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba292be7e7e441b703f+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9313
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bce7228204bd2d4577b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9314
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9bef1303fb9e21c6cb66+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
9315
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9d325789026e6ce273ae+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9316
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
9317
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_9e2025b598171192a3d4+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9318
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9319
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a063942b4ff1b0d8f4dc+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9320
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a105e663124eac5d157b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9321
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a1746e7481045e1ccc37+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9322
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9323
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a20343aa93b3e4cbf95f+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9324
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a3410e1989f6ba8dd0bb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9325
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a49b12b082ce2bdd4394+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9326
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a49b12b082ce2bdd4394+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9327
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9328
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a6e9c9e9fe59e3f8911d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9329
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a963e9aa5a33fdf44b01+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9330
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9331
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_a9b761f317f27ca2c845+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9332
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_abffbaee6c01c4e3d2c4+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9333
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9334
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b092b55c6af9d765923b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9335
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b19bd1cae910012a5fa6+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9336
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b19bd1cae910012a5fa6+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9337
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b254587b1ec9eec17e0e+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9338
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b254587b1ec9eec17e0e+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9339
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b3922fb6a70df3e2474d+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9340
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b3972df3e33231af7683+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9341
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9342
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b48fab2b0df2acb937c8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9343
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b75a1633e11cc79ab173+677eeb9d/model.neff filter=lfs diff=lfs merge=lfs -text
9344
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b77e0e7f43a3603bfe9b+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9345
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_b8997c221d74ec4a7842+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9346
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9347
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_bc0dc6318052d18d4f59+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9348
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcadb50ab85e2fba8d71+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9349
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcadb50ab85e2fba8d71+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9350
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_bf2b84c02a9cc5486e3e+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9351
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c0f9814a59c53c4073d6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9352
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9353
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c2d3f01e87cac25e1312+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9354
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c4db0300d76c7be4ad13+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9355
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c4db0300d76c7be4ad13+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9356
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c58c3b1c9745cdc8b7c7+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9357
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_c58c3b1c9745cdc8b7c7+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9358
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ca3a3e74bfb61306abb5+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9359
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_cb6ef7b1205d1d19bf46+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9360
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccbf8bb7df972e5cced8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9361
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ccbf8bb7df972e5cced8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9362
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_cdb1257943a078485626+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9363
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d265ad478c2b4276dbcc+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9364
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d265ad478c2b4276dbcc+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9365
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d366fd0dd6583a46a379+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9366
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d366fd0dd6583a46a379+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9367
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d3a9fa4c26a43f9f624a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9368
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9369
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d43edcfa396bb5f7fa37+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9370
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d45a62468e9816c897a9+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9371
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d51b8cc19b9784687709+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9372
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d51b8cc19b9784687709+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9373
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d52a1641563510c8105f+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9374
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
9375
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d691ea3e06f5d7d31edf+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9376
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d6c19a447da67dfa686e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9377
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d757f0d340c8bb449f21+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9378
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d757f0d340c8bb449f21+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9379
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9380
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7e2548756fae2419754+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9381
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d84f59502564753faaf1+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9382
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d88489730602114d4d35+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9383
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
9384
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_da644c4824015946da20+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9385
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_db708956da7a3174471d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9386
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_dc2d595d0dd94ca21077+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9387
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_dcf9688803fac8be3bc6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9388
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_de463290b0cc81f3e50a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9389
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e09b62be1d6d8ad18632+617f6939/model.neff filter=lfs diff=lfs merge=lfs -text
9390
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e09b62be1d6d8ad18632+617f6939/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9391
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e39dff68d617900c6e31+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9392
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4125c11885b90ff94c9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9393
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e48df5a1ca8b2b49e13f+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9394
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e4ae034a0e6bde8dd152+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9395
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e6cf0fcd9a16a4306e4c+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9396
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e716cefc11279f5393c2+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9397
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e752b6d369dd13e651a9+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9398
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9c7a317b8ff33889716+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9399
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9f9f8371558255bcb45+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9400
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9f9f8371558255bcb45+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9401
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9402
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ea75f74c78af49a980d6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9403
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_eb35938cbfc781930141+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9404
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_eb35938cbfc781930141+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9405
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec3616cec65f8deb284b+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9406
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ef8771b8447d48519ae4+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9407
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9408
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f407a744f5f40980a494+84f3e719/model.neff filter=lfs diff=lfs merge=lfs -text
9409
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f559b4b89708afc7e809+5be477de/model.neff filter=lfs diff=lfs merge=lfs -text
9410
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f559b4b89708afc7e809+5be477de/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9411
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f6fe1f7719e8a4b503de+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9412
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f80b15eb84a71eda0809+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9413
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9414
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f8cade1575b9d31c15da+ad9e832d/model.neff filter=lfs diff=lfs merge=lfs -text
9415
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
9416
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f95e3cf0271f5c661734+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9417
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f96d048ced77ea0c8420+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9418
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_fc1c3fb8409b31c4cc3d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
9419
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_fcbb78f6f5b1416b493a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
9420
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
9421
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_fdd3c2470676e1b82f01+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
9422
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_fe5cc26344fc85d3582c+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/4bd0c6ed2a9764168cb6.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/58bb3ae501d4d87ab565.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/f5771202abb1a7ae2611.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/af1749224d7631fdb38a.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev2",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/38357c754f2cbd1d382e.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 8192,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 28672,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 8,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
26
+ "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 8,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 64,
62
+ "num_hidden_layers": 80,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/89acac1543511de63934.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 8192,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 28672,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
26
+ "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 64,
62
+ "num_hidden_layers": 80,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a91139c3aee28b0f6c94.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 8192,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 28672,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
26
+ "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 64,
62
+ "num_hidden_layers": 80,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/3ed9abae6f7f67d07da0.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/42b299016a0208fd74b0.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/7b749096ec9bec32a9c5.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 131072,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 131072,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 131072,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/c36bfaee43f6f840c055.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/29f3c7ef0e6a87f35788.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 48,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct",
26
+ "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 48,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/6d3aa041c08b8396ee41.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 16,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct",
26
+ "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 16,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/7523661354aea5f66d6c.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct",
26
+ "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/7c2b4b0f296eeda524b6.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct",
26
+ "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/9932c1d05bd91a781c3a.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 8,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct",
26
+ "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 8,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.1-8B-Instruct/ad521b5b119cf355612b.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.1-8B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 32,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct",
26
+ "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 32,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/6517315f1bb86d99f525.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ec762024d178ab5e922b.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 5,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/ee2a399906ca25519a40.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/350aa0c9a5de191ee7c7.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/85ab3636ce10c112729d.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/e6b337232e4796edc487.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/microsoft/Phi-3-mini-4k-instruct/aaf845721275df37b728.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "embd_pdrop": 0.0,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "phi3",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 4,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
29
+ "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85",
30
+ "continuous_batching": true,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": true,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 4,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.0.dev2",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qk_layernorm": false,
53
+ "qkv_kernel_enabled": false,
54
+ "rpl_reduce_dtype": "bfloat16",
55
+ "sequence_length": 4096,
56
+ "sequence_parallel_enabled": false,
57
+ "speculation_length": 0,
58
+ "start_rank_id": 0,
59
+ "target": null,
60
+ "torch_dtype": "bfloat16",
61
+ "tp_degree": 2,
62
+ "vocab_parallel": false
63
+ },
64
+ "num_attention_heads": 32,
65
+ "num_hidden_layers": 32,
66
+ "num_key_value_heads": 32,
67
+ "original_max_position_embeddings": 4096,
68
+ "partial_rotary_factor": 1.0,
69
+ "resid_pdrop": 0.0,
70
+ "rms_norm_eps": 1e-05,
71
+ "rope_scaling": null,
72
+ "rope_theta": 10000.0,
73
+ "sliding_window": 2047,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 32064
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/16874c09c9bed580256d.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 2,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 1,
64
+ "original_max_position_embeddings": 16384,
65
+ "partial_rotary_factor": 1.0,
66
+ "resid_pdrop": 0.0,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 250000,
70
+ "sliding_window": null,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 100352
74
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/e38863225742ea4adc6d.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 2,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 1,
64
+ "original_max_position_embeddings": 16384,
65
+ "partial_rotary_factor": 1.0,
66
+ "resid_pdrop": 0.0,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 250000,
70
+ "sliding_window": null,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 100352
74
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/fecca0469b6993e5d3c5.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
26
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 2,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 1,
64
+ "original_max_position_embeddings": 16384,
65
+ "partial_rotary_factor": 1.0,
66
+ "resid_pdrop": 0.0,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 250000,
70
+ "sliding_window": null,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "vocab_size": 100352
74
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/373976326fffd2754567.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 4,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": true,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 4,
35
+ "max_context_length": 4096,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 4096,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 4096,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/84548c49ed5ba851eeb3.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 4096,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 4096,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 4096,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/92ed3c6dd358107ccc6f.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 128,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 128,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 128,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/34b1d02021287cfd46b2.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": true,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/7ee9b03bb7a8b01f359e.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/e709ce08492ca8a65007.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
24
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": true,
43
+ "optimum_neuron_version": "0.3.0.dev2",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 4,
60
+ "num_hidden_layers": 2,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": {
64
+ "factor": 4.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn",
67
+ "type": "yarn"
68
+ },
69
+ "rope_theta": 1000000.0,
70
+ "sliding_window": 131072,
71
+ "tie_word_embeddings": false,
72
+ "use_cache": true,
73
+ "use_sliding_window": false,
74
+ "vocab_size": 152064
75
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/4a3a9b902e21f2cd7582.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-1.7B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 28,
17
+ "model_type": "qwen3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "Qwen/Qwen3-1.7B",
26
+ "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev2",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 16,
62
+ "num_hidden_layers": 28,
63
+ "num_key_value_heads": 8,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 1000000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": true,
69
+ "use_cache": true,
70
+ "use_sliding_window": false,
71
+ "vocab_size": 151936
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/granite/ibm-granite/granite-3.1-2b-instruct/2ccaa328023740986e21.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": false,
47
+ "optimum_neuron_version": "0.3.0.dev3",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/llama/unsloth/Llama-3.2-1B-Instruct/0984b1f74e21db9e1ffb.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/phi3/microsoft/Phi-3-mini-4k-instruct/1e2d4e1d3b95dfb315e1.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "embd_pdrop": 0.0,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "phi3",
21
+ "neuron": {
22
+ "_serialized_key": "NxDNeuronConfig",
23
+ "async_mode": false,
24
+ "attn_kernel_enabled": false,
25
+ "batch_size": 4,
26
+ "capacity_factor": null,
27
+ "cc_pipeline_tiling_factor": 2,
28
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
29
+ "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85",
30
+ "continuous_batching": true,
31
+ "enable_bucketing": false,
32
+ "ep_degree": 1,
33
+ "flash_decoding_enabled": false,
34
+ "fused_qkv": true,
35
+ "glu_mlp": true,
36
+ "is_chunked_prefill": false,
37
+ "local_ranks_size": 2,
38
+ "logical_nc_config": 1,
39
+ "max_batch_size": 4,
40
+ "max_context_length": 4096,
41
+ "max_topk": 256,
42
+ "mlp_kernel_enabled": false,
43
+ "mlp_kernel_fuse_residual_add": false,
44
+ "n_active_tokens": 4096,
45
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
46
+ "num_cores_per_group": 1,
47
+ "on_device_sampling": false,
48
+ "optimum_neuron_version": "0.3.0.dev3",
49
+ "output_logits": false,
50
+ "padding_side": "right",
51
+ "pp_degree": 1,
52
+ "qk_layernorm": false,
53
+ "qkv_kernel_enabled": false,
54
+ "rpl_reduce_dtype": "bfloat16",
55
+ "sequence_length": 4096,
56
+ "sequence_parallel_enabled": false,
57
+ "speculation_length": 0,
58
+ "start_rank_id": 0,
59
+ "target": null,
60
+ "torch_dtype": "bfloat16",
61
+ "tp_degree": 2,
62
+ "vocab_parallel": false
63
+ },
64
+ "num_attention_heads": 32,
65
+ "num_hidden_layers": 32,
66
+ "num_key_value_heads": 32,
67
+ "original_max_position_embeddings": 4096,
68
+ "partial_rotary_factor": 1.0,
69
+ "resid_pdrop": 0.0,
70
+ "rms_norm_eps": 1e-05,
71
+ "rope_scaling": null,
72
+ "rope_theta": 10000.0,
73
+ "sliding_window": 2047,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 32064
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/qwen2/Qwen/Qwen2.5-0.5B/a40dd4e2c2b33ddea710.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 4,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
24
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
25
+ "continuous_batching": true,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 4,
35
+ "max_context_length": 4096,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 4096,
40
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.3.0.dev3",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 4096,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 14,
60
+ "num_hidden_layers": 24,
61
+ "num_key_value_heads": 2,
62
+ "rms_norm_eps": 1e-06,
63
+ "rope_scaling": null,
64
+ "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
+ "tie_word_embeddings": true,
67
+ "use_cache": true,
68
+ "use_mrope": false,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151936
71
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev3/qwen3/Qwen/Qwen3-0.6B/79fe6d5dc76e8ab41df4.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen3-0.6B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "max_position_embeddings": 40960,
16
+ "max_window_layers": 28,
17
+ "model_type": "qwen3",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "Qwen/Qwen3-0.6B",
26
+ "checkpoint_revision": "e6de91484c29aa9480d55605af694f39b081c455",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev3",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 16,
62
+ "num_hidden_layers": 28,
63
+ "num_key_value_heads": 8,
64
+ "rms_norm_eps": 1e-06,
65
+ "rope_scaling": null,
66
+ "rope_theta": 1000000,
67
+ "sliding_window": null,
68
+ "tie_word_embeddings": true,
69
+ "use_cache": true,
70
+ "use_sliding_window": false,
71
+ "vocab_size": 151936
72
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/81fac095f3d6ebe884c6.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev5",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c665cd695ad271120e6d.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 1,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": false,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 1,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev5",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ec221ea8393e9e9fd62f.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 2,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
28
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 2,
39
+ "max_context_length": 100,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 100,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev5",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "float16",
54
+ "sequence_length": 100,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "residual_multiplier": 1.0,
67
+ "rms_norm_eps": 1e-06,
68
+ "rope_scaling": null,
69
+ "rope_theta": 10000.0,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 49152
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/d891b7116252e4de6e44.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "NxDNeuronConfig",
22
+ "async_mode": false,
23
+ "attn_kernel_enabled": false,
24
+ "batch_size": 4,
25
+ "capacity_factor": null,
26
+ "cc_pipeline_tiling_factor": 2,
27
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
28
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
29
+ "continuous_batching": true,
30
+ "enable_bucketing": false,
31
+ "ep_degree": 1,
32
+ "flash_decoding_enabled": false,
33
+ "fused_qkv": true,
34
+ "glu_mlp": true,
35
+ "is_chunked_prefill": false,
36
+ "local_ranks_size": 2,
37
+ "logical_nc_config": 1,
38
+ "max_batch_size": 4,
39
+ "max_context_length": 4096,
40
+ "max_topk": 256,
41
+ "mlp_kernel_enabled": false,
42
+ "mlp_kernel_fuse_residual_add": false,
43
+ "n_active_tokens": 4096,
44
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
45
+ "num_cores_per_group": 1,
46
+ "on_device_sampling": true,
47
+ "optimum_neuron_version": "0.3.0.dev5",
48
+ "output_logits": false,
49
+ "padding_side": "right",
50
+ "pp_degree": 1,
51
+ "qk_layernorm": false,
52
+ "qkv_kernel_enabled": false,
53
+ "rpl_reduce_dtype": "bfloat16",
54
+ "sequence_length": 4096,
55
+ "sequence_parallel_enabled": false,
56
+ "speculation_length": 0,
57
+ "start_rank_id": 0,
58
+ "target": null,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "residual_multiplier": 0.22,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": null,
69
+ "rope_theta": 5000000.0,
70
+ "tie_word_embeddings": true,
71
+ "use_cache": true,
72
+ "vocab_size": 49155
73
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/834a675df2e91e91bb4f.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/836e81ae1c74a144e099.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/llamafactory/tiny-random-Llama-3/f070d170462fdefcc7cb.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/9e060dd6a7994acb47cf.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d8583f8c5b487ebc1043.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.3.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 5,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ddc7f7d79500c5808b41.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.3.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }