diff --git a/.gitattributes b/.gitattributes index 82ec30d9915ebf89be1a59d18c7dcf97d63fb73e..04667736dc887792210f479fd1961d83d9b3acc0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8380,3 +8380,59 @@ neuronxcc-2.17.194.0+d312836f/MODULE_27fcd35d4a813c2d6df6+431f5505/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_7475515d803e573fa0b2+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_7475515d803e573fa0b2+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_cf20aba2fed840608d02+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json new file mode 100644 index 0000000000000000000000000000000000000000..8446ce90e2d9ad4f5ba75da7afe2ac084850f856 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/16eb66e6f195b2f2f3b4.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json new file mode 100644 index 0000000000000000000000000000000000000000..76ae49d01bebe6516cd8afbf3a20ea25af47e7e0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/88744e5aa5d753d1f538.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json new file mode 100644 index 0000000000000000000000000000000000000000..46fcd0ba4590aa5c04e1aee016fb94538a7ac151 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/021f11fdefa8c3b516bd.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json new file mode 100644 index 0000000000000000000000000000000000000000..13908f95369b0a43ba1245d72030264e26eac9eb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/1913cf3041a0fe975f3c.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 512, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 512, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json new file mode 100644 index 0000000000000000000000000000000000000000..f3554ea054eabc9b92261f56bb8563a7a2d3b033 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/50411def20a2b703209e.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 512, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 512, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 512, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 1, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json new file mode 100644 index 0000000000000000000000000000000000000000..20123ccdd84691c6826582391f2c6629aa106085 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/7bca5f2b6f1034c1fa71.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 1, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json new file mode 100644 index 0000000000000000000000000000000000000000..208b41849b5dbc24cf502a9a430a656f10fed302 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/34beaf9835b09fe53395.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json new file mode 100644 index 0000000000000000000000000000000000000000..8ef036b0f08d405f2fa040a87f17b1ec8a908a15 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/d1ea2689244397f649b7.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json new file mode 100644 index 0000000000000000000000000000000000000000..b2d619ec41f2d494df34a9fc6022c46d8f4b865e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/321051129e499a9d100e.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json new file mode 100644 index 0000000000000000000000000000000000000000..a00e9f9fd54626706dccca740d785856b32fa366 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/59e56e036f276aac27ec.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json new file mode 100644 index 0000000000000000000000000000000000000000..0b2822e3d9c6b74b411de13080dff12b22502d96 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/aed3ac4481c88779a26c.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 48, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 48, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json new file mode 100644 index 0000000000000000000000000000000000000000..cfa0dea1208bad41e59a4ea0864e9c2831e711cb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/bcd80b4d12e05bc045ce.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 64, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json new file mode 100644 index 0000000000000000000000000000000000000000..807e19641aed624b05b86dc3a7a1bc6f89060b95 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/c28ab0c7d33e28708b3c.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json new file mode 100644 index 0000000000000000000000000000000000000000..1a155680f9bd7719a9694e3bd562aaad5142a0ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/f0e18f873ce42aaa0b9d.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json new file mode 100644 index 0000000000000000000000000000000000000000..62d3175ebbb29bf91ccd2471098e079b28f637f0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/unsloth/Llama-3.1-8B-Instruct/fb1938af2d9e7e083207.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.1-8B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.1-8B-Instruct", + "checkpoint_revision": "4699cc75b550f9c6f3173fb80f4703b62d946aa5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/226b34f74113e0809145.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/226b34f74113e0809145.json new file mode 100644 index 0000000000000000000000000000000000000000..04b841f91f144d6baf7289f8bebb4105856fb74d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/226b34f74113e0809145.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev8", + "output_all_logits": false, + "sequence_length": 131072, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/6fd6ed7696673c25e3bc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/6fd6ed7696673c25e3bc.json new file mode 100644 index 0000000000000000000000000000000000000000..a966328b9b6e7df3d1d560c1d8d6a5b37c087930 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/6fd6ed7696673c25e3bc.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev8", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/92505f1398020ba9caca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/92505f1398020ba9caca.json new file mode 100644 index 0000000000000000000000000000000000000000..9a095b9b4c396db8c9c28871b2c919f7ca525c32 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/92505f1398020ba9caca.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev8", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/f9d69ed6dad30058c3f6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/f9d69ed6dad30058c3f6.json new file mode 100644 index 0000000000000000000000000000000000000000..e92a5e79e5bc6e787869669ed30ddf237ca07613 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/llamafactory/tiny-random-Llama-3/f9d69ed6dad30058c3f6.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev8", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/31f62ec0eaab290b4ae5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/31f62ec0eaab290b4ae5.json new file mode 100644 index 0000000000000000000000000000000000000000..2e3c2d4152ed1ea02327fc52a12893202e441eda --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/31f62ec0eaab290b4ae5.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json new file mode 100644 index 0000000000000000000000000000000000000000..f38e7453723601092cd245634eaa887650a29105 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/meta-llama/Llama-3.2-1B-Instruct/3da196f521260e769ca6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/5e3d4766f8d5bbfad660.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/5e3d4766f8d5bbfad660.json new file mode 100644 index 0000000000000000000000000000000000000000..bd16e8e9aec3529faadc06da6985f4c212d1bd7e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/5e3d4766f8d5bbfad660.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/73529828aa9ce630a6ca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/73529828aa9ce630a6ca.json new file mode 100644 index 0000000000000000000000000000000000000000..02711815524046d82f266d875dc2a732f03cd302 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/73529828aa9ce630a6ca.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json new file mode 100644 index 0000000000000000000000000000000000000000..2df82512c87ac793b3df5abbaa5f6017c3c0b5e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f4d3009ec7e739e5ca73.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev8", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f8f7e906c6b8549f7310.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f8f7e906c6b8549f7310.json new file mode 100644 index 0000000000000000000000000000000000000000..004318bc72347ff6f3f976d1c98189cb0c817873 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/llama/unsloth/Llama-3.2-1B-Instruct/f8f7e906c6b8549f7310.json @@ -0,0 +1,56 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev8", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/73595ac6243cfb832200.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/73595ac6243cfb832200.json new file mode 100644 index 0000000000000000000000000000000000000000..95d9d5279ccdff340b15b836ce0b4d5ea8ef6973 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev8/qwen2/Qwen/Qwen2.5-0.5B/73595ac6243cfb832200.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev8", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/4c9bbde8fe9d338394c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/4c9bbde8fe9d338394c9.json new file mode 100644 index 0000000000000000000000000000000000000000..a05dc5c89377210d858153cbc96b1a5f0919ccb3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/unsloth/Llama-3.2-1B-Instruct/4c9bbde8fe9d338394c9.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/1bd96334e39ef6b9e94d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/1bd96334e39ef6b9e94d.json new file mode 100644 index 0000000000000000000000000000000000000000..9234fbb600cccb587825d0939b6d5a760ac1d55b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/1bd96334e39ef6b9e94d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a20fec503dbeb1c3cc2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a20fec503dbeb1c3cc2c.json new file mode 100644 index 0000000000000000000000000000000000000000..ff463501d621a2a15cc53d413da3339948ad2cb5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/a20fec503dbeb1c3cc2c.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/ad78e6467fba676e8a3a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/ad78e6467fba676e8a3a.json new file mode 100644 index 0000000000000000000000000000000000000000..db42b2fc64190bce1980a9e983072d5706e97ebf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/ad78e6467fba676e8a3a.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/2a5585a9b282ac0f03cb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/2a5585a9b282ac0f03cb.json new file mode 100644 index 0000000000000000000000000000000000000000..a7fa50b82233e87631a088d5b6f03c9a71b2de79 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/2a5585a9b282ac0f03cb.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/31b5ed5507a49ff23e88.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/31b5ed5507a49ff23e88.json new file mode 100644 index 0000000000000000000000000000000000000000..f0adc5f89462bf5c9456edee56c130191923341b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/31b5ed5507a49ff23e88.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/439224868ff2d187153d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/439224868ff2d187153d.json new file mode 100644 index 0000000000000000000000000000000000000000..b7311de7b540e660fea05525e4757263a50ff52e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/439224868ff2d187153d.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 131072, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/abebee697588b013b2de.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/abebee697588b013b2de.json new file mode 100644 index 0000000000000000000000000000000000000000..ca6186b6a919a7ea252eb7cd87bea0a46bfcae2b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/llamafactory/tiny-random-Llama-3/abebee697588b013b2de.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/74c0497cba20383c2965.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/74c0497cba20383c2965.json new file mode 100644 index 0000000000000000000000000000000000000000..484805f4bbf64db4174cacc4639d57e37d049973 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/74c0497cba20383c2965.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/7726b0c1841e33ac8fb4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/7726b0c1841e33ac8fb4.json new file mode 100644 index 0000000000000000000000000000000000000000..0707f3d835835f2157afca9793bb1bda2850245b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/7726b0c1841e33ac8fb4.json @@ -0,0 +1,56 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/be88ce8756e95baff44b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/be88ce8756e95baff44b.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c4f98b4c59492cb0de4cc0acd62a98b36d93a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/be88ce8756e95baff44b.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ea6acd2a079e69be7049.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ea6acd2a079e69be7049.json new file mode 100644 index 0000000000000000000000000000000000000000..212f30de874a8e3d982aaf5937a2bf3def86571f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/llama/unsloth/Llama-3.2-1B-Instruct/ea6acd2a079e69be7049.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/2b10d94944cdae0f19a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/2b10d94944cdae0f19a8.json new file mode 100644 index 0000000000000000000000000000000000000000..161b42eb8d7dd180a9a5b1b0f87113d16f248e1d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/2b10d94944cdae0f19a8.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/3f985dd5ca05fd50ed07.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/3f985dd5ca05fd50ed07.json new file mode 100644 index 0000000000000000000000000000000000000000..870334d473a137bb05259700d813a3bad057bff8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/3f985dd5ca05fd50ed07.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/450ed19be2b2d47ef8ee.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/450ed19be2b2d47ef8ee.json new file mode 100644 index 0000000000000000000000000000000000000000..f6c1fb3591e46d22ddb7c7accd892732b6e1672c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/450ed19be2b2d47ef8ee.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev0", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/5d4e246892cde0a54f20.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/5d4e246892cde0a54f20.json new file mode 100644 index 0000000000000000000000000000000000000000..779562dc5dfaf0f681f3809d0d5334314268cc0d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/5d4e246892cde0a54f20.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/79a57e92c5d28fbf2c7c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/79a57e92c5d28fbf2c7c.json new file mode 100644 index 0000000000000000000000000000000000000000..ce0d7b464ab78207a29bbe17527312cba0325d43 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen2/Qwen/Qwen2.5-0.5B/79a57e92c5d28fbf2c7c.json @@ -0,0 +1,72 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fuse_qkv": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9bed5099dc9483150fe2424d67ca7efe04b4b819 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4e39ff7264047e564f306270e3be1e754a5c0edc58e50acbff1bb553452b53 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c2f3e72cff96dc89ddc8c057e2e9bf93a9b58a97 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_07e4a454b533bd5b9f6f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c6db91e68c2de260b3d051da59a240cf5afc202104e4fcf45f4ba09cc6bd59 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..72961b4cf5274b100fb3ba04cf7b196bd0afff31 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23333e0184c701665b77443dff9e0e7826cde3df7b3a8610dfd7a6cb3bf1916 +size 79913 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ac22dc315fe88dbd543348193a35c19a0e083e2d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932fc04b510efe964eaa7ba1983cedcd49d5bc2fb595b9b914ab4a3f69185d6c +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9d5b7992e6b8646cdd50a3e3ffcb5a20de0e60fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08eb5012506859e6b129+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb326a41ddb5d6d9473b240905d4d1e8cb2900bada8756712c4a03aea93ee03 +size 244351 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..13d9dd97ba57fb8a1de4ade4ed7a604a70f7b9d2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff130cb67d5e702953d0ea95abe428fe1b519fd1745d9edd88354ebfa907cf0 +size 138008 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bf7ce46f3ddc09a278b861b0326c8f337f8c20c5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_11e381f25d31953ed15d+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555451b7c6671b1cb2bb32cb435d6e9c99d3e890e9b1c037dbdd901ff652853a +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..32157f229be17480afc9e02a7ed48ca0cbc8f22a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb283cbcbda082b0343e3c355450dbc40f28dbde --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d59d6b460fa2d6257d38d2acccbe5f3ddcf20c3dbcee41ac9e5f9becf0069a4 +size 7574 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7057cd954b817a708f122ccaba58d43a4e3f706c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14332524047809853118+e30acd3a/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edde92f35705ea10331e2ec901503e77efcbe57b3c918119ffb2775f802f7fb2 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c4dccbe1d2bb154396c4f2e11e90a4ef6f5623b3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5928e1e3d71165915a25d2d9778aedfc58cd57ef9ba8621279558be58e16fb7 +size 136713 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92b8dc4c0d9c639a32b308212f85380ea42012bd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_184d4ed11977011ce5e7+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b15ec188343dcedabc48667e269d3cafe56b8fcb199b8eed4cabf83ab5851d6 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bd4ac6a8c0d4323ce964930d6ae98a535948761 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82480b63e69fde09f4e77537a22aed3dea465afb13ef04e814662744ab15993b +size 76491 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb67dee384d5de47983b1ed4eb2a232f6f571341 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_374002f5e545876dbed9+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9b7366fe903881c52a003d874f50af91d5c94c4323aab4803b0718ad2ce874 +size 287744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83721f4cbf5bb97c427b47c3035753e33f7aa287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd4bcf2094c905f1d9198a2fa36e2a067f7ae8fbf9c63e63b52365a14b5106e +size 628200 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6513f7a5856783eda26944f2e08e539423b30f91 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6444b8e73d32d5bf587c6fa0e6caafc5ba0d245ace20208adc0de261b1115b78 +size 3748864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2cea014e3f3e8aa9a534ba332b713d861b49bc67 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3d7f88b0a10f3671efc8+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d878f479413f8ba944184c42b178ab5bf47641c26bd93e7880210602e8e4f34 +size 3862538 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3577ce3d2eaa1fae733b071da26fd70c022266fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a1023e1695522602409828ff9af4d776589ed5f150f52e01493955f204b04c +size 739176 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6ded20ab38f170c1280334d081ca6b073ab87671 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_410ab2339c0623a8ea99+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8145f5f50c3569815d235cdab7fa87ca9e1efcf3bceec379141c6a8d90d57d5e +size 19928064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..720ae028658683d1c9bd62350db5a2304efaee47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e52820e19bbf945d45b897bc3900f508886cdb8efdff25515450a7f8d02f8c +size 777821 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7fc9f82ead868f8cf68115f31ac779cbefb5213e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f54a4af65ac42af3f85054285f1db2841f87c479752d76d7a20dac051e03a6 +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..25551823fbb632b0e87b424574bcb16462e47e62 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4617ff1c720a11b46bd3+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ea37c17f95777faafab50142f5108b134aa732136033c29ffb86aec876bc8e +size 6026968 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..03ef86da968ef3310bc88c3519592ffa217973a2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1461a2ed509ab92da33988fdc193eb987c507eff9dd6758b21b339856f2a3dd +size 72141 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5b568a0d71075087a322708cf884498087a8b9cb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bb18150a194c32e820+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2268158d1e455f7dcca19bf11dd211da9bd0e4673ae5493cd2b443817272fd7e +size 308224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c9017e7e23d13737f79dc5ddd07c423862a5433 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149f133518214d64a0e2e50e57c0893d9da927d6b187b1c21ed9a629422cbaca +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..72caa39353c809fabb45731a770a40c2bf367780 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d8202c650b98a0b9d0f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e5362dd4933cea9c4726b7655a33993e0c9a9c9ebcd093edd5b4b7e9b1bae5 +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3caf63fc3c751b2b4e928e50f9ffa5fb140ca59a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dcdea3312b01ffb145fc8f515f2936e088920c874284213e9451eb86c6d8ee8 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..409af56d1a157e8c03946a1d592fe5e1e1e57efe Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_5c4cdb0d007842d9a3dd+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9d4d89fb5927614c39f6f2fe56792b1da3ed8cd3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bb5b4e60e6137e47352e1f6acf2c0e514d97969ad07538833c86b54af2730f +size 2086176 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0b3907e4e1270ee4e01d6c5efb9995086f5af45c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5d6569b6be8918b2148b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b3a65eb2452f301e932f38f4d28bd0185d7994e1f1640575cecb182b8b28d4 +size 3349504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..94604d78d07ab4fb4cb1db1179e1e911e59d3159 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4037e6b92e1e02bcdb1e2252e395b8498e4d7a4948994163e19a803a6c60af78 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c57dd1d12acd369e05dfa89c900a301506ff209e Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_5e1f0cf421c6ea224d79+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..daa0b4fd24ab253f6ee24810ad4c2751d11e444b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849b663cadc043301c07883915b2b03298c5cbae9a2d80f930b480a2de149491 +size 649147 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0744e1ce946075e3a1159fa0fdebffda06e56461 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f2dd83fb3cd968b7503e4b5a90ca2c9dff0e86d0e0995eef1fbe5b74e30cc5 +size 3769344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3a8d2c9b17b08586b2d1fb96901700a12134c68b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_60adeac154227edc5134+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120f79ef74a1cba7ef44196f9818a9b86cc217d3698763b9692913a9fc42601a +size 3914385 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..afdb8449d0ede4de36484484b3074b148b111ad2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d0b94c552f60c526deb2934135cef63dcd3a4630d400ef2a4fc75f68de21762 +size 777805 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..980ef53ccf32c10cbc3d310902ec262eca7502e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049b3fd347bf9f3e7ee596ca51646a5d8340c4003af50cd550f6897a91680ee0 +size 5192704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7352ca14d4f780248a261490f20b46cd3cd6cef6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_669cd724b366299c9441+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddfb68ce8bb7a3c751b6e5a4c756dbaf8ce8e38c6733beaaa2131575b2f9bf6 +size 5330648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..521eafb619f06e02d9a55b73adb8a5b61dc9ed11 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b40feb88c18f5c98cd5a8fa3c300fdf6d2907cb64d7e032688fcc3caee5f6e +size 48142 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e00ad8b63244ad1a15f515d976c4ddfe50efad32 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80943ed76c6b3a0c8a4312a8cd9edd1033f96938321c8f823896283f6c7b3aba +size 2356224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..08ab01680ad93af2843fd4984dd569303d499073 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_66fce16545f84447c1f9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffb0553219112833cbad182f747ba567a3cd540d871208c487906c0fc6959a3 +size 2364046 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..caaae7d4c6b57d6b79887fa945c42561b3c0def9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3db0257f40d5f315148f4daf5724c2d1e9a135a7734e104d2c422fed5a1c52 +size 1916098 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb010b6508e3ded988b82fe023788670d94d6c5f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be62c133067884d30f68a41cee3b2d2c0e865c354de3366f700025a0e462e8b +size 10343424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..29613dd7412a495f28eb3cce38cbaf2b3055fdb0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_682b390c1c73a9a3bd11+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4173cdd894844064ecb73b5771a30bcfb44fb98fe2da29ea71534bdfbc76a817 +size 10687122 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef8c0b133e3ec006721ebf9d3256ab663be93460 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ca28165a5701f3548d67d9dd04124223691557db1d29c30b7f1504a7045f19 +size 593372 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..14df45cdce2dae2741946c2bb8ea658f54aa15e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_716aad5e7da409a95352+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eaad672df66b4982dd689e072572163ba3e82fe1c70b08aad3b11358208e583 +size 646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..54cc41250380b1b6487a5ad3740a0100fe8442ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0fd95e5a0f01ca281d53e211063e767cbdb566278422714801e867a1ec79da +size 767826 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9808b23034977e38354ffcff65742d95bae57da5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71cd1daaaacf2fea4c2a+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0d1df21e06ca84590a9039b023e85ad65ff2659a049a7841cf781b0f0951b32 +size 19948544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab4bb349521213e4549f00c8a6c9428e4b9f639b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bfdc653294fc5b5230a64e865c0867574faebbdf3cea7d5eb0645df40148b7e +size 1910431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e483ae77d84da91ff839232b0bf343d5f15de72c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9091e88a15bbc42044e459b73ff81dc53387f5dc3a11e1e2d2a653df899da719 +size 7138304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..097fca3d4a2ce58f7616790ea685e4602bf22c1f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b9581483dd1ab45e802+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee733fe51ed123e8a68ef16c967ba7ca7f7903ed5f64be725c97768965b4d32d +size 7481879 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8650c19b613bb2a1a94f60014e05f11f6c266f7c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7e85c7d1c8b3724c328336229e86eefd2929d69c03c76392f16f071e2a7fd0 +size 79391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..70acbbcb3fa1d0284c9cb3684bfd4d45145fd825 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d5e63db6db4994c4291+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7cdbf4ecf517cf5678495f975eb43a3a3bd95845b6a6012a237b3b7480a184 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..52d648f40ff5c91cf373217125d910cb51f1c3a9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5c097cd892a9e7fd33052c96133f17665e6013ab607743ae337d7975109d99 +size 149919 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..81e35bbfcd8fe46afe4a79a74ed2fd253ef5a509 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7f3f34b9263269526fa9+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85362ab4ff4dfd2a081e12ffd94c06e9ddb02b01c6971bbcd3e0de3ce5e6975 +size 594944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..660e7c55d426e5bb9c5f36d7ed34802f4e6410ec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3a96e94e40e65a65aee5dae462c8ac82f267705e4a1129a508a6fba700e654 +size 78246 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89330bb2963a9af707d290f34f87993a3df72703 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8158ef6e31774adb8b6d2d35981c6ff7d7a3627a0a531c38699e7034e36c1819 +size 2376704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8045fdb7fb463c815a93b721fedb60a7dacbf02d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_818dd5b285ecc79f51b6+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fe8c7db631d867109dc0371f660b1552837d1c9adfc84b021f28da2c5e4fe9 +size 2384631 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ce20824f3aaeb974c62e09f3ca110aa5e87027c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d1716333edff8710cdfbba294d37e199b0b6393f9f28abde37017b11b3a33f +size 777805 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cf86dafa154864c4bb9899690503fbdbdfcd0b7b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc9fc885c4e11d4a1f3a4f0cdab770c9c2014af157f00cf973e284063524fd8e +size 4619264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0039ac7267d2eff37481129f7174bae3ded0dcf3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88825b33884e22bf2350+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7746c3764620653509fdefed28af0028784c0fa183052d5d8609e4556db39c3f +size 4757208 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2288a18f1312139defef51c7c8fc56678c718fbf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e0a9bac3f4a45c41f063cb7c340ed9f6b1815bce4662d6c96e463ad74509d8 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1216b9a83dee92ddc72631512710ea4744ce39dc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_88c0c20acb62b9015ac5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9969ddb8518badbbf4ea734d136f2722ecdd6bcef743cfd5d7b8840b5c7bba9 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d37fc8c52103e7a032fdc154879b488d0c1405e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fc15a4c473ec9658ace42b6645014b0e29bd1569a5931d1e7b95123b68424a +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c7e9de3bb13a2da01088be7c7d40de681d0d00bb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8bb6b9b7fb63c8407d1b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cc1284e36060e464a4aed507eaa1369628acc20a2d407ece78280276a232a9 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f21d5b6d40bbc519ebe2552a5bbd984c1ec1d136 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:617ffa8f001e32ee0196c13b0f23ae06dbdb8de720d647aaba4753171ab16b84 +size 46742 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..91dcfe213d6455484bbeddc1caafee21058a7169 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94a6e940063af75dc1b5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c9f37eb322306682ac2b6f70982c41deb46ba2a8871ed59b43601e91593b65 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b4c249e05dcc3fb89fe6324e725f92f199ebe2c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9113dc0e9762cb9331ceecc475920436e309643b52d11693b1647afebeaeb496 +size 73017 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3caf03861bb9bbd0dc97f54d6583bf7e147e8252 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4035f02301558ea04c11606f61cf290049d7ec3387f5c0438859358bf6cdeaeb +size 308224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5c6199b4dfd37c87f104e670a3713c9c48f3d5a0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e7d5afcefd712aef192+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89c8035ef93977f62e08cad284fadebaab1ab6ca25157cb7858e016c75b3e2f +size 316031 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..01dcc4577fba2cfb5ac8e14cc790ae56703d9f68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c733cb6b8ae8f7dedd32e00a03b9d77bd89dc2f455c2c991ab8e5a736a635885 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..904abca1eda03d66d1add321cabb8d682a25bc2d Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9f112902682e3b314a33+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0035c03b96cdfe4522437d7b3cfe18a04e45886f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20fe486ff812e2f05404f114bcb45e7743d0d06236eeaedf7e1f4b3dc5346b57 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0aeb64d937e74def5ebe62e0aa6e13c3f404b944 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a6b9d38a6340a9ba7207+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4eceb90c2d382e732a915233f419b5edd7a75e29f684b8f26655116bf71dce6 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..101c97e3230e614f804592b5304a5c26c5499084 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072ff5d810620e7a9a84b16fef3223b86a92cf8c634d2328aff6230b1c7e50fa +size 7109 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d21392b764c5e5b7c2186216e626448bc7b30581 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_a8597b28bae6770eacac+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ca875248a3593ad63488da57ed8e7eec2ee0f3bd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4e5d9a94b12f00a88c9fe5a8361dbf26f5641f9ad46ffdee5d5243ed31b78e +size 113548 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fafd02a6d373149bef747593567e05ba6e3a6c19 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b02d75de24d831d530b4+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f11216420014d8ea987810401d8d317f4d6699532b3d7b77d75081e1849463 +size 564224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cd91c4cf79dc2cf068748116e76eae78c8317e9b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2691f2383b7b223391dbd7984075c95b969a75d2ce0911a54c10e074ca78bea9 +size 2270072 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1ba5954e9bc09680c36ef73d248ae55ca50afdef --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0a68b4d4ba9281a9229+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e2f6f39c3be2a0667873d31d9107c396b415d965502c408ee31e241be43f19 +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dcc3494228a20f8b7998e468b7418084862a8006 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411cfaac8a3e7ccf827f19fedf00727036fc15dc8d472b7ecb24236b727778d8 +size 778205 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bee518d412cd0aaf92481011aa04d6240ec94b78 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b861c844e563e6b80a0ec64bd104ec3672f392baaa0742e89aabcae098358d +size 9247744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2660130b51b2721e433496a380cfcd9a3dd5f9f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0e52e55b5c34e6b0178+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e307ef947b34fca66549c91f96b57d569289be2256903eff3b1a220bbfb6ac7 +size 9385688 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7af22d06dc69bf6d1975c2674500475899c169e0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a607185ac8e1c9eb201959b1cd6e15e43d8678894f54a331dd268de52495bf +size 772276 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c14640263604bee4cff58183cd1f735bb78c8c4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dab6e97937d459068207d265be26660df17d9cb2a803327b91b888dd557cd42 +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..feb97e210b76c899d73c357b5b74f1acae9a08f8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c35c27d074e574f52183+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edf94ccc57de4922c5718137c281a6404eac0bef6ca5e9a78ad63f130fba276 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..151d018cb385a2c66474a7b08d9cf11dd024070a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4cd3cb136cc0bf987f16dfb7409ca294275564fabe191c1e05a0ad061a6cd0 +size 1916098 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..713be4e9b7e6ee8ccb83711047927ea340d2a461 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05d1e4367d1bc117624e8ac2f95e65fc43b5e6a6202abf68217b2d0780f2945 +size 11787264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1b9e96f86c4d466e54c3b45c62b4749dc55da842 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbb69a706480698b9414+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf69fcb5750632201b661935888a311dce0c694b052fe263e4b3c04224de9e74 +size 12130962 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..babab798d113e425e9c7854bd8edcf310dd77534 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200d251291134ed265a4f3699f200becd08012f14f43322038dce8fcf5698090 +size 851463 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1834105fccd6bbfbc855c65bc5ce5fcf9f302450 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0af0d798d24f975f633+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f422e5d1c382e6ddcbb9643552740134d5d5903e397052425b220d25ce4703f0 +size 32277504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1da1fbbb4673aa6f587f97eabb54cbb1484fa370 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2696f336d4ed03adcc76dc652258d9da2fd0c9ce15b90476f92b52ead1f61aa5 +size 609043 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bd02c31b205fd32e1b9e124c959b93d96e176a5f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85127473fd068bc099044c1164ca518b09934a3907972a8f52280b5a10ee5bad +size 543744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d33e45c241c2754f2d5767dc258e05530a3f34f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d9e30f7fe75dbb843a13+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5f2fdcbb3c05e51831aabb0371024b65fec2a4b821337b6ccb21c43b94b820 +size 677911 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..382db3cec1b88161d47155fd5611802c02f1582d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82473f655e940f34378b426f6260339b353288bdbdd0a17c905b5b2934c56ed4 +size 2270072 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..831eef97b23c26aac1e851a92aef8e6d677e1a8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_db2676d850a0b0c3f502+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032e02ccf18fb38a221032d169b23d8199b824b5d9fdc4d310c95fac02f06993 +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8a554e1b77b9a7df908fa74bdc87045aa82e3fae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff9d6f42eae55702201bb0e47eb34ec59feb160a18d9cd679c5a34266c5256d +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a7a70ec57b7a12609826a08367ad551f796f0dfb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4125c11885b90ff94c9+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c411f72cfea09d1fd2e949afebcbd79d1b4bdcd1fb49a3cdc073c21b6e3972c +size 8059904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5657789062feb0f4c892180983d927b8de1a5b56 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e4d157e7d1f33e1d3fdb+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a94db50c586dda0c6e6ff5b77a9b3fd73a337d193a8b37c5614e790db7f638f +size 375876 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df3ee63e6b4a3c60f06ead2e8e1c5225ad083cc6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a519bfd64a19778e7e07e3bdb6db884a2c9e5d9012d2755db17deb84e86ba91 +size 920875 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..eeaa154db8f569971c3bd834d9cc8704eba8d0a8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb6d7bd59bec7840dd72+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b579ac82cd273c18439532dc38822c29a405a08938f9b47b3b56e6a1216b757d +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..80cf0e7d916f54d6b81059eedf32e3b33660f9e1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce44c3da60a141c45bf52aba45ee0284557f2a2333feb82bb153bcde724d4e5 +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f229c4e23aff36d346b3e804897b3ee0cac445c7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f1f84223fca9c333bb2f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7835b966c4346d1c0d379f864e4868b0b770f67a8c74f18d00a28d7c5f8c5d41 +size 7742464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6b510d4be87ec49bf3c76155e191c68ae173e7c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3408a18e64c05a03ec39c2b12533fbbe6bc69c67712e90697aa89abd20b18eee +size 778205 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ec3d719d1dc378ad3212155e623872c68ca2770f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ea9ca3fb605e724ac71ec24231b1601a808c0e93f6c2d45df4abe5f53369be +size 8285184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..98290131dfbd77806f18f9c07872124ae42d81b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f37bc22c32c557b09691+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c996060cec1f449494dd6ae3ac9d7e4f057de890b0ad1a640677e80e35f0a2 +size 8423128