diff --git a/.gitattributes b/.gitattributes index cbe63a53ee554d2a3fe8946e7d4aba243ff2e7d8..30c36f1ec7adf7d3a755fbac343f85e3484f5605 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8253,3 +8253,38 @@ neuronxcc-2.17.194.0+d312836f/MODULE_a1d42723f4b151570208+bfe5714b/model.neff fi neuronxcc-2.17.194.0+d312836f/MODULE_c2f195a5af02904a4378+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fdd1d8e95911bdcc8dec+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json new file mode 100644 index 0000000000000000000000000000000000000000..eae69da7ad4256a65ed10d9aa0cf6705c2cb1b00 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/46592604b62f7ce89082.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json new file mode 100644 index 0000000000000000000000000000000000000000..2ba8d4d3e1492c85162a91cf6b7ebcff0c5e9bd2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6517782804be0be5e2a4.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json new file mode 100644 index 0000000000000000000000000000000000000000..36bcccb268f07104571d19cc3200d4535b06bf76 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/bb0a40e5cbe4d1c25285.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/0c55cea89712c277017d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/0c55cea89712c277017d.json new file mode 100644 index 0000000000000000000000000000000000000000..ddcde7407b1eb93cd03e934f8dfa68a45b5205f7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/granite/ibm-granite/granite-3.1-2b-instruct/0c55cea89712c277017d.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json new file mode 100644 index 0000000000000000000000000000000000000000..ab0080bb057c590e885279cc1a02810dee516109 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/bfc3e6553a3d02bc5c75.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json new file mode 100644 index 0000000000000000000000000000000000000000..e6668f0b9ff19c3195df1f34c5e84579a9a44f49 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/llamafactory/tiny-random-Llama-3/ce0df42e903c9a49fa72.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 1024, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 1024, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 1024, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json new file mode 100644 index 0000000000000000000000000000000000000000..5d49fd922a23b75aaa3fb8a5471d5baa9db722b8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/meta-llama/Llama-3.2-1B-Instruct/ab418e732245677e7cd5.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json new file mode 100644 index 0000000000000000000000000000000000000000..55266c252a4449a1d7d5427c6e4d949f507caab0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/291ed174890a84141720.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json new file mode 100644 index 0000000000000000000000000000000000000000..8be472e58e9f7e7e1b98bf9f520925eec2455df2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/41afd2fb7f6326db0c2c.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json new file mode 100644 index 0000000000000000000000000000000000000000..74d7c6c2ac38c2ea9868b970b2de7c5c314fcd88 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/mixtral/dacorvo/Mixtral-tiny/650d381736581bd669c9.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev7", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/5cd389b911fd41517716.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/5cd389b911fd41517716.json new file mode 100644 index 0000000000000000000000000000000000000000..50604097869663df0030290ec866005e901fa863 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/microsoft/Phi-3-mini-4k-instruct/5cd389b911fd41517716.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json new file mode 100644 index 0000000000000000000000000000000000000000..81a0158c06fbd297c66a85d87645058c69c0035b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/93956a41cd1203f773ff.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json new file mode 100644 index 0000000000000000000000000000000000000000..845543a70ddef9411724c14cd900011a71fb1d5c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/b6837f9627ec2b9693be.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json new file mode 100644 index 0000000000000000000000000000000000000000..f948edf0b481bf62102ffed432c74180077f7184 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/phi3/yujiepan/phi-4-tiny-random/dcd70056fa539895bb43.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/fae9f820a75603366cca.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/fae9f820a75603366cca.json new file mode 100644 index 0000000000000000000000000000000000000000..cb3eadc5911a214fe5086d17e74b9ffa6624117d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/Qwen/Qwen2.5-0.5B/fae9f820a75603366cca.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json new file mode 100644 index 0000000000000000000000000000000000000000..72cb5de1ae78e32428fe8339aad8f7be520de1f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/3ffe53335773a02926b2.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json new file mode 100644 index 0000000000000000000000000000000000000000..8d6ac36b691efecd4591e14a09deb5a073a8cd71 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/af69549e13162a34952b.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json new file mode 100644 index 0000000000000000000000000000000000000000..5e30b9b4c586401ff72ddf79ba17f3df6d9afd50 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/qwen2/yujiepan/qwen2.5-128k-tiny-random/be422824e6f4d14a5909.json @@ -0,0 +1,53 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev7", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0137bc9ba0bb8adf071ca54f7c316c2fe64fd047 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206a7ad89370cff8d2b2322c1ac670c89f032f09dacf78c1919f6862a5f988f8 +size 20204 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2cbfb7707feb2a889c59e59d8217d63491448567 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2443e5ed87cd2321559cfc61edbc0d0b9aed79ea53c6020815a6e66e449c9823 +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d26d776615893c63f9487ce93dab0bf854a7ebad --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54bcdec06d89d654845d062183036a3a4b545c119ab7f8bfcfcf806a54b9b35d +size 11183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a0ed747dc514f7b1c4f29a67bc3686e49303c411 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a0de5187c4d21a70fc592e7463fa702440eaf3733fefda7365e1e676adcd34 +size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a6e893a25329ac961b2d4fa2caf8cafe1d96623c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3978ae0dae4598ee8e9bbe8392fc2e7e13757206db58da945eeacc59ca5ff3de +size 21402 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..898526438801e0d8f85eefbd931aa0a0f5f29575 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec26c8671cbe08f14ab6cd99fba65b639956bf7fc6958535f68e2795b92937a5 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..64b088be808a82d8c95848c4dcf500d4dc8de3ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89450bb664c81cdf16fe49daafa237c12c1cd71f5d8ddae84827db4a2eee340a +size 17475 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9c25f42ef679cb30bb37d6a1f94abcaf2e1ac520 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ddbfd696a0ef85d93c9d3198ab829bdf6ccf964781bb6a275a60414915f2790 +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b491b19b467c4149e11536659bb757f969d67813 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bffb7434bff71e7ba712292397b2ad4ee6448cc362b3669d6140ae7eda9fed6f +size 21402 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..48b8f0d8c772ddf720b2225a61e47a3c92f1ba8f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a3ca6387784cd649abf3757bb3844b1eb9736e252e40b1a4968c1ece1b1fb16 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..11c60813aa45c4e0520e1943514d4184d27ade75 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bf2f66151be606218b2925a0449dd7916f07d36581a3b3b5bbd31c71807ecb +size 68277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d8ca5eea78f8b4aaa740c3f7567c4997b08267c9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c182ea56f8adaf519edd44ac24b026ae1b85469a1f83eacc00cf20faabcaf4c +size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..348562510616b4913bb3f081df2af57390e59670 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a62d214cf48465f4a0b90420bc898a8fd7f5f9a8dcb6370edcdcf87b44b719f0 +size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..375fc11fb6090b63bb3785c64be26ec8ee5a4553 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d12b378647e94605b87aaf73f6a9bae4ac22ceb71594fd8b7203d4c6129fb6 +size 302392 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1723f8ad00260fd1319a4fee8a8ba52404e67814 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e229618015e416964c5+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1bdace20d7ddedd1cd4a8ff16b363b6def498b939b9559a5dcdd5e554b694b +size 1414144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e6e3e749231167973be1e74d4fac2cd7ad2ee71a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f3f25ac84183e3e0fbaea932e3c43b3d7df2d165a2f3fd8d17bbb0d9b67100 +size 17475 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d07d9582ddb7d560c54abb423fdead4a855f306 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4aa452b0cdbab618fc46a764cba28672fece03e81d085eccbda181a1b57b1ab +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..336dd20dbbfca606c5506b057eb0c7f9827de046 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38184c632616d2476fe6e7794270c8a51ed09eccce54dfc80c4ee171068da566 +size 20501 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..29afaef2bbe1e51037cdeac1f7a0bbf9ee20703d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d7ca0b813faff5c0762966a281a46cde181daebae6b2e6da86be772e748dac6 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..235d59ffb22eac4d38831202f2394cf20f120cfa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcc61769d8fb9bea5e15c42d960c69b9df6f97061385d0bd0a21d0ce6906af6 +size 26028 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d13b467a2f9bcc1fa99b9ed354de63b24f0fe49 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8166c37217252f0baf51dbf17ac221fad45563e85dd6ba5929fc8a55b3dd8e +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6dcfa64daf274b7949b2fef1fa41ac04dd50d3af --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15a46bf48ef719945d1be5c931da1ae753a2a0511e68e8bc5d12755ee6c1d66 +size 22098 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ff557dd2109a43d9c8b368c63d9b37bccd558ef7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e8943204962ee0818af6e9711dcb0ead3e19d18121061ed3bf339fda9db608 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8b1f7d7aab0bc821983bd1d38df4faf672d933bb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2fb1e13c30d14bdf2a36c1c0b2efeca926d064a4e7ee7e668c6708a0d0e0ee +size 16631 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e93b8ea0906e4d2b40c1786fe0dc7fdb3d2d7dfb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e769f891bcdf52ba48bdf4d9cdc171f7ed5b3c7fa8e80d8b5142f02b0d9a39fb +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2f06e19836e6ceef1d8fc718dcffd5f6962937b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a92994ede93768a306aba61f9ff65cc12fd693d6855af9e79912cacd24c53e8 +size 81347 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9ca0f655f8891bf3b1097ab1281e2013050b5c7a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6645541ff96113d8e1bb+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21c560d53bd91367776a6093faa716f98f4a46654b4e26f21d3f03aaf58b6f5 +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..672f837d60308b6f194264f9372d1360eaf83702 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635a56ee4821e472900e2f45aff06fca7b6436d644a90095b9c830f6deae7d18 +size 19832 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80db98f924e7b72a3f42399f56cd6f495cf77213 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_83cb40c0c38bacf5b8fd+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89bae87420edb640dcdaf0058d4d072acc4dee3f5a7a0bc3e4bcb1e842cea2b1 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..561d229e401204468f750c517bd8496a6ab1ea00 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e7b848fd50ed1cc32978f64e4698cd1f10eaa9221bc9dc60316760a651d212 +size 24382 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b7d7fc591092d2b48a9925ce93aae50403e1b18 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7da23595b827adb9d8b7a49fa9d5045523fe83ed5b0ea04059e93bac81a6388 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bcfde6e20c66b07f24c853a5f28354fe76bc7f1d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c401c20ee313a4fda311fdf8ebd910b84f627ded8f6f3a17abb78ec208ffda +size 24382 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1b9f837d171a2bbdf38c46a49c7907b796574d9d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_913f4e1e2b4632438fe9+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612bab8ae99ee0d23a9d65a92642ea53faedc02eac5be589301f340ecc12947e +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0dce35de5c057defef417457f90df48d8e778ac4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d8908f9c41121c1355085a90259da1519666fd64cab6bc119930a495d28013 +size 80893 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7061c41442bf44df4d26426b0f36ce90d606be7a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfab6c5ec7ab6dafc649abcd5f9dff3c79bf3a2e3141853a22ed2759aac7f12a +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8eebfed3dd8f7811ccd94180025637a604c252c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a76dbeab0f6653fa220b+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb963bbc7241580114f5297fd62fc3e8cb19377eb953deb673fb5999c37e8c3 +size 254591 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ddedf8047e3cc2dfea1add455687ec2e59136ffd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a18136a0c5ee2b3f80ccfefce5e2065c2a75c972cf3c7ab7a73d27f9f378cf2 +size 11183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..62606222d91c3e1f4a361c2b769e36230765d8f3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47e234954a33ef3ab081870796384f1aeff31d539c4ffc08d234f774eab49a2 +size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5aa203998848d0eb9923d059d823b6906ebbb90 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:971a4bbe3cc9bf790ca9b7467babba28c98773b8fad53667a4bbbc46de7c138c +size 379604 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d37e0df1ed91f216365a3025398c186d95120d23 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9cbc3c9d2c2ba603243+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ef8c33c163f0a4c0c52f5ea47f35573c2dc6f38e3923ffb089e20a7547a48f +size 18433024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..37ffc6b6f7d787417f6b1c1452725a98299258dd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331132547003b9874793b3c551a55417efc4c95974c7795210b56ac6bf49150a +size 19360 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6c5ecb1c4f542b7c09ef36201ce0b413e3c2089e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb16b651ea9d180d5cfd+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67d32a3ec490fd861d862971d7785f2b09b4d194a979085ca624e5d5d4c2ee1 +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..344d99d0624ebe8e1c04d8fd053d32bb8b18efcb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a3ff299b77d41425f3ddbf6b205847b1158cc20d89f1aea55c0f2d50e49dc0 +size 11183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f24ec35c85d4ccbdfd8305bcafef3177c3f31476 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ab95be0675292eecd4b501ac040ebdc56436de5b667c1c8635b309be7968a4 +size 103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0369b589a88935d375ceae1e5505d6307ae21d4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1716e6ab08c7d8828b58c7a4ac4fbd4cc7a6574e6db4932ffc53020aa28bb0 +size 19832 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c80fb0e7908815a3097d909ec9bb9769a2f1974f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030a86fef9f262d80476617fdba0b793965fed279d62c7e0b6a6766befde4626 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..27f0ebd69a19693e4112f84c10b24a863c69293f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f894bf1ee79204a45884e8a3af4127e518849e995cbf7e8d136700c1198843fa +size 79431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..db3f1df14c555ea73afef99f21ed016b63befcee --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d94fd1f20a72bcd3092ed00f14c78dd8622cc06a2d6343090e9de5d76513b74 +size 318464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2eec8cdb9a46e8ed97f1d9e8eb8c0a6f34ab4cf5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7219d56c85106e3db6dc64691e64d0c47eef47e8d69b18d0d780f1e4efd4434 +size 329762 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e16c5738b2c3a77e7b8a03523c50d5c09e13471 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c329043e0317bf828d20d346b60debcff57580515609f6b5ffc06091922ca4a6 +size 20501 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d97a0670e46a6a998ca1f8019c881027502d308 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de8368a717cfd6dfec57+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7534f49d9e6d81d2ad3c7f3c0535230f910ac6ad0135fff8519cfc39ab1e29bf +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..43d0dec6fbda3a30f2dcb1f2554b99c5e9b1d936 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab169a10d51ed4ac056160c9a66d20aad7e5d770506d25f9022b1ec795494236 +size 16631 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5cf0c64ec2866ad773cd7a9c51831906e6010d4e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e0765cf6df2204e3664e+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977848c5fb76c6b48cf2a2c3d911d47366b76f1a23010177a494a420057b968d +size 134144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..75f0de8d03f6239ddbe6e17aa47e4e56f52fbed5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e969500151ea3164de6555c44cc134ca744cdf5dc03e1ded8fcd7a096e40130 +size 68279 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bdd5e3f8e06f24bcf1c13cd1c47592e96fc82b40 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e05c75311a31c070885f7868daca8a266e3ffd4f1e9a86172b84cd31aeb9b5 +size 257024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d679c066583f37660b33e2f87e9775bde5aab73a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8abd89baa6b7133c794c01d4337a62b818823dda6d240f331369394e62eda164 +size 268322 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e7e98abc16c79167b76d6d682779cd369bd99c37 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e882dda7ca8d1e7ddcf2c54143922356b4f41b29afbc979ecca776884fae42c +size 22951 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0c09ab1a8da36c904ed15ed9714fd2e3f967d72b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e9bcfc17d832317203bd+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5d8306237c3b7cdbfc9b553a0bc614372b7ecc5caeaf606a7ea9a38061dd0f +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..373234af314f1667446490db9cbb1091b7d15328 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8804e4b1c31ce0a1faf1c20a2accde86fe1531d3d6b6b3b7955ef1ee66b4982b +size 80893 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e86da427162569f8b07383986bcbb7249f5498c1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ffa5f4adb99f3617cc34b352c717cfdc4f72247c0f9b13b31fc4c2d0f42cdc +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8bbc67eb48b68b2a265130ec0d277f5b56326e47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb9e4f974acc74d766dc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cd13dbecd2127b395f61cf0ba83e6e34aff02c6228bf029515635b706cf976c +size 244351 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2df2ab269e3006b628a43a78c09f0fd04c07b48a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e6693f31ad55428431f8380303e8dc91ca44830232eaea952d4b260f95957fb +size 22558 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9ca2b767cb4dead3fa8ea93745da00ed3603450d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fce469267b2ad1b5d80e+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33e9cf514f5e1aaae87bf65497ff896c9e6f5c3a618ca6d39e66362e5762167 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..07063f7d526068909f4270de2de5d9ad099179d3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23182cdd2cb7cbce5b56f6fa076bec977ea03ff720ab4631f540baee2823a468 +size 81347 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b06a2191e2fdc9c43b76e91c1e5de2a0001d94a9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fffbb7a74f126b7cac4f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd77d420f86830cfb8ddcd832b8cf18b204370423d9edebced3953397f794e2 +size 226304