{ "metadata": { "ParamSize": 195, "ParamBytes": 15475417088.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "729c4c7a185236666cb4cdb08785c5d9" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "55a6d380db1112c6958f21a502e63d98" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "10b74a139ca5f25d551f28d7d50d721c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f2ca74d87704cc5cc9f53eacf7822109" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5c749c090a1205d14051d3a3b9d02da6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "86ce9211660290e2600bdf0bdd1dbc36" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d7e024f4b2b9dcbde1e4f720c869f6de" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8d093fcc819e40143bc7b7831eaf23e3" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0d01182bb59084e34c1733024476212" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ce823a0fbc513f1f8e40cb212a72789f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1e35a3f3548debb775aaaefe1384aef6" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 758120448, "records": [ { "name": "model.tok_embeddings.weight", "shape": [ 92544, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 758120448, "byteOffset": 0 } ], "md5sum": "609be4bdc7b659208e0148add7e9768a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5ad0181f71b256edb486d5358bacbe72" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "71878d379d2adbe623bfe2a5a07e7f68" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4b7f536fe056d9354a634fccfe6e6dea" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1b92948a5540025e75fcfb6c53daca8a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "29226d0f8b83693ed0c9beb3dfdc4009" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "80b4709ce4ab261e79434fb9aadca93f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "23eeebab8df880834ecf1b09e500624a" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8aec71916794e9bb7d505d09be979f38" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c0c63988c943b24ff181177078db3bdf" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7ac75f762846f864f11d9f2525c47e99" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f93fcba4aab175daed648d3b93e53faf" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "50acf387f02515ed40b084449ca55085" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f73a314647a4354dc81eeff83b9215ae" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6038b47c28aff59e3deeb56884dcf73" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "45e84a38854aeb748a3983005e8a549b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a847ec8d7074126cddc95721dc25d3ca" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3a25888ba3b5eeb453ab1a71b927b703" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8aeeff3981feeed9d9aec45a869d2333" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25db3a13d9b739d8303cac85b42ec608" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bfba6ac3f6ff63dfb1bba7fec7ad9475" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cb619806bc019335601180086307a12a" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "334c9a9a0a8c881a8ea87d1e9872bba8" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6c50fa86f95717d6806680cbaf3d214" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d96360db91cad893efacd8c34bad6c64" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "22b6c343418ed0285ab1e263487b83c3" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b3b12537179f5df3f99e68cc8dec5a8a" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "79d8351e44e7b499a730571499d9e6a9" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "570a4e9b537ca694e8d76b8434605763" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "48f62084ff7ca9413af779593bdc2e9f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e0817da5c73abd585f97f3dad6d9bf8f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eaf934e3769596225ce929e426a9c627" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6125c636b4a17039277010559aa81664" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "34e9a1b9f14480a4f413d671a10b4da5" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "84185c3cc8ee90a43951543dde9f12f7" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dfc31ea4b804374b0ce86d8b83609da4" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d134eb2be366e0a135c80c655dd94045" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8971494221fe6468e98b63a0901e6a44" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9b9f3d66e79c5e5f621a716abaace0e2" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4253311ba3a64d28968f67e727c5eddd" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "01319daaa5f55bc5c075213358d066f4" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c1ecd3b24426b7d4a76d60df3a0b1a66" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "36fe875659461f19de1fa1505bc7f16a" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "27a5053871e58a23e46de60314c5e132" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3db5a2b4dfe67a104edb478e8764eed9" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "40ea7ded93317537d9fbc25461066e7a" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5aab7d40bc30c62670d80e6fe2f4cb07" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fc1285b51e263abefe0ca209d39ec83b" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8908ae0673fc8d88bb46d7b7e9153a22" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "12d19f304d4840525c56b800e4ee8561" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "46e0b4d63ed303efa8dc72695cae92e6" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fc5e46a285372d57f306e0529399fd7c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "875aa36b0f2c4311d75f8dd197abdbec" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c6e16f4f2dad0a3f3f2c5a925e2ce0e8" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6763ec840b5eeb590d046909b69b6e8d" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b29c09bdc1639834b8591f26a72ea8ee" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8d9676b077e9afd18a474fef7f3aec6b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a4fa73f33e7ab3b8579735787cc6353c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d09d7184e34d50b316c1baf3aa2d4d43" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c105d013e113e947edeb7550cdc47679" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "88915a46bf9e97047be6f3d4575019ed" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f4a60a847d0e9a1a4fb79d50a9b6bcca" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4b678d4845ca7dbc9caba09a9719f9dc" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4f6d0fa26c10ef255f1b018287f73905" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0095a8249055516257568ce4d05acab7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2662907fda7eac107c8fb01e21deea9c" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9b9c6adcf5b8cefa4ae8fec8abc2706f" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b3666d24336ac56f92c60aa40d4b25fd" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "40920887c4119e6b62d091fa08a86226" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3055a39cea32faa9200969462240426b" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "10a017a7167f69f10cd2430441bb965b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56f26bd116d728dd3eb9339a5596f43f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9e4e4db902619220a2d228b3ab1655de" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1e677296bfc01d73d4a0a942629162bf" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5070a935672c09cadcfc6248dc797e3f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a0991f934a273006a15b2388f394d5f3" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2189ce4149c6a984fdccd77b297bac26" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dfcbe8edde91b8d5a00ac6a4902bfed0" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9dcadf50cf75d597e4ade4f56d3d5462" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "15b7f65244441a5d2fc13918a55adad2" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fa558ef96017633fa0798abab4584a5f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ba7565a1739102f92a69e28de7ad5612" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c5f8d190a4f65ee91a90f2ce0997b2d9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d90d10b8db78986b763b45d083c08a0c" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a9de11a91e114fd14e86175a39dd25f4" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "93ed464670f272fa5b36214cc6be2299" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "21c6a435691508e8abd3b5ed2c7a9449" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ed889019796270ce32ca01e0d04245a9" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "288ae5d56146712b6d0733de7091f02c" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bef70aff6cf9d1b8b9f21a282bd954f2" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b534ebde4eabce8b144c6e39a4f15315" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "52280b8a7e0da6420411df60eb3183ed" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5e652220463dd20f647161f2ea63821b" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "51285786e2c34cff103b9c756019ed3c" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ae49a7c257a33dd4c741e55ab36e4497" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8f349151ec046343ce2519ea4197bffe" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0adf5f8bd5ad65240cb3e05313aadf39" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0236ca031d9ff98adc32a67bbe188c6e" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "02226be1d9446c53067784cec413b821" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "57889d56c2f68834898a974286c38599" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed7bf76c0495935865a2b2fe726df220" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8f6d45e4489a41c03b289347ba096f12" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a51a642d6badb57cf05b364c3dc933b1" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f49b05eb4c4535d51c38f2787e497d2c" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3f83a94914a0d05b4e4cbf6831641a78" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bcc6206fb9388051f6e87df239c55d34" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8001981bd5c486b1a11e3cdd840a9109" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "839b87f723a308cd396bf031c82cd051" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9ca01cc498d77f46a09e8585930a9581" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "17e388c934131ed3436ec750da5cd764" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0bb3373cbda5966e6db0f59ff0c76bbd" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2976305efece53c1a2545f3d9c42dd56" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9f3387bec2dd8f54e9165f9f930e160d" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3d1a4dbfe14d2bd7122407606ce2057d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e80df739c32ad793d3003f9f595ffb9e" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fcac7fd7f870c99207807eedfbf82f08" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bee00ac1fb377a17c7a1afc7d8db9e96" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3f93f19da3b9c3b2ccdef73f8c6e76f8" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 758120448, "records": [ { "name": "output.weight", "shape": [ 92544, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 758120448, "byteOffset": 0 } ], "md5sum": "2ceab196c9dcb4e2d0c259dc305c22bb" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 532480, "records": [ { "name": "model.layers.0.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24576 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32768 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 40960 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 49152 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 57344 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 65536 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 73728 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 81920 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 90112 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 98304 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 106496 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 114688 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 122880 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 131072 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 139264 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 147456 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 155648 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 163840 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 172032 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 180224 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 188416 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 196608 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 204800 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 212992 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 221184 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 229376 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 237568 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 245760 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 253952 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 262144 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 270336 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 278528 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 286720 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 294912 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 303104 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 311296 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 319488 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 327680 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 335872 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 344064 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 352256 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 360448 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 368640 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 376832 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 385024 }, { "name": "model.layers.24.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 393216 }, { "name": "model.layers.24.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 401408 }, { "name": "model.layers.25.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 409600 }, { "name": "model.layers.25.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 417792 }, { "name": "model.layers.26.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 425984 }, { "name": "model.layers.26.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 434176 }, { "name": "model.layers.27.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 442368 }, { "name": "model.layers.27.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 450560 }, { "name": "model.layers.28.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 458752 }, { "name": "model.layers.28.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 466944 }, { "name": "model.layers.29.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 475136 }, { "name": "model.layers.29.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 483328 }, { "name": "model.layers.30.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 491520 }, { "name": "model.layers.30.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 499712 }, { "name": "model.layers.31.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 507904 }, { "name": "model.layers.31.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 516096 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 524288 } ], "md5sum": "f52ec756a4945d7239dfe8af77bf195f" } ] }