diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f4a7669aad54852ffb4753a583171eb62fd5ecd5 --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "architectures": [ + "Cohere2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "cache_implementation": "hybrid", + "eos_token_id": 255001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 12288, + "initializer_range": 0.02, + "intermediate_size": 36864, + "layer_norm_eps": 1e-05, + "logit_scale": 0.25, + "max_position_embeddings": 262144, + "model_type": "cohere2", + "num_attention_heads": 96, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "order_of_interleaved_layers": "local_attn_first", + "pad_token_id": 0, + "position_embedding_type": "rope_gptj", + "rope_scaling": null, + "rope_theta": 50000, + "rotary_pct": 1.0, + "sliding_window": 4096, + "sliding_window_pattern": 4, + "torch_dtype": "bfloat16", + "transformers_version": "4.50.3", + "unsloth_fixed": true, + "unsloth_version": "2025.3.19", + "use_cache": true, + "use_embedding_sharing": true, + "use_gated_activation": true, + "use_parallel_block": true, + "use_parallel_embedding": true, + "vocab_size": 256000 +} \ No newline at end of file diff --git a/model-00001-of-00049.safetensors b/model-00001-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51830b008b9f67bbac9ce5fccf98fecca050cb22 --- /dev/null +++ b/model-00001-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b510a976376c61d1387063aec33104a962b451416d69d901089758bb00d9e8 +size 6291456144 diff --git a/model-00002-of-00049.safetensors b/model-00002-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad7bb77cbfacb9ba80175db3ce6d6b24d236f27d --- /dev/null +++ b/model-00002-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:485573b6df21b07e5abfe52465ee630254cfd95c92c5a326a09cad1bd7c34b82 +size 4932527624 diff --git a/model-00003-of-00049.safetensors b/model-00003-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e102df8f973a32e752e3b007a4846365f3119d8 --- /dev/null +++ b/model-00003-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4e26dc2c3b74642c46975070a267243ffa438f98cf70583dd26cd14717e63d2 +size 4278215728 diff --git a/model-00004-of-00049.safetensors b/model-00004-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b93def5d604b6626b156a8e6982afe8ecd574c25 --- /dev/null +++ b/model-00004-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e7286aa5eabbe9e0f357fd6f82ff31f58399b46e7e0f1f5858fe54f95fce99 +size 4932552312 diff --git a/model-00005-of-00049.safetensors b/model-00005-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bf8a41ff7785145a51de4b761fb1ab53dc9ee92 --- /dev/null +++ b/model-00005-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4be97e749586fdf2b47d84b39c2a823f8ea6b2d929b9498d54b57cbaf3a2921 +size 4278215728 diff --git a/model-00006-of-00049.safetensors b/model-00006-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec2602fbe3cc8434fb5b366130140d56a235bba0 --- /dev/null +++ b/model-00006-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01c6be21720b6d902078429a53302320ad41521d07e92700a3a44ea180bd0a4 +size 4278215728 diff --git a/model-00007-of-00049.safetensors b/model-00007-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c517579630f2047b8b15042a6c630c52d1966b6a --- /dev/null +++ b/model-00007-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0627f4d85516d9053859829d10f4a563948dece10186d91a5ceeab74295d728 +size 4932552312 diff --git a/model-00008-of-00049.safetensors b/model-00008-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84b06c1a78c4d978461d1e397bc6470089249618 --- /dev/null +++ b/model-00008-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89eafea8d952fd04e65eacb9df5b4bf94f8c46baa4154d86c30094e8dba52e7a +size 4278215728 diff --git a/model-00009-of-00049.safetensors b/model-00009-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64b4f6ec933f7af9ce97d2b6d3365af99920f673 --- /dev/null +++ b/model-00009-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457e1d57147b0c14c0e34f027195168922bfaf7f3445f2af28dc9f9004e2b669 +size 4278215744 diff --git a/model-00010-of-00049.safetensors b/model-00010-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc585cab07c16b98558d3b16ebe90a6b1aa2fbcd --- /dev/null +++ b/model-00010-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b261740e39b6b6954229517e27f828c0fe5eb214f48e9755e9f4bcd99ce38a +size 4932552328 diff --git a/model-00011-of-00049.safetensors b/model-00011-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fda44b8da6a0b7f61373ef0d8503d4b215a1f1a --- /dev/null +++ b/model-00011-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545568b5a803b5257212a35bc1ac0736d9f37905245f755f4e06b3850c24f56a +size 4278215736 diff --git a/model-00012-of-00049.safetensors b/model-00012-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb26f0da057161dc102e6fc192cbcec4361618c6 --- /dev/null +++ b/model-00012-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13f5f89ca0b1fea3b1bdee3bb13e16720be21f218f0dbb7218ac5b908f635fc +size 4278215736 diff --git a/model-00013-of-00049.safetensors b/model-00013-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11940b059ab735fdae45cbd3a9eba1a341a0e788 --- /dev/null +++ b/model-00013-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f296046a3bbbbb3b5017ed980e616d4ad05c73d096a853e54dcf894927ad67f +size 4932552328 diff --git a/model-00014-of-00049.safetensors b/model-00014-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90557bfe47dc34e9b8fb43f70bf4dcd40afef6e7 --- /dev/null +++ b/model-00014-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30c04bfce0e5aaabad43a9fbdb2d8d41f18efce2e894fd931018076f615219e +size 4278215736 diff --git a/model-00015-of-00049.safetensors b/model-00015-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c96420b3c4943b4b95cd783cb886cede4f6c8322 --- /dev/null +++ b/model-00015-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e416d1349c91bbe5d0092bcad15d713700790b624ed086a20df1fd04163556af +size 4278215736 diff --git a/model-00016-of-00049.safetensors b/model-00016-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed7ffa856aa83957b719708fcae5eacac070ffcc --- /dev/null +++ b/model-00016-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9ec56197319edecbbdff22f944878882fadb374d5f13f5c44bb08cb2f3a33e +size 4932552328 diff --git a/model-00017-of-00049.safetensors b/model-00017-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f41c92f356e957e3702ac162eb852806f1b5b16 --- /dev/null +++ b/model-00017-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096c5fbf956a3079843fac773deb062ed7f62bb3fbdeb3444f368d979b867d76 +size 4278215736 diff --git a/model-00018-of-00049.safetensors b/model-00018-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98bb49c55f2e662a2d3f83d6638e202ec0a91a3f --- /dev/null +++ b/model-00018-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b253dc17d4deaec0b2d7b1c109a1c3d4544d3e0f073a36427015b1c38bd40d82 +size 4278215736 diff --git a/model-00019-of-00049.safetensors b/model-00019-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f705933d18c734c2280fe3b32026e8e6fd3a06ae --- /dev/null +++ b/model-00019-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9603a1d5f3c7da0dd4cc8991e2cd127960ba2225971ec5b31d6c4c467ee7960 +size 4932552328 diff --git a/model-00020-of-00049.safetensors b/model-00020-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7259c5e9325376544a03a41178980c854ce9d286 --- /dev/null +++ b/model-00020-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5391fe292aff4f8c44a92bc3cabf0abc6977eef57b8a078d3c3cc5cbcf5e6f4f +size 4278215736 diff --git a/model-00021-of-00049.safetensors b/model-00021-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f343d6d428d8c9be8e1de75595d9caf64a0cadc --- /dev/null +++ b/model-00021-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6dffff92b839db4c87a8b63e3d2f0ec74c8b5cfa7f996fbcdbbb2f4663c0927 +size 4278215736 diff --git a/model-00022-of-00049.safetensors b/model-00022-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4924689dcab451f023dd2bd4e9cf6e348e4ae99 --- /dev/null +++ b/model-00022-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec1409b81d39255adf574b503eeebd8835381b14f91618a140d739da40ee48c +size 4932552328 diff --git a/model-00023-of-00049.safetensors b/model-00023-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8512aa3047cbed5f88a0d6fd5212305901005f43 --- /dev/null +++ b/model-00023-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee7cf45bdea89cf73f4d6a587324afc1ef88dc26dde0b3cb587916dbe54cdcb +size 4278215736 diff --git a/model-00024-of-00049.safetensors b/model-00024-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc9386b4806cf092a27c6a253c0a99e4cbbf5fe9 --- /dev/null +++ b/model-00024-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18417d0c9376fdf5aaa77c9d47eb8a505962c80da1ad6f0bfaa713e3e0c0d5ee +size 4278215736 diff --git a/model-00025-of-00049.safetensors b/model-00025-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0754398ae672b318f319f1ebe4f0b8848302de0c --- /dev/null +++ b/model-00025-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3363af6ffa39e1d7fbf5488cff6f358c248788ca6871ca0df08c436ec06d257 +size 4932552328 diff --git a/model-00026-of-00049.safetensors b/model-00026-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1531b662fdec1053d163dd979bb34e64135ef55e --- /dev/null +++ b/model-00026-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1100f0d0de95a5eb6ccefba32c58bc7a692c6d1e8d00e7ce9e24da06eb1eab +size 4278215736 diff --git a/model-00027-of-00049.safetensors b/model-00027-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe4e645267691fceaaa7ffd5bec8fe410a46ad67 --- /dev/null +++ b/model-00027-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fe4a19d17611006fa62c5beb7f85f8211527cd829420f3d52b8fec6fbb163a +size 4278215736 diff --git a/model-00028-of-00049.safetensors b/model-00028-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..767af14698161ff521dbd6aceba7e2a9d2f63da4 --- /dev/null +++ b/model-00028-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b06d697dca0655fc69d8c2e5e100ff9a7ca4c105e8684d755d17b623297380 +size 4932552328 diff --git a/model-00029-of-00049.safetensors b/model-00029-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c81e9d67d0d3f47166f36ad573f79ac7641c5f47 --- /dev/null +++ b/model-00029-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c675c6becb47b8960df36772eeca74a965328a79e76995ec7b99fb1b56ff10 +size 4278215736 diff --git a/model-00030-of-00049.safetensors b/model-00030-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fab8dbeb9898cedc999fb4893e9de00a73e7b0d7 --- /dev/null +++ b/model-00030-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df68be08ed81f4ae1dfc01191078df24ae82256a31727c79319492e6912fefd2 +size 4278215736 diff --git a/model-00031-of-00049.safetensors b/model-00031-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a17752468b982502bbcc450023b3fb332ac5400 --- /dev/null +++ b/model-00031-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecff21c1a0c7fb2818608a2ec1b2aa3dfe8c47b8768c1d1d531195f39d3b9e65 +size 4932552328 diff --git a/model-00032-of-00049.safetensors b/model-00032-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2db67b22a7f059967de3fdb0f83f835e408eea3a --- /dev/null +++ b/model-00032-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90353213c0865c246c0e2f11ea2475ff67a3685e44ef081795677fca7f9305a +size 4278215736 diff --git a/model-00033-of-00049.safetensors b/model-00033-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d136626d8247fa2f364cf2f6c12bb387b047e17c --- /dev/null +++ b/model-00033-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee733dddd12515c2bf44b1198d48a083d728402318b6c6a9945f5cd0eccc6f4 +size 4278215736 diff --git a/model-00034-of-00049.safetensors b/model-00034-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..764d1698e87e282d47657db34c3ba82f0877db68 --- /dev/null +++ b/model-00034-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba24822d19b295292f5b7cadd0a285ec555a7395c534b49eb9678e5c03fe8d9 +size 4932552328 diff --git a/model-00035-of-00049.safetensors b/model-00035-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1dfd54a8eb267b1cf532eae797e3cb454873a59a --- /dev/null +++ b/model-00035-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac2a0edfdfe02f15450068051c925253e92526813a14ff179ac58a19e7513a1 +size 4278215736 diff --git a/model-00036-of-00049.safetensors b/model-00036-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63e02aaf9a72e3bb94ade59318296996d2e95718 --- /dev/null +++ b/model-00036-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e418b8c6c5e2e52efa9e2ad76791ebcd5fae477dd4456ff02535a20a2bab583 +size 4278215736 diff --git a/model-00037-of-00049.safetensors b/model-00037-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6b4f5f49bd17d6e3651c337c62110e8e36e4d75 --- /dev/null +++ b/model-00037-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c61d13e9190ee802175d9e87d95eef87beb13587f371e0b5cd2b1edfee7e59 +size 4932552328 diff --git a/model-00038-of-00049.safetensors b/model-00038-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1ecf94111c50e8b3e5bb56d8db89c0b1e9468ea --- /dev/null +++ b/model-00038-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389ff526ee7f8031a464dcdaf8baf423a6b225206b5716415a4ab17dcd2d8cf7 +size 4278215736 diff --git a/model-00039-of-00049.safetensors b/model-00039-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85c31f0022e8d015c3acb45b552b505d32efe745 --- /dev/null +++ b/model-00039-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05cda093c7a907d3b9dc57e1262ccd986fa9cfd730d8da90dfcaad79de30cfc +size 4278215736 diff --git a/model-00040-of-00049.safetensors b/model-00040-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6202c3d81ced1ec87084d4b49e3c42492b3230ef --- /dev/null +++ b/model-00040-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed1afe77efac6d4907ce1ca701fc2912c7323c95986f96f3d0fe9ff63bc8e94 +size 4932552328 diff --git a/model-00041-of-00049.safetensors b/model-00041-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05d87a19775e9fab59afff36a9ec158c1762376c --- /dev/null +++ b/model-00041-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a622f7db46d89cecffb16d48081af286b0931bf62c8c554b5f78e60de96652fb +size 4278215736 diff --git a/model-00042-of-00049.safetensors b/model-00042-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6e744254cf47f601c4a23f17d59f0161a129f9d --- /dev/null +++ b/model-00042-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92539d2d95dc636421101be8832055b13d49eece12b95e72a80dbe5bb913f5b5 +size 4278215736 diff --git a/model-00043-of-00049.safetensors b/model-00043-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e660fecb839f833f8f86f632f7de4de3b8222d0 --- /dev/null +++ b/model-00043-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d0dd6b575104594db55420675ff6f2108ad38ae5a2dc99ea1191092dad72945 +size 4932552328 diff --git a/model-00044-of-00049.safetensors b/model-00044-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66c4533064d0899a362ebbaacdff69934f8c94f4 --- /dev/null +++ b/model-00044-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559f7b08d10e8930e911bc5eb96230c8ba87472a991d2a2d5769ff358224060e +size 4278215736 diff --git a/model-00045-of-00049.safetensors b/model-00045-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b90a98a661fc695060c7e175f94ea35225cbf5a --- /dev/null +++ b/model-00045-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923a0c2c46e8adfe6361ea470a650d036a887d3c7e1c258be0ea0c24a7c5cfdc +size 4278215736 diff --git a/model-00046-of-00049.safetensors b/model-00046-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..775f0cb0de904bf7900d3f7ecec463749ae40e5b --- /dev/null +++ b/model-00046-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab316d92fd2a4213f63f88ad9a17a28c56d165ab70ed64d6c9f7934d7addba97 +size 4932552328 diff --git a/model-00047-of-00049.safetensors b/model-00047-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df779b21040307423ba3bab5bc7a9f927b92b584 --- /dev/null +++ b/model-00047-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9599674b74fac9daa53dac2ea4e5d477225f9143d7b6655bc6fb073e09de4d +size 4278215736 diff --git a/model-00048-of-00049.safetensors b/model-00048-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3abdd28efd3b0747b17afcecb0ad157d152a207c --- /dev/null +++ b/model-00048-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70721e08b4b847d92739fecb7cdf1c9718d1be434f59e02941d69a62154b40c9 +size 4278215736 diff --git a/model-00049-of-00049.safetensors b/model-00049-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..522798cc00d2ff779d5663ee175885621ef731ec --- /dev/null +++ b/model-00049-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1710395039425ff62fd9f51033a982ac19454fb3b8bbee9796eef6c4b16bd728 +size 4278265088 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..847668e4f8456c745ef364f9eb8ce175139cd29b --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,521 @@ +{ + "metadata": { + "total_size": 222115160064 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00049.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00049.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00049.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.10.input_layernorm.weight": "model-00010-of-00049.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.11.input_layernorm.weight": "model-00010-of-00049.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.input_layernorm.weight": "model-00011-of-00049.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.13.input_layernorm.weight": "model-00012-of-00049.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.14.input_layernorm.weight": "model-00013-of-00049.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.15.input_layernorm.weight": "model-00013-of-00049.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.input_layernorm.weight": "model-00014-of-00049.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.17.input_layernorm.weight": "model-00015-of-00049.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.18.input_layernorm.weight": "model-00016-of-00049.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.19.input_layernorm.weight": "model-00016-of-00049.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00049.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.20.input_layernorm.weight": "model-00017-of-00049.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.21.input_layernorm.weight": "model-00018-of-00049.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.22.input_layernorm.weight": "model-00019-of-00049.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.23.input_layernorm.weight": "model-00019-of-00049.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.input_layernorm.weight": "model-00020-of-00049.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.25.input_layernorm.weight": "model-00021-of-00049.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.26.input_layernorm.weight": "model-00022-of-00049.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.27.input_layernorm.weight": "model-00022-of-00049.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.input_layernorm.weight": "model-00023-of-00049.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.29.input_layernorm.weight": "model-00024-of-00049.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00049.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.30.input_layernorm.weight": "model-00025-of-00049.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.31.input_layernorm.weight": "model-00025-of-00049.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.input_layernorm.weight": "model-00026-of-00049.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.33.input_layernorm.weight": "model-00027-of-00049.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.34.input_layernorm.weight": "model-00028-of-00049.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.35.input_layernorm.weight": "model-00028-of-00049.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.input_layernorm.weight": "model-00029-of-00049.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.37.input_layernorm.weight": "model-00030-of-00049.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.38.input_layernorm.weight": "model-00031-of-00049.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.39.input_layernorm.weight": "model-00031-of-00049.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00049.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.40.input_layernorm.weight": "model-00032-of-00049.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.41.input_layernorm.weight": "model-00033-of-00049.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.42.input_layernorm.weight": "model-00034-of-00049.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.43.input_layernorm.weight": "model-00034-of-00049.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.input_layernorm.weight": "model-00035-of-00049.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.45.input_layernorm.weight": "model-00036-of-00049.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.46.input_layernorm.weight": "model-00037-of-00049.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.47.input_layernorm.weight": "model-00037-of-00049.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.input_layernorm.weight": "model-00038-of-00049.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.49.input_layernorm.weight": "model-00039-of-00049.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00049.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.50.input_layernorm.weight": "model-00040-of-00049.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.51.input_layernorm.weight": "model-00040-of-00049.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.input_layernorm.weight": "model-00041-of-00049.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.53.input_layernorm.weight": "model-00042-of-00049.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.54.input_layernorm.weight": "model-00043-of-00049.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.55.input_layernorm.weight": "model-00043-of-00049.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.input_layernorm.weight": "model-00044-of-00049.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.57.input_layernorm.weight": "model-00045-of-00049.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.58.input_layernorm.weight": "model-00046-of-00049.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.59.input_layernorm.weight": "model-00046-of-00049.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00049.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.60.input_layernorm.weight": "model-00047-of-00049.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.61.input_layernorm.weight": "model-00048-of-00049.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.62.input_layernorm.weight": "model-00049-of-00049.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.63.input_layernorm.weight": "model-00049-of-00049.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00049.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.input_layernorm.weight": "model-00008-of-00049.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.9.input_layernorm.weight": "model-00009-of-00049.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00008-of-00049.safetensors", + "model.norm.weight": "model-00049-of-00049.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8eb9ef63af01dfa2c350573b543b14275370944 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e13c779896a87c94c9e0df9482975c8b8485265 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953b2730d23ca19e7dca96f75f3e10b497bb679290b06d8981190bff2039fc72 +size 20124922 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a8116a7350643d60d5d023e6a1c6240769a0ca32 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,351 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255000": { + "content": "<|START_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255001": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255002": { + "content": "<|YES_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255003": { + "content": "<|NO_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255004": { + "content": "<|GOOD_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255005": { + "content": "<|BAD_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255006": { + "content": "<|USER_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255007": { + "content": "<|CHATBOT_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255008": { + "content": "<|SYSTEM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255009": { + "content": "<|USER_0_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255010": { + "content": "<|USER_1_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255011": { + "content": "<|USER_2_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255012": { + "content": "<|USER_3_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255013": { + "content": "<|USER_4_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255014": { + "content": "<|USER_5_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255015": { + "content": "<|USER_6_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255016": { + "content": "<|USER_7_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255017": { + "content": "<|USER_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255018": { + "content": "<|USER_9_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255019": { + "content": "<|START_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255020": { + "content": "<|END_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255021": { + "content": "<|START_RESPONSE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255022": { + "content": "<|END_RESPONSE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255023": { + "content": "<|START_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255024": { + "content": "<|END_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255025": { + "content": "<|START_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255026": { + "content": "<|END_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255027": { + "content": "<|EXTRA_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255028": { + "content": "<|NEW_FILE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255029": { + "content": "<|BEGINNING_OF_PREFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255030": { + "content": "<|BEGINNING_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255031": { + "content": "<|BEGINNING_OF_SUFFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255032": { + "content": "<|END_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "chat_template": "{% if 'role' in messages[0] %}{{- bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + message['content'] | trim + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + message['content'] | trim + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>' + message['content'] | trim + '<|END_RESPONSE|><|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>' }}{% endif %}{% else %}{{- bos_token }}{% for message in messages %}{% if message['from'] == 'system' %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + message['value'] | trim + '<|END_OF_TURN_TOKEN|>' }}{% elif message['from'] == 'human' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + message['value'] | trim + '<|END_OF_TURN_TOKEN|>' }}{% elif message['from'] == 'gpt' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>' + message['value'] | trim + '<|END_RESPONSE|><|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>' }}{% endif %}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|END_OF_TURN_TOKEN|>", + "extra_special_tokens": {}, + "legacy": true, + "merges_file": null, + "model_max_length": 262144, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "CohereTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "vocab_file": null +}