Felladrin commited on
Commit
9822074
·
0 Parent(s):

Initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ datasets:
5
+ - Open-Orca/slimorca-deduped-cleaned-corrected
6
+ language:
7
+ - en
8
+ base_model:
9
+ - Felladrin/Minueza-2-96M
10
+ tags:
11
+ - llama-factory
12
+ ---
13
+
14
+ # Minueza-2-96M-Instruct (Variant 07)
15
+
16
+ This model is a fine-tuned version of [Felladrin/Minueza-2-96M](https://huggingface.co/Felladrin/Minueza-2-96M) on the English [Open-Orca/slimorca-deduped-cleaned-corrected](https://huggingface.co/datasets/Open-Orca/slimorca-deduped-cleaned-corrected) dataset.
17
+
18
+ ## Usage
19
+
20
+ ```sh
21
+ pip install transformers==4.51.1 torch==2.6.0
22
+ ```
23
+
24
+ ```python
25
+ from transformers import pipeline, TextStreamer
26
+ import torch
27
+
28
+ generate_text = pipeline(
29
+ "text-generation",
30
+ model="Felladrin/Minueza-2-96M-Instruct-Variant-07",
31
+ device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
32
+ )
33
+
34
+ messages = [
35
+ {
36
+ "role": "system",
37
+ "content": "You are an AI assistant that follows instruction extremely well. Help as much as you can.",
38
+ },
39
+ {
40
+ "role": "user",
41
+ "content": "Could you explain how does the Internet work?",
42
+ },
43
+ ]
44
+
45
+ generate_text(
46
+ generate_text.tokenizer.apply_chat_template(
47
+ messages, tokenize=False, add_generation_prompt=True
48
+ ),
49
+ streamer=TextStreamer(generate_text.tokenizer, skip_special_tokens=True),
50
+ max_new_tokens=512,
51
+ do_sample=True,
52
+ temperature=0.7,
53
+ top_p=0.9,
54
+ top_k=0,
55
+ min_p=0.1,
56
+ repetition_penalty=1.17,
57
+ )
58
+ ```
59
+
60
+ ## Training hyperparameters
61
+
62
+ The following hyperparameters were used during training:
63
+
64
+ - learning_rate: 5.8e-05
65
+ - train_batch_size: 4
66
+ - eval_batch_size: 4
67
+ - seed: 42
68
+ - gradient_accumulation_steps: 32
69
+ - total_train_batch_size: 128
70
+ - optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
71
+ - lr_scheduler_type: cosine
72
+ - lr_scheduler_warmup_ratio: 0.1
73
+ - num_epochs: 3
74
+
75
+ ## Framework versions
76
+
77
+ - Transformers 4.51.1
78
+ - Pytorch 2.6.0+cu124
79
+ - Datasets 3.5.0
80
+ - Tokenizers 0.21.0
81
+
82
+ ## License
83
+
84
+ This model is licensed under the Apache License 2.0.
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": ["LlamaForCausalLM"],
3
+ "attention_bias": false,
4
+ "attention_dropout": 0.1,
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "head_dim": 56,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 672,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 2688,
12
+ "max_position_embeddings": 4096,
13
+ "mlp_bias": false,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 8,
17
+ "num_key_value_heads": 4,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-6,
20
+ "rope_scaling": null,
21
+ "rope_theta": 500000.0,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.1",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968f3623a929d54e511d62e2fa66eece90b591309e3434702be924c39ab9f55f
3
+ size 192018920
runs/Apr13_09-26-50_249cc294beac/events.out.tfevents.1744536581.249cc294beac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff87a859534448f71e95a5ecee95394342c6bf50e8fe9e33d88927b509a9d43
3
+ size 230838
runs/Apr13_09-26-50_249cc294beac/events.out.tfevents.1744547113.249cc294beac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b08cd5023fe0c56bc6d330e458c451f032c90930c1a35490207079d195c823
3
+ size 377
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<|im_start|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<|im_end|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<|im_start|>",
29
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "<|im_end|>",
32
+ "extra_special_tokens": {},
33
+ "model_max_length": 4096,
34
+ "pad_token": "<|im_end|>",
35
+ "padding_side": "right",
36
+ "split_special_tokens": false,
37
+ "tokenizer_class": "PreTrainedTokenizerFast",
38
+ "truncation_side": "right",
39
+ "unk_token": "<unk>"
40
+ }