yujonglee commited on
Commit
0c05259
·
verified ·
1 Parent(s): 731ba8a
README.md CHANGED
@@ -4,5 +4,6 @@
4
 
5
  # HyprLLM (SM)
6
 
7
- - [Dataset](https://huggingface.co/datasets/yujonglee/hypr-llm-data2/tree/b57a5c1687f0299d20a6c7440733060cc306bb56)
8
- - [Experiment](https://wandb.ai/yujonglee/hypr-llm/runs/qeote5lt)
 
 
4
 
5
  # HyprLLM (SM)
6
 
7
+ - [Base Model](https://huggingface.co/unsloth/Qwen3-1.7B-unsloth-bnb-4bit)
8
+ - [Dataset](https://huggingface.co/datasets/yujonglee/hypr-llm-data2/tree/4436d698bc4ec6469e282d10907fe62da7445c38)
9
+ - [Experiment](https://wandb.ai/yujonglee/hypr-llm/runs/ytpq0fu8)
added_tokens.json CHANGED
@@ -1,3 +1,28 @@
1
  {
2
- "<image_soft_token>": 262144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
 
1
  {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
  }
chat_template.jinja CHANGED
@@ -1,47 +1,144 @@
1
- {{ bos_token }}
2
- {%- if messages[0]['role'] == 'system' -%}
3
- {%- if messages[0]['content'] is string -%}
4
- {%- set first_user_prefix = messages[0]['content'] + '
5
-
6
- ' -%}
7
- {%- else -%}
8
- {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
-
10
- ' -%}
11
- {%- endif -%}
12
- {%- set loop_messages = messages[1:] -%}
13
- {%- else -%}
14
- {%- set first_user_prefix = "" -%}
15
- {%- set loop_messages = messages -%}
16
- {%- endif -%}
17
- {%- for message in loop_messages -%}
18
- {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
- {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
- {%- endif -%}
21
- {%- if (message['role'] == 'assistant') -%}
22
- {%- set role = "model" -%}
23
- {%- else -%}
24
- {%- set role = message['role'] -%}
25
- {%- endif -%}
26
- {{ '<start_of_turn>' + role + '
27
- ' + (first_user_prefix if loop.first else "") }}
28
- {%- if message['content'] is string -%}
29
- {{ message['content'] | trim }}
30
- {%- elif message['content'] is iterable -%}
31
- {%- for item in message['content'] -%}
32
- {%- if item['type'] == 'image' -%}
33
- {{ '<start_of_image>' }}
34
- {%- elif item['type'] == 'text' -%}
35
- {{ item['text'] | trim }}
36
- {%- endif -%}
37
- {%- endfor -%}
38
- {%- else -%}
39
- {{ raise_exception("Invalid content type") }}
40
- {%- endif -%}
41
- {{ '<end_of_turn>
42
- ' }}
43
- {%- endfor -%}
44
- {%- if add_generation_prompt -%}
45
- {{ '<start_of_turn>model
46
- ' }}
47
- {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {%- if tools %}
3
+ {{- '<|im_start|>system
4
+ ' }}
5
+ {%- if messages[0].role == 'system' %}
6
+ {{- messages[0].content + '
7
+
8
+ ' }}
9
+ {%- endif %}
10
+ {{- "# Tools
11
+
12
+ You may call one or more functions to assist with the user query.
13
+
14
+ You are provided with function signatures within <tools></tools> XML tags:
15
+ <tools>" }}
16
+ {%- for tool in tools %}
17
+ {{- "
18
+ " }}
19
+ {{- tool | tojson }}
20
+ {%- endfor %}
21
+ {{- "
22
+ </tools>
23
+
24
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
25
+ <tool_call>
26
+ {\"name\": <function-name>, \"arguments\": <args-json-object>}
27
+ </tool_call><|im_end|>
28
+ " }}
29
+ {%- else %}
30
+ {%- if messages[0].role == 'system' %}
31
+ {{- '<|im_start|>system
32
+ ' + messages[0].content + '<|im_end|>
33
+ ' }}
34
+ {%- endif %}
35
+ {%- endif %}
36
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
37
+ {%- for forward_message in messages %}
38
+ {%- set index = (messages|length - 1) - loop.index0 %}
39
+ {%- set message = messages[index] %}
40
+ {%- set current_content = message.content if message.content is not none else '' %}
41
+ {%- set tool_start = '<tool_response>' %}
42
+ {%- set tool_start_length = tool_start|length %}
43
+ {%- set start_of_message = current_content[:tool_start_length] %}
44
+ {%- set tool_end = '</tool_response>' %}
45
+ {%- set tool_end_length = tool_end|length %}
46
+ {%- set start_pos = (current_content|length) - tool_end_length %}
47
+ {%- if start_pos < 0 %}
48
+ {%- set start_pos = 0 %}
49
+ {%- endif %}
50
+ {%- set end_of_message = current_content[start_pos:] %}
51
+ {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
52
+ {%- set ns.multi_step_tool = false %}
53
+ {%- set ns.last_query_index = index %}
54
+ {%- endif %}
55
+ {%- endfor %}
56
+ {%- for message in messages %}
57
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
58
+ {{- '<|im_start|>' + message.role + '
59
+ ' + message.content + '<|im_end|>' + '
60
+ ' }}
61
+ {%- elif message.role == "assistant" %}
62
+ {%- set content = message.content %}
63
+ {%- set reasoning_content = '' %}
64
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
65
+ {%- set reasoning_content = message.reasoning_content %}
66
+ {%- else %}
67
+ {%- if '</think>' in message.content %}
68
+ {%- set content = (message.content.split('</think>')|last).lstrip('
69
+ ') %}
70
+ {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('
71
+ ') %}
72
+ {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('
73
+ ') %}
74
+ {%- endif %}
75
+ {%- endif %}
76
+ {%- if loop.index0 > ns.last_query_index %}
77
+ {%- if loop.last or (not loop.last and reasoning_content) %}
78
+ {{- '<|im_start|>' + message.role + '
79
+ <think>
80
+ ' + reasoning_content.strip('
81
+ ') + '
82
+ </think>
83
+
84
+ ' + content.lstrip('
85
+ ') }}
86
+ {%- else %}
87
+ {{- '<|im_start|>' + message.role + '
88
+ ' + content }}
89
+ {%- endif %}
90
+ {%- else %}
91
+ {{- '<|im_start|>' + message.role + '
92
+ ' + content }}
93
+ {%- endif %}
94
+ {%- if message.tool_calls %}
95
+ {%- for tool_call in message.tool_calls %}
96
+ {%- if (loop.first and content) or (not loop.first) %}
97
+ {{- '
98
+ ' }}
99
+ {%- endif %}
100
+ {%- if tool_call.function %}
101
+ {%- set tool_call = tool_call.function %}
102
+ {%- endif %}
103
+ {{- '<tool_call>
104
+ {"name": "' }}
105
+ {{- tool_call.name }}
106
+ {{- '", "arguments": ' }}
107
+ {%- if tool_call.arguments is string %}
108
+ {{- tool_call.arguments }}
109
+ {%- else %}
110
+ {{- tool_call.arguments | tojson }}
111
+ {%- endif %}
112
+ {{- '}
113
+ </tool_call>' }}
114
+ {%- endfor %}
115
+ {%- endif %}
116
+ {{- '<|im_end|>
117
+ ' }}
118
+ {%- elif message.role == "tool" %}
119
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
120
+ {{- '<|im_start|>user' }}
121
+ {%- endif %}
122
+ {{- '
123
+ <tool_response>
124
+ ' }}
125
+ {{- message.content }}
126
+ {{- '
127
+ </tool_response>' }}
128
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
129
+ {{- '<|im_end|>
130
+ ' }}
131
+ {%- endif %}
132
+ {%- endif %}
133
+ {%- endfor %}
134
+ {%- if add_generation_prompt %}
135
+ {{- '<|im_start|>assistant
136
+ ' }}
137
+ {%- if enable_thinking is defined and enable_thinking is false %}
138
+ {{- '<think>
139
+
140
+ </think>
141
+
142
+ ' }}
143
+ {%- endif %}
144
+ {%- endif %}
config.json CHANGED
@@ -1,99 +1,32 @@
1
  {
2
  "architectures": [
3
- "Gemma3ForConditionalGeneration"
4
  ],
5
- "boi_token_index": 255999,
6
- "bos_token_id": 2,
7
- "eoi_token_index": 256000,
8
- "eos_token_id": 106,
9
- "image_token_index": 262144,
 
10
  "initializer_range": 0.02,
11
- "mm_tokens_per_image": 256,
12
- "model_type": "gemma3",
13
- "pad_token_id": 0,
14
- "text_config": {
15
- "attention_bias": false,
16
- "attention_dropout": 0.0,
17
- "attn_logit_softcapping": null,
18
- "cache_implementation": "hybrid",
19
- "final_logit_softcapping": null,
20
- "head_dim": 256,
21
- "hidden_activation": "gelu_pytorch_tanh",
22
- "hidden_size": 2560,
23
- "initializer_range": 0.02,
24
- "intermediate_size": 10240,
25
- "layer_types": [
26
- "sliding_attention",
27
- "sliding_attention",
28
- "sliding_attention",
29
- "sliding_attention",
30
- "sliding_attention",
31
- "full_attention",
32
- "sliding_attention",
33
- "sliding_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "sliding_attention",
37
- "full_attention",
38
- "sliding_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "sliding_attention",
43
- "full_attention",
44
- "sliding_attention",
45
- "sliding_attention",
46
- "sliding_attention",
47
- "sliding_attention",
48
- "sliding_attention",
49
- "full_attention",
50
- "sliding_attention",
51
- "sliding_attention",
52
- "sliding_attention",
53
- "sliding_attention",
54
- "sliding_attention",
55
- "full_attention",
56
- "sliding_attention",
57
- "sliding_attention",
58
- "sliding_attention",
59
- "sliding_attention"
60
- ],
61
- "max_position_embeddings": 131072,
62
- "model_type": "gemma3_text",
63
- "num_attention_heads": 8,
64
- "num_hidden_layers": 34,
65
- "num_key_value_heads": 4,
66
- "query_pre_attn_scalar": 256,
67
- "rms_norm_eps": 1e-06,
68
- "rope_local_base_freq": 10000.0,
69
- "rope_scaling": {
70
- "factor": 8.0,
71
- "rope_type": "linear"
72
- },
73
- "rope_theta": 1000000.0,
74
- "sliding_window": 1024,
75
- "sliding_window_pattern": 6,
76
- "torch_dtype": "bfloat16",
77
- "use_cache": true,
78
- "vocab_size": 262208
79
- },
80
  "torch_dtype": "bfloat16",
81
- "transformers_version": "4.53.0",
82
  "unsloth_fixed": true,
83
- "unsloth_version": "2025.6.8",
84
- "vision_config": {
85
- "attention_dropout": 0.0,
86
- "hidden_act": "gelu_pytorch_tanh",
87
- "hidden_size": 1152,
88
- "image_size": 896,
89
- "intermediate_size": 4304,
90
- "layer_norm_eps": 1e-06,
91
- "model_type": "siglip_vision_model",
92
- "num_attention_heads": 16,
93
- "num_channels": 3,
94
- "num_hidden_layers": 27,
95
- "patch_size": 14,
96
- "torch_dtype": "bfloat16",
97
- "vision_use_head": false
98
- }
99
  }
 
1
  {
2
  "architectures": [
3
+ "Qwen3ForCausalLM"
4
  ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "head_dim": 128,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 2048,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 6144,
13
+ "max_position_embeddings": 40960,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen3",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 151654,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.52.4",
27
  "unsloth_fixed": true,
28
+ "unsloth_version": "2025.6.4",
29
+ "use_cache": true,
30
+ "use_sliding_window": false,
31
+ "vocab_size": 151936
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
generation_config.json CHANGED
@@ -1,13 +1,14 @@
1
  {
2
- "bos_token_id": 2,
3
- "cache_implementation": "hybrid",
4
  "do_sample": true,
5
  "eos_token_id": [
6
- 1,
7
- 106
8
  ],
9
- "pad_token_id": 0,
10
- "top_k": 64,
 
 
11
  "top_p": 0.95,
12
- "transformers_version": "4.53.0"
13
  }
 
1
  {
2
+ "bos_token_id": 151643,
 
3
  "do_sample": true,
4
  "eos_token_id": [
5
+ 151645,
6
+ 151643
7
  ],
8
+ "max_length": 40960,
9
+ "pad_token_id": 151654,
10
+ "temperature": 0.6,
11
+ "top_k": 20,
12
  "top_p": 0.95,
13
+ "transformers_version": "4.52.4"
14
  }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:870fded56ac7c974367a6c9b6d404a820a3799fb9c9f01108764761941e87f24
3
+ size 3441185608
model_f16.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c62fef61a0bd6a33d74c2bc69322c1bd88f083c4839a114b5025e9e1c3540
3
- size 7767803328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f1a9a093d9b126ec886a55458a028801dc1900fcd549937b35245a95b59ea3
3
+ size 3447349376
model_q4_k_m.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23419289a54012f93b3709b8b9d9b111694fbdc4b90f3bb1817c20ca2a54efd5
3
- size 2489893568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28e6090992dd1fc902fae6de0587e402d748f24201ad811d28205ff0a31c8faa
3
+ size 1107409024
model_q8_0.gguf CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24ca38d0f3dcac13374c4920f1b5b5166c08c1fd6337c628c7658f88a66b799b
3
- size 4130401728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48df35c8238e675628f786957fe342442b40594d2e54988a4065f1714e55a968
3
+ size 1834426496
special_tokens_map.json CHANGED
@@ -1,30 +1,28 @@
1
  {
2
- "boi_token": "<start_of_image>",
3
- "bos_token": {
4
- "content": "<bos>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- "eoi_token": "<end_of_image>",
 
 
 
 
 
 
11
  "eos_token": {
12
- "content": "<end_of_turn>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
- "image_token": "<image_soft_token>",
19
  "pad_token": {
20
- "content": "<pad>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false
25
- },
26
- "unk_token": {
27
- "content": "<unk>",
28
  "lstrip": false,
29
  "normalized": false,
30
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
  "eos_token": {
18
+ "content": "<|im_end|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
 
24
  "pad_token": {
25
+ "content": "<|vision_pad|>",
 
 
 
 
 
 
 
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
- size 33384568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff