Add model

Browse files

Files changed (14) hide show

README.md +3 -2
added_tokens.json +26 -1
chat_template.jinja +144 -47
config.json +25 -92
generation_config.json +8 -7
merges.txt +0 -0
model.safetensors +3 -0
model_f16.gguf +2 -2
model_q4_k_m.gguf +2 -2
model_q8_0.gguf +2 -2
special_tokens_map.json +17 -19
tokenizer.json +2 -2
tokenizer_config.json +0 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -4,5 +4,6 @@
 # HyprLLM (SM)
-- [Dataset](https://huggingface.co/datasets/yujonglee/hypr-llm-data2/tree/b57a5c1687f0299d20a6c7440733060cc306bb56)
-- [Experiment](https://wandb.ai/yujonglee/hypr-llm/runs/qeote5lt)

 # HyprLLM (SM)
+- [Base Model](https://huggingface.co/unsloth/Qwen3-1.7B-unsloth-bnb-4bit)
+- [Dataset](https://huggingface.co/datasets/yujonglee/hypr-llm-data2/tree/4436d698bc4ec6469e282d10907fe62da7445c38)
+- [Experiment](https://wandb.ai/yujonglee/hypr-llm/runs/ytpq0fu8)

added_tokens.json CHANGED Viewed

@@ -1,3 +1,28 @@
 {
-  "<image_soft_token>": 262144
 }

 {
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
 }

chat_template.jinja CHANGED Viewed

@@ -1,47 +1,144 @@
-{{ bos_token }}
-{%- if messages[0]['role'] == 'system' -%}
-    {%- if messages[0]['content'] is string -%}
-        {%- set first_user_prefix = messages[0]['content'] + '
-' -%}
-    {%- else -%}
-        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
-' -%}
-    {%- endif -%}
-    {%- set loop_messages = messages[1:] -%}
-{%- else -%}
-    {%- set first_user_prefix = "" -%}
-    {%- set loop_messages = messages -%}
-{%- endif -%}
-{%- for message in loop_messages -%}
-    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
-        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
-    {%- endif -%}
-    {%- if (message['role'] == 'assistant') -%}
-        {%- set role = "model" -%}
-    {%- else -%}
-        {%- set role = message['role'] -%}
-    {%- endif -%}
-    {{ '<start_of_turn>' + role + '
-' + (first_user_prefix if loop.first else "") }}
-    {%- if message['content'] is string -%}
-        {{ message['content'] | trim }}
-    {%- elif message['content'] is iterable -%}
-        {%- for item in message['content'] -%}
-            {%- if item['type'] == 'image' -%}
-                {{ '<start_of_image>' }}
-            {%- elif item['type'] == 'text' -%}
-                {{ item['text'] | trim }}
-            {%- endif -%}
-        {%- endfor -%}
-    {%- else -%}
-        {{ raise_exception("Invalid content type") }}
-    {%- endif -%}
-    {{ '<end_of_turn>
-' }}
-{%- endfor -%}
-{%- if add_generation_prompt -%}
-    {{ '<start_of_turn>model
-' }}
-{%- endif -%}

+{%- if tools %}
+    {{- '<|im_start|>system
+' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '
+' }}
+    {%- endif %}
+    {{- "# Tools
+You may call one or more functions to assist with the user query.
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>" }}
+    {%- for tool in tools %}
+        {{- "
+" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "
+</tools>
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{\"name\": <function-name>, \"arguments\": <args-json-object>}
+</tool_call><|im_end|>
+" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system
+' + messages[0].content + '<|im_end|>
+' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for forward_message in messages %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- set message = messages[index] %}
+    {%- set current_content = message.content if message.content is not none else '' %}
+    {%- set tool_start = '<tool_response>' %}
+    {%- set tool_start_length = tool_start|length %}
+    {%- set start_of_message = current_content[:tool_start_length] %}
+    {%- set tool_end = '</tool_response>' %}
+    {%- set tool_end_length = tool_end|length %}
+    {%- set start_pos = (current_content|length) - tool_end_length %}
+    {%- if start_pos < 0 %}
+        {%- set start_pos = 0 %}
+    {%- endif %}
+    {%- set end_of_message = current_content[start_pos:] %}
+    {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '
+' + message.content + '<|im_end|>' + '
+' }}
+    {%- elif message.role == "assistant" %}
+        {%- set content = message.content %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in message.content %}
+                {%- set content = (message.content.split('</think>')|last).lstrip('
+') %}
+                {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('
+') %}
+                {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('
+') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '
+<think>
+' + reasoning_content.strip('
+') + '
+</think>
+' + content.lstrip('
+') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '
+' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '
+' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '
+' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>
+{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}
+</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>
+' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '
+<tool_response>
+' }}
+        {{- message.content }}
+        {{- '
+</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>
+' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant
+' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>
+</think>
+' }}
+    {%- endif %}
+{%- endif %}

config.json CHANGED Viewed

@@ -1,99 +1,32 @@
 {
   "architectures": [
-    "Gemma3ForConditionalGeneration"
   ],
-  "boi_token_index": 255999,
-  "bos_token_id": 2,
-  "eoi_token_index": 256000,
-  "eos_token_id": 106,
-  "image_token_index": 262144,
   "initializer_range": 0.02,
-  "mm_tokens_per_image": 256,
-  "model_type": "gemma3",
-  "pad_token_id": 0,
-  "text_config": {
-    "attention_bias": false,
-    "attention_dropout": 0.0,
-    "attn_logit_softcapping": null,
-    "cache_implementation": "hybrid",
-    "final_logit_softcapping": null,
-    "head_dim": 256,
-    "hidden_activation": "gelu_pytorch_tanh",
-    "hidden_size": 2560,
-    "initializer_range": 0.02,
-    "intermediate_size": 10240,
-    "layer_types": [
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "full_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "full_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "full_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "full_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "full_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention",
-      "sliding_attention"
-    ],
-    "max_position_embeddings": 131072,
-    "model_type": "gemma3_text",
-    "num_attention_heads": 8,
-    "num_hidden_layers": 34,
-    "num_key_value_heads": 4,
-    "query_pre_attn_scalar": 256,
-    "rms_norm_eps": 1e-06,
-    "rope_local_base_freq": 10000.0,
-    "rope_scaling": {
-      "factor": 8.0,
-      "rope_type": "linear"
-    },
-    "rope_theta": 1000000.0,
-    "sliding_window": 1024,
-    "sliding_window_pattern": 6,
-    "torch_dtype": "bfloat16",
-    "use_cache": true,
-    "vocab_size": 262208
-  },
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.53.0",
   "unsloth_fixed": true,
-  "unsloth_version": "2025.6.8",
-  "vision_config": {
-    "attention_dropout": 0.0,
-    "hidden_act": "gelu_pytorch_tanh",
-    "hidden_size": 1152,
-    "image_size": 896,
-    "intermediate_size": 4304,
-    "layer_norm_eps": 1e-06,
-    "model_type": "siglip_vision_model",
-    "num_attention_heads": 16,
-    "num_channels": 3,
-    "num_hidden_layers": 27,
-    "patch_size": 14,
-    "torch_dtype": "bfloat16",
-    "vision_use_head": false
-  }
 }

 {
   "architectures": [
+    "Qwen3ForCausalLM"
   ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
   "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151654,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
   "unsloth_fixed": true,
+  "unsloth_version": "2025.6.4",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
 }

generation_config.json CHANGED Viewed

@@ -1,13 +1,14 @@
 {
-  "bos_token_id": 2,
-  "cache_implementation": "hybrid",
   "do_sample": true,
   "eos_token_id": [
-    1,
-    106
   ],
-  "pad_token_id": 0,
-  "top_k": 64,
   "top_p": 0.95,
-  "transformers_version": "4.53.0"
 }

 {
+  "bos_token_id": 151643,
   "do_sample": true,
   "eos_token_id": [
+    151645,
+    151643
   ],
+  "max_length": 40960,
+  "pad_token_id": 151654,
+  "temperature": 0.6,
+  "top_k": 20,
   "top_p": 0.95,
+  "transformers_version": "4.52.4"
 }

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:870fded56ac7c974367a6c9b6d404a820a3799fb9c9f01108764761941e87f24
+size 3441185608

model_f16.gguf CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:044c62fef61a0bd6a33d74c2bc69322c1bd88f083c4839a114b5025e9e1c3540
-size 7767803328

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4f1a9a093d9b126ec886a55458a028801dc1900fcd549937b35245a95b59ea3
+size 3447349376

model_q4_k_m.gguf CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23419289a54012f93b3709b8b9d9b111694fbdc4b90f3bb1817c20ca2a54efd5
-size 2489893568

 version https://git-lfs.github.com/spec/v1
+oid sha256:28e6090992dd1fc902fae6de0587e402d748f24201ad811d28205ff0a31c8faa
+size 1107409024

model_q8_0.gguf CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24ca38d0f3dcac13374c4920f1b5b5166c08c1fd6337c628c7658f88a66b799b
-size 4130401728

 version https://git-lfs.github.com/spec/v1
+oid sha256:48df35c8238e675628f786957fe342442b40594d2e54988a4065f1714e55a968
+size 1834426496

special_tokens_map.json CHANGED Viewed

@@ -1,30 +1,28 @@
 {
-  "boi_token": "<start_of_image>",
-  "bos_token": {
-    "content": "<bos>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eoi_token": "<end_of_image>",
   "eos_token": {
-    "content": "<end_of_turn>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "image_token": "<image_soft_token>",
   "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
   "eos_token": {
+    "content": "<|im_end|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<|vision_pad|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
-size 33384568

 version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff