nathanrchn commited on
Commit
95b8d87
·
verified ·
1 Parent(s): 2614136

remove unnecessary chat template and fix eos token

Browse files
chat_template.jinja DELETED
@@ -1,327 +0,0 @@
1
- {%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
2
- {%- if param_spec.type == "array" -%}
3
- {%- if param_spec['items'] -%}
4
- {%- if param_spec['items']['type'] == "string" -%}
5
- {{- "string[]" }}
6
- {%- elif param_spec['items']['type'] == "number" -%}
7
- {{- "number[]" }}
8
- {%- elif param_spec['items']['type'] == "integer" -%}
9
- {{- "number[]" }}
10
- {%- elif param_spec['items']['type'] == "boolean" -%}
11
- {{- "boolean[]" }}
12
- {%- else -%}
13
- {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
14
- {%- if inner_type == "object | object" or inner_type|length > 50 -%}
15
- {{- "any[]" }}
16
- {%- else -%}
17
- {{- inner_type + "[]" }}
18
- {%- endif -%}
19
- {%- endif -%}
20
- {%- if param_spec.nullable -%}
21
- {{- " | null" }}
22
- {%- endif -%}
23
- {%- else -%}
24
- {{- "any[]" }}
25
- {%- if param_spec.nullable -%}
26
- {{- " | null" }}
27
- {%- endif -%}
28
- {%- endif -%}
29
- {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
30
- {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
31
- {%- if param_spec.type | length > 1 -%}
32
- {{- param_spec.type | join(" | ") }}
33
- {%- else -%}
34
- {{- param_spec.type[0] }}
35
- {%- endif -%}
36
- {%- elif param_spec.oneOf -%}
37
- {#- Handle oneOf schemas - check for complex unions and fallback to any #}
38
- {%- set has_object_variants = false -%}
39
- {%- for variant in param_spec.oneOf -%}
40
- {%- if variant.type == "object" -%}
41
- {%- set has_object_variants = true -%}
42
- {%- endif -%}
43
- {%- endfor -%}
44
- {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
45
- {{- "any" }}
46
- {%- else -%}
47
- {%- for variant in param_spec.oneOf -%}
48
- {{- render_typescript_type(variant, required_params) -}}
49
- {%- if variant.description %}
50
- {{- "// " + variant.description }}
51
- {%- endif -%}
52
- {%- if variant.default is defined %}
53
- {{ "// default: " + variant.default|tojson }}
54
- {%- endif -%}
55
- {%- if not loop.last %}
56
- {{- " | " }}
57
- {% endif -%}
58
- {%- endfor -%}
59
- {%- endif -%}
60
- {%- elif param_spec.type == "string" -%}
61
- {%- if param_spec.enum -%}
62
- {{- '"' + param_spec.enum|join('" | "') + '"' -}}
63
- {%- else -%}
64
- {{- "string" }}
65
- {%- if param_spec.nullable %}
66
- {{- " | null" }}
67
- {%- endif -%}
68
- {%- endif -%}
69
- {%- elif param_spec.type == "number" -%}
70
- {{- "number" }}
71
- {%- elif param_spec.type == "integer" -%}
72
- {{- "number" }}
73
- {%- elif param_spec.type == "boolean" -%}
74
- {{- "boolean" }}
75
- {%- elif param_spec.type == "object" -%}
76
- {%- if param_spec.properties -%}
77
- {{- "{\n" }}
78
- {%- for prop_name, prop_spec in param_spec.properties.items() -%}
79
- {{- prop_name -}}
80
- {%- if prop_name not in (param_spec.required or []) -%}
81
- {{- "?" }}
82
- {%- endif -%}
83
- {{- ": " }}
84
- {{ render_typescript_type(prop_spec, param_spec.required or []) }}
85
- {%- if not loop.last -%}
86
- {{-", " }}
87
- {%- endif -%}
88
- {%- endfor -%}
89
- {{- "}" }}
90
- {%- else -%}
91
- {{- "object" }}
92
- {%- endif -%}
93
- {%- else -%}
94
- {{- "any" }}
95
- {%- endif -%}
96
- {%- endmacro -%}
97
-
98
- {%- macro render_tools(tools) -%}
99
- {%- for tool in tools %}
100
- {{- "// " + tool.description + "\n" }}
101
- {{- "type "+ tool.name + " = " }}
102
- {%- if tool.parameters and tool.parameters.properties %}
103
- {{- "(_: {\n" }}
104
- {%- for param_name, param_spec in tool.parameters.properties.items() %}
105
- {%- if param_spec.description %}
106
- {{- "// " + param_spec.description + "\n" }}
107
- {%- endif %}
108
- {{- param_name }}
109
- {%- if param_name not in (tool.parameters.required or []) -%}
110
- {{- "?" }}
111
- {%- endif -%}
112
- {{- ": " }}
113
- {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
114
- {%- if param_spec.default is defined -%}
115
- {%- if param_spec.enum %}
116
- {{- ", // default: " + param_spec.default }}
117
- {%- elif param_spec.oneOf %}
118
- {{- "// default: " + param_spec.default }}
119
- {%- else %}
120
- {{- ", // default: " + param_spec.default|tojson }}
121
- {%- endif -%}
122
- {%- endif -%}
123
- {%- if not loop.last %}
124
- {{- ",\n" }}
125
- {%- else %}
126
- {{- "\n" }}
127
- {%- endif -%}
128
- {%- endfor %}
129
- {{- "}) => any;" }}
130
- {%- else -%}
131
- {{- "() => any;" }}
132
- {%- endif -%}
133
- {%- if not loop.last -%}
134
- {{- "\n" }}
135
- {%- endif -%}
136
- {%- endfor %}
137
- {%- endmacro -%}
138
-
139
- {{ bos_token }}
140
-
141
- {%- set system_token = '<|system_start|>' -%}
142
- {%- set end_system_token = '<|system_end|>' -%}
143
- {%- set developer_token = '<|developer_start|>' -%}
144
- {%- set end_developer_token = '<|developer_end|>' -%}
145
- {%- set user_token = '<|user_start|>' -%}
146
- {%- set end_user_token = '<|user_end|>' -%}
147
- {%- set assistant_token = '<|assistant_start|>' -%}
148
- {%- set end_assistant_token = '<|assistant_end|>' -%}
149
- {%- set inner_token = '<|inner_prefix|>' -%}
150
- {%- set outer_token = '<|inner_suffix|>' -%}
151
- {%- set tool_calls_token = '<|tools_prefix|>' -%}
152
- {%- set end_tool_calls_token = '<|tools_suffix|>' -%}
153
-
154
- {%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, assistant_format=none) -%}
155
-
156
- {%- if messages and messages[0].role == 'system' -%}
157
- {%- if "content" in messages[0] -%}
158
- {%- if messages[0].content is string -%}
159
- {{ system_token + messages[0].content + end_system_token }}
160
- {%- elif messages[0].content is mapping and "text" in messages[0].content -%}
161
- {{ system_token + messages[0].content.text + end_system_token }}
162
- {%- else -%}
163
- {{- raise_exception("Invalid system message") -}}
164
- {%- endif -%}
165
- {%- else -%}
166
- {{- raise_exception("Invalid system message") -}}
167
- {%- endif -%}
168
- {%- set loop_messages = messages[1:] -%}
169
- {%- else -%}
170
- {{ system_token + 'You are Apertus, a helpful assistant created by the SwissAI initiative.\nKnowledge cutoff: 2024-04\nCurrent date: ' + strftime_now('%Y-%m-%d') + end_system_token }}
171
- {%- set loop_messages = messages -%}
172
- {%- endif -%}
173
-
174
- {{ developer_token + 'Deliberation: ' }}
175
- {%- if enable_thinking is defined and enable_thinking -%}
176
- {{ 'enabled\n' }}
177
- {%- else -%}
178
- {{ 'disabled\n' }}
179
- {%- endif -%}
180
- {%- if tools is defined and tools -%}
181
- {{ 'Tool Capabilities:\n' + render_tools(tools) }}
182
- {%- else -%}
183
- {{ 'Tool Capabilities: disabled' }}
184
- {%- endif -%}
185
- {{ end_developer_token }}
186
-
187
- {%- for message in loop_messages -%}
188
- {%- if message.role == 'user' -%}
189
- {%- set ns.in_inner = false -%}
190
- {%- if ns.in_tool -%}
191
- {{ ']' }}
192
- {%- set ns.in_tool = false -%}
193
- {%- endif -%}
194
- {%- if ns.in_assistant -%}
195
- {{ end_assistant_token }}
196
- {%- set ns.in_assistant = false -%}
197
- {%- endif -%}
198
- {%- if "content" in message -%}
199
- {{ user_token }}
200
- {%- if message.content is string -%}
201
- {{ message.content }}
202
- {%- elif message.content is mapping and "parts" in message.content -%}
203
- {%- set parts = message.content.parts -%}
204
- {%- for part in parts -%}
205
- {%- if part.type == "text" -%}
206
- {{ part.text }}
207
- {%- else -%}
208
- {{- raise_exception("Invalid user part: " + part.type) -}}
209
- {%- endif -%}
210
- {%- endfor -%}
211
- {%- else -%}
212
- {{- raise_exception("Invalid user message: " + message.role) -}}
213
- {%- endif -%}
214
- {{ end_user_token }}
215
- {%- endif -%}
216
- {%- elif message.role == 'assistant' -%}
217
- {%- if not ns.in_assistant -%}
218
- {{ assistant_token }}
219
- {%- set ns.in_assistant = true -%}
220
- {%- endif -%}
221
- {%- if "content" in message -%}
222
- {%- if message.content is string and (ns.assistant_format is none or ns.assistant_format == "string") -%}
223
- {%- if ns.in_tool -%}
224
- {{ ']' }}
225
- {%- set ns.in_tool = false -%}
226
- {%- endif -%}
227
- {%- set ns.assistant_format = "string" -%}
228
- {{ message.content }}
229
- {%- elif message.content is mapping and "blocks" in message.content and (ns.assistant_format is none or ns.assistant_format == "mapping") -%}
230
- {%- set ns.assistant_format = "mapping" -%}
231
- {%- set blocks = message.content.blocks -%}
232
- {%- for block in blocks -%}
233
- {%- if block.type == 'thoughts' -%}
234
- {%- if ns.in_tool -%}
235
- {{ ']' }}
236
- {%- set ns.in_tool = false -%}
237
- {%- endif -%}
238
- {%- if not ns.in_inner -%}
239
- {%- set ns.in_inner = true -%}
240
- {{ inner_token }}
241
- {%- endif -%}
242
- {{ block.text }}
243
- {%- elif block.type == 'tool_calls' -%}
244
- {%- if ns.in_tool -%}
245
- {{ ']' }}
246
- {%- set ns.in_tool = false -%}
247
- {%- endif -%}
248
- {%- if ns.in_inner and not loop.first and block.calls|length == 1 and block.calls[0].name == 'display_answers' -%}
249
- {%- set ns.in_inner = false -%}
250
- {{ outer_token }}
251
- {%- endif -%}
252
- {{ tool_calls_token + '[' }}
253
- {%- for tool_call in block.calls -%}
254
- {{- '{"' + tool_call.name + '": ' + tool_call.arguments + '}' }}
255
- {%- if not loop.last -%}
256
- {{- ", " }}
257
- {%- endif -%}
258
- {%- endfor -%}
259
- {{ ']' + end_tool_calls_token }}
260
- {%- elif block.type == 'tool_outputs' -%}
261
- {%- if ns.in_tool -%}
262
- {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
263
- {%- endif -%}
264
- {{ '[' }}
265
- {%- for tool_output in block.outputs -%}
266
- {{- tool_output.output }}
267
- {%- if not loop.last -%}
268
- {{- ", " }}
269
- {%- endif -%}
270
- {%- endfor -%}
271
- {{- ']' }}
272
- {%- elif block.type == 'response' -%}
273
- {%- if ns.in_tool -%}
274
- {{ ']' }}
275
- {%- set ns.in_tool = false -%}
276
- {%- endif -%}
277
- {%- if (not loop.first and ns.in_inner) or (ns.in_assistant and ns.in_inner) -%}
278
- {%- set ns.in_inner = false -%}
279
- {{ outer_token }}
280
- {%- endif -%}
281
- {{ block.text }}
282
- {%- else -%}
283
- {{- raise_exception("Invalid assistant block type: " + block.type) -}}
284
- {%- endif -%}
285
- {%- endfor -%}
286
- {%- else -%}
287
- {{- raise_exception("Invalid assistant content") -}}
288
- {%- endif -%}
289
- {%- else -%}
290
- {{- raise_exception("Invalid assistant message") -}}
291
- {%- endif -%}
292
- {%- if "tool_calls" in message and message.tool_calls -%}
293
- {{ tool_calls_token + '[' }}
294
- {%- for tool_call in message.tool_calls -%}
295
- {%- if tool_call.type == 'function' -%}
296
- {%- set function = tool_call.function -%}
297
- {{- '{"' + function.name + '": ' + function.arguments + '}' }}
298
- {%- if not loop.last -%}
299
- {{- ", " }}
300
- {%- endif -%}
301
- {%- else -%}
302
- {{- raise_exception("Invalid tool call type: " + tool_call.type) -}}
303
- {%- endif -%}
304
- {%- endfor -%}
305
- {{ ']' + end_tool_calls_token }}
306
- {%- endif -%}
307
- {%- elif message.role == 'tool' -%}
308
- {%- if not ns.in_assistant -%}
309
- {{- raise_exception("Tool message outside of assistant") -}}
310
- {%- endif -%}
311
- {%- if not ns.in_tool -%}
312
- {{ '[' }}
313
- {%- set ns.in_tool = true -%}
314
- {%- else -%}
315
- {{ ", "}}
316
- {%- endif -%}
317
- {{ message.content }}
318
- {%- else -%}
319
- {{- raise_exception("Invalid message role") -}}
320
- {%- endif -%}
321
- {%- endfor -%}
322
- {%- if ns.in_tool -%}
323
- {{ ']' }}
324
- {%- endif -%}
325
- {%- if add_generation_prompt -%}
326
- {{ assistant_token }}
327
- {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "eos_token_id": [2, 68, 72],
5
  "transformers_version": "4.54.0.dev0"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
  "transformers_version": "4.54.0.dev0"
6
  }
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|assistant_end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -8006,7 +8006,7 @@
8006
  },
8007
  "bos_token": "<s>",
8008
  "clean_up_tokenization_spaces": false,
8009
- "eos_token": "<|assistant_end|>",
8010
  "extra_special_tokens": {},
8011
  "model_input_names": [
8012
  "input_ids",
 
8006
  },
8007
  "bos_token": "<s>",
8008
  "clean_up_tokenization_spaces": false,
8009
+ "eos_token": "</s>",
8010
  "extra_special_tokens": {},
8011
  "model_input_names": [
8012
  "input_ids",