yujiepan commited on
Commit
2e33456
·
verified ·
1 Parent(s): 5ea8a1b

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +8 -1
  2. tokenizer_config.json +17 -2
special_tokens_map.json CHANGED
@@ -14,6 +14,9 @@
14
  "<|IMAGE|>",
15
  "<|VIDEO|>"
16
  ],
 
 
 
17
  "eos_token": {
18
  "content": "<|im_end|>",
19
  "lstrip": false,
@@ -21,11 +24,15 @@
21
  "rstrip": false,
22
  "single_word": false
23
  },
 
24
  "pad_token": {
25
  "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
- }
 
 
 
31
  }
 
14
  "<|IMAGE|>",
15
  "<|VIDEO|>"
16
  ],
17
+ "audio_bos_token": "<|audio_bos|>",
18
+ "audio_eos_token": "<|audio_eos|>",
19
+ "audio_token": "<|AUDIO|>",
20
  "eos_token": {
21
  "content": "<|im_end|>",
22
  "lstrip": false,
 
24
  "rstrip": false,
25
  "single_word": false
26
  },
27
+ "image_token": "<|IMAGE|>",
28
  "pad_token": {
29
  "content": "<|endoftext|>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false
34
+ },
35
+ "video_token": "<|VIDEO|>",
36
+ "vision_bos_token": "<|vision_bos|>",
37
+ "vision_eos_token": "<|vision_eos|>"
38
  }
tokenizer_config.json CHANGED
@@ -193,16 +193,31 @@
193
  "<|IMAGE|>",
194
  "<|VIDEO|>"
195
  ],
 
 
 
196
  "bos_token": null,
197
  "chat_template": "{% set audio_count = namespace(value=0) %}{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_bos|><|IMAGE|><|vision_eos|>{% elif content['type'] == 'audio' or 'audio' in content or 'audio_url' in content %}{% set audio_count.value = audio_count.value + 1 %}{% if add_audio_id %}Audio {{ audio_count.value }}: {% endif %}<|audio_bos|><|AUDIO|><|audio_eos|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_bos|><|VIDEO|><|vision_eos|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
198
  "clean_up_tokenization_spaces": false,
199
  "eos_token": "<|im_end|>",
200
  "errors": "replace",
201
- "extra_special_tokens": {},
 
 
 
 
 
 
 
 
 
202
  "model_max_length": 32768,
203
  "pad_token": "<|endoftext|>",
204
  "processor_class": "Qwen2_5OmniProcessor",
205
  "split_special_tokens": false,
206
  "tokenizer_class": "Qwen2Tokenizer",
207
- "unk_token": null
 
 
 
208
  }
 
193
  "<|IMAGE|>",
194
  "<|VIDEO|>"
195
  ],
196
+ "audio_bos_token": "<|audio_bos|>",
197
+ "audio_eos_token": "<|audio_eos|>",
198
+ "audio_token": "<|AUDIO|>",
199
  "bos_token": null,
200
  "chat_template": "{% set audio_count = namespace(value=0) %}{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_bos|><|IMAGE|><|vision_eos|>{% elif content['type'] == 'audio' or 'audio' in content or 'audio_url' in content %}{% set audio_count.value = audio_count.value + 1 %}{% if add_audio_id %}Audio {{ audio_count.value }}: {% endif %}<|audio_bos|><|AUDIO|><|audio_eos|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_bos|><|VIDEO|><|vision_eos|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
201
  "clean_up_tokenization_spaces": false,
202
  "eos_token": "<|im_end|>",
203
  "errors": "replace",
204
+ "extra_special_tokens": {
205
+ "audio_bos_token": "<|audio_bos|>",
206
+ "audio_eos_token": "<|audio_eos|>",
207
+ "audio_token": "<|AUDIO|>",
208
+ "image_token": "<|IMAGE|>",
209
+ "video_token": "<|VIDEO|>",
210
+ "vision_bos_token": "<|vision_bos|>",
211
+ "vision_eos_token": "<|vision_eos|>"
212
+ },
213
+ "image_token": "<|IMAGE|>",
214
  "model_max_length": 32768,
215
  "pad_token": "<|endoftext|>",
216
  "processor_class": "Qwen2_5OmniProcessor",
217
  "split_special_tokens": false,
218
  "tokenizer_class": "Qwen2Tokenizer",
219
+ "unk_token": null,
220
+ "video_token": "<|VIDEO|>",
221
+ "vision_bos_token": "<|vision_bos|>",
222
+ "vision_eos_token": "<|vision_eos|>"
223
  }