qqc1989 commited on
Commit
3cb8955
·
verified ·
1 Parent(s): a3b1a17

update the video models

Browse files
Files changed (43) hide show
  1. .gitattributes +1 -0
  2. main +3 -0
  3. qwen2_5-vl-3b-video-ax650/Qwen2.5-VL-3B-Instruct_vision_nhwc.axmodel +3 -0
  4. qwen2_5-vl-3b-video-ax650/model.embed_tokens.weight.bfloat16.bin +3 -0
  5. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l0_together.axmodel +3 -0
  6. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l10_together.axmodel +3 -0
  7. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l11_together.axmodel +3 -0
  8. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l12_together.axmodel +3 -0
  9. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l13_together.axmodel +3 -0
  10. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l14_together.axmodel +3 -0
  11. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l15_together.axmodel +3 -0
  12. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l16_together.axmodel +3 -0
  13. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l17_together.axmodel +3 -0
  14. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l18_together.axmodel +3 -0
  15. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l19_together.axmodel +3 -0
  16. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l1_together.axmodel +3 -0
  17. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l20_together.axmodel +3 -0
  18. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l21_together.axmodel +3 -0
  19. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l22_together.axmodel +3 -0
  20. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l23_together.axmodel +3 -0
  21. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l24_together.axmodel +3 -0
  22. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l25_together.axmodel +3 -0
  23. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l26_together.axmodel +3 -0
  24. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l27_together.axmodel +3 -0
  25. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l28_together.axmodel +3 -0
  26. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l29_together.axmodel +3 -0
  27. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l2_together.axmodel +3 -0
  28. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l30_together.axmodel +3 -0
  29. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l31_together.axmodel +3 -0
  30. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l32_together.axmodel +3 -0
  31. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l33_together.axmodel +3 -0
  32. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l34_together.axmodel +3 -0
  33. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l35_together.axmodel +3 -0
  34. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l3_together.axmodel +3 -0
  35. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l4_together.axmodel +3 -0
  36. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l5_together.axmodel +3 -0
  37. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l6_together.axmodel +3 -0
  38. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l7_together.axmodel +3 -0
  39. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l8_together.axmodel +3 -0
  40. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l9_together.axmodel +3 -0
  41. qwen2_5-vl-3b-video-ax650/qwen2_5_vl_post.axmodel +3 -0
  42. qwen2_tokenizer_video_308.py +163 -0
  43. run_qwen2_5_vl_video.sh +21 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.axmodel filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.axmodel filter=lfs diff=lfs merge=lfs -text
37
+ main filter=lfs diff=lfs merge=lfs -text
main ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ff91a56c40a95babe72fad409621026c045be888a8ced3baa28a58fcbdf4cc
3
+ size 6601952
qwen2_5-vl-3b-video-ax650/Qwen2.5-VL-3B-Instruct_vision_nhwc.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:035e9118aa3f106f35f6cf4caa6829a8c97695693b42aea009e43ae39e9b1a59
3
+ size 777801587
qwen2_5-vl-3b-video-ax650/model.embed_tokens.weight.bfloat16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84907567aa829c6f24cadbdeb68c3c44d25fc0a8be8e917fd603cb64f72810d
3
+ size 622329856
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5b66385e068162bf861993ab3498d8e4793bd412e8357fd281e98c09f84eff
3
+ size 86933188
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de08eff43d84faa3ed3c46ef171f241bdd73a77a3fc4e029af3223fd18303ad
3
+ size 86935492
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b81ac35c7966ea343d3b449232cfc42f8e02003ed3df5e65a6daf1ab9b56df0
3
+ size 86935588
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6eb5cf26888149648316c0a6b430f174c3d2e25504975df075f2caf1a8d15b6
3
+ size 86936132
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bd67d0240f5281ebafe01896ce863fa624390c9750c05e4ac4497a839596ef
3
+ size 86935524
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8860c974531207ca25f0e0a0d576e876259683ad8f4b748c5d2a1dbc69eafb7f
3
+ size 86935428
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l15_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0bb269587734c9984d6e5af6c3e9b03b15fc920fce7c0568e30881ddaac1adb
3
+ size 86935588
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l16_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3466ed08253764c2457169fe6edc1aaed9d5a746c34f29d7073173a0e7223b
3
+ size 86935652
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l17_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ed757a43338e23bb648361ce76b8c366b89f041eee3ed25a0b22098b0e4f24
3
+ size 86935300
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l18_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2566389327e5148b929322196c6cbcb4a4b3f0299fcbf698c255b807b282cd99
3
+ size 86935428
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l19_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2c28522ea2549c4c015b8049de1e5aaf5d4afe6ddef68bfd92c12bf47b64191
3
+ size 86934788
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l1_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a98e7b824f05bc86d8e9f0cc61ebcf2415350daf185ef02c5f9677352244b0ef
3
+ size 86929860
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l20_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1262a309f5eb9d85521e86b6a719428652b2be3a163acd3a13e9a688a506e171
3
+ size 86934212
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l21_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656c2bf1d3af4d0154fed5b5d057b9dc24788f3f68cc04b12259c9d8f3ef1493
3
+ size 86935076
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l22_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15dd153042c32e822e192f852d7c4ba3d467673f1bda34c58236123782d2aa92
3
+ size 86935620
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l23_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c22900e7d11d99c619893cb0a03e7889269b5ff47578c597af3220e35e83a5
3
+ size 86934404
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l24_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25780d4c7db782d98c9827cfdb68ebd495c111990499043bc0fb91e64eab4d7
3
+ size 86935780
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l25_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:443f7c63087635d5a8fe0ac1b51e5ed693c2888c6d1ab9ac2a48b52734e0275e
3
+ size 86934948
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l26_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0d10a3b792f1289a8a79bc68a51134001eaadebd0c7fb9110a7ed80f670658
3
+ size 86935652
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l27_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ff2024170ba4838554ee1ef531cc568a7dea348142fb48e6cd6d996a18eea7
3
+ size 86934628
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l28_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d082101d3c6fd2c09a9f82e505c1890d2bb8e5d69fe213f2725fe4c0ad0bd17
3
+ size 86934948
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l29_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12048a8795683759c909e08ecbc50ae2cdb888ae7b1e4f2899af6e29a729459e
3
+ size 86935940
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l2_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3bdfdf9ce858e7e1637a8dac177f060da6894b198c7f9c5b3d6f8f887f61dd
3
+ size 86930788
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l30_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9846af323c192eaee10689d55d37bccf13f62e74124bd12a33495851bae91e2
3
+ size 86935204
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l31_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b42294d688dbdfefd53b657102d86eff35a817a82de5472f390c9e40bbd1a7e8
3
+ size 86934692
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l32_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c60192ff6b15787e848ffefccb70116f2f286714fb9237a31e0aa24ce465a33
3
+ size 86935236
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l33_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc6aebb37f2f0482776e54bed85b3148587c289fb0bfbc4403a58ed0dc4f66ed
3
+ size 86935940
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l34_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c72fd3eb94bd8ca52b1badc07b206bc94392baea543ab0b88e97b6b052b618
3
+ size 86936068
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l35_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133e9c854d671dbabb949fdd99618d0db0af34d4cc91d286d437e42d385d0d3e
3
+ size 86935748
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l3_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34994d1b3ff6e3f4461035307fa244ed712bcf7509c3fa44cf1e4199c91c9592
3
+ size 86933508
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l4_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1e4df37b58bec3a18ef6b7de667618939991800c3e2c31b84ee4ed06842491
3
+ size 86932612
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l5_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeaa73ad355583c0e500d6fe912ade8815bd173d275f62a1da37a799e7ee9e77
3
+ size 86935204
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l6_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e7486c37d3dc3ccfbb34aa5fde22751fa7d9dbd641c68db5ae6300c41745361
3
+ size 86935364
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l7_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16efb1338f77d6b38ac8492faf16978a218452dd126751396afe9c6feed7349f
3
+ size 86935972
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l8_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5108611a45e285f7c8429cbfb8370e8bf568e01d52729f673b6f13dd5e22b4a2
3
+ size 86936036
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l9_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcfb4e58c68991faab36dc6ca4eabd47b711b382b50097da0220484be29b0dfa
3
+ size 86935940
qwen2_5-vl-3b-video-ax650/qwen2_5_vl_post.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775af6247fa36dfebc62b4b2b5a485816b532ceba82545e224753c4ddbd07c0c
3
+ size 339965928
qwen2_tokenizer_video_308.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, PreTrainedTokenizerFast
2
+ from transformers.tokenization_utils_base import AddedToken
3
+ from http.server import HTTPServer, BaseHTTPRequestHandler
4
+ import json
5
+ import argparse
6
+
7
+
8
+ class Tokenizer_Http():
9
+
10
+ def __init__(self):
11
+
12
+ path = 'qwen2_5-vl-tokenizer'
13
+ self.tokenizer = AutoTokenizer.from_pretrained(path,
14
+ trust_remote_code=True,
15
+ use_fast=False)
16
+
17
+ def encode(self, content):
18
+ text = [f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n']
19
+ input_ids = self.tokenizer(text)
20
+ return input_ids["input_ids"][0]
21
+
22
+ def encode_vpm(self, content="描述一下这个视频的内容"):
23
+
24
+ # official implementation
25
+ text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|vision_end|>{content}<|im_end|>\n<|im_start|>assistant\n"
26
+
27
+ # better for quantation model
28
+ # text = f"<|im_start|>user\n{content}<|im_end|>\n<|im_start|>user\n<|vision_start|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|video_pad|><|vision_end|><|im_end|>\n<|im_start|>assistant\n"
29
+
30
+ output_kwargs = {'text_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'images_kwargs': {'return_tensors': 'pt'}, 'audio_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'videos_kwargs': {'return_tensors': 'pt'}, 'common_kwargs': {'return_tensors': 'pt'}}
31
+
32
+ text_inputs = self.tokenizer(text, **output_kwargs["text_kwargs"])
33
+ return text_inputs["input_ids"].tolist()[0]
34
+
35
+ def decode(self, token_ids):
36
+ return self.tokenizer.decode(token_ids,
37
+ clean_up_tokenization_spaces=False)
38
+
39
+ @property
40
+ def bos_id(self):
41
+ return self.tokenizer.bos_token_id
42
+
43
+ @property
44
+ def eos_id(self):
45
+ return self.tokenizer.eos_token_id
46
+
47
+ @property
48
+ def bos_token(self):
49
+ return self.tokenizer.bos_token
50
+
51
+ @property
52
+ def eos_token(self):
53
+ return self.tokenizer.eos_token
54
+
55
+
56
+ tokenizer = Tokenizer_Http()
57
+
58
+ print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id,
59
+ tokenizer.eos_token)
60
+ token_ids = tokenizer.encode_vpm()
61
+ # [151644, 8948, 198, 56568, 104625, 100633, 104455, 104800, 101101, 32022, 102022, 99602, 100013, 9370, 90286, 21287, 42140, 53772, 35243, 26288, 104949, 3837, 105205, 109641, 67916, 30698, 11, 54851, 46944, 115404, 42192, 99441, 100623, 48692, 100168, 110498, 1773, 151645, 151644, 872, 198,
62
+ # 151646,
63
+ # 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648,
64
+ # 151647,
65
+ # 198, 5501, 7512, 279, 2168, 19620, 13, 151645, 151644, 77091, 198]
66
+ # 118
67
+ print(token_ids)
68
+ print(len(token_ids))
69
+ token_ids = tokenizer.encode("hello world")
70
+ # [151644, 8948, 198, 56568, 104625, 100633, 104455, 104800, 101101, 32022, 102022, 99602, 100013, 9370, 90286, 21287, 42140, 53772, 35243, 26288, 104949, 3837, 105205, 109641, 67916, 30698, 11, 54851, 46944, 115404, 42192, 99441, 100623, 48692, 100168, 110498, 1773, 151645, 151644, 872, 198, 14990, 1879, 151645, 151644, 77091, 198]
71
+ # 47
72
+ print(token_ids)
73
+ print(len(token_ids))
74
+
75
+
76
+ class Request(BaseHTTPRequestHandler):
77
+ #通过类继承,新定义类
78
+ timeout = 5
79
+ server_version = 'Apache'
80
+
81
+ def do_GET(self):
82
+ print(self.path)
83
+ #在新类中定义get的内容(当客户端向该服务端使用get请求时,本服务端将如下运行)
84
+ self.send_response(200)
85
+ self.send_header("type", "get") #设置响应头,可省略或设置多个
86
+ self.end_headers()
87
+
88
+ if self.path == '/bos_id':
89
+ bos_id = tokenizer.bos_id
90
+ # print(bos_id)
91
+ # to json
92
+ if bos_id is None:
93
+ msg = json.dumps({'bos_id': -1})
94
+ else:
95
+ msg = json.dumps({'bos_id': bos_id})
96
+ elif self.path == '/eos_id':
97
+ eos_id = tokenizer.eos_id
98
+ if eos_id is None:
99
+ msg = json.dumps({'eos_id': -1})
100
+ else:
101
+ msg = json.dumps({'eos_id': eos_id})
102
+ else:
103
+ msg = 'error'
104
+
105
+ print(msg)
106
+ msg = str(msg).encode() #转为str再转为byte格式
107
+
108
+ self.wfile.write(msg) #将byte格式的信息返回给客户端
109
+
110
+ def do_POST(self):
111
+ #在新类中定义post的内容(当客户端向该服务端使用post请求时,本服务端将如下运行)
112
+ data = self.rfile.read(int(
113
+ self.headers['content-length'])) #获取从客户端传入的参数(byte格式)
114
+ data = data.decode() #将byte格式转为str格式
115
+
116
+ self.send_response(200)
117
+ self.send_header("type", "post") #设置响应头,可省略或设置多个
118
+ self.end_headers()
119
+
120
+ if self.path == '/encode':
121
+ req = json.loads(data)
122
+ print(req)
123
+ prompt = req['text']
124
+ b_img_prompt = False
125
+ if 'img_prompt' in req:
126
+ b_img_prompt = req['img_prompt']
127
+ if b_img_prompt:
128
+ token_ids = tokenizer.encode_vpm(prompt)
129
+ else:
130
+ token_ids = tokenizer.encode(prompt)
131
+
132
+ if token_ids is None:
133
+ msg = json.dumps({'token_ids': -1})
134
+ else:
135
+ msg = json.dumps({'token_ids': token_ids})
136
+
137
+ elif self.path == '/decode':
138
+ req = json.loads(data)
139
+ token_ids = req['token_ids']
140
+ text = tokenizer.decode(token_ids)
141
+ if text is None:
142
+ msg = json.dumps({'text': ""})
143
+ else:
144
+ msg = json.dumps({'text': text})
145
+ else:
146
+ msg = 'error'
147
+ print(msg)
148
+ msg = str(msg).encode() #转为str再转为byte格式
149
+
150
+ self.wfile.write(msg) #将byte格式的信息返回给客户端
151
+
152
+
153
+ if __name__ == "__main__":
154
+
155
+ args = argparse.ArgumentParser()
156
+ args.add_argument('--host', type=str, default='localhost')
157
+ args.add_argument('--port', type=int, default=8080)
158
+ args = args.parse_args()
159
+
160
+ host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
161
+ print('http://%s:%s' % host)
162
+ server = HTTPServer(host, Request) #根据地址端口号和新定义的类,创建服务器实例
163
+ server.serve_forever() #开启服务
run_qwen2_5_vl_video.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./main \
2
+ --template_filename_axmodel "qwen2_5-vl-3b-video-ax650/qwen2_5_vl_p512_l%d_together.axmodel" \
3
+ --axmodel_num 36 \
4
+ --filename_vpm_resampler_axmodedl "qwen2_5-vl-3b-video-ax650/Qwen2.5-VL-3B-Instruct_vision_nhwc.axmodel" \
5
+ --tokenizer_type 2 \
6
+ --bos 0 --eos 0 \
7
+ --dynamic_load_axmodel_layer 0 \
8
+ --use_mmap_load_embed 1 \
9
+ --filename_tokenizer_model "http://127.0.0.1:12345" \
10
+ --filename_post_axmodel "qwen2_5-vl-3b-video-ax650/qwen2_5_vl_post.axmodel" \
11
+ --use_topk 0 \
12
+ --filename_tokens_embed "qwen2_5-vl-3b-video-ax650/model.embed_tokens.weight.bfloat16.bin" \
13
+ --tokens_embed_num 151936 \
14
+ --tokens_embed_size 2048 \
15
+ --live_print 1 \
16
+ --continue 1 \
17
+ --img_width 308 \
18
+ --img_height 308 \
19
+ --vision_start_token_id 151652 \
20
+ --post_config_path post_config.json \
21
+ --prompt "$1" --image "$2"