Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import numpy as np
|
|
9 |
from PIL import Image
|
10 |
from transformers import (
|
11 |
Qwen2VLForConditionalGeneration,
|
|
|
12 |
AutoProcessor,
|
13 |
TextIteratorStreamer,
|
14 |
)
|
@@ -58,7 +59,7 @@ def downsample_video(video_path):
|
|
58 |
return frames
|
59 |
|
60 |
# Model and Processor Setup
|
61 |
-
QV_MODEL_ID = "
|
62 |
qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
|
63 |
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
64 |
QV_MODEL_ID,
|
@@ -66,9 +67,9 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
66 |
torch_dtype=torch.float16
|
67 |
).to("cuda").eval()
|
68 |
|
69 |
-
COREOCR_MODEL_ID = "prithivMLmods/
|
70 |
coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
|
71 |
-
coreocr_model =
|
72 |
COREOCR_MODEL_ID,
|
73 |
trust_remote_code=True,
|
74 |
torch_dtype=torch.bfloat16
|
@@ -117,11 +118,11 @@ def model_inference(message, history, use_coreocr):
|
|
117 |
if use_coreocr:
|
118 |
processor = coreocr_processor
|
119 |
model = coreocr_model
|
120 |
-
model_name = "
|
121 |
else:
|
122 |
processor = qwen_processor
|
123 |
model = qwen_model
|
124 |
-
model_name = "
|
125 |
|
126 |
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
127 |
all_images = [item["image"] for item in content if item["type"] == "image"]
|
@@ -165,7 +166,7 @@ demo = gr.ChatInterface(
|
|
165 |
multimodal=True,
|
166 |
cache_examples=False,
|
167 |
theme="bethecloud/storj_theme",
|
168 |
-
additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use
|
169 |
)
|
170 |
|
171 |
demo.launch(debug=True, ssr_mode=False)
|
|
|
9 |
from PIL import Image
|
10 |
from transformers import (
|
11 |
Qwen2VLForConditionalGeneration,
|
12 |
+
Qwen2_5_VLForConditionalGeneration,
|
13 |
AutoProcessor,
|
14 |
TextIteratorStreamer,
|
15 |
)
|
|
|
59 |
return frames
|
60 |
|
61 |
# Model and Processor Setup
|
62 |
+
QV_MODEL_ID = "prithivMLmods/coreOCR-7B-050325-preview"
|
63 |
qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
|
64 |
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
65 |
QV_MODEL_ID,
|
|
|
67 |
torch_dtype=torch.float16
|
68 |
).to("cuda").eval()
|
69 |
|
70 |
+
COREOCR_MODEL_ID = "prithivMLmods/docscopeOCR-7B-050425-exp"
|
71 |
coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
|
72 |
+
coreocr_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
73 |
COREOCR_MODEL_ID,
|
74 |
trust_remote_code=True,
|
75 |
torch_dtype=torch.bfloat16
|
|
|
118 |
if use_coreocr:
|
119 |
processor = coreocr_processor
|
120 |
model = coreocr_model
|
121 |
+
model_name = "DocScopeOCR"
|
122 |
else:
|
123 |
processor = qwen_processor
|
124 |
model = qwen_model
|
125 |
+
model_name = "CoreOCR"
|
126 |
|
127 |
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
128 |
all_images = [item["image"] for item in content if item["type"] == "image"]
|
|
|
166 |
multimodal=True,
|
167 |
cache_examples=False,
|
168 |
theme="bethecloud/storj_theme",
|
169 |
+
additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use DocScopeOCR")],
|
170 |
)
|
171 |
|
172 |
demo.launch(debug=True, ssr_mode=False)
|