prithivMLmods commited on
Commit
a5e1c7c
·
verified ·
1 Parent(s): ddae8ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -9,6 +9,7 @@ import numpy as np
9
  from PIL import Image
10
  from transformers import (
11
  Qwen2VLForConditionalGeneration,
 
12
  AutoProcessor,
13
  TextIteratorStreamer,
14
  )
@@ -58,7 +59,7 @@ def downsample_video(video_path):
58
  return frames
59
 
60
  # Model and Processor Setup
61
- QV_MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct"
62
  qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
63
  qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
64
  QV_MODEL_ID,
@@ -66,9 +67,9 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
66
  torch_dtype=torch.float16
67
  ).to("cuda").eval()
68
 
69
- COREOCR_MODEL_ID = "prithivMLmods/coreOCR-7B-050325-preview"
70
  coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
71
- coreocr_model = Qwen2VLForConditionalGeneration.from_pretrained(
72
  COREOCR_MODEL_ID,
73
  trust_remote_code=True,
74
  torch_dtype=torch.bfloat16
@@ -117,11 +118,11 @@ def model_inference(message, history, use_coreocr):
117
  if use_coreocr:
118
  processor = coreocr_processor
119
  model = coreocr_model
120
- model_name = "CoreOCR"
121
  else:
122
  processor = qwen_processor
123
  model = qwen_model
124
- model_name = "Qwen2VL OCR"
125
 
126
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
127
  all_images = [item["image"] for item in content if item["type"] == "image"]
@@ -165,7 +166,7 @@ demo = gr.ChatInterface(
165
  multimodal=True,
166
  cache_examples=False,
167
  theme="bethecloud/storj_theme",
168
- additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use Qwen2VL OCR")],
169
  )
170
 
171
  demo.launch(debug=True, ssr_mode=False)
 
9
  from PIL import Image
10
  from transformers import (
11
  Qwen2VLForConditionalGeneration,
12
+ Qwen2_5_VLForConditionalGeneration,
13
  AutoProcessor,
14
  TextIteratorStreamer,
15
  )
 
59
  return frames
60
 
61
  # Model and Processor Setup
62
+ QV_MODEL_ID = "prithivMLmods/coreOCR-7B-050325-preview"
63
  qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
64
  qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
65
  QV_MODEL_ID,
 
67
  torch_dtype=torch.float16
68
  ).to("cuda").eval()
69
 
70
+ COREOCR_MODEL_ID = "prithivMLmods/docscopeOCR-7B-050425-exp"
71
  coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
72
+ coreocr_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
73
  COREOCR_MODEL_ID,
74
  trust_remote_code=True,
75
  torch_dtype=torch.bfloat16
 
118
  if use_coreocr:
119
  processor = coreocr_processor
120
  model = coreocr_model
121
+ model_name = "DocScopeOCR"
122
  else:
123
  processor = qwen_processor
124
  model = qwen_model
125
+ model_name = "CoreOCR"
126
 
127
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
128
  all_images = [item["image"] for item in content if item["type"] == "image"]
 
166
  multimodal=True,
167
  cache_examples=False,
168
  theme="bethecloud/storj_theme",
169
+ additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use DocScopeOCR")],
170
  )
171
 
172
  demo.launch(debug=True, ssr_mode=False)