gizemsarsinlar commited on
Commit
3d9d048
·
verified ·
1 Parent(s): 1fec0c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -3,8 +3,9 @@ import spaces
3
  from transformers import AutoModelForCausalLM, AutoProcessor
4
  import torch
5
  from PIL import Image
 
 
6
 
7
- # Prompt templates
8
  user_prompt = '<|user|>\n'
9
  assistant_prompt = '<|assistant|>\n'
10
  prompt_suffix = "<|end|>\n"
@@ -30,13 +31,12 @@ def run_example(image, text_input=None, model_id=model_name):
30
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
31
  image = Image.fromarray(image).convert("RGB")
32
 
33
- inputs = processor(prompt, image, return_tensors="pt").to(model.device)
34
- with torch.no_grad():
35
- generate_ids = model.generate(
36
- **inputs,
37
- max_new_tokens=1000,
38
- eos_token_id=processor.tokenizer.eos_token_id
39
- )
40
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
41
  response = processor.batch_decode(
42
  generate_ids,
@@ -53,7 +53,7 @@ css = """
53
  }
54
  """
55
 
56
- with gr.Blocks(css=css, title="Phi-3.5 Vision Instruct Demo") as demo:
57
  gr.Markdown("## Phi-3.5 Vision Instruct Demo with Example Inputs")
58
 
59
  with gr.Tab(label="Phi-3.5 Input"):
@@ -82,12 +82,7 @@ with gr.Blocks(css=css, title="Phi-3.5 Vision Instruct Demo") as demo:
82
  examples_per_page=3
83
  )
84
 
85
- submit_btn.click(
86
- fn=run_example,
87
- inputs=[input_img, text_input, model_selector],
88
- outputs=output_text
89
- )
90
 
91
- # Queue and launch without share (handled automatically on Spaces)
92
  demo.queue()
93
- demo.launch()
 
3
  from transformers import AutoModelForCausalLM, AutoProcessor
4
  import torch
5
  from PIL import Image
6
+ import subprocess
7
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
8
 
 
9
  user_prompt = '<|user|>\n'
10
  assistant_prompt = '<|assistant|>\n'
11
  prompt_suffix = "<|end|>\n"
 
31
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
32
  image = Image.fromarray(image).convert("RGB")
33
 
34
+ inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
35
+ generate_ids = model.generate(
36
+ **inputs,
37
+ max_new_tokens=1000,
38
+ eos_token_id=processor.tokenizer.eos_token_id
39
+ )
 
40
  generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
41
  response = processor.batch_decode(
42
  generate_ids,
 
53
  }
54
  """
55
 
56
+ with gr.Blocks(css=css) as demo:
57
  gr.Markdown("## Phi-3.5 Vision Instruct Demo with Example Inputs")
58
 
59
  with gr.Tab(label="Phi-3.5 Input"):
 
82
  examples_per_page=3
83
  )
84
 
85
+ submit_btn.click(run_example, [input_img, text_input, model_selector], [output_text])
 
 
 
 
86
 
 
87
  demo.queue()
88
+ demo.launch()