mrfakename commited on
Commit
b855287
Β·
verified Β·
1 Parent(s): a543525

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -108
app.py CHANGED
@@ -1,122 +1,64 @@
1
- import spaces
2
  import gradio as gr
3
  import torch
4
- from transformers import AutoModel, AutoTokenizer
 
 
5
 
6
- # Load model and tokenizer
7
- model_path = "apple/DiffuCoder-7B-cpGRPO"
8
- device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
- model = AutoModel.from_pretrained(
11
- model_path,
12
- torch_dtype=torch.bfloat16,
13
- trust_remote_code=True
14
- ).to(device).eval()
15
 
16
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
17
- tokenizer.eos_token = "<|im_end|>"
18
 
19
  @spaces.GPU
20
- def generate_code(query, temperature=0.4, top_p=0.95, max_new_tokens=256):
21
- # Format prompt using chat template
22
- messages = [
23
- {"role": "system", "content": "You are a helpful coding assistant."},
24
- {"role": "user", "content": query.strip()}
25
- ]
26
-
27
- # Apply chat template - this creates the prompt but doesn't include assistant response
28
- prompt = tokenizer.apply_chat_template(
29
- messages,
30
- tokenize=False,
31
- add_generation_prompt=True
32
- )
33
-
34
- # Tokenize only the prompt (without any assistant response)
35
- inputs = tokenizer(prompt, return_tensors="pt")
36
- input_ids = inputs.input_ids.to(device)
37
- attention_mask = inputs.attention_mask.to(device)
38
-
39
- # Calculate initial prompt length - this is where the assistant response will start
40
- initial_prompt_len = input_ids.shape[1]
41
-
42
- # Track EOS status
43
- eos_detected = False
44
-
45
- # Generate with token streaming
46
- TOKEN_PER_STEP = 1
47
- steps = min(max_new_tokens // TOKEN_PER_STEP, 512) # Limit to max 512 steps
48
-
49
- # This will accumulate only the assistant's response
50
- assistant_response = ""
51
-
52
- for i in range(steps):
53
- if eos_detected:
54
- break
55
-
56
- output = model.diffusion_generate(
57
- input_ids,
58
- attention_mask=attention_mask,
59
- max_new_tokens=TOKEN_PER_STEP,
60
- output_history=True,
61
- return_dict_in_generate=True,
62
- steps=1,
63
- temperature=temperature,
64
- top_p=top_p,
65
- alg="entropy",
66
- alg_temp=0.,
67
- )
68
-
69
- # Get only the new tokens generated in this step
70
- new_token_ids = output.sequences[0, -TOKEN_PER_STEP:]
71
-
72
- # Check for EOS token in the new tokens
73
- if tokenizer.eos_token_id in new_token_ids:
74
- # If EOS is found, stop after this token
75
- eos_detected = True
76
- # Remove EOS token from output
77
- new_token_ids = new_token_ids[new_token_ids != tokenizer.eos_token_id]
78
- if new_token_ids.numel() == 0:
79
- # Only EOS was generated, nothing to add
80
- break
81
-
82
- # Decode only the new tokens
83
- new_text = tokenizer.decode(
84
- new_token_ids,
85
- skip_special_tokens=True,
86
- clean_up_tokenization_spaces=False
87
- )
88
-
89
- # Update input for next step
90
- input_ids = output.sequences
91
- attention_mask = torch.cat([
92
- attention_mask,
93
- torch.ones(1, 1, dtype=attention_mask.dtype, device=device)
94
- ], dim=1)
95
-
96
- # Append to assistant response and yield
97
- assistant_response += new_text
98
- # Remove any trailing special tokens
99
- clean_response = assistant_response.replace('<|dlm_pad|>', '').strip()
100
- yield clean_response
101
-
102
- if eos_detected:
103
- break
104
 
105
- # Create Gradio interface
106
  demo = gr.Interface(
107
- fn=generate_code,
108
  inputs=[
109
- gr.Textbox(label="Code Request", lines=3,
110
- placeholder="Describe the code you want..."),
111
- gr.Slider(0.1, 1.0, value=0.4, label="Temperature"),
112
- gr.Slider(0.5, 1.0, value=0.95, label="Top-p"),
113
- gr.Slider(32, 512, value=256, step=32, label="Max Tokens")
114
  ],
115
- outputs=gr.Textbox(label="Generated Code", lines=10),
116
- title="🧠 DiffuCoder Code Generator",
117
- description="Generate code with Apple's DiffuCoder-7B model"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  )
119
 
120
- # Run the demo
121
  if __name__ == "__main__":
122
- demo.queue().launch()
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModel
4
+ from nextstep.models.gen_pipeline import NextStepPipeline
5
+ from spaces import GPU
6
 
7
+ HF_HUB = "stepfun-ai/NextStep-1-Large"
 
 
8
 
9
+ # Load model & tokenizer
10
+ tokenizer = AutoTokenizer.from_pretrained(HF_HUB, local_files_only=True, trust_remote_code=True)
11
+ model = AutoModel.from_pretrained(HF_HUB, local_files_only=True, trust_remote_code=True)
12
+ pipeline = NextStepPipeline(tokenizer=tokenizer, model=model).to(device="cuda", dtype=torch.bfloat16)
 
13
 
14
+ IMG_SIZE = 512
 
15
 
16
  @spaces.GPU
17
+ def generate(main_prompt, positive_prompt, negative_prompt):
18
+ image = pipeline.generate_image(
19
+ main_prompt,
20
+ hw=(IMG_SIZE, IMG_SIZE),
21
+ num_images_per_caption=1,
22
+ positive_prompt=positive_prompt,
23
+ negative_prompt=negative_prompt,
24
+ cfg=7.5,
25
+ cfg_img=1.0,
26
+ cfg_schedule="constant",
27
+ use_norm=False,
28
+ num_sampling_steps=28,
29
+ timesteps_shift=1.0,
30
+ seed=3407,
31
+ )[0]
32
+ return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
34
  demo = gr.Interface(
35
+ fn=generate,
36
  inputs=[
37
+ gr.Textbox(label="Main Prompt", lines=2, placeholder="Describe your dream image..."),
38
+ gr.Textbox(label="Positive Prompt", lines=1, placeholder="Extra quality boosters..."),
39
+ gr.Textbox(label="Negative Prompt", lines=1, placeholder="Things to avoid..."),
 
 
40
  ],
41
+ outputs=gr.Image(label="Generated Image"),
42
+ title="NextStep Image Generator",
43
+ description="Simple Gradio demo for NextStep-1-Large with editable prompts.",
44
+ examples=[
45
+ [
46
+ 'A realistic photograph of a wall with "NextStep-1.1 is coming" prominently displayed',
47
+ 'masterpiece, film grained, best quality.',
48
+ 'lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry.'
49
+ ],
50
+ [
51
+ 'A cinematic shot of a futuristic city at sunset',
52
+ 'ultra-detailed, 8k, volumetric lighting, masterpiece',
53
+ 'blurry, lowres, noise, distortion'
54
+ ],
55
+ [
56
+ 'A fantasy painting of a dragon flying over a castle',
57
+ 'high detail, vibrant colors, epic composition',
58
+ 'washed out colors, bad anatomy, low detail'
59
+ ],
60
+ ]
61
  )
62
 
 
63
  if __name__ == "__main__":
64
+ demo.launch()