janbanot commited on
Commit
4631bc7
·
1 Parent(s): d54309a

fix: output streaming

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -5,8 +5,9 @@ from transformers import (
5
  AutoModelForCausalLM,
6
  AutoTokenizer,
7
  BitsAndBytesConfig,
8
- TextStreamer,
9
  )
 
10
 
11
  MODEL_ID = "speakleash/Bielik-11B-v2.3-Instruct"
12
  MODEL_NAME = MODEL_ID.split("/")[-1]
@@ -38,9 +39,7 @@ def test(prompt):
38
  top_k = 0
39
  top_p = 0
40
 
41
- streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
42
-
43
- model.generation_config.pad_token_id = tokenizer.pad_token_id
44
  system = "Jesteś chatboem udzielającym odpowiedzi na pytania w języku polskim"
45
  messages = []
46
 
@@ -62,27 +61,33 @@ def test(prompt):
62
  model_input_ids = tokenizer_output.input_ids
63
  model_attention_mask = tokenizer_output.attention_mask
64
 
65
- outputs = model.generate(
66
- model_input_ids,
67
- attention_mask=model_attention_mask,
68
- streamer=streamer,
69
- max_new_tokens=max_tokens,
70
- do_sample=True if temperature else False,
71
- temperature=temperature,
72
- top_k=top_k,
73
- top_p=top_p,
74
- )
75
-
76
- answer = tokenizer.batch_decode(outputs, skip_special_tokens=False)
77
- # Extract just the assistant's response after last user message
78
- response = answer[0].split("<|im_end|>")[2].strip()
79
- return response
 
 
 
 
 
 
80
 
81
 
82
  demo = gr.Interface(
83
  fn=test,
84
  inputs=gr.Textbox(label="Your question", placeholder="Type your question here..."),
85
- outputs=gr.Text(label="Answer"),
86
  title="Polish Chatbot",
87
  description="Ask questions in Polish to the Bielik-11B-v2.3-Instruct model"
88
  )
 
5
  AutoModelForCausalLM,
6
  AutoTokenizer,
7
  BitsAndBytesConfig,
8
+ TextIteratorStreamer,
9
  )
10
+ from threading import Thread
11
 
12
  MODEL_ID = "speakleash/Bielik-11B-v2.3-Instruct"
13
  MODEL_NAME = MODEL_ID.split("/")[-1]
 
39
  top_k = 0
40
  top_p = 0
41
 
42
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
43
  system = "Jesteś chatboem udzielającym odpowiedzi na pytania w języku polskim"
44
  messages = []
45
 
 
61
  model_input_ids = tokenizer_output.input_ids
62
  model_attention_mask = tokenizer_output.attention_mask
63
 
64
+ generate_kwargs = {
65
+ "input_ids": model_input_ids,
66
+ "attention_mask": model_attention_mask,
67
+ "streamer": streamer,
68
+ "max_new_tokens": max_tokens,
69
+ "do_sample": True if temperature else False,
70
+ "temperature": temperature,
71
+ "top_k": top_k,
72
+ "top_p": top_p,
73
+ }
74
+
75
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
76
+ t.start()
77
+
78
+ partial_response = ""
79
+ for new_token in streamer:
80
+ partial_response += new_token
81
+ # Stop if we hit any of the special tokens
82
+ if "<|im_end|>" in partial_response or "<|endoftext|>" in partial_response:
83
+ break
84
+ yield partial_response
85
 
86
 
87
  demo = gr.Interface(
88
  fn=test,
89
  inputs=gr.Textbox(label="Your question", placeholder="Type your question here..."),
90
+ outputs=gr.Textbox(label="Answer", lines=5),
91
  title="Polish Chatbot",
92
  description="Ask questions in Polish to the Bielik-11B-v2.3-Instruct model"
93
  )