Spaces:

chenjoya
/

LiveCC

Running on Zero

App Files Files Community

chenjoya commited on Apr 23

Commit

a76995d

verified ·

1 Parent(s): 44bdfc1

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -3

app.py CHANGED Viewed

@@ -62,10 +62,51 @@ with gr.Blocks() as demo:
                     infer = _init_infer()
                 state['video_path'] = video_path
                 yield 'finished initialization, responding...', state
-                if mode == 'Conversation':
-                    yield infer.video_qa(query=message, state=state)
                 else:
-                    return 'waiting video input...'
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(

                     infer = _init_infer()
                 state['video_path'] = video_path
                 yield 'finished initialization, responding...', state
+                if mode != 'Conversation':
+                    yield 'waiting video input...', state
+                query = message
+                if video_path:
+                    message = {
+                        "role": "user",
+                        "content": [
+                            {"type": "video", "video": video_path},
+                            {"type": "text", "text": query if query else default_query},
+                        ],
+                    }
                 else:
+                    message = {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": query if query else default_query},
+                        ],
+                    }
+                image_inputs, video_inputs = process_vision_info([message])
+                texts = infer.processor.apply_chat_template([message], tokenize=False, add_generation_prompt=True, return_tensors='pt')
+                past_ids = state.get('past_ids', None)
+                if past_ids is not None:
+                    texts = '<|im_end|>\n' + texts[infer.system_prompt_offset:]
+                inputs = infer.processor(
+                    text=texts,
+                    images=image_inputs,
+                    videos=video_inputs,
+                    return_tensors="pt",
+                )
+                inputs.to(infer.model.device)
+                if past_ids is not None:
+                    inputs['input_ids'] = torch.cat([past_ids, inputs.input_ids], dim=1)
+                outputs = infer.model.generate(
+                    **inputs, past_key_values=state.get('past_key_values', None),
+                    return_dict_in_generate=True, do_sample=do_sample,
+                    repetition_penalty=repetition_penalty,
+                    max_new_tokens=512,
+                )
+                state['past_key_values'] = outputs.past_key_values
+                state['past_ids'] = outputs.sequences[:, :-1]
+                response = infer.processor.decode(outputs.sequences[0, inputs.input_ids.size(1):], skip_special_tokens=True)
+                print(response)
+                return response, state
             def gr_chatinterface_chatbot_clear_fn():
                 return {}, {}, 0, 0
             gr_chatinterface = gr.ChatInterface(