File size: 2,489 Bytes
356bab3 5cd1d6b 1487f7e 93d849e 949b582 4041d63 5cd1d6b a952e20 4041d63 a952e20 1487f7e 4041d63 1487f7e 5cd1d6b 3c10179 1487f7e 5cd1d6b 4041d63 5cd1d6b 506f1dd 5cd1d6b 042492f 5cd1d6b 356bab3 4041d63 5cd1d6b 1487f7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline
# Load models
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
# Global variable to store transcript for Q&A
stored_transcript = ""
def transcribe_and_summarize(video_file):
global stored_transcript
if video_file is None:
return "Error: No file provided.", ""
try:
video = VideoFileClip(video_file)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path, codec='pcm_s16le')
transcription_result = asr(audio_path, return_timestamps=True)
transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']])
stored_transcript = transcribed_text # Save for Q&A
# Summarize
if len(transcribed_text.split()) < 50:
summarized_text = "Text too short to summarize."
else:
summary_result = summarizer(transcribed_text, max_length=500, min_length=100, do_sample=False)
summarized_text = summary_result[0]['summary_text']
return transcribed_text, summarized_text
except Exception as e:
return f"Error: {str(e)}", ""
def answer_question(question):
global stored_transcript
if not stored_transcript:
return "Please transcribe a video first."
result = qa_pipeline(question=question, context=stored_transcript)
return result['answer']
# Gradio interface with three parts
with gr.Blocks() as iface:
with gr.Row():
video_input = gr.Video(label="Upload Video (.mp4)")
transcribed_text = gr.Textbox(label="Transcribed Text", lines=6)
summarized_text = gr.Textbox(label="Summarized Text", lines=6)
transcribe_btn = gr.Button("Transcribe and Summarize")
transcribe_btn.click(fn=transcribe_and_summarize, inputs=video_input, outputs=[transcribed_text, summarized_text])
with gr.Row():
question_input = gr.Textbox(label="Ask a question about the transcript")
answer_output = gr.Textbox(label="Answer")
ask_btn = gr.Button("Get Answer")
ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)
# Launch
port = int(os.environ.get('PORT1', 7860))
iface.launch(share=True, server_port=port)
|