File size: 2,489 Bytes
356bab3
5cd1d6b
1487f7e
93d849e
949b582
4041d63
5cd1d6b
a952e20
4041d63
 
 
 
a952e20
1487f7e
4041d63
 
1487f7e
 
5cd1d6b
3c10179
1487f7e
 
 
 
 
5cd1d6b
4041d63
5cd1d6b
506f1dd
5cd1d6b
 
 
042492f
5cd1d6b
 
 
 
356bab3
 
 
4041d63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cd1d6b
1487f7e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline

# Load models
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

# Global variable to store transcript for Q&A
stored_transcript = ""

def transcribe_and_summarize(video_file):
    global stored_transcript

    if video_file is None:
        return "Error: No file provided.", ""

    try:
        video = VideoFileClip(video_file)
        audio_path = "temp_audio.wav"
        video.audio.write_audiofile(audio_path, codec='pcm_s16le')

        transcription_result = asr(audio_path, return_timestamps=True)
        transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']])
        stored_transcript = transcribed_text  # Save for Q&A

        # Summarize
        if len(transcribed_text.split()) < 50:
            summarized_text = "Text too short to summarize."
        else:
            summary_result = summarizer(transcribed_text, max_length=500, min_length=100, do_sample=False)
            summarized_text = summary_result[0]['summary_text']

        return transcribed_text, summarized_text

    except Exception as e:
        return f"Error: {str(e)}", ""

def answer_question(question):
    global stored_transcript
    if not stored_transcript:
        return "Please transcribe a video first."
    result = qa_pipeline(question=question, context=stored_transcript)
    return result['answer']

# Gradio interface with three parts
with gr.Blocks() as iface:
    with gr.Row():
        video_input = gr.Video(label="Upload Video (.mp4)")
        transcribed_text = gr.Textbox(label="Transcribed Text", lines=6)
        summarized_text = gr.Textbox(label="Summarized Text", lines=6)

    transcribe_btn = gr.Button("Transcribe and Summarize")
    transcribe_btn.click(fn=transcribe_and_summarize, inputs=video_input, outputs=[transcribed_text, summarized_text])

    with gr.Row():
        question_input = gr.Textbox(label="Ask a question about the transcript")
        answer_output = gr.Textbox(label="Answer")

    ask_btn = gr.Button("Get Answer")
    ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)

# Launch
port = int(os.environ.get('PORT1', 7860))
iface.launch(share=True, server_port=port)