File size: 3,136 Bytes
356bab3
5cd1d6b
1487f7e
93d849e
949b582
4041d63
5cd1d6b
a952e20
4041d63
 
 
a952e20
1487f7e
4041d63
 
1487f7e
 
5cd1d6b
3c10179
1487f7e
 
 
 
 
5cd1d6b
beed497
5cd1d6b
 
 
 
042492f
5cd1d6b
 
 
 
356bab3
 
 
4041d63
 
 
 
 
 
 
cf392a0
 
 
 
 
 
 
 
4041d63
f430f55
cf392a0
 
 
 
4041d63
beed497
4041d63
f430f55
cf392a0
 
 
4041d63
beed497
 
 
5cd1d6b
1487f7e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline

# Load models
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

stored_transcript = ""

def transcribe_and_summarize(video_file):
    global stored_transcript

    if video_file is None:
        return "Error: No file provided.", ""

    try:
        video = VideoFileClip(video_file)
        audio_path = "temp_audio.wav"
        video.audio.write_audiofile(audio_path, codec='pcm_s16le')

        transcription_result = asr(audio_path, return_timestamps=True)
        transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']])
        stored_transcript = transcribed_text

        if len(transcribed_text.split()) < 50:
            summarized_text = "Text too short to summarize."
        else:
            summary_result = summarizer(transcribed_text, max_length=500, min_length=100, do_sample=False)
            summarized_text = summary_result[0]['summary_text']

        return transcribed_text, summarized_text

    except Exception as e:
        return f"Error: {str(e)}", ""

def answer_question(question):
    global stored_transcript
    if not stored_transcript:
        return "Please transcribe a video first."
    result = qa_pipeline(question=question, context=stored_transcript)
    return result['answer']

with gr.Blocks(css="""
body { background-color: black !important; }
.gradio-container { color: #FFFF33 !important; }
button { background-color: #FFFF33 !important; color: black !important; border: none !important; }
input, textarea, .gr-textbox, .gr-video { background-color: #111 !important; color: #FFFF33 !important; border-color: #FFFF33 !important; }
""") as iface:
    gr.HTML("<h1 style='color:#FFFF33'>πŸŽ₯ Video Transcriber, Summarizer & Q&A Tool</h1>")
    gr.HTML("<p style='color:#CCCC33'>Upload a video to get a transcript, summary, and ask questions about its content.</p>")

    with gr.Tab("πŸ“ Transcription & Summary"):
        video_input = gr.Video(label="Upload Video (.mp4)", interactive=True)
        transcribe_btn = gr.Button("πŸš€ Transcribe and Summarize")
        transcribed_text = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
        summarized_text = gr.Textbox(label="Summarized Text", lines=8, interactive=False)

        transcribe_btn.click(fn=transcribe_and_summarize, inputs=video_input, outputs=[transcribed_text, summarized_text])

    with gr.Tab("❓ Ask Questions"):
        question_input = gr.Textbox(label="Ask a question based on the transcript", placeholder="E.g., What is the main topic?")
        ask_btn = gr.Button("πŸ” Get Answer")
        answer_output = gr.Textbox(label="Answer", interactive=False)

        ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)

# Launch app
port = int(os.environ.get('PORT1', 7860))
iface.launch(share=True, server_port=port)