File size: 5,024 Bytes
356bab3 5cd1d6b 1487f7e 93d849e 949b582 4041d63 5cd1d6b a952e20 4041d63 a952e20 477c4c7 4041d63 1487f7e 477c4c7 3c10179 1487f7e 477c4c7 1ab9d68 477c4c7 beed497 5cd1d6b 32920c5 5cd1d6b 1ab9d68 477c4c7 5cd1d6b 356bab3 32920c5 4041d63 477c4c7 4041d63 477c4c7 4041d63 477c4c7 cf392a0 c8a13a7 477c4c7 cf392a0 477c4c7 4041d63 477c4c7 cf392a0 477c4c7 613192e 477c4c7 4041d63 477c4c7 4041d63 32c6718 477c4c7 cf392a0 4041d63 beed497 477c4c7 5cd1d6b a69be69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline
# Load models
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
stored_transcript = ""
def transcribe_from_video(video_file):
global stored_transcript
if video_file is None:
return "Error: No video file provided.", ""
try:
video = VideoFileClip(video_file)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path, codec='pcm_s16le')
transcription_result = asr(audio_path, return_timestamps=True)
transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]])
stored_transcript = transcribed_text
if len(transcribed_text.split()) < 50:
summarized_text = "Text too short to summarize."
else:
word_count = len(transcribed_text.split())
max_summary_length = max(50, int(word_count * 0.3))
min_summary_length = max(20, int(word_count * 0.15))
summary = summarizer(
transcribed_text,
max_length=max_summary_length,
min_length=min_summary_length,
do_sample=False
)
summarized_text = summary[0]["summary_text"]
return transcribed_text, summarized_text
except Exception as e:
return f"Error: {str(e)}", ""
def transcribe_from_audio(audio_file):
global stored_transcript
if audio_file is None:
return "Error: No audio recorded.", ""
try:
transcription_result = asr(audio_file, return_timestamps=True)
transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]])
stored_transcript = transcribed_text
if len(transcribed_text.split()) < 50:
summarized_text = "Text too short to summarize."
else:
word_count = len(transcribed_text.split())
max_summary_length = max(50, int(word_count * 0.3))
min_summary_length = max(20, int(word_count * 0.15))
summary = summarizer(
transcribed_text,
max_length=max_summary_length,
min_length=min_summary_length,
do_sample=False
)
summarized_text = summary[0]["summary_text"]
return transcribed_text, summarized_text
except Exception as e:
return f"Error: {str(e)}", ""
def answer_question(question):
global stored_transcript
if not stored_transcript:
return "Please transcribe a video or record audio first."
result = qa_pipeline(question=question, context=stored_transcript)
return result["answer"]
# UI
with gr.Blocks(css="""
body { background-color: black !important; }
.gradio-container { color: #FFFF33 !important; }
button { background-color: #FFFF33 !important; color: black !important; border: none !important; }
input, textarea, .gr-textbox, .gr-video, .gr-audio { background-color: #111 !important; color: #FFFF33 !important; border-color: #FFFF33 !important; }
""") as iface:
gr.HTML("<h1 style='color:#FFFF33'>π€ Video & Voice Transcriber, Summarizer & Q&A</h1>")
gr.HTML("<p style='color:#CCCC33'>Upload a video or record speech to get transcript, summary, and ask questions.</p>")
with gr.Tab("π₯ Video Upload"):
video_input = gr.Video(label="Upload Video (.mp4)", interactive=True)
transcribe_btn = gr.Button("π Transcribe from Video")
transcribed_text_v = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
summarized_text_v = gr.Textbox(label="Summarized Text", lines=8, interactive=False)
transcribe_btn.click(fn=transcribe_from_video, inputs=video_input, outputs=[transcribed_text_v, summarized_text_v])
with gr.Tab("ποΈ Record Speech"):
audio_input = gr.Audio(type="filepath", label="Record Audio")
record_btn = gr.Button("π§ Transcribe from Audio")
transcribed_text_a = gr.Textbox(label="Transcribed Text", lines=8, interactive=False)
summarized_text_a = gr.Textbox(label="Summarized Text", lines=8, interactive=False)
record_btn.click(fn=transcribe_from_audio, inputs=audio_input, outputs=[transcribed_text_a, summarized_text_a])
with gr.Tab("β Ask Questions"):
question_input = gr.Textbox(label="Ask a question about the transcript", placeholder="E.g., What was the main topic?")
ask_btn = gr.Button("π Get Answer")
answer_output = gr.Textbox(label="Answer", interactive=False)
ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output)
# Launch
port = int(os.environ.get('PORT1', 7860))
url = iface.launch(share=True, server_port=port)
print(f"Interface is live at: {url}")
|