import os import torch from pytubefix import YouTube from moviepy.editor import VideoFileClip from transformers import pipeline pip install moviepy # ---- STEP 1: Download YouTube Video ---- url = "https://www.youtube.com/watch?v=VgxnyKnB3qc&ab" yt = YouTube(url) title = yt.title print(f"Downloading: {title}") video_stream = yt.streams.get_highest_resolution() video_path = f"/content/{title}.mp4" video_stream.download(filename=video_path) print(f"Video saved as: {video_path}") # ---- STEP 2: Extract Audio from Video ---- output_audio = f"/content/{title}.wav" video = VideoFileClip(video_path) video.audio.write_audiofile(output_audio) print(f"Audio extracted: {output_audio}") # ---- STEP 3: Transcribe Audio ---- asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") def transcribe_audio(audio_file): print("Transcribing audio...") transcription_result = asr(audio_file) transcribed_text = transcription_result["text"] return transcribed_text transcribed_text = transcribe_audio(output_audio) print("Transcription Complete:\n", transcribed_text[:500]) # Preview first 500 characters # ---- STEP 4: Summarize Transcription ---- summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def summarize_text(text): if len(text.split()) < 50: return "Text too short to summarize." print("Summarizing text...") summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False) return summary_result[0]['summary_text'] summarized_text = summarize_text(transcribed_text) print("\nSummary:\n", summarized_text) # ---- OPTIONAL: Save Results to File ---- with open(f"/content/{title}_transcription.txt", "w") as f: f.write(transcribed_text) with open(f"/content/{title}_summary.txt", "w") as f: f.write(summarized_text) print("Transcription & Summary saved!")