Mong muốn:

Ngon nhất là package yt-dlp.

AIO metadata

def get_smallest_webm_audio(url):
    try:
        # Run yt-dlp with -J to get video metadata
        result = subprocess.run(
            ["yt-dlp", "-J", url],
            capture_output=True,
            text=True,
            check=True
        )
        # Parse the JSON output
        metadata = json.loads(result.stdout)
        formats = metadata.get("formats", [])
        
        # Filter for webm audio-only formats with filesize
        webm_audio_formats = [
            {
                "format_id": f.get("format_id"),
                "filesize": f.get("filesize"),
                "duration": metadata.get("duration"),
                "title": metadata.get("title"),
                "ext": f.get("ext")
            }
            for f in formats
            if f.get("ext") == "webm" and f.get("vcodec") == "none" and f.get("filesize") is not None
        ]
        
        # Find the format with the smallest filesize
        if webm_audio_formats:
            smallest = min(webm_audio_formats, key=lambda x: x["filesize"])
            return smallest
        else:
            return None
    except subprocess.CalledProcessError as e:
        print(f"Error running yt-dlp: {e.stderr}")
        return None
    except json.JSONDecodeError:
        print("Error decoding JSON output from yt-dlp.")
        return None
    
url = "<https://www.youtube.com/watch?v=ry9SYnV3svc>"  # Replace with your video URL
smallest_webm_audio = get_smallest_webm_audio(url)
if smallest_webm_audio:
    print("Smallest webm audio format:")
    print(smallest_webm_audio)
else:
    print("No webm audio format with filesize found.")

Get video size

import yt_dlp

def get_youtube_video_size_mb(video_url):
    """Get the file size of a YouTube video in MB."""
    try:
        ydl_opts = {"listformats": True, "quiet": True}

        with YoutubeDL(ydl_opts) as ydl:
            result = ydl.extract_info(video_url, download=False)
            formats = result.get("formats", [])

            # Iterate through formats to find the first valid "audio only" entry with filesize
            for f in formats:
                if (
                    f.get("vcodec") == "none"  # Indicates "audio only"
                    and f.get("filesize") is not None
                ):
                    # return f.get("format_id")
                    return f.get("filesize") / (1024 * 1024)  # Convert to MB

        return None
    except Exception:
        return None

Get video duration

import yt_dlp

def get_youtube_video_duration_s(video_url):
    """Get the duration of a YouTube video in seconds."""
    try:
        ydl_opts = {"quiet": True}

        with YoutubeDL(ydl_opts) as ydl:
            result = ydl.extract_info(video_url, download=False)
            return result.get("duration")
    except Exception:
        return None

Download

Không thể download bằng python package yt-dlp được, toàn ra lỗi 403

An error occurred: ERROR: unable to download video data: HTTP Error 403: Forbidden

Cách hay nhất để download là cũng dùng yt-dlp nhưng là dùng CLI của nó!

import subprocess
import json

def download_yt(url, output_dir="."):
    # Get video formats info
    cmd = ["yt-dlp", "-J", url]
    result = subprocess.run(cmd, capture_output=True, text=True)
    formats = json.loads(result.stdout)["formats"]
    
    # Find audio format with smallest filesize
    audio_formats = [f for f in formats if f.get("vcodec") == "none"]
    best_format = min(audio_formats, key=lambda x: x.get("filesize", float("inf")))
    format_id = best_format["format_id"]
    print("👉👉👉 format_id: ", format_id)
    
    # Download with selected format
    cmd = [
        "yt-dlp",
        "-f", format_id,
        "-o", f"{output_dir}/%(title)s.%(ext)s",
        url
    ]
    subprocess.run(cmd)

# Usage
download_yt("<https://www.youtube.com/watch?v=ry9SYnV3svc>", "../fake_storage_account/audios/audioDirectory")