Mong muốn:
Ngon nhất là package yt-dlp
.
def get_smallest_webm_audio(url):
try:
# Run yt-dlp with -J to get video metadata
result = subprocess.run(
["yt-dlp", "-J", url],
capture_output=True,
text=True,
check=True
)
# Parse the JSON output
metadata = json.loads(result.stdout)
formats = metadata.get("formats", [])
# Filter for webm audio-only formats with filesize
webm_audio_formats = [
{
"format_id": f.get("format_id"),
"filesize": f.get("filesize"),
"duration": metadata.get("duration"),
"title": metadata.get("title"),
"ext": f.get("ext")
}
for f in formats
if f.get("ext") == "webm" and f.get("vcodec") == "none" and f.get("filesize") is not None
]
# Find the format with the smallest filesize
if webm_audio_formats:
smallest = min(webm_audio_formats, key=lambda x: x["filesize"])
return smallest
else:
return None
except subprocess.CalledProcessError as e:
print(f"Error running yt-dlp: {e.stderr}")
return None
except json.JSONDecodeError:
print("Error decoding JSON output from yt-dlp.")
return None
url = "<https://www.youtube.com/watch?v=ry9SYnV3svc>" # Replace with your video URL
smallest_webm_audio = get_smallest_webm_audio(url)
if smallest_webm_audio:
print("Smallest webm audio format:")
print(smallest_webm_audio)
else:
print("No webm audio format with filesize found.")
import yt_dlp
def get_youtube_video_size_mb(video_url):
"""Get the file size of a YouTube video in MB."""
try:
ydl_opts = {"listformats": True, "quiet": True}
with YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(video_url, download=False)
formats = result.get("formats", [])
# Iterate through formats to find the first valid "audio only" entry with filesize
for f in formats:
if (
f.get("vcodec") == "none" # Indicates "audio only"
and f.get("filesize") is not None
):
# return f.get("format_id")
return f.get("filesize") / (1024 * 1024) # Convert to MB
return None
except Exception:
return None
import yt_dlp
def get_youtube_video_duration_s(video_url):
"""Get the duration of a YouTube video in seconds."""
try:
ydl_opts = {"quiet": True}
with YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(video_url, download=False)
return result.get("duration")
except Exception:
return None
Không thể download bằng python package yt-dlp
được, toàn ra lỗi 403
An error occurred: ERROR: unable to download video data: HTTP Error 403: Forbidden
Cách hay nhất để download là cũng dùng yt-dlp nhưng là dùng CLI của nó!
import subprocess
import json
def download_yt(url, output_dir="."):
# Get video formats info
cmd = ["yt-dlp", "-J", url]
result = subprocess.run(cmd, capture_output=True, text=True)
formats = json.loads(result.stdout)["formats"]
# Find audio format with smallest filesize
audio_formats = [f for f in formats if f.get("vcodec") == "none"]
best_format = min(audio_formats, key=lambda x: x.get("filesize", float("inf")))
format_id = best_format["format_id"]
print("👉👉👉 format_id: ", format_id)
# Download with selected format
cmd = [
"yt-dlp",
"-f", format_id,
"-o", f"{output_dir}/%(title)s.%(ext)s",
url
]
subprocess.run(cmd)
# Usage
download_yt("<https://www.youtube.com/watch?v=ry9SYnV3svc>", "../fake_storage_account/audios/audioDirectory")