From b62f9211edd9fbc59ab87b82296889905ac8746e Mon Sep 17 00:00:00 2001 From: elucida Date: Sun, 22 Dec 2024 15:23:31 +0800 Subject: [PATCH] add ydl_args config to customize YoutubeDL ydl_opts make it possible to easily support options like proxy & cookies as defined in https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/YoutubeDL.py when downloading, helpful to fix issue like #347 #348 this works like #350 but more general. --- video2dataset/data_reader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/video2dataset/data_reader.py b/video2dataset/data_reader.py index 4dda4734..1a39e6d8 100644 --- a/video2dataset/data_reader.py +++ b/video2dataset/data_reader.py @@ -165,11 +165,13 @@ class YtDlpDownloader: download_size: preferred height of video to download. Will try to download smallest video >=download_size download_audio_rate: same as size but with audio yt_metadata_args: see get_yt_metadata function docstring + ydl_args: see YoutubeDL docstring """ # TODO: maybe we just include height and width in the metadata_args def __init__(self, yt_args, tmp_dir, encode_formats): self.metadata_args = yt_args.get("yt_metadata_args", {}) + self.ydl_args = yt_args.get("ydl_args", {}) self.video_size = yt_args.get("download_size", 360) self.audio_rate = yt_args.get("download_audio_rate", 44100) self.tmp_dir = tmp_dir @@ -194,6 +196,7 @@ def __call__(self, url): if self.encode_formats.get("audio", None): audio_path_m4a = f"{self.tmp_dir}/{str(uuid.uuid4())}.m4a" ydl_opts = { + **self.ydl_args, "outtmpl": audio_path_m4a, "format": audio_fmt_string, "quiet": True, @@ -217,6 +220,7 @@ def __call__(self, url): if self.encode_formats.get("video", None): video_path = f"{self.tmp_dir}/{str(uuid.uuid4())}.mp4" ydl_opts = { + **self.ydl_args, "outtmpl": video_path, "format": video_format_string, "quiet": True,