Skip to content

Commit

Permalink
Fixed some mistakes and updated YouTube(...).download() function
Browse files Browse the repository at this point in the history
  • Loading branch information
henrique-coder committed Dec 27, 2024
1 parent 8eab945 commit 2c1a6a6
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 18 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ youtube.download(
audio_stream=None, # The audio stream generated by .analyze_audio_streams(). (default: None)
output_path=Path.cwd(), # The output path to save the downloaded video and/or audio to. If a directory is provided, the file name will be generated based on the video title and ID, like 'title - [id].extension'. If a file is provided, the file will be saved with the provided name. (default: Path.cwd())
ffmpeg_path='local', # The path to the ffmpeg executable. If 'local', the ffmpeg executable will be searched in the PATH environment variable. (default: 'local')
pre_allocate_space=False, # Whether to pre-allocate space for the file, useful to avoid disk fragmentation. (default: False)
use_ram_buffer=True, # Whether to use a RAM buffer to download the file. (default: True)
max_connections='auto', # The maximum number of connections to use for downloading the file. (default: 'auto')
connection_speed=80, # The connection speed in Mbps. (default: 80)
overwrite=True, # Overwrite the file if it already exists. Otherwise, a "_1", "_2", etc. suffix will be added. (default: True)
Expand Down
51 changes: 33 additions & 18 deletions streamsnapper/platforms/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

# Local imports
from ..exceptions import EmptyDataError, InvalidDataError, ScrapingError
from ..functions import format_string, get_value
from ..functions import format_string, get_value, strip
from ..merger import Merger


Expand Down Expand Up @@ -99,6 +99,8 @@ def extract(self, url: Optional[str] = None, ytdlp_data: Optional[Dict[Any, Any]
ScrapingError: If an error occurs while scraping the YouTube video.
"""

self._source_url = url

if ytdlp_data:
self._raw_youtube_data = ytdlp_data
elif not url:
Expand All @@ -109,8 +111,6 @@ def extract(self, url: Optional[str] = None, ytdlp_data: Optional[Dict[Any, Any]
if not video_id:
raise ValueError(f'Invalid YouTube video URL: "{url}"')

url = f'https://www.youtube.com/watch?v={video_id}'

try:
with YoutubeDL(self._ydl_opts) as ydl:
self._raw_youtube_data = ydl.extract_info(url=url, download=False, process=True)
Expand Down Expand Up @@ -170,9 +170,11 @@ def analyze_info(self, check_thumbnails: bool = False, retrieve_dislike_count: b
pass

general_info = {
'fullUrl': f'https://www.youtube.com/watch?v={id_}',
'sourceUrl': self._source_url,
'shortUrl': f'https://youtu.be/{id_}',
'embedUrl': f'https://www.youtube.com/embed/{id_}',
'youtubeMusicUrl': f'https://music.youtube.com/watch?v={id_}',
'fullUrl': f'https://www.youtube.com/watch?v={id_}',
'id': id_,
'title': title,
'cleanTitle': clean_title,
Expand All @@ -191,10 +193,10 @@ def analyze_info(self, check_thumbnails: bool = False, retrieve_dislike_count: b
'uploadTimestamp': get_value(data, 'timestamp', ['release_timestamp']),
'availability': get_value(data, 'availability'),
'chapters': chapters,
'commentCount': get_value(data, 'comment_count', default_to=0),
'likeCount': get_value(data, 'like_count'),
'commentCount': get_value(data, 'comment_count', convert_to=int, default_to=0),
'likeCount': get_value(data, 'like_count', convert_to=int),
'dislikeCount': dislike_count,
'followCount': get_value(data, 'channel_follower_count'),
'followCount': get_value(data, 'channel_follower_count', convert_to=int),
'language': get_value(data, 'language'),
'thumbnails': [
f'https://img.youtube.com/vi/{id_}/maxresdefault.jpg',
Expand Down Expand Up @@ -343,7 +345,7 @@ def extract_stream_info(stream: Dict[Any, Any]) -> Dict[str, Optional[Union[str,
youtube_format_id = get_value(stream, 'format_id', convert_to=int)

data = {
'url': unquote(get_value(stream, 'url')),
'url': get_value(stream, 'url', convert_to=[unquote, strip]),
'codec': codec_parts[0] if codec_parts else None,
'codecVariant': codec_parts[1] if len(codec_parts) > 1 else None,
'rawCodec': codec,
Expand Down Expand Up @@ -464,7 +466,7 @@ def extract_stream_info(stream: Dict[Any, Any]) -> Dict[str, Optional[Union[str,
youtube_format_note = get_value(stream, 'format_note')

data = {
'url': unquote(get_value(stream, 'url')),
'url': get_value(stream, 'url', convert_to=[unquote, strip]),
'codec': codec_parts[0] if codec_parts else None,
'codecVariant': codec_parts[1] if len(codec_parts) > 1 else None,
'rawCodec': codec,
Expand Down Expand Up @@ -524,7 +526,7 @@ def analyze_subtitle_streams(self) -> None:
subtitle_streams[stream] = [
{
'extension': get_value(subtitle, 'ext'),
'url': get_value(subtitle, 'url', convert_to=unquote),
'url': get_value(subtitle, 'url', convert_to=[unquote, strip]),
'language': get_value(subtitle, 'name'),
}
for subtitle in data[stream]
Expand All @@ -538,6 +540,8 @@ def download(
audio_stream: Optional[Dict[str, Any]] = None,
output_path: Union[str, PathLike] = Path.cwd(),
ffmpeg_path: Union[str, PathLike, Literal['local']] = 'local',
pre_allocate_space: bool = False,
use_ram_buffer: bool = True,
max_connections: Union[int, Literal['auto']] = 'auto',
connection_speed: float = 80,
overwrite: bool = True,
Expand All @@ -557,6 +561,8 @@ def download(
audio_stream: The audio stream generated by .analyze_audio_streams(). (default: None)
output_path: The output path to save the downloaded video and/or audio to. If a directory is provided, the file name will be generated based on the video title and ID, like 'title - [id].extension'. If a file is provided, the file will be saved with the provided name. (default: Path.cwd())
ffmpeg_path: The path to the ffmpeg executable. If 'local', the ffmpeg executable will be searched in the PATH environment variable. (default: 'local')
pre_allocate_space: Whether to pre-allocate space for the file, useful to avoid disk fragmentation. (default: False)
use_ram_buffer: Whether to use a RAM buffer to download the file. (default: True)
max_connections: The maximum number of connections to use for downloading the file. (default: 'auto')
connection_speed: The connection speed in Mbps. (default: 80)
overwrite: Overwrite the file if it already exists. Otherwise, a "_1", "_2", etc. suffix will be added. (default: True)
Expand All @@ -569,6 +575,7 @@ def download(
Raises:
EmptyDataError: If no YouTube data is available. Please call .extract() first.
InsufficientSpaceError: If there is not enough space to download the file.
"""

if not self._raw_youtube_data:
Expand Down Expand Up @@ -600,20 +607,24 @@ def download(
max_connections=max_connections,
connection_speed=connection_speed,
overwrite=overwrite,
show_progress_bar=show_progress_bar,
show_progress_bars=show_progress_bar,
timeout=timeout,
)
video_downloader.download(video_stream['url'], output_video_path)
video_downloader.download(
video_stream['url'], output_video_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
)

output_audio_path = Path(tmp_path, f'.tmp-audio-{self.general_info["id"]}.{audio_stream["extension"]}')
audio_downloader = TurboDL(
max_connections=max_connections,
connection_speed=connection_speed,
overwrite=overwrite,
show_progress_bar=show_progress_bar,
show_progress_bars=show_progress_bar,
timeout=timeout,
)
audio_downloader.download(audio_stream['url'], output_audio_path)
audio_downloader.download(
audio_stream['url'], output_audio_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
)

merger = Merger(logging=logging)
merger.merge(
Expand All @@ -633,10 +644,12 @@ def download(
max_connections=max_connections,
connection_speed=connection_speed,
overwrite=overwrite,
show_progress_bar=show_progress_bar,
show_progress_bars=show_progress_bar,
timeout=timeout,
)
downloader.download(video_stream['url'], output_path)
downloader.download(
video_stream['url'], output_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
)

return Path(downloader.output_path)
elif audio_stream:
Expand All @@ -649,10 +662,12 @@ def download(
max_connections=max_connections,
connection_speed=connection_speed,
overwrite=overwrite,
show_progress_bar=show_progress_bar,
show_progress_bars=show_progress_bar,
timeout=timeout,
)
downloader.download(audio_stream['url'], output_path)
downloader.download(
audio_stream['url'], output_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
)

return Path(downloader.output_path)

Expand Down

0 comments on commit 2c1a6a6

Please sign in to comment.