Fixed some mistakes and updated YouTube(...).download() function

henrique-coder · Dec 27, 2024 · 2c1a6a6 · 2c1a6a6
1 parent 8eab945
commit 2c1a6a6
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -93,6 +93,8 @@ youtube.download(
     audio_stream=None,  # The audio stream generated by .analyze_audio_streams(). (default: None)
     output_path=Path.cwd(),  # The output path to save the downloaded video and/or audio to. If a directory is provided, the file name will be generated based on the video title and ID, like 'title - [id].extension'. If a file is provided, the file will be saved with the provided name. (default: Path.cwd())
     ffmpeg_path='local',  # The path to the ffmpeg executable. If 'local', the ffmpeg executable will be searched in the PATH environment variable. (default: 'local')
+    pre_allocate_space=False,  # Whether to pre-allocate space for the file, useful to avoid disk fragmentation. (default: False)
+    use_ram_buffer=True,  # Whether to use a RAM buffer to download the file. (default: True)
     max_connections='auto',  # The maximum number of connections to use for downloading the file. (default: 'auto')
     connection_speed=80,  # The connection speed in Mbps. (default: 80)
     overwrite=True,  # Overwrite the file if it already exists. Otherwise, a "_1", "_2", etc. suffix will be added. (default: True)

diff --git a/streamsnapper/platforms/youtube.py b/streamsnapper/platforms/youtube.py
@@ -22,7 +22,7 @@
 
 # Local imports
 from ..exceptions import EmptyDataError, InvalidDataError, ScrapingError
-from ..functions import format_string, get_value
+from ..functions import format_string, get_value, strip
 from ..merger import Merger
 
 
@@ -99,6 +99,8 @@ def extract(self, url: Optional[str] = None, ytdlp_data: Optional[Dict[Any, Any]
             ScrapingError: If an error occurs while scraping the YouTube video.
         """
 
+        self._source_url = url
+
         if ytdlp_data:
             self._raw_youtube_data = ytdlp_data
         elif not url:
@@ -109,8 +111,6 @@ def extract(self, url: Optional[str] = None, ytdlp_data: Optional[Dict[Any, Any]
             if not video_id:
                 raise ValueError(f'Invalid YouTube video URL: "{url}"')
 
-            url = f'https://www.youtube.com/watch?v={video_id}'
-
             try:
                 with YoutubeDL(self._ydl_opts) as ydl:
                     self._raw_youtube_data = ydl.extract_info(url=url, download=False, process=True)
@@ -170,9 +170,11 @@ def analyze_info(self, check_thumbnails: bool = False, retrieve_dislike_count: b
                     pass
 
         general_info = {
-            'fullUrl': f'https://www.youtube.com/watch?v={id_}',
+            'sourceUrl': self._source_url,
             'shortUrl': f'https://youtu.be/{id_}',
             'embedUrl': f'https://www.youtube.com/embed/{id_}',
+            'youtubeMusicUrl': f'https://music.youtube.com/watch?v={id_}',
+            'fullUrl': f'https://www.youtube.com/watch?v={id_}',
             'id': id_,
             'title': title,
             'cleanTitle': clean_title,
@@ -191,10 +193,10 @@ def analyze_info(self, check_thumbnails: bool = False, retrieve_dislike_count: b
             'uploadTimestamp': get_value(data, 'timestamp', ['release_timestamp']),
             'availability': get_value(data, 'availability'),
             'chapters': chapters,
-            'commentCount': get_value(data, 'comment_count', default_to=0),
-            'likeCount': get_value(data, 'like_count'),
+            'commentCount': get_value(data, 'comment_count', convert_to=int, default_to=0),
+            'likeCount': get_value(data, 'like_count', convert_to=int),
             'dislikeCount': dislike_count,
-            'followCount': get_value(data, 'channel_follower_count'),
+            'followCount': get_value(data, 'channel_follower_count', convert_to=int),
             'language': get_value(data, 'language'),
             'thumbnails': [
                 f'https://img.youtube.com/vi/{id_}/maxresdefault.jpg',
@@ -343,7 +345,7 @@ def extract_stream_info(stream: Dict[Any, Any]) -> Dict[str, Optional[Union[str,
             youtube_format_id = get_value(stream, 'format_id', convert_to=int)
 
             data = {
-                'url': unquote(get_value(stream, 'url')),
+                'url': get_value(stream, 'url', convert_to=[unquote, strip]),
                 'codec': codec_parts[0] if codec_parts else None,
                 'codecVariant': codec_parts[1] if len(codec_parts) > 1 else None,
                 'rawCodec': codec,
@@ -464,7 +466,7 @@ def extract_stream_info(stream: Dict[Any, Any]) -> Dict[str, Optional[Union[str,
             youtube_format_note = get_value(stream, 'format_note')
 
             data = {
-                'url': unquote(get_value(stream, 'url')),
+                'url': get_value(stream, 'url', convert_to=[unquote, strip]),
                 'codec': codec_parts[0] if codec_parts else None,
                 'codecVariant': codec_parts[1] if len(codec_parts) > 1 else None,
                 'rawCodec': codec,
@@ -524,7 +526,7 @@ def analyze_subtitle_streams(self) -> None:
             subtitle_streams[stream] = [
                 {
                     'extension': get_value(subtitle, 'ext'),
-                    'url': get_value(subtitle, 'url', convert_to=unquote),
+                    'url': get_value(subtitle, 'url', convert_to=[unquote, strip]),
                     'language': get_value(subtitle, 'name'),
                 }
                 for subtitle in data[stream]
@@ -538,6 +540,8 @@ def download(
         audio_stream: Optional[Dict[str, Any]] = None,
         output_path: Union[str, PathLike] = Path.cwd(),
         ffmpeg_path: Union[str, PathLike, Literal['local']] = 'local',
+        pre_allocate_space: bool = False,
+        use_ram_buffer: bool = True,
         max_connections: Union[int, Literal['auto']] = 'auto',
         connection_speed: float = 80,
         overwrite: bool = True,
@@ -557,6 +561,8 @@ def download(
             audio_stream: The audio stream generated by .analyze_audio_streams(). (default: None)
             output_path: The output path to save the downloaded video and/or audio to. If a directory is provided, the file name will be generated based on the video title and ID, like 'title - [id].extension'. If a file is provided, the file will be saved with the provided name. (default: Path.cwd())
             ffmpeg_path: The path to the ffmpeg executable. If 'local', the ffmpeg executable will be searched in the PATH environment variable. (default: 'local')
+            pre_allocate_space: Whether to pre-allocate space for the file, useful to avoid disk fragmentation. (default: False)
+            use_ram_buffer: Whether to use a RAM buffer to download the file. (default: True)
             max_connections: The maximum number of connections to use for downloading the file. (default: 'auto')
             connection_speed: The connection speed in Mbps. (default: 80)
             overwrite: Overwrite the file if it already exists. Otherwise, a "_1", "_2", etc. suffix will be added. (default: True)
@@ -569,6 +575,7 @@ def download(
 
         Raises:
             EmptyDataError: If no YouTube data is available. Please call .extract() first.
+            InsufficientSpaceError: If there is not enough space to download the file.
         """
 
         if not self._raw_youtube_data:
@@ -600,20 +607,24 @@ def download(
                 max_connections=max_connections,
                 connection_speed=connection_speed,
                 overwrite=overwrite,
-                show_progress_bar=show_progress_bar,
+                show_progress_bars=show_progress_bar,
                 timeout=timeout,
             )
-            video_downloader.download(video_stream['url'], output_video_path)
+            video_downloader.download(
+                video_stream['url'], output_video_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
+            )
 
             output_audio_path = Path(tmp_path, f'.tmp-audio-{self.general_info["id"]}.{audio_stream["extension"]}')
             audio_downloader = TurboDL(
                 max_connections=max_connections,
                 connection_speed=connection_speed,
                 overwrite=overwrite,
-                show_progress_bar=show_progress_bar,
+                show_progress_bars=show_progress_bar,
                 timeout=timeout,
             )
-            audio_downloader.download(audio_stream['url'], output_audio_path)
+            audio_downloader.download(
+                audio_stream['url'], output_audio_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
+            )
 
             merger = Merger(logging=logging)
             merger.merge(
@@ -633,10 +644,12 @@ def download(
                 max_connections=max_connections,
                 connection_speed=connection_speed,
                 overwrite=overwrite,
-                show_progress_bar=show_progress_bar,
+                show_progress_bars=show_progress_bar,
                 timeout=timeout,
             )
-            downloader.download(video_stream['url'], output_path)
+            downloader.download(
+                video_stream['url'], output_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
+            )
 
             return Path(downloader.output_path)
         elif audio_stream:
@@ -649,10 +662,12 @@ def download(
                 max_connections=max_connections,
                 connection_speed=connection_speed,
                 overwrite=overwrite,
-                show_progress_bar=show_progress_bar,
+                show_progress_bars=show_progress_bar,
                 timeout=timeout,
             )
-            downloader.download(audio_stream['url'], output_path)
+            downloader.download(
+                audio_stream['url'], output_path, pre_allocate_space=pre_allocate_space, use_ram_buffer=use_ram_buffer
+            )
 
             return Path(downloader.output_path)