Spodcast v0.5.0 which:

Frank de Lange · Frank de Lange · commit 0877f04cb1b2 · 2022-06-30T16:52:41.000Z
- fixes #13 (Cannot download episodes anymore) - uses _librespot-python_ interfaces instead of raw web API access (needed to fix #13) - can not yet determine decrypted file size for Spotify-hosted episodes (which used to work) so will only look at the file name to determine whether an episode has already been downloaded. To retry corrupted downloads just remove the partially downloaded file and try again.
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = spodcast
-version = 0.4.9
+version = 0.5.0
 description = A caching Spotify podcast to RSS proxy.
 long_description = file:README.md
 long_description_content_type = text/markdown
@@ -20,7 +20,8 @@ platforms = any
 packages =
     spodcast
 install_requires =
-    librespot >= 0.0.1
+    librespot >= 0.0.5
+    pybase62
     ffmpeg-python
     setuptools
 include_package_data =
diff --git a/spodcast/app.py b/spodcast/app.py
@@ -2,8 +2,9 @@
 
 from itertools import islice
 from librespot.audio.decoders import AudioQuality
+from librespot.metadata import ShowId, EpisodeId
 
-from spodcast.podcast import download_episode, get_show_episodes
+from spodcast.podcast import download_episode, get_episodes
 from spodcast.utils import regex_input_for_urls
 from spodcast.spodcast import Spodcast
 
@@ -15,10 +16,15 @@ def client(args) -> None:
 
     if args.urls:
         for spotify_url in args.urls:
-            episode_id, show_id = regex_input_for_urls(spotify_url)
-            log.debug(f"episode_id {episode_id}. show_id {show_id}")
-            if episode_id is not None:
+            episode_id_str, show_id_str = regex_input_for_urls(spotify_url)
+            log.debug(f"episode_id_str {episode_id_str}. show_id_str {show_id_str}")
+            if episode_id_str is not None:
+                episode_id = EpisodeId.from_base62(episode_id_str)
+                log.debug("episode_id: %s", episode_id)
                 download_episode(episode_id)
-            elif show_id is not None:
-                for episode in islice(get_show_episodes(show_id), Spodcast.CONFIG.get_max_episodes()):
-                    download_episode(episode)
+            elif show_id_str is not None:
+                show_id = ShowId.from_base62(show_id_str)
+                log.debug("show_id: %s", show_id)
+                for episode_id in islice(get_episodes(show_id), Spodcast.CONFIG.get_max_episodes()):
+                    log.debug("episode_id: %s", episode_id)
+                    download_episode(episode_id)
diff --git a/spodcast/const.py b/spodcast/const.py
@@ -1,22 +1,8 @@
-ERROR = 'error'
-ITEMS = 'items'
-NAME = 'name'
-DESCRIPTION = "description"
-ID = 'id'
-URL = 'url'
-URI = 'uri'
-EXTERNAL_URLS = 'external_urls'
-SPOTIFY = 'spotify'
-RELEASE_DATE = 'release_date'
-IMAGES = 'images'
+TYPE = 'type'
 LIMIT = 'limit'
 OFFSET = 'offset'
 CREDENTIALS_PREFIX = 'spodcast-cred'
-AUTHORIZATION = 'Authorization'
-DURATION_MS = 'duration_ms'
-SHOW = 'show'
-TYPE = 'type'
 USER_READ_EMAIL = 'user-read-email'
-PLAYLIST_READ_PRIVATE = 'playlist-read-private'
-USER_LIBRARY_READ = 'user-library-read'
 FILE_EXISTS = -1
+OPEN_SPOTIFY_URL = 'open.spotify.com'
+IMAGE_CDN = lambda image_id_hex: f'https://i.scdn.co/image/{image_id_hex}'
diff --git a/spodcast/podcast.py b/spodcast/podcast.py
@@ -6,68 +6,61 @@
 from html import escape
 import urllib.parse
 
-from librespot.metadata import EpisodeId
-
+import base62
+from base62 import CHARSET_INVERTED
 import ffmpeg
 
-from spodcast.const import ERROR, ID, ITEMS, NAME, SHOW, DURATION_MS, DESCRIPTION, RELEASE_DATE, URI, URL, EXTERNAL_URLS, IMAGES, SPOTIFY, FILE_EXISTS
+from librespot import util
+from librespot.metadata import ShowId, EpisodeId
+from librespot.core import ApiClient
+
+from spodcast.const import FILE_EXISTS, IMAGE_CDN
 from spodcast.feedgenerator import RSS_FEED_CODE, RSS_FEED_FILE_NAME, RSS_FEED_SHOW_INDEX, RSS_FEED_INFO_EXTENSION, RSS_FEED_SHOW_IMAGE, RSS_FEED_VERSION, get_index_version
-from spodcast.spotapi import EPISODE_INFO_URL, SHOWS_URL, EPISODE_DOWNLOAD_URL, ANON_PODCAST_DOMAIN
-from spodcast.utils import clean_filename
+from spodcast.utils import clean_filename, uri_to_url
 from spodcast.spodcast import Spodcast
 
 log = logging.getLogger(__name__)
 
-def get_info(episode_id_str, target="episode"):
-    log.info("Fetching episode information...")
-    (raw, info) = Spodcast.invoke_url(f'{EPISODE_INFO_URL}/{episode_id_str}')
-    if not info:
-        log.error('INVALID EPISODE ID')
-
-    log.debug("episode info: %s", info)
-
-    if ERROR in info:
-        return None, None
 
-    if target == "episode":
+def hex_to_spotify_id(hex_id):
+    return base62.encodebytes(util.hex_to_bytes(hex_id), CHARSET_INVERTED)
 
-        podcast_name = info[SHOW][NAME]
-        episode_name = info[NAME]
-        duration_ms = info[DURATION_MS]
-        description = info[DESCRIPTION]
-        release_date = info[RELEASE_DATE]
-        uri = info[URI]
 
-        return podcast_name, duration_ms, episode_name, description, release_date, uri
+def get_show_info(show_id_hex):
+    log.info("Fetching show information...")
+    show_id = ShowId.from_hex(show_id_hex)
+    uri = f'spotify:show:{hex_to_spotify_id(show_id_hex)}'
+    info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
+    link = uri_to_url(uri)
+    description = info.description
+    image = IMAGE_CDN(util.bytes_to_hex(info.cover_image.image[1].file_id))
 
-    elif target == "show":
-        podcast_name = info[SHOW][NAME]
-        link = info[SHOW][EXTERNAL_URLS][SPOTIFY]
-        description = info[SHOW][DESCRIPTION]
-        image = info[SHOW][IMAGES][0][URL]
+    return link, description, image
 
-        return podcast_name, link, description, image
 
+def get_episode_info(episode_id_hex):
+    log.info("Fetching episode information...")
+    episode_id = EpisodeId.from_hex(episode_id_hex)
+    uri = f'spotify:episode:{hex_to_spotify_id(episode_id_hex)}'
+    info = Spodcast.SESSION.api().get_metadata_4_episode(episode_id)
+    podcast_name = info.show.name
+    podcast_id = util.bytes_to_hex(info.show.gid)
+    episode_name = info.name
+    duration_ms = info.duration
+    description = info.description
+    external_url = info.external_url if info.external_url else None
+    pt = info.publish_time
+    release_date = f'{pt.year}-{pt.month}-{pt.day}T{pt.hour}:{pt.minute}:00Z'
 
-def get_show_episodes(show_id_str) -> list:
-    episodes = []
-    offset = 0
-    limit = 50
+    return podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, external_url
 
-    log.info("Fetching episodes...")
-    while True:
-        resp = Spodcast.invoke_url_with_params(
-            f'{SHOWS_URL}/{show_id_str}/episodes', limit=limit, offset=offset)
-        offset += limit
-        for episode in resp[ITEMS]:
-            episodes.append([episode[ID], episode[RELEASE_DATE]])
-        if len(resp[ITEMS]) < limit:
-            break
 
-    # some shows list episodes in the wrong order so reverse sort them by release date
-    episodes.sort(key=lambda x: datetime.strptime(x[1], "%Y-%m-%d"), reverse=True)
+def get_episodes(show_id):
+    info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
+    episodes = info.episode
+    episodes.sort(key = lambda x: datetime.strptime(f'{x.publish_time.year}-{x.publish_time.month}-{x.publish_time.day}T{x.publish_time.hour}:{x.publish_time.minute}:00Z', "%Y-%m-%dT%H:%M:%SZ"), reverse=True)
 
-    return [episode[0] for episode in episodes]
+    return [util.bytes_to_hex(episode.gid) for episode in episodes]
 
 
 def download_file(url, filepath):
@@ -101,14 +94,24 @@ def download_file(url, filepath):
 
     return filepath, os.path.getsize(filepath), mimetype
 
+
 def download_stream(stream, filepath):
     size = stream.input_stream.size
+
     mp3_filepath = os.path.splitext(filepath)[0] + ".mp3"
     mimetype = "audio/ogg"
 
     if (
-        ((os.path.isfile(filepath)
-        and abs(size - os.path.getsize(filepath)) < 1000)
+        # "FILE SIZE CHECK TEMPORARILY OUT OF ORDER"
+        # Need to find a way to get decrypted content size
+        # from Spotify to enable file size checks, for now
+        # this only checks for the presence of a file with
+        # the same name. To recover from failed downloads
+        # simply remove incomplete files
+        #
+        #((os.path.isfile(filepath)
+        #and abs(size - os.path.getsize(filepath)) < 1000)
+        (os.path.isfile(filepath)
         or (Spodcast.CONFIG.get_transcode()
         and os.path.isfile(mp3_filepath)))
         and Spodcast.CONFIG.get_skip_existing_files()
@@ -145,75 +148,75 @@ def download_stream(stream, filepath):
 
 
 def download_episode(episode_id) -> None:
-    podcast_name, duration_ms, episode_name, description, release_date, uri = get_info(episode_id, "episode")
-
-    if podcast_name is None:
-        log.warning('Skipping episode (podcast NOT FOUND)')
-    elif episode_name is None:
-        log.warning('Skipping episode (episode NOT FOUND)')
-    else:
-        filename = clean_filename(podcast_name + ' - ' + episode_name)
-        log.debug(Spodcast.invoke_url(EPISODE_DOWNLOAD_URL(episode_id)))
-        download_url = Spodcast.invoke_url(EPISODE_DOWNLOAD_URL(episode_id))[1]["data"]["episode"]["audio"]["items"][-1]["url"]
-        log.debug(f"download_url: {download_url}")
-        show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/'))
-        os.makedirs(show_directory, exist_ok=True)
-
-        if ANON_PODCAST_DOMAIN in download_url:
-            episode_stream_id = EpisodeId.from_base62(episode_id)
-            stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY)
-            basename = f"{filename}.ogg"
-            filepath = os.path.join(show_directory, basename)
-            path, size, mimetype = download_stream(stream, filepath)
-            basename = os.path.basename(path) # may have changed due to transcoding
-        else:
-            basename=f"{filename}.mp3"
-            filepath = os.path.join(show_directory, basename)
-            path, size, mimetype = download_file(download_url, filepath)
+    try:
+        podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, download_url = get_episode_info(episode_id)
 
-        if size == FILE_EXISTS:
-            log.info(f"Skipped {podcast_name}: {episode_name}")
+        if podcast_name is None:
+            log.warning('Skipping episode (podcast NOT FOUND)')
+        elif episode_name is None:
+            log.warning('Skipping episode (episode NOT FOUND)')
         else:
-            log.warning(f"Downloaded {podcast_name}: {episode_name}")
+            filename = clean_filename(podcast_name + ' - ' + episode_name)
+            show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/'))
+            os.makedirs(show_directory, exist_ok=True)
+
+            if download_url is None:
+                episode_stream_id = EpisodeId.from_hex(episode_id)
+                stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY)
+                basename = f"{filename}.ogg"
+                filepath = os.path.join(show_directory, basename)
+                path, size, mimetype = download_stream(stream, filepath)
+                basename = os.path.basename(path) # may have changed due to transcoding
+            else:
+                basename=f"{filename}.mp3"
+                filepath = os.path.join(show_directory, basename)
+                path, size, mimetype = download_file(download_url, filepath)
+
+            if size == FILE_EXISTS:
+                log.info(f"Skipped {podcast_name}: {episode_name}")
+            else:
+                log.warning(f"Downloaded {podcast_name}: {episode_name}")
+
+                if Spodcast.CONFIG.get_rss_feed():
+                    episode_info = {
+                            "mimetype": mimetype,
+                            "medium": "audio",
+                            "duration": int(duration_ms/1000),
+                            "date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%dT%H:%M:%SZ")),
+                            "title": escape(episode_name), "guid": uri, "description": escape(description),
+                            "filename": urllib.parse.quote(basename),
+                            "size": int(size) }
+                    info_file = open(os.path.join(show_directory, f"{basename}.{RSS_FEED_INFO_EXTENSION}"), "w")
+                    info_file.write(json.dumps(episode_info))
+                    info_file.close()
 
             if Spodcast.CONFIG.get_rss_feed():
-                episode_info = {
-                        "mimetype": mimetype,
-                        "medium": "audio",
-                        "duration": int(duration_ms/1000),
-                        "date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%d")),
-                        "title": escape(episode_name), "guid": uri, "description": escape(description),
-                        "filename": urllib.parse.quote(basename),
-                        "size": int(size) }
-                info_file = open(os.path.join(show_directory, f"{basename}.{RSS_FEED_INFO_EXTENSION}"), "w")
-                info_file.write(json.dumps(episode_info))
-                info_file.close()
-
-        if Spodcast.CONFIG.get_rss_feed():
-            show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}")
-            if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int():
-                podcast_name, link, description, image = get_info(episode_id, "show")
-                show_info = {}
-                if os.path.isfile(show_index_file_name):
-                    with open(show_index_file_name, encoding='utf-8') as file:
-                        show_info = json.load(file)
-                        file.close()
-                show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str())
-                show_info["title"] = escape(podcast_name)
-                show_info["link"] = link
-                show_info["description"] = escape(description)
-                show_info["image"] = RSS_FEED_SHOW_IMAGE 
-                show_index_file = open(show_index_file_name, "w")
-                show_index_file.write(json.dumps(show_info))
-                show_index_file.close()
-
-            show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}")
-            if not os.path.isfile(show_image_name):
-                download_file(image, show_image_name)
-
-            rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME)
-            if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int():
-                rss_file = open(rss_file_name, "w")
-                rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str()))
-                rss_file.close()
-
+                show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}")
+                if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int():
+                    podcast_link, podcast_description, podcast_image = get_show_info(podcast_id)
+                    show_info = {}
+                    if os.path.isfile(show_index_file_name):
+                        with open(show_index_file_name, encoding='utf-8') as file:
+                            show_info = json.load(file)
+                            file.close()
+                    show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str())
+                    show_info["title"] = escape(podcast_name)
+                    show_info["link"] = podcast_link
+                    show_info["description"] = escape(podcast_description)
+                    show_info["image"] = RSS_FEED_SHOW_IMAGE 
+                    show_index_file = open(show_index_file_name, "w")
+                    show_index_file.write(json.dumps(show_info))
+                    show_index_file.close()
+
+                show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}")
+                if not os.path.isfile(show_image_name):
+                    download_file(podcast_image, show_image_name)
+
+                rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME)
+                if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int():
+                    rss_file = open(rss_file_name, "w")
+                    rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str()))
+                    rss_file.close()
+
+    except ApiClient.StatusCodeException as status:
+        log.warning("episode %s, StatusCodeException: %s", episode_id, status)
diff --git a/spodcast/spodcast.py b/spodcast/spodcast.py
@@ -137,6 +137,7 @@ def invoke_url_with_params(cls, url, limit, offset, **kwargs):
     @classmethod
     def invoke_url(cls, url, tryCount=0):
         headers = cls.get_auth_header()
+        Spodcast.LOG.debug(headers)
         response = requests.get(url, headers=headers)
         responsetext = response.text
         responsejson = response.json()
diff --git a/spodcast/spotapi.py b/spodcast/spotapi.py
diff --git a/spodcast/utils.py b/spodcast/utils.py
@@ -5,6 +5,7 @@
 from typing import List, Tuple
 
 from spodcast.spodcast import Spodcast
+from spodcast.const import OPEN_SPOTIFY_URL
 
 valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
 
@@ -47,3 +48,6 @@ def clean_filename(filename, whitelist=valid_filename_chars, replace=' '):
     cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist)
     return cleaned_filename
 
+def uri_to_url(spotify_id):
+    (spotify,sp_type,sp_id) = spotify_id.split(':')
+    return f'https://{OPEN_SPOTIFY_URL}/{sp_type}/{sp_id}'