Skip to content

Commit

Permalink
Various search improvements (#375)
Browse files Browse the repository at this point in the history
* remove "Lyrics" keyword from search

* search with Youtube Music for better results (#373,#374)

- add ytmusicapi as dependency
- modify dump_json() and find_and_download_songs() to use YT Music

* add function to get closest matching string via Levenshtein edit distance

* implement getting closest match in search results

* update CI and package setup to python>=3.8

* Update .github/workflows/tests.yml

---------

Co-authored-by: Sathyajith Bhat <sathya@sathyasays.com>
  • Loading branch information
Maritsu and SathyaBhat authored Jul 4, 2024
1 parent dc1d24c commit 934b6ee
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, '3.10', 3.11, 3.12]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ yt-dlp>=2023.3.4
spotipy~=2.21
mutagen~=1.45
rich~=12.0
urllib3~=1.26
urllib3~=1.26
ytmusicapi~=1.6.0
Levenshtein~=0.25.1
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
setup(
name="spotify_dl",
version=VERSION,
python_requires=">=3.7",
python_requires=">=3.8",
install_requires=requirements,
author="Sathya Bhat",
author_email="sathya@sathyasays.com",
Expand All @@ -36,10 +36,11 @@
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Internet",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
Expand Down
17 changes: 17 additions & 0 deletions spotify_dl/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
import Levenshtein


def get_closest_match(results, expected) -> str:
"""
Returns closest matching result based on Levenshtein edit distance.
"""
best_r = ""
min_distance = float('inf')
for r in results:
curr_distance = Levenshtein.distance(r, expected)
if (curr_distance < min_distance):
min_distance = curr_distance
best_r = r
return best_r


def sanitize(name, replace_with=""):
"""
Removes some of the reserved characters from the name so it can be saved
Expand Down
34 changes: 27 additions & 7 deletions spotify_dl/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import mutagen
import csv
import yt_dlp
import ytmusicapi
from mutagen.easyid3 import EasyID3
from mutagen.id3 import APIC, ID3
from mutagen.mp3 import MP3
from spotify_dl.scaffold import log
from spotify_dl.utils import sanitize
from spotify_dl.utils import sanitize, get_closest_match
from spotify_dl.constants import DOWNLOAD_LIST


Expand All @@ -33,16 +34,26 @@ def dump_json(songs):
:param songs: the songs for which the JSON should be output
"""
for song in songs:
query = f"{song.get('artist')} - {song.get('name')} Lyrics".replace(
query = f"{song.get('artist')} - {song.get('name')}".replace(
":", ""
).replace('"', "")

ydl_opts = {"quiet": True}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
ytJson = ydl.extract_info("ytsearch:" + query, False)
print(json.dumps(ytJson.get("entries")))
ytJson = {}
with ytmusicapi.YTMusic() as ym:
# Reduce results to array of titles and video IDs
result_titles, result_ids = zip(*map(
lambda d: (f"{d['artists'][0]['name']} - {d['title']}".replace(":", "").replace('"', ""), d["videoId"]),
ym.search(query, filter="songs")
))
# Get ID of closest matching result by finding index in titles list
videoId = result_ids[result_titles.index(get_closest_match(result_titles, query))]

ytJson = ydl.extract_info(f"https://music.youtube.com/watch?v={videoId}", False)
print(json.dumps([ytJson])) # insert into array so that the format stays the same
except Exception as e: # skipcq: PYL-W0703
log.debug(e)
print(
Expand Down Expand Up @@ -144,7 +155,7 @@ def set_tags(temp, filename, kwargs):
def find_and_download_songs(kwargs):
"""
function handles actual download of the songs
the youtube_search lib is used to search for songs and get best url
the ytmusicapi lib is used to search for songs and get best url via YT Music
:param kwargs: dictionary of key value arguments to be used in download
"""
sponsorblock_postprocessor = []
Expand All @@ -160,7 +171,7 @@ def find_and_download_songs(kwargs):
int(temp[-1].replace("\n", "")),
)

query = f"{artist} - {name} Lyrics".replace(":", "").replace('"', "")
query = f"{artist} - {name}".replace(":", "").replace('"', "")
print(f"Initiating download for {query}.")

file_name = kwargs["file_name_f"](
Expand Down Expand Up @@ -200,6 +211,15 @@ def find_and_download_songs(kwargs):
print(f"File {mp3file_path} already exists, we do not overwrite it ")
continue

with ytmusicapi.YTMusic() as ym:
# Reduce search results to array of titles and video IDs
result_titles, result_ids = zip(*map(
lambda d: (f"{d['artists'][0]['name']} - {d['title']}".replace(":", "").replace('"', ""), d["videoId"]),
ym.search(query, filter="songs")
))
# Get ID of closest matching result by finding index in titles list
video_id = result_ids[result_titles.index(get_closest_match(result_titles, query))]

outtmpl = f"{file_path}.%(ext)s"
ydl_opts = {
"proxy": kwargs.get("proxy"),
Expand Down Expand Up @@ -227,7 +247,7 @@ def find_and_download_songs(kwargs):
ydl_opts["postprocessors"].append(mp3_postprocess_opts.copy())
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([query])
ydl.download([f"https://music.youtube.com/watch?v={video_id}"])
except Exception as e: # skipcq: PYL-W0703
log.debug(e)
print(f"Failed to download {name}, make sure yt_dlp is up to date")
Expand Down

0 comments on commit 934b6ee

Please sign in to comment.