Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use multiprocressing.Pool to massively parallelise download using multiple cores #290

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ unins
.coverage
.vscode/**
.venv/*
venv/*

# macOS temporary files
.DS_Store


27 changes: 26 additions & 1 deletion spotify_dl/spotify_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from spotify_dl.spotify import fetch_tracks, parse_spotify_url, validate_spotify_url, get_item_name
from spotify_dl.youtube import download_songs, default_filename, playlist_num_filename

import multiprocessing


def spotify_dl():
"""Main entry point of the script."""
Expand Down Expand Up @@ -94,12 +96,35 @@ def spotify_dl():
songs = fetch_tracks(sp, item_type, url)
else:
songs = {}

# list of parameter dictionaries to be sent to download_songs function
# Every song from every playlist/ablum/track url will be append to this list
params_list = []

if args.download is True:
file_name_f = default_filename
if args.keep_playlist_order:
file_name_f = playlist_num_filename
if save_path is not None:
download_songs(songs, save_path, args.format_str, args.skip_mp3, args.keep_playlist_order, args.no_overwrites, args.skip_non_music_sections, file_name_f)
for song in songs:
params = {}
params["song"] = song
params["download_directory"] = save_path
params["format_string"] = args.format_str
params["skip_mp3"] = args.skip_mp3
params["keep_playlist_order"] = args.keep_playlist_order
params["no_overwrites"] = args.no_overwrites
params["skip_non_music_sections"] = args.skip_non_music_sections
params["file_name_f"] = file_name_f
params_list.append(params)

n_cores = multiprocessing.cpu_count()
# using 1 less that the available cores to avoid overloading the system
cores_to_use = n_cores - 1 if n_cores > 1 else 1
pool = multiprocessing.Pool(processes = cores_to_use)
# map will run the function on each parameter dictionary in parallel,
pool.map(download_songs, params_list)
pool.close()


if __name__ == '__main__':
Expand Down
169 changes: 89 additions & 80 deletions spotify_dl/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@ def playlist_num_filename(song):
return f"{song.get('playlist_num')} - {default_filename(song)}"


def download_songs(songs, download_directory, format_string, skip_mp3,
keep_playlist_order=False, no_overwrites=False, skip_non_music_sections=False,
file_name_f=default_filename):
def download_songs(params):
"""
Downloads songs from the YouTube URL passed to either current directory or download_directory, is it is passed.
:param songs: Dictionary of songs and associated artist
:param song: Song object and metadata
:param download_directory: Location where to save
:param format_string: format string for the file conversion
:param skip_mp3: Whether to skip conversion to MP3
Expand All @@ -33,86 +31,97 @@ def download_songs(songs, download_directory, format_string, skip_mp3,
:param skip_non_music_sections: Whether we should skip Non-Music sections using SponsorBlock API
:param file_name_f: optional func(song) -> str that returns a filename for the download (without extension)
"""

# destructuring parameters
song = params['song']
download_directory = params['download_directory']
format_string = params['format_string']
skip_mp3 = params['skip_mp3']
keep_playlist_order = params['keep_playlist_order'] if 'keep_playlist_order' in params else False
no_overwrites = params['no_overwrites'] if 'no_overwrites' in params else False
skip_non_music_sections = params['skip_non_music_sections'] if 'skip_non_music_sections' in params else False
file_name_f = params['file_name_f'] if 'file_name_f' in params else default_filename

overwrites = not no_overwrites
log.debug(f"Downloading to {download_directory}")
for song in songs:
query = f"{song.get('artist')} - {song.get('name')} Lyrics".replace(":", "").replace("\"", "")
download_archive = path.join(download_directory, 'downloaded_songs.txt')

file_name = file_name_f(song)
file_path = path.join(download_directory, file_name)

sponsorblock_remove_list = ['music_offtopic'] if skip_non_music_sections else []

outtmpl = f"{file_path}.%(ext)s"
ydl_opts = {
'format': format_string,
'download_archive': download_archive,
'outtmpl': outtmpl,
'default_search': 'ytsearch',
'noplaylist': True,
'no_color': False,
'postprocessors': [
{
'key': 'SponsorBlock',
'categories': sponsorblock_remove_list,
},
{
'key': 'ModifyChapters',
'remove_sponsor_segments': ['music_offtopic'],
'force_keyframes': True,
}],
'postprocessor_args': ['-metadata', 'title=' + song.get('name'),
'-metadata', 'artist=' + song.get('artist'),
'-metadata', 'album=' + song.get('album')]

query = f"{song.get('artist')} - {song.get('name')} Lyrics".replace(":", "").replace("\"", "")
download_archive = path.join(download_directory, 'downloaded_songs.txt')

file_name = file_name_f(song)
file_path = path.join(download_directory, file_name)

sponsorblock_remove_list = ['music_offtopic'] if skip_non_music_sections else []

outtmpl = f"{file_path}.%(ext)s"
ydl_opts = {
'format': format_string,
'download_archive': download_archive,
'outtmpl': outtmpl,
'default_search': 'ytsearch',
'noplaylist': True,
'no_color': False,
'postprocessors': [
{
'key': 'SponsorBlock',
'categories': sponsorblock_remove_list,
},
{
'key': 'ModifyChapters',
'remove_sponsor_segments': ['music_offtopic'],
'force_keyframes': True,
}],
'postprocessor_args': ['-metadata', 'title=' + song.get('name'),
'-metadata', 'artist=' + song.get('artist'),
'-metadata', 'album=' + song.get('album')]
}
if not skip_mp3:
mp3_postprocess_opts = {
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}
if not skip_mp3:
mp3_postprocess_opts = {
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}
ydl_opts['postprocessors'].append(mp3_postprocess_opts.copy())

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl_opts['postprocessors'].append(mp3_postprocess_opts.copy())

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([query])
except Exception as e:
log.debug(e)
print('Failed to download: {}, please ensure YouTubeDL is up-to-date. '.format(query))
return

if not skip_mp3:
mp3filename = f"{file_path}.mp3"
mp3file_path = path.join(mp3filename)
if overwrites or not path.exists(mp3file_path):
try:
ydl.download([query])
except Exception as e:
song_file = MP3(mp3file_path, ID3=EasyID3)
except mutagen.MutagenError as e:
log.debug(e)
print('Failed to download: {}, please ensure YouTubeDL is up-to-date. '.format(query))
continue

if not skip_mp3:
mp3filename = f"{file_path}.mp3"
mp3file_path = path.join(mp3filename)
if overwrites or not path.exists(mp3file_path):
try:
song_file = MP3(mp3file_path, ID3=EasyID3)
except mutagen.MutagenError as e:
log.debug(e)
print('Failed to download: {}, please ensure YouTubeDL is up-to-date. '.format(query))
continue
song_file['date'] = song.get('year')
if keep_playlist_order:
song_file['tracknumber'] = str(song.get('playlist_num'))
else:
song_file['tracknumber'] = str(song.get('num')) + '/' + str(song.get('num_tracks'))
song_file['genre'] = song.get('genre')
song_file.save()
song_file = MP3(mp3filename, ID3=ID3)
cover = song.get('cover')
if cover is not None:
if cover.lower().startswith('http'):
req = urllib.request.Request(cover)
else:
raise ValueError from None
with urllib.request.urlopen(req) as resp: # nosec
song_file.tags['APIC'] = APIC(
encoding=3,
mime='image/jpeg',
type=3, desc=u'Cover',
data=resp.read()
)
song_file.save()
return
song_file['date'] = song.get('year')
if keep_playlist_order:
song_file['tracknumber'] = str(song.get('playlist_num'))
else:
print('File {} already exists, we do not overwrite it '.format(mp3filename))
song_file['tracknumber'] = str(song.get('num')) + '/' + str(song.get('num_tracks'))
song_file['genre'] = song.get('genre')
song_file.save()
song_file = MP3(mp3filename, ID3=ID3)
cover = song.get('cover')
if cover is not None:
if cover.lower().startswith('http'):
req = urllib.request.Request(cover)
else:
raise ValueError from None
with urllib.request.urlopen(req) as resp: # nosec
song_file.tags['APIC'] = APIC(
encoding=3,
mime='image/jpeg',
type=3, desc=u'Cover',
data=resp.read()
)
song_file.save()
else:
print('File {} already exists, we do not overwrite it '.format(mp3filename))
26 changes: 18 additions & 8 deletions tests/test_youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ def test_download_one_false_skip():
'num': 6,
'num_tracks': 15,
'year': '1994'}]
yt.download_songs(songs, download_directory=os.path.dirname(os.path.realpath(__file__)),
format_string='best',
skip_mp3=False)
params = {}
params["song"] = songs[0]
params["download_directory"] = os.path.dirname(os.path.realpath(__file__))
params["format_string"] = 'best'
params["skip_mp3"] = False
yt.download_songs(params)
music = MP3("tests/Eagles - Hotel California - Live On MTV, 1994.mp3", ID3=EasyID3)
tags = ID3("tests/Eagles - Hotel California - Live On MTV, 1994.mp3")
assert (music['artist'][0] == 'Eagles')
Expand All @@ -42,8 +45,12 @@ def test_download_one_true_skip():
'num': 6,
'num_tracks': 15,
'year': '1994'}]
yt.download_songs(songs, download_directory='~/Downloads', format_string='best',
skip_mp3=False)
params = {}
params["song"] = songs[0]
params["download_directory"] = '~/Downloads'
params["format_string"] = 'best'
params["skip_mp3"] = False
yt.download_songs(params)

def test_download_cover_none():
songs = [
Expand All @@ -55,9 +62,12 @@ def test_download_cover_none():
'num': 7,
'num_tracks': 16,
'year': '1974'}]
yt.download_songs(songs, download_directory=os.path.dirname(os.path.realpath(__file__)),
format_string='best',
skip_mp3=False)
params = {}
params["song"] = songs[0]
params["download_directory"] = 'os.path.dirname(os.path.realpath(__file__))'
params["format_string"] = 'best'
params["skip_mp3"] = False
yt.download_songs(params)
music = MP3("tests/Queen - The Fairy Feller's Master-Stroke - Remastered 2011.mp3", ID3=EasyID3)
tags = ID3("tests/Queen - The Fairy Feller's Master-Stroke - Remastered 2011.mp3")
assert (music['artist'][0] == 'Queen')
Expand Down