-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathmusicscan-cli
executable file
·449 lines (406 loc) · 16.3 KB
/
musicscan-cli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# imports: standard
import logging
import json
import sys
import html
from collections import Counter
import os
import re
# imports: third party
from colorama import Fore, init
# imports: custom
import musicscan
# constants
LOG_FILE = 'musicscan.log'
PROCESSED_DIRS_FILE = 'processed-dirs.txt'
SEPARATOR_WIDTH = 60
SHORTEST_ALBUM_DIR_REGEX = re.compile(r'\w{3,}\ ?\w{3,}')
def determine_color(evaluation=None, counts=None):
if evaluation:
if evaluation == "full":
color = Fore.GREEN
elif evaluation == "partial":
color = Fore.YELLOW
else:
color = Fore.RED
elif counts:
if counts["full"] > 0:
color = Fore.GREEN
elif counts["partial"] > 0:
color = Fore.YELLOW
else:
color = Fore.RED
else:
color = Fore.RED
return color
def add_processed_dir_record(path):
# append the path to the file
logging.debug("adding path '%s' to processed dirs file", path)
try:
with open(PROCESSED_DIRS_FILE, "a") as f:
f.write(path + "\n")
except Exception as e:
logging.error("could not add record of processed directory. %s", e)
def get_processed_dirs():
lines = []
try:
with open(PROCESSED_DIRS_FILE, "r") as f:
lines = [ line.rstrip('\n') for line in f.readlines() ]
except Exception as e:
logging.error("could not read in processed dirs from %s. %s", PROCESSED_DIRS_FILE, e)
return lines
def download_torrent_file(torrent_id, torrent_name, api, match_evaluation):
# set torrent filename
torrent_filename = '{0}.{1}.torrent'.format(torrent_name, torrent_id)
print("------- Downloading torrent file: '{0}'".format(torrent_filename))
# save torrent file
logging.info("Saving torrent file: '%s'", torrent_filename)
saved_successfully = api.save_torrent(torrent_id, torrent_filename, match_evaluation)
if not saved_successfully:
print("{0}-------- Saving file failed!".format(Fore.RED))
def get_group_torrents(group):
if "torrents" in group:
group_torrents = group["torrents"]
elif "torrentId" in group:
group_torrents = [ group ]
else:
group_torrents = []
return group_torrents
# search API and attempt to match
def search_and_match(api, release, music_threshold, searched_torrent_ids, max_search_results, **kwargs):
# initialise
search_match_evaluations = []
# perform search
search_result = api.torrent_search(max_search_results, **kwargs)
print("-- search result groups: {0}".format(len(search_result)))
# show each torrent group
for group in search_result:
# get torrents from group
group_torrents = get_group_torrents(group)
print("--- {group_artist} - {group_name} ({year}): {torrent_count} {torrent_plural}".format(
group_artist = html.unescape(group.get("artist", "(no artist)")),
group_name = html.unescape(group["groupName"]),
year = group.get("groupYear", None),
torrent_count = len(group_torrents),
torrent_plural = "torrent" if len(group_torrents) == 1 else "torrents"
))
# iterate through torrents in the group
for group_torrent in group_torrents:
if group_torrent["torrentId"] in searched_torrent_ids:
print("---- skipping torrent {0}, as already checked".format(group_torrent["torrentId"]))
else:
print("---- checking torrent {0}".format(group_torrent["torrentId"]))
torrent_object = api.torrent(group_torrent["torrentId"])
torrent_filepath_unescaped = html.unescape(torrent_object.get("filePath", "missing_filePath"))
print("----- torrent filePath: {0}".format(torrent_filepath_unescaped))
# parse torrent file list
torrent_files = musicscan.filecompare.parse_torrent_files(torrent_object["fileList"])
# attempt to match torrent files to local files
torrent_match = musicscan.filecompare.match_torrent_files(torrent_files, release)
# evaluate match
match_evaluation = musicscan.filecompare.evaluate_match(torrent_match, music_threshold)
print("{color}----- torrent file match: by size (music) = {0}%, by size (all) = {1}%, by name {2}%".format(
torrent_match["audio_filesize_matches_pct"],
torrent_match["filesize_matches_pct"],
torrent_match["filename_matches_pct"],
color = determine_color(evaluation=match_evaluation)
))
logging.info("match_evaluation = %s", match_evaluation)
# if match evaluation is partial/full, download the torrent
if match_evaluation:
print("------ match = {0}".format(match_evaluation))
# save torrent
download_torrent_file(group_torrent["torrentId"],
torrent_filepath_unescaped,api, match_evaluation)
# append to list
search_match_evaluations.append(match_evaluation)
searched_torrent_ids.add(group_torrent["torrentId"])
# return
return {
"evaluations": search_match_evaluations,
"torrent_ids": searched_torrent_ids
}
def generate_searches(release):
"""
search 1: artist, album, format
search 2: album, year, format
search 3: directory name, year, format
search 4: directory name, format
"""
# remove disc stuff and some symbols from the folder name
dirpath_simplified = musicscan.findmusiclocal.simplify_album(
os.path.basename(os.path.normpath(release["dirpath"]))
)
logging.info("dirpath_simplified (simplify_album) = '%s'", dirpath_simplified)
# replace non alpha-numeric characters with spaces
dirpath_simplified = musicscan.findmusiclocal.simplify_album(dirpath_simplified)
logging.info("dirpath_simplified (re.sub) = '%s'", dirpath_simplified)
# check if string contains at least some characters
if not SHORTEST_ALBUM_DIR_REGEX.search(dirpath_simplified):
dirpath_simplified = None
# build searches
searches = [
dict(
artistname=release["tags"]["artist"],
groupname=release["tags"]["album"],
format=release["tags"]["audio_format"],
haslog=release["has_log"],
hascue=release["has_cue"]
),
dict(
groupname=release["tags"]["album"],
year=release["tags"]["year"],
format=release["tags"]["audio_format"],
haslog=release["has_log"],
hascue=release["has_cue"]
),
dict(
searchstr=dirpath_simplified,
year=release["tags"]["year"],
format=release["tags"]["audio_format"],
haslog=release["has_log"],
hascue=release["has_cue"]
),
dict(
searchstr=dirpath_simplified,
format=release["tags"]["audio_format"],
haslog=release["has_log"],
hascue=release["has_cue"]
)
]
logging.debug("generated searches (before filtering):\n%s", json.dumps(searches, indent=2))
# remove searches with None values
searches = [ s for s in searches if None not in s.values() ]
logging.debug("generated searches (after filtering):\n%s", json.dumps(searches, indent=2))
# return
return searches
def process_release(release, api, music_threshold, processed_dirs, max_search_results):
# initialise list of match evaluations for this release
release_match_evaluations = []
# initialise set of searched IDs for this release
release_searched_ids = set()
# start console output
print("*" * SEPARATOR_WIDTH)
print("Processing '{0}'...".format(release["dirpath"]))
# check if this path has previously been processed
if release["dirpath"] in processed_dirs:
# if it's already been processed, skip
logging.info("%s found in processed_dirs. skipping", release["dirpath"])
print("- skipping as has been previously processed")
else:
# attempt a basic identification of the release, add as tags
release["tags"] = musicscan.findmusiclocal.get_release_basics(release["audio_files"])
populated_tags = [ "{0}: {1}".format(k,v) for k,v in release["tags"].items() if v != None ]
print("- essential tags = {0}".format(", ".join(populated_tags)))
# search and match
for search_fields in generate_searches(release):
logging.info("searching API for: %s", search_fields)
print("- searching API for: {0}".format(
", ".join([ "{0}={1}".format(k,v) for k,v in search_fields.items() ])
))
search_matches = search_and_match(api, release, music_threshold,
release_searched_ids, max_search_results, **search_fields)
# show evaluation counts
counts = Counter(search_matches["evaluations"])
print("{color}---- match summary. full: {0}, partial: {1}, no match: {2}".format(
counts["full"], counts["partial"], counts[None], color=determine_color(counts=counts)
))
release_match_evaluations.extend(search_matches["evaluations"])
# check if release has any full matches yet
if "full" in release_match_evaluations:
# break out of for loop to abort further searches
break
# add searched IDs to set (to avoid repeating checking the same torrents)
release_searched_ids.update(search_matches["torrent_ids"])
# end of processing of this release
logging.info("end of processing for this release")
add_processed_dir_record(release["dirpath"])
def main():
# argparse
import argparse
# initialise colorama
init(autoreset=True)
# custom help subclass
class CustomHelpFormatter(argparse.HelpFormatter):
def __init__(self, prog):
super().__init__(prog, max_help_position=40, width=80)
def _format_action_invocation(self, action):
if not action.option_strings or action.nargs == 0:
return super()._format_action_invocation(action)
default = self._get_default_metavar_for_optional(action)
args_string = self._format_args(action, default)
return ', '.join(action.option_strings) + ' ' + args_string
# argparse custom help format
fmt = lambda prog: CustomHelpFormatter(prog)
# argparse parser
parser = argparse.ArgumentParser(
description='Scan directories for music folders and compare to WhatAPI',
formatter_class=fmt
)
# input path
parser.add_argument(
"inputpath",
help="path to scan"
)
# directory threshold
parser.add_argument(
'-d', '--dirsmax',
metavar='COUNT',
type=int,
default=12,
help='maximum number of subdirectories per directory (default: %(default)s)'
)
# file threshold
parser.add_argument(
'-f', '--filesmax',
metavar='COUNT',
type=int,
default=50,
help='maximum number of files per directory (default: %(default)s)'
)
# log level
parser.add_argument(
'-l', '--loglevel',
metavar='LOGLEVEL',
default='info',
choices=['debug', 'info', 'warning', 'error', 'critical'],
help='loglevel for log file (default: %(default)s)'
)
# max search results threshold
parser.add_argument(
'-m', '--maxsearch',
metavar='COUNT',
type=int,
default=100,
help='maximum number of search results (default: %(default)s)'
)
# output directory for downloaded torrent files
parser.add_argument(
'-o', '--output',
metavar='PATH',
default='.',
help='output path for .torrent files (default: %(default)s)'
)
# reset the record of processed directories
parser.add_argument(
'-r', '--reset',
action='store_true',
default=False,
help='scan all rather than skipping previously scanned'
)
# server URL
parser.add_argument(
'-s', '--server',
metavar='URL',
default='https://redacted.ch',
help='server URL (default: %(default)s)'
)
# music file matching threshold
parser.add_argument(
'-t', '--threshold',
metavar='PCT',
type=int,
default=80,
help='matching local music threshold (default: %(default)s)'
)
# config file (for login details)
group_config = parser.add_argument_group("config file")
group_config.add_argument(
'-c', '--config',
metavar='FILE',
default='server.conf',
help='config file with login details (default: %(default)s)'
)
# login
group_login = parser.add_argument_group("login details")
# account password
group_login.add_argument(
'-p', '--password',
metavar='PASSWORD',
help='your password'
)
# account username
group_login.add_argument(
'-u', '--username',
metavar='USERNAME',
help='your username'
)
# parse arguments
args = parser.parse_args()
# set variables from args
root_path = args.inputpath
loglevel = args.loglevel
config_file = args.config
server = args.server
username = args.username
password = args.password
output_path = args.output
music_threshold = args.threshold
reset_processed_dirs = args.reset
files_max = args.filesmax
dirs_max = args.dirsmax
max_search_results = args.maxsearch
# set up logging
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel)
logging.basicConfig(
format='%(asctime)s %(levelname)s:%(message)s',
filename=LOG_FILE,
filemode='w',
level=numeric_level
)
# reset processed dirs file if requested
if reset_processed_dirs:
# open in non-appended write mode and close, to clear contents
open(PROCESSED_DIRS_FILE, 'w').close()
# load processed dirs
processed_dirs = get_processed_dirs()
# load cookies
cookies = musicscan.cookies.get_cookies()
# if separate credentials given, disable config file
if username and password:
config_file = None
# temporary hack: parse config file (whatapi config parser broken for python3)
else:
username, password = musicscan.api.get_username_and_password(config_file)
config_file = None
# connect to API
print("Getting connection to API...")
try:
api = musicscan.api.WhatAPIExtended(
config_file=config_file,
username=username,
password=password,
cookies=cookies,
server=server
)
except Exception as e:
print("API connection failed")
logging.error("API connection failed. %s", e)
# bail
sys.exit(1)
else:
print("API connection established")
logging.info("API connection success")
# set torrent file save path
print("Setting torrent save path...")
save_path_ok = api.set_torrent_file_save_path(output_path)
if save_path_ok:
print("OK")
else:
print("FAILED")
sys.exit(1)
# find local releases (directories with music files)
for release in musicscan.findmusiclocal.find_releases(root_path, files_max, dirs_max):
if release:
logging.info("release:\n%s", json.dumps(release, indent=2))
process_release(release, api, music_threshold, processed_dirs, max_search_results)
# show summary
print("*" * SEPARATOR_WIDTH)
print("Summary:\n")
print("torrent files saved: {0}".format(api.torrent_files_written))
if __name__ == "__main__":
main()