musicscan-cli

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# imports: standard
import logging
import json
import sys
import html
from collections import Counter
import os
import re

# imports: third party
from colorama import Fore, init

# imports: custom
import musicscan


# constants
LOG_FILE = 'musicscan.log'
PROCESSED_DIRS_FILE = 'processed-dirs.txt'
SEPARATOR_WIDTH = 60
SHORTEST_ALBUM_DIR_REGEX = re.compile(r'\w{3,}\ ?\w{3,}')


def determine_color(evaluation=None, counts=None):
    if evaluation:
        if evaluation == "full":
            color = Fore.GREEN
        elif evaluation == "partial":
            color = Fore.YELLOW
        else:
            color = Fore.RED
    elif counts:
        if counts["full"] > 0:
            color = Fore.GREEN
        elif counts["partial"] > 0:
            color = Fore.YELLOW
        else:
            color = Fore.RED
    else:
        color = Fore.RED
    return color


def add_processed_dir_record(path):
    # append the path to the file
    logging.debug("adding path '%s' to processed dirs file", path)
    try:
        with open(PROCESSED_DIRS_FILE, "a") as f:
            f.write(path + "\n")
    except Exception as e:
        logging.error("could not add record of processed directory. %s", e)


def get_processed_dirs():
    lines = []
    try:
        with open(PROCESSED_DIRS_FILE, "r") as f:
            lines = [ line.rstrip('\n') for line in f.readlines() ]
    except Exception as e:
        logging.error("could not read in processed dirs from %s. %s", PROCESSED_DIRS_FILE, e)
    return lines


def download_torrent_file(torrent_id, torrent_name, api, match_evaluation):
    # set torrent filename
    torrent_filename = '{0}.{1}.torrent'.format(torrent_name, torrent_id)
    print("------- Downloading torrent file: '{0}'".format(torrent_filename))
    # save torrent file
    logging.info("Saving torrent file: '%s'", torrent_filename)
    saved_successfully = api.save_torrent(torrent_id, torrent_filename, match_evaluation)
    if not saved_successfully:
        print("{0}-------- Saving file failed!".format(Fore.RED))


def get_group_torrents(group):
    if "torrents" in group:
        group_torrents = group["torrents"]
    elif "torrentId" in group:
        group_torrents = [ group ]
    else:
        group_torrents = []
    return group_torrents


# search API and attempt to match
def search_and_match(api, release, music_threshold, searched_torrent_ids, max_search_results, **kwargs):
    # initialise
    search_match_evaluations = []
    # perform search
    search_result = api.torrent_search(max_search_results, **kwargs)
    print("-- search result groups: {0}".format(len(search_result)))
    # show each torrent group
    for group in search_result:
        # get torrents from group
        group_torrents = get_group_torrents(group)
        print("--- {group_artist} - {group_name} ({year}): {torrent_count} {torrent_plural}".format(
                group_artist = html.unescape(group.get("artist", "(no artist)")),
                group_name = html.unescape(group["groupName"]),
                year = group.get("groupYear", None),
                torrent_count = len(group_torrents),
                torrent_plural = "torrent" if len(group_torrents) == 1 else "torrents"
        ))
        # iterate through torrents in the group
        for group_torrent in group_torrents:
            if group_torrent["torrentId"] in searched_torrent_ids:
                print("---- skipping torrent {0}, as already checked".format(group_torrent["torrentId"]))
            else:
                print("---- checking torrent {0}".format(group_torrent["torrentId"]))
                torrent_object = api.torrent(group_torrent["torrentId"])
                torrent_filepath_unescaped = html.unescape(torrent_object.get("filePath", "missing_filePath"))
                print("----- torrent filePath: {0}".format(torrent_filepath_unescaped))
                # parse torrent file list
                torrent_files = musicscan.filecompare.parse_torrent_files(torrent_object["fileList"])
                # attempt to match torrent files to local files
                torrent_match = musicscan.filecompare.match_torrent_files(torrent_files, release)
                # evaluate match
                match_evaluation = musicscan.filecompare.evaluate_match(torrent_match, music_threshold)
                print("{color}----- torrent file match: by size (music) = {0}%, by size (all) = {1}%, by name {2}%".format(
                        torrent_match["audio_filesize_matches_pct"],
                        torrent_match["filesize_matches_pct"],
                        torrent_match["filename_matches_pct"],
                        color = determine_color(evaluation=match_evaluation)
                ))
                logging.info("match_evaluation = %s", match_evaluation)
                # if match evaluation is partial/full, download the torrent
                if match_evaluation:
                    print("------ match = {0}".format(match_evaluation))
                    # save torrent
                    download_torrent_file(group_torrent["torrentId"],
                            torrent_filepath_unescaped,api, match_evaluation)
                # append to list
                search_match_evaluations.append(match_evaluation)
                searched_torrent_ids.add(group_torrent["torrentId"])
                
    # return
    return {
        "evaluations": search_match_evaluations,
        "torrent_ids": searched_torrent_ids
    }


def generate_searches(release):
    """
    search 1: artist, album, format
    search 2: album, year, format
    search 3: directory name, year, format
    search 4: directory name, format
    """
    # remove disc stuff and some symbols from the folder name
    dirpath_simplified = musicscan.findmusiclocal.simplify_album(
            os.path.basename(os.path.normpath(release["dirpath"]))
    )
    logging.info("dirpath_simplified (simplify_album) = '%s'", dirpath_simplified)
    # replace non alpha-numeric characters with spaces
    dirpath_simplified = musicscan.findmusiclocal.simplify_album(dirpath_simplified)
    logging.info("dirpath_simplified (re.sub) = '%s'", dirpath_simplified)
    # check if string contains at least some characters
    if not SHORTEST_ALBUM_DIR_REGEX.search(dirpath_simplified):
        dirpath_simplified = None
    # build searches
    searches = [
            dict(
                    artistname=release["tags"]["artist"],
                    groupname=release["tags"]["album"],
                    format=release["tags"]["audio_format"],
                    haslog=release["has_log"],
                    hascue=release["has_cue"]
            ),
            dict(
                    groupname=release["tags"]["album"],
                    year=release["tags"]["year"],
                    format=release["tags"]["audio_format"],
                    haslog=release["has_log"],
                    hascue=release["has_cue"]
            ),
            dict(
                    searchstr=dirpath_simplified,
                    year=release["tags"]["year"],
                    format=release["tags"]["audio_format"],
                    haslog=release["has_log"],
                    hascue=release["has_cue"]
            ),
            dict(
                    searchstr=dirpath_simplified,
                    format=release["tags"]["audio_format"],
                    haslog=release["has_log"],
                    hascue=release["has_cue"]
            )
    ]
    logging.debug("generated searches (before filtering):\n%s", json.dumps(searches, indent=2))
    # remove searches with None values
    searches = [ s for s in searches if None not in s.values() ]
    logging.debug("generated searches (after filtering):\n%s", json.dumps(searches, indent=2))
    # return
    return searches


def process_release(release, api, music_threshold, processed_dirs, max_search_results):
    # initialise list of match evaluations for this release
    release_match_evaluations = []
    # initialise set of searched IDs for this release
    release_searched_ids = set()
    # start console output
    print("*" * SEPARATOR_WIDTH)
    print("Processing '{0}'...".format(release["dirpath"]))
    # check if this path has previously been processed
    if release["dirpath"] in processed_dirs:
        # if it's already been processed, skip
        logging.info("%s found in processed_dirs. skipping", release["dirpath"])
        print("- skipping as has been previously processed")
    else:
        # attempt a basic identification of the release, add as tags
        release["tags"] = musicscan.findmusiclocal.get_release_basics(release["audio_files"])
        populated_tags = [ "{0}: {1}".format(k,v) for k,v in release["tags"].items() if v != None ]
        print("- essential tags = {0}".format(", ".join(populated_tags)))

        # search and match
        for search_fields in generate_searches(release):
            logging.info("searching API for: %s", search_fields)
            print("- searching API for: {0}".format(
                    ", ".join([ "{0}={1}".format(k,v) for k,v in search_fields.items() ])
            ))
            search_matches = search_and_match(api, release, music_threshold,
                    release_searched_ids, max_search_results, **search_fields)
            # show evaluation counts
            counts = Counter(search_matches["evaluations"])
            print("{color}---- match summary. full: {0}, partial: {1}, no match: {2}".format(
                    counts["full"], counts["partial"], counts[None], color=determine_color(counts=counts)
            ))
            release_match_evaluations.extend(search_matches["evaluations"])
            # check if release has any full matches yet
            if "full" in release_match_evaluations:
                # break out of for loop to abort further searches
                break
            # add searched IDs to set (to avoid repeating checking the same torrents)
            release_searched_ids.update(search_matches["torrent_ids"])

        # end of processing of this release
        logging.info("end of processing for this release")
        add_processed_dir_record(release["dirpath"])


def main():
    # argparse
    import argparse

    # initialise colorama
    init(autoreset=True)

    # custom help subclass
    class CustomHelpFormatter(argparse.HelpFormatter):
        def __init__(self, prog):
            super().__init__(prog, max_help_position=40, width=80)

        def _format_action_invocation(self, action):
            if not action.option_strings or action.nargs == 0:
                return super()._format_action_invocation(action)
            default = self._get_default_metavar_for_optional(action)
            args_string = self._format_args(action, default)
            return ', '.join(action.option_strings) + ' ' + args_string


    # argparse custom help format
    fmt = lambda prog: CustomHelpFormatter(prog)
    # argparse parser
    parser = argparse.ArgumentParser(
            description='Scan directories for music folders and compare to WhatAPI',
            formatter_class=fmt
    )
    # input path
    parser.add_argument(
            "inputpath",
            help="path to scan"
    )
    # directory threshold
    parser.add_argument(
            '-d', '--dirsmax',
            metavar='COUNT',
            type=int,
            default=12,
            help='maximum number of subdirectories per directory (default: %(default)s)'
    )
    # file threshold
    parser.add_argument(
            '-f', '--filesmax',
            metavar='COUNT',
            type=int,
            default=50,
            help='maximum number of files per directory (default: %(default)s)'
    )
    # log level
    parser.add_argument(
            '-l', '--loglevel',
            metavar='LOGLEVEL',
            default='info',
            choices=['debug', 'info', 'warning', 'error', 'critical'],
            help='loglevel for log file (default: %(default)s)'
    )
    # max search results threshold
    parser.add_argument(
            '-m', '--maxsearch',
            metavar='COUNT',
            type=int,
            default=100,
            help='maximum number of search results (default: %(default)s)'
    )
    # output directory for downloaded torrent files
    parser.add_argument(
            '-o', '--output',
            metavar='PATH',
            default='.',
            help='output path for .torrent files (default: %(default)s)'
    )
    # reset the record of processed directories
    parser.add_argument(
            '-r', '--reset',
            action='store_true',
            default=False,
            help='scan all rather than skipping previously scanned'
    )
    # server URL
    parser.add_argument(
            '-s', '--server', 
            metavar='URL',
            default='https://redacted.ch',
            help='server URL (default: %(default)s)'
    )
    # music file matching threshold
    parser.add_argument(
            '-t', '--threshold',
            metavar='PCT',
            type=int,
            default=80,
            help='matching local music threshold (default: %(default)s)'
    )
    # config file (for login details)
    group_config = parser.add_argument_group("config file")
    group_config.add_argument(
            '-c', '--config',
            metavar='FILE',
            default='server.conf',
            help='config file with login details (default: %(default)s)'
    )
    # login
    group_login = parser.add_argument_group("login details")
    # account password
    group_login.add_argument(
            '-p', '--password',
            metavar='PASSWORD',
            help='your password'
    )
    # account username
    group_login.add_argument(
            '-u', '--username',
            metavar='USERNAME',
            help='your username'
    )
    # parse arguments
    args = parser.parse_args()
    # set variables from args
    root_path = args.inputpath
    loglevel = args.loglevel
    config_file = args.config
    server = args.server
    username = args.username
    password = args.password
    output_path = args.output
    music_threshold = args.threshold
    reset_processed_dirs = args.reset
    files_max = args.filesmax
    dirs_max = args.dirsmax
    max_search_results = args.maxsearch

    # set up logging
    numeric_level = getattr(logging, loglevel.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    logging.basicConfig(
            format='%(asctime)s %(levelname)s:%(message)s',
            filename=LOG_FILE,
            filemode='w',
            level=numeric_level
    )

    # reset processed dirs file if requested
    if reset_processed_dirs:
        # open in non-appended write mode and close, to clear contents
        open(PROCESSED_DIRS_FILE, 'w').close()
    # load processed dirs
    processed_dirs = get_processed_dirs()

    # load cookies
    cookies = musicscan.cookies.get_cookies()
    # if separate credentials given, disable config file
    if username and password:
        config_file = None
    # temporary hack: parse config file (whatapi config parser broken for python3)
    else:
        username, password = musicscan.api.get_username_and_password(config_file)
        config_file = None
    
    # connect to API
    print("Getting connection to API...")
    try:
        api = musicscan.api.WhatAPIExtended(
                config_file=config_file,
                username=username,
                password=password,
                cookies=cookies,
                server=server
        )
    except Exception as e:
        print("API connection failed")
        logging.error("API connection failed. %s", e)
        # bail
        sys.exit(1)
    else:
        print("API connection established")
        logging.info("API connection success")
    
    
    # set torrent file save path
    print("Setting torrent save path...")
    save_path_ok = api.set_torrent_file_save_path(output_path)
    if save_path_ok:
        print("OK")
    else:
        print("FAILED")
        sys.exit(1)
    

    # find local releases (directories with music files)
    for release in musicscan.findmusiclocal.find_releases(root_path, files_max, dirs_max):
        if release:
            logging.info("release:\n%s", json.dumps(release, indent=2))
            process_release(release, api, music_threshold, processed_dirs, max_search_results)

    # show summary
    print("*" * SEPARATOR_WIDTH)
    print("Summary:\n")
    print("torrent files saved: {0}".format(api.torrent_files_written))


if __name__ == "__main__":
    main()