whipper-team · ubitux · Mar 5, 2017 · Mar 5, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -10,7 +10,7 @@ install:
     # Dependencies
     - sudo apt-get -qq update
     - sudo pip install --upgrade -qq pip
-    - sudo apt-get -qq install cdparanoia cdrdao flac libcdio-dev libiso9660-dev libsndfile1-dev python-cddb python-gobject python-musicbrainzngs python-mutagen python-setuptools sox swig
+    - sudo apt-get -qq install cdparanoia cdrdao flac libcdio-dev libiso9660-dev libsndfile1-dev python-gobject python-musicbrainzngs python-mutagen python-setuptools sox swig
     - sudo pip install pycdio requests
 
     # Testing dependencies

diff --git a/README.md b/README.md
@@ -72,7 +72,6 @@ Whipper relies on the following packages in order to run correctly and provide a
 - [python-musicbrainzngs](https://github.com/alastair/python-musicbrainzngs), for metadata lookup
 - [python-mutagen](https://pypi.python.org/pypi/mutagen), for tagging support
 - [python-setuptools](https://pypi.python.org/pypi/setuptools), for installation, plugins support
-- [python-cddb](http://cddb-py.sourceforge.net/), for showing but not using metadata if disc not available in the MusicBrainz DB
 - [pycdio](https://pypi.python.org/pypi/pycdio/) (to avoid bugs please use `pycdio` **0.20** & `libcdio` >= **0.90** or, with previous `libcdio` versions, `pycdio` **0.17**), for drive identification
   - Required for drive offset and caching behavior to be stored in the configuration file
 - [requests](https://pypi.python.org/pypi/requests) for retrieving AccurateRip database entries

diff --git a/whipper/command/cd.py b/whipper/command/cd.py
@@ -27,7 +27,7 @@
 import gobject
 from whipper.command.basecommand import BaseCommand
 from whipper.common import (
-    accurip, config, drive, program, task
+    accurip, config, drive, program, task, simplefreedb
 )
 from whipper.program import cdrdao, cdparanoia, utils
 from whipper.result import result
@@ -126,10 +126,27 @@ def do(self):
         if not self.program.metadata:
             # fall back to FreeDB for lookup
             cddbid = self.ittoc.getCDDBValues()
-            cddbmd = self.program.getCDDB(cddbid)
-            if cddbmd:
+
+            sfdb = simplefreedb.SimpleFreeDB()
+            discid, ntrks = cddbid[0], cddbid[1]
+            offsets, nsecs = cddbid[2:-1], cddbid[-1]
+            matches = sfdb.query(discid, ntrks, offsets, nsecs)
+            if matches:
+                match = matches[0]  # TODO: honor --prompt
+                cddbmd = match['artist_title']
                 sys.stdout.write('FreeDB identifies disc as %s\n' % cddbmd)
 
+                if self.options.allow_freedb:
+                    logger.warning('Using FreeDB metadata, which is not '
+                                   'recommended due to its very low quality '
+                                   'standards.')
+                    logger.warning('You are strongly encouraged to submit CD '
+                                   'information to the MusicBrainz database.')
+                    fdb_data = sfdb.read(match['category'], match['discid'])
+                    self.program.metadata = \
+                        self.program.craftMusicBrainzFromFreeDB(fdb_data)
+
+        if not self.program.metadata:
             # also used by rip cd info
             if not getattr(self.options, 'unknown', False):
                 logger.critical("unable to retrieve disc metadata, "
@@ -282,6 +299,11 @@ def add_arguments(self):
                                  action="store_true", dest="unknown",
                                  help="whether to continue ripping if "
                                  "the CD is unknown", default=False)
+        self.parser.add_argument('--allow-freedb',
+                                 action="store_true", dest="allow_freedb",
+                                 help="whether to allow relying on low "
+                                 "quality CDDB/FreeDB metadata if nothing "
+                                 "else is available", default=False)
         self.parser.add_argument('--cdr',
                                  action="store_true", dest="cdr",
                                  help="whether to continue ripping if "

diff --git a/whipper/common/program.py b/whipper/common/program.py
@@ -248,28 +248,25 @@ def getPath(self, outdir, template, mbdiscid, metadata, track_number=None):
         template = re.sub(r'%(\w)', r'%(\1)s', template)
         return os.path.join(outdir, template % v)
 
-    def getCDDB(self, cddbdiscid):
+    def craftMusicBrainzFromFreeDB(self, freedb_data):
         """
-        @param cddbdiscid: list of id, tracks, offsets, seconds
-
-        @rtype: str
+        Create MusicBrainz-sytle metadata from a FreeDB entry.
         """
-        # FIXME: convert to nonblocking?
-        import CDDB
-        try:
-            code, md = CDDB.query(cddbdiscid)
-            logger.debug('CDDB query result: %r, %r', code, md)
-            if code == 200:
-                return md['title']
-
-        except IOError, e:
-            # FIXME: for some reason errno is a str ?
-            if e.errno == 'socket error':
-                self._stdout.write("Warning: network error: %r\n" % (e, ))
-            else:
-                raise
+        disc = mbngs.DiscMetadata()
 
-        return None
+        disc.artist = disc.sortName = freedb_data.get('artist')
+        disc.release = str(freedb_data.get('year', '0000'))
+        disc.title = disc.releaseTitle = freedb_data.get('title')
+
+        tracks = []
+        for track_title in freedb_data.get('tracks', []):
+            track = mbngs.TrackMetadata()
+            track.artist = track.sortName = disc.artist
+            track.title = track_title
+            tracks.append(track)
+        disc.tracks = tracks
+
+        return disc
 
     def getMusicBrainz(self, ittoc, mbdiscid, release=None, country=None,
                        prompt=False):
@@ -455,7 +452,9 @@ def getTagList(self, number):
             if self.metadata.release is not None:
                 tags['DATE'] = self.metadata.release
 
-            if number > 0:
+            has_mbinfo = (mbidTrack and mbidTrackArtist and mbidAlbum
+                          and mbidTrackAlbum and mbDiscId)
+            if number > 0 and has_mbinfo:
                 tags['MUSICBRAINZ_TRACKID'] = mbidTrack
                 tags['MUSICBRAINZ_ARTISTID'] = mbidTrackArtist
                 tags['MUSICBRAINZ_ALBUMID'] = mbidAlbum

diff --git a/whipper/common/simplefreedb.py b/whipper/common/simplefreedb.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+
+# Copyright (C) 2017 Clément Bœsch <u@pkh.me>
+
+# This file is part of whipper.
+#
+# whipper is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# whipper is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with whipper.  If not, see <http://www.gnu.org/licenses/>.
+
+import re
+import socket
+import getpass
+import urllib
+import whipper
+
+
+class SimpleFreeDB:
+
+    URL = 'http://freedb.freedb.org/~cddb/cddb.cgi'
+    PROTO = 6
+
+    def __init__(self):
+        user = getpass.getuser()
+        host = socket.gethostname()
+        client = 'whipper'
+        hello = '%s %s %s %s' % (user, host, client, whipper.__version__)
+        self._hello = urllib.quote_plus(hello)
+        self._slash_split_regex = re.compile(r'(?<!\\)/')
+
+    def _cddb_cmd(self, cmd):
+        cmd_arg = urllib.quote_plus('cddb ' + cmd)
+        data_url = (self.URL, cmd_arg, self._hello, self.PROTO)
+        url = '%s?cmd=%s&hello=%s&proto=%d' % data_url
+        req = urllib.urlopen(url)
+        return req.read().decode('utf-8')
+
+    @staticmethod
+    def _get_code(line):
+        return int(line.split(None, 1)[0])
+
+    def _split_dtitle(self, dtitle):
+        # Note: we can not use a simple dtitle.split('/') here because the
+        # slash could be escaped.
+        artist, title = re.split(self._slash_split_regex, dtitle, maxsplit=1)
+        return artist.rstrip(), title.lstrip()
+
+    def _craft_match(self, category, discid_str, dtitle):
+        artist, title = self._split_dtitle(dtitle)
+        return {'category': category,
+                'discid': int(discid_str, 16),
+                'artist_title': dtitle,
+                'artist': artist,
+                'title': title}
+
+    def query(self, discid, ntrks, offsets, nsecs):
+        data_q = (discid, ntrks, ' '.join(str(x) for x in offsets), nsecs)
+        cmd = 'query %08x %d %s %d' % data_q
+        data = self._cddb_cmd(cmd)
+        lines = data.splitlines()
+        code = self._get_code(lines[0])
+        matches = []
+        if code == 200:
+            line = lines[0]
+            matches.append(self._craft_match(*line.split(None, 3)[1:]))
+        elif code in (210, 211):
+            for line in lines[1:]:
+                if line == '.':
+                    break
+                matches.append(self._craft_match(*line.split(None, 2)))
+        return matches
+
+    def read(self, category, discid):
+        cmd = 'read %s %08x' % (category, discid)
+        data = self._cddb_cmd(cmd)
+        lines = data.splitlines()
+        code = self._get_code(lines[0])
+        if code != 210:
+            return None
+
+        data = {}
+        dtitle = ''
+        tracks = {}
+
+        for line in lines[1:]:
+            if line == '.':
+                break
+            if not line or line.startswith('#'):
+                continue
+            key, value = line.split('=', 1)
+            if not value:
+                continue
+            if key == 'DTITLE':
+                dtitle += value
+            elif key == 'DYEAR':
+                data['year'] = int(value)
+            elif key == 'DGENRE':
+                data['genre'] = value
+            elif key.startswith('TTITLE'):
+                n = int(key[6:])
+                tracks[n] = tracks.get(n, '') + value
+
+        try:
+            artist, title = self._split_dtitle(dtitle)
+        except:
+            raise Exception('Unable to parse DTITLE "%s"' % dtitle)
+        data['artist_title'] = dtitle
+        data['artist'] = artist
+        data['title'] = title
+
+        data['tracks'] = [v for k, v in sorted(tracks.items())]
+
+        return data
+
+
+def main():
+    test_queries = (
+        # 200, 1 match
+        (0xfd0ce112, 18, (150, 16732, 27750, 43075, 58800, 71690, 86442,
+                          101030, 111812, 128367, 136967, 152115, 164812,
+                          180337, 194072, 201690, 211652, 230517), 3299),
+        # 211, inexact but 1 match
+        (0xb70e170e, 14, (150, 20828, 36008, 53518, 71937, 90777, 109374,
+                          128353, 150255, 172861, 192062, 216672, 235357,
+                          253890), 3609),
+    )
+    import pprint
+    fdb = SimpleFreeDB()
+    dtitle_split_data = 'foo \\/ bar / bla / baz'
+    dtitle_split_ref = ('foo \\/ bar', 'bla / baz')
+    assert fdb._split_dtitle(dtitle_split_data) == dtitle_split_ref
+    for i, query in enumerate(test_queries):
+        for match in fdb.query(*query):
+            pprint.pprint(fdb.read(match['category'], match['discid']))
+
+
+if __name__ == '__main__':
+    main()