Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Controversial Freedb support #138

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ install:
# Dependencies
- sudo apt-get -qq update
- sudo pip install --upgrade -qq pip
- sudo apt-get -qq install cdparanoia cdrdao flac libcdio-dev libiso9660-dev libsndfile1-dev python-cddb python-gobject python-musicbrainzngs python-mutagen python-setuptools sox swig
- sudo apt-get -qq install cdparanoia cdrdao flac libcdio-dev libiso9660-dev libsndfile1-dev python-gobject python-musicbrainzngs python-mutagen python-setuptools sox swig
- sudo pip install pycdio requests

# Testing dependencies
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ Whipper relies on the following packages in order to run correctly and provide a
- [python-musicbrainzngs](https://github.com/alastair/python-musicbrainzngs), for metadata lookup
- [python-mutagen](https://pypi.python.org/pypi/mutagen), for tagging support
- [python-setuptools](https://pypi.python.org/pypi/setuptools), for installation, plugins support
- [python-cddb](http://cddb-py.sourceforge.net/), for showing but not using metadata if disc not available in the MusicBrainz DB
- [pycdio](https://pypi.python.org/pypi/pycdio/) (to avoid bugs please use `pycdio` **0.20** & `libcdio` >= **0.90** or, with previous `libcdio` versions, `pycdio` **0.17**), for drive identification
- Required for drive offset and caching behavior to be stored in the configuration file
- [requests](https://pypi.python.org/pypi/requests) for retrieving AccurateRip database entries
Expand Down
28 changes: 25 additions & 3 deletions whipper/command/cd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import gobject
from whipper.command.basecommand import BaseCommand
from whipper.common import (
accurip, config, drive, program, task
accurip, config, drive, program, task, simplefreedb
)
from whipper.program import cdrdao, cdparanoia, utils
from whipper.result import result
Expand Down Expand Up @@ -126,10 +126,27 @@ def do(self):
if not self.program.metadata:
# fall back to FreeDB for lookup
cddbid = self.ittoc.getCDDBValues()
cddbmd = self.program.getCDDB(cddbid)
if cddbmd:

sfdb = simplefreedb.SimpleFreeDB()
discid, ntrks = cddbid[0], cddbid[1]
offsets, nsecs = cddbid[2:-1], cddbid[-1]
matches = sfdb.query(discid, ntrks, offsets, nsecs)
if matches:
match = matches[0] # TODO: honor --prompt
cddbmd = match['artist_title']
sys.stdout.write('FreeDB identifies disc as %s\n' % cddbmd)

if self.options.allow_freedb:
logger.warning('Using FreeDB metadata, which is not '
'recommended due to its very low quality '
'standards.')
logger.warning('You are strongly encouraged to submit CD '
'information to the MusicBrainz database.')
fdb_data = sfdb.read(match['category'], match['discid'])
self.program.metadata = \
self.program.craftMusicBrainzFromFreeDB(fdb_data)

if not self.program.metadata:
# also used by rip cd info
if not getattr(self.options, 'unknown', False):
logger.critical("unable to retrieve disc metadata, "
Expand Down Expand Up @@ -282,6 +299,11 @@ def add_arguments(self):
action="store_true", dest="unknown",
help="whether to continue ripping if "
"the CD is unknown", default=False)
self.parser.add_argument('--allow-freedb',
action="store_true", dest="allow_freedb",
help="whether to allow relying on low "
"quality CDDB/FreeDB metadata if nothing "
"else is available", default=False)
self.parser.add_argument('--cdr',
action="store_true", dest="cdr",
help="whether to continue ripping if "
Expand Down
39 changes: 19 additions & 20 deletions whipper/common/program.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,28 +248,25 @@ def getPath(self, outdir, template, mbdiscid, metadata, track_number=None):
template = re.sub(r'%(\w)', r'%(\1)s', template)
return os.path.join(outdir, template % v)

def getCDDB(self, cddbdiscid):
def craftMusicBrainzFromFreeDB(self, freedb_data):
"""
@param cddbdiscid: list of id, tracks, offsets, seconds

@rtype: str
Create MusicBrainz-sytle metadata from a FreeDB entry.
"""
# FIXME: convert to nonblocking?
import CDDB
try:
code, md = CDDB.query(cddbdiscid)
logger.debug('CDDB query result: %r, %r', code, md)
if code == 200:
return md['title']

except IOError, e:
# FIXME: for some reason errno is a str ?
if e.errno == 'socket error':
self._stdout.write("Warning: network error: %r\n" % (e, ))
else:
raise
disc = mbngs.DiscMetadata()

return None
disc.artist = disc.sortName = freedb_data.get('artist')
disc.release = str(freedb_data.get('year', '0000'))
disc.title = disc.releaseTitle = freedb_data.get('title')

tracks = []
for track_title in freedb_data.get('tracks', []):
track = mbngs.TrackMetadata()
track.artist = track.sortName = disc.artist
track.title = track_title
tracks.append(track)
disc.tracks = tracks

return disc

def getMusicBrainz(self, ittoc, mbdiscid, release=None, country=None,
prompt=False):
Expand Down Expand Up @@ -455,7 +452,9 @@ def getTagList(self, number):
if self.metadata.release is not None:
tags['DATE'] = self.metadata.release

if number > 0:
has_mbinfo = (mbidTrack and mbidTrackArtist and mbidAlbum
and mbidTrackAlbum and mbDiscId)
if number > 0 and has_mbinfo:
tags['MUSICBRAINZ_TRACKID'] = mbidTrack
tags['MUSICBRAINZ_ARTISTID'] = mbidTrackArtist
tags['MUSICBRAINZ_ALBUMID'] = mbidAlbum
Expand Down
148 changes: 148 additions & 0 deletions whipper/common/simplefreedb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4

# Copyright (C) 2017 Clément Bœsch <u@pkh.me>

# This file is part of whipper.
#
# whipper is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# whipper is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with whipper. If not, see <http://www.gnu.org/licenses/>.

import re
import socket
import getpass
import urllib
import whipper


class SimpleFreeDB:

URL = 'http://freedb.freedb.org/~cddb/cddb.cgi'
PROTO = 6

def __init__(self):
user = getpass.getuser()
host = socket.gethostname()
client = 'whipper'
hello = '%s %s %s %s' % (user, host, client, whipper.__version__)
self._hello = urllib.quote_plus(hello)
self._slash_split_regex = re.compile(r'(?<!\\)/')

def _cddb_cmd(self, cmd):
cmd_arg = urllib.quote_plus('cddb ' + cmd)
data_url = (self.URL, cmd_arg, self._hello, self.PROTO)
url = '%s?cmd=%s&hello=%s&proto=%d' % data_url
req = urllib.urlopen(url)
return req.read().decode('utf-8')

@staticmethod
def _get_code(line):
return int(line.split(None, 1)[0])

def _split_dtitle(self, dtitle):
# Note: we can not use a simple dtitle.split('/') here because the
# slash could be escaped.
artist, title = re.split(self._slash_split_regex, dtitle, maxsplit=1)
return artist.rstrip(), title.lstrip()

def _craft_match(self, category, discid_str, dtitle):
artist, title = self._split_dtitle(dtitle)
return {'category': category,
'discid': int(discid_str, 16),
'artist_title': dtitle,
'artist': artist,
'title': title}

def query(self, discid, ntrks, offsets, nsecs):
data_q = (discid, ntrks, ' '.join(str(x) for x in offsets), nsecs)
cmd = 'query %08x %d %s %d' % data_q
data = self._cddb_cmd(cmd)
lines = data.splitlines()
code = self._get_code(lines[0])
matches = []
if code == 200:
line = lines[0]
matches.append(self._craft_match(*line.split(None, 3)[1:]))
elif code in (210, 211):
for line in lines[1:]:
if line == '.':
break
matches.append(self._craft_match(*line.split(None, 2)))
return matches

def read(self, category, discid):
cmd = 'read %s %08x' % (category, discid)
data = self._cddb_cmd(cmd)
lines = data.splitlines()
code = self._get_code(lines[0])
if code != 210:
return None

data = {}
dtitle = ''
tracks = {}

for line in lines[1:]:
if line == '.':
break
if not line or line.startswith('#'):
continue
key, value = line.split('=', 1)
if not value:
continue
if key == 'DTITLE':
dtitle += value
elif key == 'DYEAR':
data['year'] = int(value)
elif key == 'DGENRE':
data['genre'] = value
elif key.startswith('TTITLE'):
n = int(key[6:])
tracks[n] = tracks.get(n, '') + value

try:
artist, title = self._split_dtitle(dtitle)
except:
raise Exception('Unable to parse DTITLE "%s"' % dtitle)
data['artist_title'] = dtitle
data['artist'] = artist
data['title'] = title

data['tracks'] = [v for k, v in sorted(tracks.items())]

return data


def main():
test_queries = (
# 200, 1 match
(0xfd0ce112, 18, (150, 16732, 27750, 43075, 58800, 71690, 86442,
101030, 111812, 128367, 136967, 152115, 164812,
180337, 194072, 201690, 211652, 230517), 3299),
# 211, inexact but 1 match
(0xb70e170e, 14, (150, 20828, 36008, 53518, 71937, 90777, 109374,
128353, 150255, 172861, 192062, 216672, 235357,
253890), 3609),
)
import pprint
fdb = SimpleFreeDB()
dtitle_split_data = 'foo \\/ bar / bla / baz'
dtitle_split_ref = ('foo \\/ bar', 'bla / baz')
assert fdb._split_dtitle(dtitle_split_data) == dtitle_split_ref
for i, query in enumerate(test_queries):
for match in fdb.query(*query):
pprint.pprint(fdb.read(match['category'], match['discid']))


if __name__ == '__main__':
main()