Merge pull request #94 from sertansenturk/ported_code_refactor

Ported code refactor
sertansenturk · Dec 14, 2019 · 158913a · 158913a
2 parents ae930b9 + b16ac95
commit 158913a
Show file tree

Hide file tree

Showing 65 changed files with 1,369 additions and 5,358 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,21 +1,40 @@
-# we only need to point to python for the tests to run
 language: python
-python:
-  - "2.7"
-#  - "3.3"
-#  - "3.4"
-#  - "3.5"
-
-# command to install dependencies
+matrix:
+  include:
+    # - name: "Docker build"
+    #   services: docker
+    #   python: 3.7
+    #   env: TOX_ENV=docker
+    #   before_install:
+    #     - docker --version
+    #     - pip install -U pip
+    #     - pip install tox-travis
+    #   install: # installed by tox
+    #   script: tox -e $TOX_ENV
+    #   after_success: # do nothing
+    - name: "Python 3.5"
+      python: 3.5
+      env: TOX_PYTHON_ENV=py35
+    - name: "Python 3.6"
+      python: 3.6
+      env: TOX_PYTHON_ENV=py36
+    - name: "Python 3.7"
+      python: 3.7
+      env: TOX_PYTHON_ENV=py37
+    - name: "Python 3.8"
+      python: 3.8
+      env: TOX_PYTHON_ENV=py38
+before_install:
+  - ls -la
+  - pip install -U pip
+  - pip install tox-travis
+  - pip install codecov
 install:
-#  - pip install -r requirements.txt
-  - pip install flake8
-
-# command to run before the tests
-before_script:
-  - "flake8 tomato"
-
-# command to run tests
+  - python setup.py install
+  # - pip install . # install package
+  # - pip install -r requirements.txt
+  - pip show tomato
 script:
-#  - nosetests unittests
-  - nosetests --with-coverage
+  - tox -e $TOX_PYTHON_ENV,flake8 #,pylint
+after_success:
+  - codecov # submit coverage to https://codecov.io/gh/sertansenturk/tomato/
diff --git a/Changelog.md b/Changelog.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## tomato v0.13.0
+
+- Refactored the code (in particular metadata and score processing), which was previously ported from different makam analysis libraries in v0.11.0: Pull requests [#84](https://github.com/sertansenturk/tomato/pull/84) and [#94](https://github.com/sertansenturk/tomato/pull/94)
+
 ## tomato v0.12.3
 
 - Solved the problem with loading the makam/tonic estimation model from pickle (removed `morty` dependency): [Pull request #91](https://github.com/sertansenturk/tomato/pull/#91)

diff --git a/demos/audio_analysis_demo.ipynb b/demos/audio_analysis_demo.ipynb
diff --git a/demos/complete_analysis_demo.ipynb b/demos/complete_analysis_demo.ipynb
diff --git a/demos/joint_analysis_demo.ipynb b/demos/joint_analysis_demo.ipynb
diff --git a/demos/score_analysis_demo.ipynb b/demos/score_analysis_demo.ipynb
diff --git a/demos/score_conversion_demo.ipynb b/demos/score_conversion_demo.ipynb
@@ -6,7 +6,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from tomato.symbolic.scoreconverter import ScoreConverter\n",
+    "from tomato.symbolic.symbtrconverter import SymbTrConverter\n",
     "import pprint \n",
     "import os\n"
    ]
@@ -36,7 +36,7 @@
     "svg_paper_size = 'junior-legal'  # The paper size of the svg output pages\n",
     "\n",
     "# instantiate analyzer object\n",
-    "scoreConverter = ScoreConverter()\n"
+    "symbTrConverter = SymbTrConverter()\n"
    ]
   },
   {
@@ -53,7 +53,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "xml_output, ly_output, svg_output, txt_ly_mapping = scoreConverter.convert(\n",
+    "xml_output, ly_output, svg_output, txt_ly_mapping = symbTrConverter.convert(\n",
     "    txt_filename, mu2_filename, symbtr_name=symbtr_name, mbid=work_mbid, \n",
     "    render_metadata=render_metadata, xml_out=xml_filename, ly_out=ly_filename, \n",
     "    svg_out=svg_filename_template, svg_paper_size=svg_paper_size)\n"
@@ -74,16 +74,16 @@
    "source": [
     "# convert symbtr-txt file to MusicXML format. Use the metadata in the mu2 header as\n",
     "# complementary information\n",
-    "xml_file = scoreConverter.txt_mu2_to_musicxml(\n",
+    "xml_file = symbTrConverter.txt_mu2_to_musicxml(\n",
     "    txt_filename, mu2_filename, xml_out=xml_filename, symbtr_name=symbtr_name,\n",
     "    mbid=work_mbid)\n",
     "\n",
     "# convert the MusicXML to LilyPond\n",
-    "ly_file, ly_txt_mapping = scoreConverter.musicxml_to_lilypond(\n",
+    "ly_file, ly_txt_mapping = symbTrConverter.musicxml_to_lilypond(\n",
     "    xml_file, ly_out=ly_filename, render_metadata=render_metadata)\n",
     "\n",
     "# convert the LilyPond to svg\n",
-    "svg_files = scoreConverter.lilypond_to_svg(\n",
+    "svg_files = symbTrConverter.lilypond_to_svg(\n",
     "    ly_file, svg_out=svg_filename_template, \n",
     "    paper_size=svg_paper_size, ly_txt_mapping=ly_txt_mapping)\n"
    ]
@@ -98,14 +98,14 @@
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
   }
  },
  "nbformat": 4,

diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.9.0  # numerical operations
 scipy>=0.17.0  # temporary mat file saving for MCR binary inputs
 pandas>=0.18.0  # tabular data processing
 matplotlib>=1.5.1  # plotting
-json_tricks==2.1  # saving json files with classes and numpy arrays
+json_tricks==3.12.1  # saving json files with classes and numpy arrays
 eyeD3>=0.7.5  # reading metadata embedded in the audio recordings
 six>=1.10.0  # Python 2*3 support
 future>=0.15.2  # Python 2*3 support

diff --git a/setup.py b/setup.py
@@ -3,7 +3,10 @@
 
 import os
 import subprocess
+import zipfile
+
 from tomato import __version__
+
 try:
     import ConfigParser  # python 2
 except ImportError:
@@ -12,7 +15,6 @@
     from urllib2 import urlopen  # python 2
 except ImportError:
     from urllib.request import urlopen  # python 3
-import zipfile
 
 try:
     from setuptools import setup

diff --git a/...to/symbolic/musicxml2lilypond/__init__.py → tests/__init__.py b/...to/symbolic/musicxml2lilypond/__init__.py → tests/__init__.py
diff --git a/tests/test_converter.py b/tests/test_converter.py
@@ -0,0 +1,60 @@
+import numpy as np
+import pytest
+
+from tomato.converter import Converter
+
+
+def test_hz_to_cent_negative_hz_track():
+    # GIVEN
+    hz_track = np.array([-50])
+    ref_freq = np.float(25.0)
+
+    # WHEN; THEN
+    with pytest.raises(ValueError):
+        Converter.hz_to_cent(hz_track, ref_freq)
+
+
+def test_hz_to_cent_negative_ref_freq():
+    # GIVEN
+    hz_track = np.array([50])
+    ref_freq = np.float(-5.0)
+
+    # WHEN; THEN
+    with pytest.raises(ValueError):
+        Converter.hz_to_cent(hz_track, ref_freq)
+
+
+def test_hz_to_cent_negative_min_freq():
+    # GIVEN
+    hz_track = np.array([50])
+    ref_freq = np.float(25.0)
+    min_freq = -5.0
+
+    # WHEN; THEN
+    with pytest.raises(ValueError):
+        Converter.hz_to_cent(hz_track, ref_freq, min_freq)
+
+
+def test_hz_to_cent_ref_less_than_min():
+    # GIVEN
+    hz_track = np.array([50])
+    ref_freq = np.float(25.0)
+    min_freq = np.float(30.0)
+
+    # WHEN; THEN
+    with pytest.raises(ValueError):
+        Converter.hz_to_cent(hz_track, ref_freq, min_freq)
+
+
+def test_hz_to_cent_hz_less_than_min():
+    # GIVEN
+    hz_track = np.array([20])
+    ref_freq = np.float(35.0)
+    min_freq = np.float(30.0)
+
+    # WHEN
+    result = Converter.hz_to_cent(hz_track, ref_freq, min_freq)
+
+    # THEN
+    expected = np.array([np.nan])
+    np.testing.assert_array_equal(result, expected)
diff --git a/tomato/__init__.py b/tomato/__init__.py
@@ -1,2 +1,2 @@
 # coding=utf-8
-__version__ = '0.12.3'
+__version__ = '0.13.0'
diff --git a/tomato/analyzer.py b/tomato/analyzer.py
@@ -24,10 +24,13 @@
 # scores for the description and discovery of Ottoman-Turkish makam music.
 # PhD thesis, Universitat Pompeu Fabra, Barcelona, Spain.
 
-from abc import ABCMeta, abstractmethod, abstractproperty
-from .io import IO
 import logging
 import warnings
+from abc import ABCMeta, abstractmethod, abstractproperty
+
+from .io import IO
+
+logger = logging.Logger(__name__, level=logging.INFO)
 
 
 class Analyzer(object):
@@ -75,7 +78,7 @@ def _parse_inputs(self, **kwargs):
         for feature, val in kwargs.items():
             if feature not in self._inputs:
                 warn_str = u'Unrelated feature {0:s}: It will be kept, ' \
-                           u'but it will not be used in the audio analysis.' \
+                           u'but will not be used in the analysis.' \
                            u''.format(feature)
                 warnings.warn(warn_str, stacklevel=2)
             precomputed_features[feature] = val
@@ -91,7 +94,7 @@ def _partial_caller(flag, func, *input_args, **input_kwargs):
                 return func(*input_args, **input_kwargs)
             except (RuntimeError, KeyError, IndexError, ValueError,
                     TypeError, AttributeError):
-                logging.warning('{0:s} failed.'.format(func.__name__))
+                logger.exception('{0:s} failed.'.format(func.__name__))
                 return None
         else:  # flag is the precomputed feature itself
             return flag

diff --git a/tomato/audio/ahenk.py b/tomato/audio/ahenk.py
@@ -25,6 +25,8 @@
 # PhD thesis, Universitat Pompeu Fabra, Barcelona, Spain.
 
 import numpy as np
+
+from ..converter import Converter
 from ..io import IO
 
 
@@ -44,7 +46,7 @@ def identify(cls, tonic_freq, symbol_in):
             symbol_in, tonic_dict)
 
         # get the transposition in cents, rounded to the closest semitone
-        cent_dist = cls._hz_to_cent(tonic_freq, tonic_bolahenk_freq)
+        cent_dist = Converter.hz_to_cent(tonic_freq, tonic_bolahenk_freq)
         mod_cent_dist = np.mod(cent_dist, cls.CENTS_IN_OCTAVE)
 
         # if the distance is more than 1150 cents wrap it to minus
@@ -58,14 +60,14 @@ def identify(cls, tonic_freq, symbol_in):
 
         # create the stats dictionary
         distance_to_bolahenk = {
-            'performed': {'value': mod_cent_dist.tolist()[0], 'unit': 'cent'},
+            'performed': {'value': mod_cent_dist, 'unit': 'cent'},
             'theoretical': {'value': mod_cent_approx, 'unit': 'cent'}}
         ahenk_dict = {'name': '', 'slug': '', 'makam': makam,
                       'tonic_symbol': tonic_symbol,
                       'distance_to_bolahenk': distance_to_bolahenk,
-                      'deviation': {'value': mod_cent_dev.tolist()[0],
+                      'deviation': {'value': mod_cent_dev,
                                     'unit': 'cent'},
-                      'abs_deviation': {'value': abs_mod_cent_dev.tolist()[0],
+                      'abs_deviation': {'value': abs_mod_cent_dev,
                                         'unit': 'cent'}}
 
         # get the ahenk
@@ -102,17 +104,3 @@ def _get_tonic_symbol_from_makam(symbol_in, tonic_dict):
                     raise KeyError("The tonic of this makam is not known.")
                 break
         return tonic_symbol, tonic_bolahenk_freq
-
-    @staticmethod
-    def _hz_to_cent(hz_track, ref_freq):
-        """--------------------------------------------------------------------
-        Converts an array of Hertz values into cents.
-        -----------------------------------------------------------------------
-        hz_track : The 1-D array of Hertz values
-        ref_freq    : Reference frequency for cent conversion
-        --------------------------------------------------------------------"""
-        hz_track = np.array(hz_track)
-
-        # The 0 Hz values are removed, not only because they are meaningless,
-        # but also logarithm of 0 is problematic.
-        return np.log2(hz_track[hz_track > 0] / ref_freq) * 1200.0
diff --git a/tomato/audio/audioanalyzer.py b/tomato/audio/audioanalyzer.py
@@ -32,23 +32,22 @@
 
 import numpy as np
 import six
-from ..metadata.recording import Recording
-from musicbrainzngs import NetworkError
-from musicbrainzngs import ResponseError
-from tomato.audio.makamtonic.toniclastnote import TonicLastNote
+from musicbrainzngs import NetworkError, ResponseError
 
+from ..analyzer import Analyzer
+from ..io import IO
+from ..metadata.recording import Recording as RecordingMetadata
+from ..plotter import Plotter
 from .ahenk import Ahenk
 from .makamtonic.knnclassifier import KNNClassifier as MakamClassifier
+from .makamtonic.toniclastnote import TonicLastNote
 from .notemodel import NoteModel
 from .pitchdistribution import PitchDistribution
 from .pitchfilter import PitchFilter
 from .predominantmelody import PredominantMelody
 from .seyir import Seyir
-from ..analyzer import Analyzer
-from ..io import IO
-from ..plotter import Plotter
 
-logging.basicConfig(level=logging.INFO)
+logger = logging.Logger(__name__, level=logging.INFO)
 
 
 class AudioAnalyzer(Analyzer):
@@ -74,7 +73,6 @@ def __init__(self, verbose=False):
                                     'distance_method': 'bhat'}
 
         # extractors
-        self._metadata_getter = Recording(get_work_attributes=True)
         self._pitch_extractor = PredominantMelody(filter_pitch=False)  #
         # filter_pitch uses Essentia PitchFilter, which is not as good as our
         # Python implementation
@@ -92,8 +90,8 @@ def analyze(self, filepath='', **kwargs):
         filepath = IO.make_unicode(filepath)
 
         # metadata
-        audio_f['metadata'] = self._call_audio_metadata(audio_f['metadata'],
-                                                        filepath)
+        audio_f['metadata'] = self._call_audio_metadata(
+            audio_f['metadata'], filepath)
 
         # predominant melody extraction
         audio_f['pitch'] = self._partial_caller(
@@ -146,6 +144,7 @@ def analyze(self, filepath='', **kwargs):
         return audio_f
 
     def _call_audio_metadata(self, audio_meta, filepath):
+
         if audio_meta is False:  # metadata crawling is disabled
             audio_meta = None
         elif audio_meta is None:  # no MBID is given, attempt to get
@@ -195,7 +194,7 @@ def crawl_musicbrainz_metadata(self, rec_in):
             rec_in = IO.make_unicode(rec_in)
             tic = timeit.default_timer()
             self.vprint(u"- Getting relevant metadata of {0:s}".format(rec_in))
-            audio_meta = self._metadata_getter.from_musicbrainz(rec_in)
+            audio_meta = RecordingMetadata.from_musicbrainz(rec_in)
 
             self.vprint_time(tic, timeit.default_timer())
             return audio_meta
@@ -334,9 +333,6 @@ def compute_note_models(self, pitch_distribution, tonic, makamstr):
     def set_pitch_extractor_params(self, **kwargs):
         self._set_params('_pitch_extractor', **kwargs)
 
-    def set_metadata_getter_params(self, **kwargs):
-        self._set_params('_metadata_getter', **kwargs)
-
     def set_pitch_filter_params(self, **kwargs):
         self._set_params('_pitch_filter', **kwargs)