Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add pre-commit hooks for black, isort, autoflake and docformatter #590

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/pre-commit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: pre-commit

on:
pull_request:
push:
branches: [master]

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: pre-commit/action@v3.0.0
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
repos:
- repo: https://github.com/psf/black
rev: 23.7.0
hooks:
- id: black

- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort

- repo: https://github.com/pycqa/autoflake
rev: v2.2.0
hooks:
- id: autoflake

- repo: https://github.com/pycqa/docformatter
rev: v1.7.5
hooks:
- id: docformatter
77 changes: 33 additions & 44 deletions docs/source/contributing_examples/example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Example Dataset Loader
"""Example Dataset Loader.

.. admonition:: Dataset Info
:class: dropdown
Expand All @@ -11,7 +11,6 @@
4. Describe the type of music included in the dataset
5. Indicate any relevant papers related to the dataset
6. Include a description about how the data can be accessed and the license it uses (if applicable)

"""
import csv
import json
Expand All @@ -22,9 +21,11 @@
# -- example imports you won't use
import librosa
import numpy as np
from smart_open import open # if you use the open function, make sure you include this line!
from smart_open import (
open, # if you use the open function, make sure you include this line!
)

from mirdata import download_utils, jams_utils, core, annotations
from mirdata import annotations, core, download_utils, jams_utils

# -- Add any relevant citations here
BIBTEX = """
Expand Down Expand Up @@ -54,19 +55,19 @@
"default": "1.0",
"test": "sample",
"1.0": core.Index(filename="example_index_1.0.json"),
"sample": core.Index(filename="example_index_sample.json")
"sample": core.Index(filename="example_index_sample.json"),
}

# -- REMOTES is a dictionary containing all files that need to be downloaded.
# -- The keys should be descriptive (e.g. 'annotations', 'audio').
# -- When having data that can be partially downloaded, remember to set up
# -- correctly destination_dir to download the files following the correct structure.
REMOTES = {
'remote_data': download_utils.RemoteFileMetadata(
filename='a_zip_file.zip',
url='http://website/hosting/the/zipfile.zip',
checksum='00000000000000000000000000000000', # -- the md5 checksum
destination_dir='path/to/unzip' # -- relative path for where to unzip the data, or None
"remote_data": download_utils.RemoteFileMetadata(
filename="a_zip_file.zip",
url="http://website/hosting/the/zipfile.zip",
checksum="00000000000000000000000000000000", # -- the md5 checksum
destination_dir="path/to/unzip", # -- relative path for where to unzip the data, or None
),
}

Expand Down Expand Up @@ -102,8 +103,8 @@ class Track(core.Track):
annotation (EventData): a description of this annotation

"""

def __init__(self, track_id, data_home, dataset_name, index, metadata):

# -- this sets the following attributes:
# -- * track_id
# -- * _dataset_name
Expand All @@ -117,7 +118,7 @@ def __init__(self, track_id, data_home, dataset_name, index, metadata):
index=index,
metadata=metadata,
)

# -- add any dataset specific attributes here
self.audio_path = self.get_path("audio")
self.annotation_path = self.get_path("annotation")
Expand Down Expand Up @@ -146,12 +147,11 @@ def annotation(self) -> Optional[annotations.EventData]:
# -- any memory heavy information (like audio) properties
@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The track's audio
"""The track's audio.

Returns:
* np.ndarray - audio signal
* float - sample rate

"""
return load_audio(self.audio_path)

Expand All @@ -172,7 +172,7 @@ def to_jams(self):
# -- if the dataset contains multitracks, you can define a MultiTrack similar to a Track
# -- you can delete the block of code below if the dataset has no multitracks
class MultiTrack(core.MultiTrack):
"""Example multitrack class
"""Example multitrack class.

Args:
mtrack_id (str): multitrack id
Expand All @@ -188,11 +188,9 @@ class MultiTrack(core.MultiTrack):

Cached Properties:
annotation (EventData): a description of this annotation

"""
def __init__(
self, mtrack_id, data_home, dataset_name, index, track_class, metadata
):

def __init__(self, mtrack_id, data_home, dataset_name, index, track_class, metadata):
# -- this sets the following attributes:
# -- * mtrack_id
# -- * _dataset_name
Expand Down Expand Up @@ -232,12 +230,11 @@ def annotation(self) -> Optional[annotations.EventData]:

@property
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
"""The track's audio
"""The track's audio.

Returns:
* np.ndarray - audio signal
* float - sample rate

"""
return load_audio(self.audio_path)

Expand All @@ -247,16 +244,15 @@ def to_jams(self):
"""Jams: the track's data in jams format"""
return jams_utils.jams_converter(
audio_path=self.mix_path,
annotation_data=[(self.annotation, None)],
...
chord_data=[(self.annotation, None)],
)
# -- see the documentation for `jams_utils.jams_converter for all fields


# -- this decorator allows this function to take a string or an open bytes file as input
# -- and in either case converts it to an open file handle.
# -- It also checks if the file exists
# -- and, if None is passed, None will be returned
# -- and, if None is passed, None will be returned
@io.coerce_to_bytes_io
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
"""Load a Example audio file.
Expand All @@ -267,7 +263,6 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
Returns:
* np.ndarray - the audio signal
* float - The sample rate of the audio file

"""
# -- for example, the code below. This should be dataset specific!
# -- By default we load to mono
Expand All @@ -277,15 +272,15 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:

# -- Write any necessary loader functions for loading the dataset's data


# -- this decorator allows this function to take a string or an open file as input
# -- and in either case converts it to an open file handle.
# -- It also checks if the file exists
# -- and, if None is passed, None will be returned
# -- and, if None is passed, None will be returned
@io.coerce_to_string_io
def load_annotation(fhandle: TextIO) -> Optional[annotations.EventData]:

# -- because of the decorator, the file is already open
reader = csv.reader(fhandle, delimiter=' ')
reader = csv.reader(fhandle, delimiter=" ")
intervals = []
annotation = []
for line in reader:
Expand All @@ -295,16 +290,14 @@ def load_annotation(fhandle: TextIO) -> Optional[annotations.EventData]:
# there are several annotation types in annotations.py
# They should be initialized with data, followed by their units
# see annotations.py for a complete list of types and units.
annotation_data = annotations.EventData(
np.array(intervals), "s", np.array(annotation), "open"
)
annotation_data = annotations.EventData(np.array(intervals), "s", np.array(annotation), "open")
return annotation_data


# -- use this decorator so the docs are complete
@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""The Example dataset
"""
"""The Example dataset."""

def __init__(self, data_home=None, version="default"):
super().__init__(
Expand All @@ -320,40 +313,36 @@ def __init__(self, data_home=None, version="default"):
)

# -- if your dataset has a top-level metadata file, write a loader for it here
# -- you do not have to include this function if there is no metadata
# -- you do not have to include this function if there is no metadata
@core.cached_property
def _metadata(self):
metadata_path = os.path.join(self.data_home, 'example_metadata.csv')
metadata_path = os.path.join(self.data_home, "example_metadata.csv")

# load metadata however makes sense for your dataset
metadata_path = os.path.join(data_home, 'example_metadata.json')
with open(metadata_path, 'r') as fhandle:
metadata_path = os.path.join(data_home, "example_metadata.json")
with open(metadata_path, "r") as fhandle:
metadata = json.load(fhandle)

return metadata

# -- if your dataset needs to overwrite the default download logic, do it here.
# -- this function is usually not necessary unless you need very custom download logic
def download(
self, partial_download=None, force_overwrite=False, cleanup=False
):
"""Download the dataset
def download(self, partial_download=None, force_overwrite=False, cleanup=False):
"""Download the dataset.

Args:
partial_download (list or None):
A list of keys of remotes to partially download.
If None, all data is downloaded
force_overwrite (bool):
If True, existing files are overwritten by the downloaded files.
If True, existing files are overwritten by the downloaded files.
cleanup (bool):
Whether to delete any zip/tar files after extracting.

Raises:
ValueError: if invalid keys are passed to partial_download
IOError: if a downloaded file's checksum is different from expected

"""
# see download_utils.downloader for basic usage - if you only need to call downloader
# once, you do not need this function at all.
# only write a custom function if you need it!

9 changes: 3 additions & 6 deletions docs/source/contributing_examples/make_example_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import glob
import json
import os

from mirdata.validate import md5

DATASET_INDEX_PATH = "../mirdata/datasets/indexes/dataset_index.json"
Expand All @@ -19,9 +20,7 @@ def make_dataset_index(dataset_data_path):
# top-key level tracks
index_tracks = {}
for track_id in track_ids:
audio_checksum = md5(
os.path.join(dataset_data_path, "Wavfile/{}.wav".format(track_id))
)
audio_checksum = md5(os.path.join(dataset_data_path, "Wavfile/{}.wav".format(track_id)))
annotation_checksum = md5(
os.path.join(dataset_data_path, "annotation/{}.lab".format(track_id))
)
Expand All @@ -48,8 +47,6 @@ def main(args):

if __name__ == "__main__":
PARSER = argparse.ArgumentParser(description="Make dataset index file.")
PARSER.add_argument(
"dataset_data_path", type=str, help="Path to dataset data folder."
)
PARSER.add_argument("dataset_data_path", type=str, help="Path to dataset data folder.")

main(PARSER.parse_args())
5 changes: 1 addition & 4 deletions docs/source/contributing_examples/test_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Tests for example dataset
"""
"""Tests for example dataset."""
import numpy as np
import pytest

Expand Down Expand Up @@ -37,7 +36,6 @@ def test_track():


def test_to_jams():

default_trackid = "some_id"
data_home = "tests/resources/mir_datasets/dataset"
dataset = example.Dataset(data_home, version="test")
Expand Down Expand Up @@ -73,4 +71,3 @@ def test_metadata():
dataset = example.Dataset(data_home, version="test")
metadata = dataset._metadata
assert metadata["some_id"] == "something"

11 changes: 3 additions & 8 deletions mirdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,13 @@

from .version import version as __version__


DATASETS = [
d.name
for d in pkgutil.iter_modules(
[os.path.dirname(os.path.abspath(__file__)) + "/datasets"]
)
d.name for d in pkgutil.iter_modules([os.path.dirname(os.path.abspath(__file__)) + "/datasets"])
]


def list_datasets():
"""Get a list of all mirdata dataset names
"""Get a list of all mirdata dataset names.

Returns:
list: list of dataset names as strings
Expand All @@ -23,7 +19,7 @@ def list_datasets():


def initialize(dataset_name, data_home=None, version="default"):
"""Load a mirdata dataset by name
"""Load a mirdata dataset by name.

Example:
.. code-block:: python
Expand All @@ -45,7 +41,6 @@ def initialize(dataset_name, data_home=None, version="default"):

Returns:
Dataset: a mirdata.core.Dataset object

"""
if dataset_name not in DATASETS:
raise ValueError("Invalid dataset {}".format(dataset_name))
Expand Down
Loading
Loading