Skip to content

Commit

Permalink
Add pcodec (zarr-developers#501)
Browse files Browse the repository at this point in the history
* added PCodec

* fix line length and print statements

* docs

* mock pcodec on rtd

* fix typo

* add dtype details

* changed import style for pcodec

* fix flake8

* revert import changes

* fix errors due to changes in pcodec API

* change import style

* skip coverage of failed import path

* skip pcodec tests if not installed
  • Loading branch information
rabernat authored Feb 24, 2024
1 parent 0878717 commit 4abe4be
Show file tree
Hide file tree
Showing 89 changed files with 250 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
run: |
conda activate env
export DISABLE_NUMCODECS_AVX2=""
python -m pip install -v -e .[test,test_extras,msgpack,zfpy]
python -m pip install -v -e .[test,test_extras,msgpack,zfpy,pcodec]
- name: List installed packages
shell: "bash -l {0}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-osx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
run: |
conda activate env
export DISABLE_NUMCODECS_AVX2=""
python -m pip install -v -e .[test,test_extras,msgpack,zfpy]
python -m pip install -v -e .[test,test_extras,msgpack,zfpy,pcodec]
- name: List installed packages
shell: "bash -l {0}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
shell: "bash -l {0}"
run: |
conda activate env
python -m pip install -v -e .[test,test_extras,msgpack,zfpy]
python -m pip install -v -e .[test,test_extras,msgpack,zfpy,pcodec]
- name: List installed packages
shell: "bash -l {0}"
Expand Down
2 changes: 1 addition & 1 deletion c-blosc
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __getattr__(cls, name):
return Mock()


MOCK_MODULES = ['msgpack']
MOCK_MODULES = ['msgpack', 'pcodec']
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)


Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Contents
abc
registry
blosc
pcodec
lz4
zfpy
zstd
Expand Down
10 changes: 10 additions & 0 deletions docs/pcodec.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
PCodec
======

.. automodule:: numcodecs.pcodec

.. autoclass:: PCodec

.. autoattribute:: codec_id
.. automethod:: encode
.. automethod:: decode
2 changes: 2 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Unreleased
Enhancements
~~~~~~~~~~~~

* Add PCodec
By :user:`Ryan Abernathey <rabernat>`.
* Use PyData theme for docs
By :user:`John Kirkham <jakirkham>`, :issue:`485`.

Expand Down
Binary file added fixture/pcodec/array.00.npy
Binary file not shown.
Binary file added fixture/pcodec/array.01.npy
Binary file not shown.
Binary file added fixture/pcodec/array.02.npy
Binary file not shown.
Binary file added fixture/pcodec/array.03.npy
Binary file not shown.
Binary file added fixture/pcodec/array.04.npy
Binary file not shown.
Binary file added fixture/pcodec/array.05.npy
Binary file not shown.
Binary file added fixture/pcodec/array.06.npy
Binary file not shown.
Binary file added fixture/pcodec/array.07.npy
Binary file not shown.
Binary file added fixture/pcodec/array.08.npy
Binary file not shown.
Binary file added fixture/pcodec/array.09.npy
Binary file not shown.
8 changes: 8 additions & 0 deletions fixture/pcodec/codec.00/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"float_mult_spec": "enabled",
"id": "pcodec",
"int_mult_spec": "enabled",
"level": 8
}
Binary file added fixture/pcodec/codec.00/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.00/encoded.09.dat
Binary file not shown.
8 changes: 8 additions & 0 deletions fixture/pcodec/codec.01/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"float_mult_spec": "enabled",
"id": "pcodec",
"int_mult_spec": "enabled",
"level": 1
}
Binary file added fixture/pcodec/codec.01/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.01/encoded.09.dat
Binary file not shown.
8 changes: 8 additions & 0 deletions fixture/pcodec/codec.02/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"float_mult_spec": "enabled",
"id": "pcodec",
"int_mult_spec": "enabled",
"level": 5
}
Binary file added fixture/pcodec/codec.02/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.02/encoded.09.dat
Binary file not shown.
8 changes: 8 additions & 0 deletions fixture/pcodec/codec.03/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"float_mult_spec": "enabled",
"id": "pcodec",
"int_mult_spec": "enabled",
"level": 9
}
Binary file added fixture/pcodec/codec.03/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.03/encoded.09.dat
Binary file not shown.
8 changes: 8 additions & 0 deletions fixture/pcodec/codec.04/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"delta_encoding_order": null,
"equal_pages_up_to": 262144,
"float_mult_spec": "disabled",
"id": "pcodec",
"int_mult_spec": "disabled",
"level": 8
}
Binary file added fixture/pcodec/codec.04/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.04/encoded.09.dat
Binary file not shown.
8 changes: 8 additions & 0 deletions fixture/pcodec/codec.05/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"delta_encoding_order": null,
"equal_pages_up_to": 300,
"float_mult_spec": "enabled",
"id": "pcodec",
"int_mult_spec": "enabled",
"level": 8
}
Binary file added fixture/pcodec/codec.05/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.05/encoded.09.dat
Binary file not shown.
3 changes: 3 additions & 0 deletions numcodecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,6 @@

from numcodecs.fletcher32 import Fletcher32
register_codec(Fletcher32)

from numcodecs.pcodec import PCodec
register_codec(PCodec)
89 changes: 89 additions & 0 deletions numcodecs/pcodec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Optional, Literal

import numcodecs
import numcodecs.abc
from numcodecs.compat import ensure_contiguous_ndarray

try:
from pcodec import standalone, ChunkConfig, PagingSpec
except ImportError: # pragma: no cover
standalone = None


DEFAULT_MAX_PAGE_N = 262144


class PCodec(numcodecs.abc.Codec):
"""
PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
numerical sequences with high compression ratio and fast speed.
See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
PCodec supports only the following numerical dtypes: uint32, unit64, int32,
int64, float32, and float64.
Parameters
----------
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
delta_encoding_order : init or None
Either a delta encoding level from 0-7 or None. If set to None, pcodec
will try to infer the optimal delta encoding order.
int_mult_spec : {'enabled', 'disabled'}
If enabled, pcodec will consider using int mult mode, which can
substantially improve compression ratio but decrease speed in some cases
for integer types.
float_mult_spec : {'enabled', 'disabled'}
If enabled, pcodec will consider using float mult mode, which can
substantially improve compression ratio but decrease speed in some cases
for float types.
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""

codec_id = "pcodec"

def __init__(
self,
level: int = 8,
delta_encoding_order: Optional[int] = None,
int_mult_spec: Literal["enabled", "disabled"] = "enabled",
float_mult_spec: Literal["enabled", "disabled"] = "enabled",
equal_pages_up_to: int = 262144
):
if standalone is None: # pragma: no cover
raise ImportError(
"pcodec must be installed to use the PCodec codec."
)

# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.delta_encoding_order = delta_encoding_order
self.int_mult_spec = int_mult_spec
self.float_mult_spec = float_mult_spec
self.equal_pages_up_to = equal_pages_up_to

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)

paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)

config = ChunkConfig(
compression_level=self.level,
delta_encoding_order=self.delta_encoding_order,
int_mult_spec=self.int_mult_spec,
float_mult_spec=self.float_mult_spec,
paging_spec=paging_spec,
)
return standalone.simple_compress(buf, config)

def decode(self, buf, out=None):
if out is not None:
out = ensure_contiguous_ndarray(out)
standalone.simple_decompress_into(buf, out)
return out
else:
return standalone.simple_decompress(buf)
11 changes: 11 additions & 0 deletions numcodecs/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,17 @@ def check_encode_decode_array(arr, codec):
assert_array_items_equal(arr, dec)


def check_encode_decode_array_to_bytes(arr, codec):

enc = codec.encode(arr)
dec = codec.decode(enc)
assert_array_items_equal(arr, dec)

out = np.empty_like(arr)
codec.decode(enc, out=out)
assert_array_items_equal(arr, out)


def check_config(codec):
config = codec.get_config()
# round-trip through JSON to check serialization
Expand Down
78 changes: 78 additions & 0 deletions numcodecs/tests/test_pcodec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pytest
import numpy as np

from numcodecs.pcodec import PCodec

try:
# initializing codec triggers ImportError
PCodec()
except ImportError: # pragma: no cover
pytest.skip(
"pcodec not available", allow_module_level=True
)

from numcodecs.tests.common import (
check_encode_decode_array_to_bytes,
check_config,
check_repr,
check_backwards_compatibility,
check_err_decode_object_buffer,
check_err_encode_object_buffer,
)


codecs = [
PCodec(),
PCodec(level=1),
PCodec(level=5),
PCodec(level=9),
PCodec(float_mult_spec="disabled", int_mult_spec="disabled"),
PCodec(equal_pages_up_to=300),
]


# mix of dtypes: integer, float
# mix of shapes: 1D, 2D
# mix of orders: C, F
arrays = [
np.arange(1000, dtype="u4"),
np.arange(1000, dtype="u8"),
np.arange(1000, dtype="i4"),
np.arange(1000, dtype="i8"),
np.linspace(1000, 1001, 1000, dtype="f4"),
np.linspace(1000, 1001, 1000, dtype="f8"),
np.random.normal(loc=1000, scale=1, size=(100, 10)),
np.asfortranarray(np.random.normal(loc=1000, scale=1, size=(100, 10))),
np.random.randint(0, 2**60, size=1000, dtype="u8"),
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8"),
]


@pytest.mark.parametrize("arr", arrays)
@pytest.mark.parametrize("codec", codecs)
def test_encode_decode(arr, codec):
check_encode_decode_array_to_bytes(arr, codec)


def test_config():
codec = PCodec(level=3)
check_config(codec)


def test_repr():
check_repr(
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, float_mult_spec='enabled', "
"int_mult_spec='enabled', level=3)"
)


def test_backwards_compatibility():
check_backwards_compatibility(PCodec.codec_id, arrays, codecs)


def test_err_decode_object_buffer():
check_err_decode_object_buffer(PCodec())


def test_err_encode_object_buffer():
check_err_encode_object_buffer(PCodec())
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ msgpack = [
zfpy = [
"zfpy>=1.0.0",
]
pcodec = [
"pcodec>=0.1.0",
]

[tool.setuptools]
license-files = ["LICENSE.txt"]
Expand Down

0 comments on commit 4abe4be

Please sign in to comment.