Skip to content

Commit

Permalink
Use msgspec for JSON output instead of orjson
Browse files Browse the repository at this point in the history
Slightly faster, and less dependencies
  • Loading branch information
craigds committed Dec 11, 2024
1 parent 9e24690 commit cf42a0e
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 27 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ _When adding new entries to the changelog, please include issue/PR numbers where

## Unreleased

- diff: Use [orjson](https://github.com/ijl/orjson?tab=readme-ov-file#orjson) for faster JSON-Lines output. [#1019](https://github.com/koordinates/kart/pull/1019)
- Much faster access to tabular/vector datasets (about 75% more features processed per second) by switching to [msgspec](https://jcristharif.com/msgspec/) - [#1025](https://github.com/koordinates/kart/pull/1025)
- diff: Faster JSON-Lines output (also using msgspec)
- Linux builds now require glibc 2.28+ [#1027](https://github.com/koordinates/kart/pull/1027) - This means minimum distro versions are:
- Debian 10+
- Ubuntu 18.10+
- Fedora 29+
- CentOS/RHEL 8+
- Much faster access to tabular/vector datasets (about 75% more features processed per second) by switching to [msgspec](https://jcristharif.com/msgspec/) - [#1025](https://github.com/koordinates/kart/pull/1025)
- diff: Faster JSON-Lines output (also using msgspec)
- Upgrade to PDAL 2.7 [#1005](https://github.com/koordinates/kart/pull/1005)
- Adds a `--drop-empty-geometry-features` option to `kart export`. [#1007](https://github.com/koordinates/kart/pull/1007)
- Adds diagnostic output to Kart when `KART_DIAGNOSTICS=1` environment variable is set. [#1013](https://github.com/koordinates/kart/pull/1013)
Expand Down
18 changes: 9 additions & 9 deletions kart/json_diff_writers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import orjson
import logging
import threading
from datetime import datetime, timedelta, timezone
Expand All @@ -22,8 +21,9 @@
from kart.output_util import (
dump_json_output,
resolve_output_path,
orjson_encode_default,
msgspec_json_encoder,
)

from kart.tabular.feature_output import feature_as_geojson, feature_as_json
from kart.timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

Expand Down Expand Up @@ -239,19 +239,19 @@ def _check_output_path(cls, repo, output_path):
def __init__(self, *args, diff_estimate_accuracy=None, delta_filter=None, **kwargs):
super().__init__(*args, **kwargs)
self.fp = resolve_output_path(self.output_path)
self.separators = (",", ":") if self.json_style == "extracompact" else None

self._diff_estimate_accuracy = diff_estimate_accuracy
self.delta_filter = delta_filter
self._output_lock = threading.RLock()
# https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffer
self._output_buffer = bytearray()

def dump(self, obj):
output: bytes = orjson.dumps(
obj,
default=orjson_encode_default,
option=orjson.OPT_APPEND_NEWLINE | orjson.OPT_NON_STR_KEYS,
)
# https://jcristharif.com/msgspec/perf-tips.html#line-delimited-json
msgspec_json_encoder.encode_into(obj, self._output_buffer)
self._output_buffer.extend(b"\n")
with self._output_lock:
self.fp.buffer.write(output)
self.fp.buffer.write(self._output_buffer)

def write_header(self):
self.dump(
Expand Down
20 changes: 7 additions & 13 deletions kart/output_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,14 @@
import types
from pathlib import Path

import orjson
import msgspec.json
import pygments
from pygments.lexers import JsonLexer

from .wkt_lexer import WKTLexer

_terminal_formatter = None

# note: `json` and `orjson` libraries aren't quite interchangeable.
# * orjson is much faster, so we use it where we can
# * orjson doesn't support custom separators
# * orjson doesn't support iterencode(), so can't stream unbounded iterators to stdout :(
ORJSON_OPTIONS = {
"compact": 0, # orjson doesn't support custom separators, so extracompact and compact look identical
"extracompact": 0,
"pretty": orjson.OPT_INDENT_2,
}
JSON_PARAMS = {
"compact": {},
"extracompact": {"separators": (",", ":")},
Expand All @@ -47,17 +38,20 @@ def __iter__(self):
return itertools.chain(self._head, *self[:1])


def orjson_encode_default(obj):
def msgspec_json_encode_default(obj):
"""
Hook to extend the default serialisation of `orjson.dumps()`
Hook to extend the default serialisation of `msgspec.json.dumps()`
"""
if isinstance(obj, tuple):
return list(obj)

if hasattr(obj, "__json__"):
return obj.__json__()

raise TypeError
raise NotImplementedError


msgspec_json_encoder = msgspec.json.Encoder(enc_hook=msgspec_json_encode_default)


class ExtendedJsonEncoder(json.JSONEncoder):
Expand Down
1 change: 0 additions & 1 deletion requirements/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ certifi
click~=8.1.7
docutils<0.18
msgspec~=0.18.6
orjson
Pygments
pymysql
rst2txt
Expand Down
2 changes: 0 additions & 2 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ jsonschema==4.17.3
# via -r requirements.in
msgspec==0.18.6
# via -r requirements.in
orjson==3.10.11
# via -r requirements.in
#psycopg2==2.9.9
# via -r requirements/vendor-wheels.txt
pycparser==2.21
Expand Down

0 comments on commit cf42a0e

Please sign in to comment.