Skip to content

Commit

Permalink
Merge pull request #627 from peterstace/separate_twkb_unmarshal_funcs
Browse files Browse the repository at this point in the history
Refactor TWKB unmarshal interface
  • Loading branch information
peterstace authored Jun 26, 2024
2 parents a2b0677 + 85153f5 commit 2d1b48b
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 182 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@
`GEOSCoverageSimplifyVW_r` functions (exposed as `CoverageIsValid` and
`CoverageSimplifyVW`).

- **Breaking change:** Overhauls the TWKB unmarshalling related functions in a
breaking way:

- Removes the `UnmarshalTWKBWithHeaders` and
`UnmarshalTWKBBoundingBoxHeader` functions.

- Modifies the `UnmarshalTWKBEnvelope` function to return an
`ExtendedEnvelope` (which is a regular XY `Envelope` with the addition of
Z and M ranges).

- Adds `UnmarshalTWKBList` and `UnmarshalTWKBSize` functions, which return
the (optional) ID list and (optional) sizes of TWKBs without fully
unmarshalling them.

## v0.50.0

2024-05-07
Expand Down
28 changes: 28 additions & 0 deletions geom/interval.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package geom

// Interval represents the interval bound by two float64 values. The interval
// is closed, i.e. its endpoints are included. An interval typically has
// distinct endpoints (i.e. is non-degenerate). It may also be degenerate and
// contain no elements, or degenerate and contain a single element (i.e. the
// min and max bounds are the same). The zero value of Interval is the
// degenerate interval that contains no elements.
type Interval struct {
min, max float64
nonEmpty bool
}

// NewInterval returns a new non-empty Interval with the given bounds (which
// may be the same).
func NewInterval(boundA, boundB float64) Interval {
if boundB < boundA {
boundA, boundB = boundB, boundA
}
return Interval{boundA, boundB, true}
}

// MinMax returns the minimum and maximum bounds of the interval. The boolean
// return value indicates if the interval is non-empty (the minimum and maximum
// bounds should be ignored if false).
func (i Interval) MinMax() (float64, float64, bool) {
return i.min, i.max, i.nonEmpty
}
186 changes: 93 additions & 93 deletions geom/twkb_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,83 +28,66 @@ func UnmarshalTWKB(twkb []byte, nv ...NoValidate) (Geometry, error) {
return g, nil
}

// UnmarshalTWKBWithHeaders parses a Tiny Well Known Binary (TWKB),
// returning the corresponding Geometry, and any bounding box and any IDs
// listed in its header information.
//
// If there is a bounding box header, the bbox slice will be populated with
// two points, a minimum then a maximum. Otherwise, the slice is empty.
//
// If there is an ID list header, the ids slice will be populated with the
// IDs from that header. Otherwise, the slice is empty.
//
// NoValidate{} can be passed in to disable geometry constraint validation.
func UnmarshalTWKBWithHeaders(twkb []byte, nv ...NoValidate) (g Geometry, bbox []Point, ids []int64, err error) {
// UnmarshalTWKBIDList parses just the ID list of a Tiny Well Known Binary
// (TWKB). The bool return flag indicates if the list is present or not (ID
// lists are not mandatory in TWKBs).
func UnmarshalTWKBIDList(twkb []byte) ([]int64, bool, error) {
p := newTWKBParser(twkb)
g, err = p.nextGeometry()
if err != nil {
return Geometry{}, nil, nil, p.annotateError(err)
if err := p.parseHeaders(); err != nil {
return nil, false, p.annotateError(err)
}
if len(nv) == 0 {
if err := g.Validate(); err != nil {
return Geometry{}, nil, nil, err
}
if !p.hasIDs {
return nil, false, nil
}
if p.hasBBox {
bbox, err = UnmarshalTWKBBoundingBoxHeader(twkb)
if err != nil {
return Geometry{}, nil, nil, p.annotateError(err)
}

// When parsing the header, we already validated that if the ID list flag
// is set then we have a collection type. Each collection types starts with
// a uvarint indicating the number of elements, followed by the ID list.
numItems, err := p.parseUnsignedVarint()
if err != nil {
return nil, false, p.annotateError(fmt.Errorf("ID list size uvarint malformed: %w", err))
}

if err := p.parseIDList(int(numItems)); err != nil {
return nil, false, p.annotateError(err)
}
return g, bbox, p.idList, p.annotateError(err)
return p.idList, true, nil
}

// UnmarshalTWKBBoundingBoxHeader checks if the bounding box header
// exists in the Tiny Well Known Binary (TWKB) and if it exists
// returns its minimum and maximum points in the bbox slice
// (otherwise the slice is empty).
//
// Because the results are returned as Points, the X, Y, Z, and M values
// can all be returned. Check the point type to see if the Z and M values
// are valid.
//
// The function returns immediately after parsing the headers.
// Any remaining geometry is not parsed by this function.
func UnmarshalTWKBBoundingBoxHeader(twkb []byte) (bbox []Point, err error) {
// ExtendedEnvelope is an Envelope that may also contain Z and M ranges.
type ExtendedEnvelope struct {
XYEnvelope Envelope
ZRange Interval
MRange Interval
}

// UnmarshalTWKBEnvelope parses just the envelope from the header of a Tiny
// Well Known Binary (TWKB). The bool return flag indicates if the envelope is
// present or not (envelopes are not mandatory in TWKBs). The envelope may
// contain Z and M values if they are present in the TWKB.
func UnmarshalTWKBEnvelope(twkb []byte) (ExtendedEnvelope, bool, error) {
p := newTWKBParser(twkb)
bbox, err = p.parseBBoxHeader()
return bbox, p.annotateError(err)
extEnv, err := p.parseBBoxHeader()
if err != nil {
return ExtendedEnvelope{}, false, p.annotateError(err)
}
return extEnv, p.hasBBox, nil
}

// UnmarshalTWKBEnvelope checks if the bounding box header exists
// in the Tiny Well Known Binary (TWKB) and returns an Envelope
// that is non-empty iff the header exists (thus the envelope
// will not be computed from the data, only from a header).
//
// Note that due to the definition of Envelope, only the X and Y
// coordinates will be returned this way, whereas any Z and M
// coordinates will be silently ignored by this function.
//
// The function returns immediately after parsing the headers.
// Any remaining geometry is not parsed by this function.
func UnmarshalTWKBEnvelope(twkb []byte) (Envelope, error) {
// UnmarshalTWKBSize parses the size (in bytes) of the Tiny Well Known Binary
// (TWKB) from its header. This can be used for quickly scanning through a
// sequence of concatenated TWKBs, for reading just bounding boxes, or to
// distribute full parsing to different goroutines. The size is the total size
// of the TWKB (from its start).
func UnmarshalTWKBSize(twkb []byte) (int, bool, error) {
p := newTWKBParser(twkb)
if err := p.parseHeaders(); err != nil {
return Envelope{}, p.annotateError(err)
return 0, false, p.annotateError(err)
}
if !p.hasBBox {
return Envelope{}, nil
if !p.hasSize {
return 0, false, nil
}
return NewEnvelope(
XY{
float64(p.bbox[0]) / p.scalings[0],
float64(p.bbox[2]) / p.scalings[1],
},
XY{
float64(p.bbox[0]+p.bbox[1]) / p.scalings[0],
float64(p.bbox[2]+p.bbox[3]) / p.scalings[1],
},
), nil
return p.size, p.hasSize, nil
}

// twkbParser holds all state information for interpreting TWKB buffers
Expand Down Expand Up @@ -132,6 +115,7 @@ type twkbParser struct {

bbox []int64
idList []int64
size int

refpoint [twkbMaxDimensions]int64
}
Expand Down Expand Up @@ -265,6 +249,13 @@ func (p *twkbParser) parseMetadataHeader() error {
p.hasIDs = (metaheader & twkbHasIDs) != 0
p.hasExt = (metaheader & twkbHasExtPrec) != 0
p.isEmpty = (metaheader & twkbIsEmpty) != 0

switch p.kind {
case twkbTypePoint, twkbTypeLineString, twkbTypePolygon:
if p.hasIDs {
return errors.New("ID list is not allowed for Point, LineString, or Polygon")
}
}
return nil
}

Expand Down Expand Up @@ -310,7 +301,8 @@ func (p *twkbParser) parseSize() error {
if err != nil {
return fmt.Errorf("size varint malformed: %w", err)
}
if uint64(p.pos)+bytesRemaining > uint64(len(p.twkb)) {
p.size = p.pos + int(bytesRemaining)
if p.size > len(p.twkb) {
return fmt.Errorf("remaining input (%d bytes) smaller than size varint indicates (%d bytes)", len(p.twkb)-p.pos, bytesRemaining)
}
return nil
Expand All @@ -334,12 +326,12 @@ func (p *twkbParser) parseBBox() error {
return nil
}

func (p *twkbParser) parseBBoxHeader() (bbox []Point, err error) {
if err = p.parseHeaders(); err != nil {
return nil, err
func (p *twkbParser) parseBBoxHeader() (ExtendedEnvelope, error) {
if err := p.parseHeaders(); err != nil {
return ExtendedEnvelope{}, err
}
if !p.hasBBox {
return nil, nil
return ExtendedEnvelope{}, nil
}
switch {
case p.hasZ && p.hasM:
Expand All @@ -353,9 +345,11 @@ func (p *twkbParser) parseBBoxHeader() (bbox []Point, err error) {
maxZ := float64(p.bbox[4]+p.bbox[5]) / p.scalings[2]
maxM := float64(p.bbox[6]+p.bbox[7]) / p.scalings[3]

minPt := NewPoint(Coordinates{XY: XY{minX, minY}, Z: minZ, M: minM, Type: p.ctype})
maxPt := NewPoint(Coordinates{XY: XY{maxX, maxY}, Z: maxZ, M: maxM, Type: p.ctype})
bbox = []Point{minPt, maxPt}
return ExtendedEnvelope{
XYEnvelope: NewEnvelope(XY{minX, minY}, XY{maxX, maxY}),
ZRange: NewInterval(minZ, maxZ),
MRange: NewInterval(minM, maxM),
}, nil
case p.hasZ:
minX := float64(p.bbox[0]) / p.scalings[0]
minY := float64(p.bbox[2]) / p.scalings[1]
Expand All @@ -365,9 +359,10 @@ func (p *twkbParser) parseBBoxHeader() (bbox []Point, err error) {
maxY := float64(p.bbox[2]+p.bbox[3]) / p.scalings[1]
maxZ := float64(p.bbox[4]+p.bbox[5]) / p.scalings[2]

minPt := NewPoint(Coordinates{XY: XY{minX, minY}, Z: minZ, Type: p.ctype})
maxPt := NewPoint(Coordinates{XY: XY{maxX, maxY}, Z: maxZ, Type: p.ctype})
bbox = []Point{minPt, maxPt}
return ExtendedEnvelope{
XYEnvelope: NewEnvelope(XY{minX, minY}, XY{maxX, maxY}),
ZRange: NewInterval(minZ, maxZ),
}, nil
case p.hasM:
minX := float64(p.bbox[0]) / p.scalings[0]
minY := float64(p.bbox[2]) / p.scalings[1]
Expand All @@ -377,21 +372,21 @@ func (p *twkbParser) parseBBoxHeader() (bbox []Point, err error) {
maxY := float64(p.bbox[2]+p.bbox[3]) / p.scalings[1]
maxM := float64(p.bbox[4]+p.bbox[5]) / p.scalings[2]

minPt := NewPoint(Coordinates{XY: XY{minX, minY}, M: minM, Type: p.ctype})
maxPt := NewPoint(Coordinates{XY: XY{maxX, maxY}, M: maxM, Type: p.ctype})
bbox = []Point{minPt, maxPt}
return ExtendedEnvelope{
XYEnvelope: NewEnvelope(XY{minX, minY}, XY{maxX, maxY}),
MRange: NewInterval(minM, maxM),
}, nil
default:
minX := float64(p.bbox[0]) / p.scalings[0]
minY := float64(p.bbox[2]) / p.scalings[1]

maxX := float64(p.bbox[0]+p.bbox[1]) / p.scalings[0]
maxY := float64(p.bbox[2]+p.bbox[3]) / p.scalings[1]

minPt := NewPoint(Coordinates{XY: XY{minX, minY}, Type: p.ctype})
maxPt := NewPoint(Coordinates{XY: XY{maxX, maxY}, Type: p.ctype})
bbox = []Point{minPt, maxPt}
return ExtendedEnvelope{
XYEnvelope: NewEnvelope(XY{minX, minY}, XY{maxX, maxY}),
}, nil
}
return bbox, nil
}

func (p *twkbParser) parsePoint() (Point, error) {
Expand Down Expand Up @@ -502,8 +497,10 @@ func (p *twkbParser) nextMultiPoint() (MultiPoint, error) {
if err != nil {
return MultiPoint{}, fmt.Errorf("num points varint malformed: %w", err)
}
if err := p.parseIDList(int(numPoints)); err != nil {
return MultiPoint{}, err
if p.hasIDs {
if err := p.parseIDList(int(numPoints)); err != nil {
return MultiPoint{}, err
}
}
var pts []Point
for i := 0; i < int(numPoints); i++ {
Expand All @@ -528,8 +525,10 @@ func (p *twkbParser) nextMultiLineString() (MultiLineString, error) {
if err != nil {
return MultiLineString{}, fmt.Errorf("num linestrings varint malformed: %w", err)
}
if err := p.parseIDList(int(numLineStrings)); err != nil {
return MultiLineString{}, err
if p.hasIDs {
if err := p.parseIDList(int(numLineStrings)); err != nil {
return MultiLineString{}, err
}
}
var lines []LineString
for i := 0; i < int(numLineStrings); i++ {
Expand All @@ -554,8 +553,10 @@ func (p *twkbParser) nextMultiPolygon() (MultiPolygon, error) {
if err != nil {
return MultiPolygon{}, fmt.Errorf("num polygons varint malformed: %w", err)
}
if err := p.parseIDList(int(numPolygons)); err != nil {
return MultiPolygon{}, err
if p.hasIDs {
if err := p.parseIDList(int(numPolygons)); err != nil {
return MultiPolygon{}, err
}
}
var polys []Polygon
for i := 0; i < int(numPolygons); i++ {
Expand All @@ -580,8 +581,10 @@ func (p *twkbParser) nextGeometryCollection() (GeometryCollection, error) {
if err != nil {
return GeometryCollection{}, fmt.Errorf("num polygons varint malformed: %w", err)
}
if err := p.parseIDList(int(numGeoms)); err != nil {
return GeometryCollection{}, err
if p.hasIDs {
if err := p.parseIDList(int(numGeoms)); err != nil {
return GeometryCollection{}, err
}
}
var geoms []Geometry
for i := 0; i < int(numGeoms); i++ {
Expand Down Expand Up @@ -632,9 +635,6 @@ func (p *twkbParser) parsePointArray(numPoints int) ([]float64, error) {
}

func (p *twkbParser) parseIDList(numIDs int) error {
if !p.hasIDs {
return nil
}
p.idList = make([]int64, numIDs)
for i := 0; i < numIDs; i++ {
id, err := p.parseSignedVarint()
Expand Down
Loading

0 comments on commit 2d1b48b

Please sign in to comment.