Skip to content

Commit

Permalink
feat(GeoArrow): getBinaryGeometriesFromArrow enhancement (#2785)
Browse files Browse the repository at this point in the history
  • Loading branch information
lixun910 authored Nov 15, 2023
1 parent c56deab commit 992d90e
Show file tree
Hide file tree
Showing 11 changed files with 399 additions and 83 deletions.
1 change: 1 addition & 0 deletions modules/arrow/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"@loaders.gl/gis": "4.0.3",
"@loaders.gl/loader-utils": "4.0.3",
"@loaders.gl/schema": "4.0.3",
"@math.gl/polygon": "4.0.0",
"apache-arrow": "^13.0.0"
},
"gitHead": "c95a4ff72512668a93d9041ce8636bac09333fd5"
Expand Down
229 changes: 209 additions & 20 deletions modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,49 @@
// Copyright (c) vis.gl contributors

import * as arrow from 'apache-arrow';
import {earcut} from '@math.gl/polygon';
import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema';
import {GeoArrowEncoding} from '@loaders.gl/gis';
import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds';
import {TypedArray} from '@loaders.gl/loader-utils';

/**
* Binary data from geoarrow column and can be used by e.g. deck.gl GeojsonLayer
*/
export type BinaryDataFromGeoArrow = {
/** Binary format geometries, an array of BinaryFeatureCollection */
binaryGeometries: BinaryFeatures[];
/** Boundary of the binary geometries */
bounds: [number, number, number, number];
/** Feature types of the binary geometries */
featureTypes: {polygon: boolean; point: boolean; line: boolean};
/** (Optional) mean centers of the binary geometries for e.g. polygon filtering */
meanCenters?: number[][];
};

/**
* Binary geometry content returned from getBinaryGeometriesFromChunk
*/
type BinaryGeometryContent = {
// Array of Point feature indexes by vertex
featureIds: Uint32Array;
/** Flat coordinate array of e.g. x, y or x,y,z */
flatCoordinateArray: Float64Array;
/** Dimention of each position */
nDim: number;
/** Array of geometry offsets: the start index of primitive geometry */
geomOffset: Int32Array;
/** Array of geometry indicies: the start index of each geometry */
geometryIndicies: Uint16Array;
/** (Optional) indices of triangels returned from polygon tessellation (Polygon only) */
triangles?: Uint32Array;
/** (Optional) array of mean center of each geometry */
meanCenters?: Float64Array;
};

// binary geometry template, see deck.gl BinaryGeometry
/**
* binary geometry template, see deck.gl BinaryGeometry
*/
export const BINARY_GEOMETRY_TEMPLATE = {
globalFeatureIds: {value: new Uint32Array(0), size: 1},
positions: {value: new Float32Array(0), size: 2},
Expand All @@ -32,33 +53,40 @@ export const BINARY_GEOMETRY_TEMPLATE = {
featureIds: {value: new Uint32Array(0), size: 1}
};

export type BinaryGeometriesFromArrowOptions = {
/** option to specify which chunk to get binary geometries from, for progressive rendering */
chunkIndex?: number;
/** option to get mean centers from geometries, for polygon filtering */
meanCenter?: boolean;
};

/**
* get binary geometries from geoarrow column
*
* @param geoColumn the geoarrow column, e.g. arrowTable.getChildAt(geoColumnIndex)
* @param geoEncoding the geo encoding of the geoarrow column, e.g. getGeoArrowEncoding(arrowTable.schema, geoColumnName)
* @param options options for getting binary geometries {meanCenter: boolean}
* @returns BinaryDataFromGeoArrow
*/
export function getBinaryGeometriesFromArrow(
geoColumn: arrow.Vector,
geoEncoding: GeoArrowEncoding
geoEncoding: GeoArrowEncoding,
options?: BinaryGeometriesFromArrowOptions
): BinaryDataFromGeoArrow {
const featureTypes = {
polygon: geoEncoding === 'geoarrow.multipolygon' || geoEncoding === 'geoarrow.polygon',
point: geoEncoding === 'geoarrow.multipoint' || geoEncoding === 'geoarrow.point',
line: geoEncoding === 'geoarrow.multilinestring' || geoEncoding === 'geoarrow.linestring'
};

const chunks = geoColumn.data;
const chunks = options?.chunkIndex ? [geoColumn.data[options?.chunkIndex]] : geoColumn.data;
let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity];
let globalFeatureIdOffset = 0;
const binaryGeometries: BinaryFeatures[] = [];

chunks.forEach((chunk) => {
const {featureIds, flatCoordinateArray, nDim, geomOffset} = getBinaryGeometriesFromChunk(
chunk,
geoEncoding
);
const {featureIds, flatCoordinateArray, nDim, geomOffset, triangles} =
getBinaryGeometriesFromChunk(chunk, geoEncoding);

const globalFeatureIds = new Uint32Array(featureIds.length);
for (let i = 0; i < featureIds.length; i++) {
Expand All @@ -79,7 +107,6 @@ export function getBinaryGeometriesFromArrow(

// TODO: check if chunks are sequentially accessed
globalFeatureIdOffset += chunk.length;

// NOTE: deck.gl defines the BinaryFeatures structure must have points, lines, polygons even if they are empty
binaryGeometries.push({
shape: 'binary-feature-collection',
Expand All @@ -99,22 +126,111 @@ export function getBinaryGeometriesFromArrow(
...BINARY_GEOMETRY_TEMPLATE,
...(featureTypes.polygon ? binaryContent : {}),
polygonIndices: {
// NOTE: polygonIndices and primitivePolygonIndices are not used together to render polygon (binary) with holes
// for GeoJsonLayer with binary geometries in deck.gl currently, so we pass geomOffset and triangles.
// use geomOffset as polygonIndices same as primitivePolygonIndices since we are using earcut to get triangule indices
value: featureTypes.polygon ? geomOffset : new Uint16Array(0),
size: 1
},
primitivePolygonIndices: {
value: featureTypes.polygon ? geomOffset : new Uint16Array(0),
size: 1
}
},
...(triangles ? {triangles: {value: triangles, size: 1}} : {})
}
});

bounds = updateBoundsFromGeoArrowSamples(flatCoordinateArray, nDim, bounds);
});

return {binaryGeometries, bounds, featureTypes};
return {
binaryGeometries,
bounds,
featureTypes,
...(options?.meanCenter
? {meanCenters: getMeanCentersFromBinaryGeometries(binaryGeometries)}
: {})
};
}

/**
* Get mean centers from binary geometries
* @param binaryGeometries binary geometries from geoarrow column, an array of BinaryFeatureCollection
* @returns mean centers of the binary geometries
*/
export function getMeanCentersFromBinaryGeometries(binaryGeometries: BinaryFeatures[]): number[][] {
const globalMeanCenters: number[][] = [];
binaryGeometries.forEach((binaryGeometry: BinaryFeatures) => {
let binaryGeometryType: string | null = null;
if (binaryGeometry.points && binaryGeometry.points.positions.value.length > 0) {
binaryGeometryType = 'points';
} else if (binaryGeometry.lines && binaryGeometry.lines.positions.value.length > 0) {
binaryGeometryType = 'lines';
} else if (binaryGeometry.polygons && binaryGeometry.polygons.positions.value.length > 0) {
binaryGeometryType = 'polygons';
}

const binaryContent = binaryGeometryType ? binaryGeometry[binaryGeometryType] : null;
if (binaryContent && binaryGeometryType !== null) {
const featureIds = binaryContent.featureIds.value;
const flatCoordinateArray = binaryContent.positions.value;
const nDim = binaryContent.positions.size;
const primitivePolygonIndices = binaryContent.primitivePolygonIndices?.value;

const meanCenters = getMeanCentersFromGeometry(
featureIds,
flatCoordinateArray,
nDim,
binaryGeometryType,
primitivePolygonIndices
);
meanCenters.forEach((center) => {
globalMeanCenters.push(center);
});
}
});
return globalMeanCenters;
}

/**
* Get mean centers from raw coordinates and feature ids
* @param featureIds Array of feature ids indexes by vertex
* @param flatCoordinateArray Array of vertex, e.g. x, y or x, y, z, positions
* @param nDim number of dimensions per position
* @returns - mean centers of each polygon
*/
function getMeanCentersFromGeometry(
featureIds: TypedArray,
flatCoordinateArray: TypedArray,
nDim: number,
geometryType: string,
primitivePolygonIndices?: TypedArray
) {
const meanCenters: number[][] = [];
const vertexCount = flatCoordinateArray.length;
let vertexIndex = 0;
while (vertexIndex < vertexCount) {
const featureId = featureIds[vertexIndex / nDim];
const center = [0, 0];
let vertexCountInFeature = 0;
while (vertexIndex < vertexCount && featureIds[vertexIndex / nDim] === featureId) {
if (
geometryType === 'polygons' &&
primitivePolygonIndices &&
primitivePolygonIndices.indexOf(vertexIndex / nDim) >= 0
) {
// skip the first point since it is the same as the last point in each ring for polygons
vertexIndex += nDim;
} else {
center[0] += flatCoordinateArray[vertexIndex];
center[1] += flatCoordinateArray[vertexIndex + 1];
vertexIndex += nDim;
vertexCountInFeature++;
}
}
center[0] /= vertexCountInFeature;
center[1] /= vertexCountInFeature;
meanCenters.push(center);
}
return meanCenters;
}

/**
Expand Down Expand Up @@ -142,6 +258,53 @@ function getBinaryGeometriesFromChunk(
}
}

/**
* get triangle indices. Allows deck.gl to skip performing costly triangulation on main thread.
* @param polygonIndices Indices within positions of the start of each simple Polygon
* @param primitivePolygonIndices Indices within positions of the start of each primitive Polygon/ring
* @param flatCoordinateArray Array of x, y or x, y, z positions
* @param nDim - number of dimensions per position
* @returns
*/
export function getTriangleIndices(
polygonIndices: Uint16Array,
primitivePolygonIndices: Int32Array,
flatCoordinateArray: Float64Array,
nDim: number
): Uint32Array {
let primitiveIndex = 0;
const triangles: number[] = [];
// loop polygonIndices to get triangles
for (let i = 0; i < polygonIndices.length - 1; i++) {
const startIdx = polygonIndices[i];
const endIdx = polygonIndices[i + 1];
// get subarray of flatCoordinateArray
const slicedFlatCoords = flatCoordinateArray.subarray(startIdx * nDim, endIdx * nDim);
// get holeIndices for earcut
const holeIndices: number[] = [];
while (primitivePolygonIndices[primitiveIndex] < endIdx) {
if (primitivePolygonIndices[primitiveIndex] > startIdx) {
holeIndices.push(primitivePolygonIndices[primitiveIndex] - startIdx);
}
primitiveIndex++;
}
const triangleIndices = earcut(
slicedFlatCoords,
holeIndices.length > 0 ? holeIndices : undefined,
nDim
);
for (let j = 0; j < triangleIndices.length; j++) {
triangles.push(triangleIndices[j] + startIdx);
}
}
// convert traingles to Uint32Array
const trianglesUint32 = new Uint32Array(triangles.length);
for (let i = 0; i < triangles.length; i++) {
trianglesUint32[i] = triangles[i];
}
return trianglesUint32;
}

/**
* get binary polygons from geoarrow polygon column
* @param chunk one chunk of geoarrow polygon column
Expand Down Expand Up @@ -178,12 +341,14 @@ function getBinaryPolygonsFromChunk(chunk: arrow.Data, geoEncoding: string): Bin
}
}

const triangles = getTriangleIndices(geometryIndicies, geomOffset, flatCoordinateArray, nDim);
return {
featureIds,
flatCoordinateArray,
nDim,
geomOffset,
geometryIndicies
geometryIndicies,
triangles
};
}

Expand All @@ -209,11 +374,23 @@ function getBinaryLinesFromChunk(chunk: arrow.Data, geoEncoding: string): Binary

const numOfVertices = flatCoordinateArray.length / nDim;
const featureIds = new Uint32Array(numOfVertices);
for (let i = 0; i < chunk.length; i++) {
const startIdx = geomOffset[i];
const endIdx = geomOffset[i + 1];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;

if (isMultiLineString) {
const partData = chunk.valueOffsets;
for (let i = 0; i < partData.length - 1; i++) {
const startIdx = geomOffset[partData[i]];
const endIdx = geomOffset[partData[i + 1]];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;
}
}
} else {
for (let i = 0; i < chunk.length; i++) {
const startIdx = geomOffset[i];
const endIdx = geomOffset[i + 1];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;
}
}
}

Expand Down Expand Up @@ -248,8 +425,20 @@ function getBinaryPointsFromChunk(chunk: arrow.Data, geoEncoding: string): Binar

const numOfVertices = flatCoordinateArray.length / nDim;
const featureIds = new Uint32Array(numOfVertices);
for (let i = 0; i < chunk.length; i++) {
featureIds[i] = i;

if (isMultiPoint) {
const partData = chunk.valueOffsets;
for (let i = 0; i < partData.length - 1; i++) {
const startIdx = partData[i];
const endIdx = partData[i + 1];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;
}
}
} else {
for (let i = 0; i < chunk.length; i++) {
featureIds[i] = i;
}
}

return {
Expand Down
9 changes: 7 additions & 2 deletions modules/arrow/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,15 @@ export type {GeoArrowEncoding} from '@loaders.gl/gis';
// getGeometryColumnsFromArrowTable,
// getGeoArrowEncoding

export type {BinaryDataFromGeoArrow} from './geoarrow/convert-geoarrow-to-binary-geometry';
export type {
BinaryDataFromGeoArrow,
BinaryGeometriesFromArrowOptions
} from './geoarrow/convert-geoarrow-to-binary-geometry';
export {
BINARY_GEOMETRY_TEMPLATE,
getBinaryGeometriesFromArrow
getBinaryGeometriesFromArrow,
getTriangleIndices,
getMeanCentersFromBinaryGeometries
} from './geoarrow/convert-geoarrow-to-binary-geometry';

export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson';
Expand Down
Binary file modified modules/arrow/test/data/line.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/multiline.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/multipoint.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/point.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/polygon.arrow
Binary file not shown.
Loading

0 comments on commit 992d90e

Please sign in to comment.