Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(GeoArrow): getBinaryGeometriesFromArrow enhancement #2785

Merged
merged 11 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modules/arrow/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"@loaders.gl/gis": "4.0.3",
"@loaders.gl/loader-utils": "4.0.3",
"@loaders.gl/schema": "4.0.3",
"@math.gl/polygon": "4.0.0",
"apache-arrow": "^13.0.0"
},
"gitHead": "c95a4ff72512668a93d9041ce8636bac09333fd5"
Expand Down
229 changes: 209 additions & 20 deletions modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,49 @@
// Copyright (c) vis.gl contributors

import * as arrow from 'apache-arrow';
import {earcut} from '@math.gl/polygon';
import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema';
import {GeoArrowEncoding} from '@loaders.gl/gis';
import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds';
import {TypedArray} from '@loaders.gl/loader-utils';

/**
* Binary data from geoarrow column and can be used by e.g. deck.gl GeojsonLayer
*/
export type BinaryDataFromGeoArrow = {
/** Binary format geometries, an array of BinaryFeatureCollection */
binaryGeometries: BinaryFeatures[];
/** Boundary of the binary geometries */
bounds: [number, number, number, number];
/** Feature types of the binary geometries */
featureTypes: {polygon: boolean; point: boolean; line: boolean};
/** (Optional) mean centers of the binary geometries for e.g. polygon filtering */
meanCenters?: number[][];
lixun910 marked this conversation as resolved.
Show resolved Hide resolved
};

/**
* Binary geometry content returned from getBinaryGeometriesFromChunk
*/
type BinaryGeometryContent = {
lixun910 marked this conversation as resolved.
Show resolved Hide resolved
// Array of Point feature indexes by vertex
featureIds: Uint32Array;
/** Flat coordinate array of e.g. x, y or x,y,z */
flatCoordinateArray: Float64Array;
/** Dimention of each position */
nDim: number;
/** Array of geometry offsets: the start index of primitive geometry */
geomOffset: Int32Array;
/** Array of geometry indicies: the start index of each geometry */
geometryIndicies: Uint16Array;
/** (Optional) indices of triangels returned from polygon tessellation (Polygon only) */
triangles?: Uint32Array;
lixun910 marked this conversation as resolved.
Show resolved Hide resolved
/** (Optional) array of mean center of each geometry */
meanCenters?: Float64Array;
};

// binary geometry template, see deck.gl BinaryGeometry
/**
* binary geometry template, see deck.gl BinaryGeometry
*/
export const BINARY_GEOMETRY_TEMPLATE = {
globalFeatureIds: {value: new Uint32Array(0), size: 1},
positions: {value: new Float32Array(0), size: 2},
Expand All @@ -32,33 +53,40 @@ export const BINARY_GEOMETRY_TEMPLATE = {
featureIds: {value: new Uint32Array(0), size: 1}
};

export type BinaryGeometriesFromArrowOptions = {
/** option to specify which chunk to get binary geometries from, for progressive rendering */
chunkIndex?: number;
/** option to get mean centers from geometries, for polygon filtering */
meanCenter?: boolean;
};

/**
* get binary geometries from geoarrow column
*
* @param geoColumn the geoarrow column, e.g. arrowTable.getChildAt(geoColumnIndex)
* @param geoEncoding the geo encoding of the geoarrow column, e.g. getGeoArrowEncoding(arrowTable.schema, geoColumnName)
* @param options options for getting binary geometries {meanCenter: boolean}
* @returns BinaryDataFromGeoArrow
*/
export function getBinaryGeometriesFromArrow(
geoColumn: arrow.Vector,
geoEncoding: GeoArrowEncoding
geoEncoding: GeoArrowEncoding,
options?: BinaryGeometriesFromArrowOptions
): BinaryDataFromGeoArrow {
const featureTypes = {
polygon: geoEncoding === 'geoarrow.multipolygon' || geoEncoding === 'geoarrow.polygon',
point: geoEncoding === 'geoarrow.multipoint' || geoEncoding === 'geoarrow.point',
line: geoEncoding === 'geoarrow.multilinestring' || geoEncoding === 'geoarrow.linestring'
};

const chunks = geoColumn.data;
const chunks = options?.chunkIndex ? [geoColumn.data[options?.chunkIndex]] : geoColumn.data;
let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity];
let globalFeatureIdOffset = 0;
const binaryGeometries: BinaryFeatures[] = [];

chunks.forEach((chunk) => {
const {featureIds, flatCoordinateArray, nDim, geomOffset} = getBinaryGeometriesFromChunk(
chunk,
geoEncoding
);
const {featureIds, flatCoordinateArray, nDim, geomOffset, triangles} =
getBinaryGeometriesFromChunk(chunk, geoEncoding);

const globalFeatureIds = new Uint32Array(featureIds.length);
for (let i = 0; i < featureIds.length; i++) {
Expand All @@ -79,7 +107,6 @@ export function getBinaryGeometriesFromArrow(

// TODO: check if chunks are sequentially accessed
globalFeatureIdOffset += chunk.length;

// NOTE: deck.gl defines the BinaryFeatures structure must have points, lines, polygons even if they are empty
binaryGeometries.push({
shape: 'binary-feature-collection',
Expand All @@ -99,22 +126,111 @@ export function getBinaryGeometriesFromArrow(
...BINARY_GEOMETRY_TEMPLATE,
...(featureTypes.polygon ? binaryContent : {}),
polygonIndices: {
// NOTE: polygonIndices and primitivePolygonIndices are not used together to render polygon (binary) with holes
// for GeoJsonLayer with binary geometries in deck.gl currently, so we pass geomOffset and triangles.
// use geomOffset as polygonIndices same as primitivePolygonIndices since we are using earcut to get triangule indices
value: featureTypes.polygon ? geomOffset : new Uint16Array(0),
size: 1
},
primitivePolygonIndices: {
value: featureTypes.polygon ? geomOffset : new Uint16Array(0),
size: 1
}
},
...(triangles ? {triangles: {value: triangles, size: 1}} : {})
}
});

bounds = updateBoundsFromGeoArrowSamples(flatCoordinateArray, nDim, bounds);
});

return {binaryGeometries, bounds, featureTypes};
return {
binaryGeometries,
bounds,
featureTypes,
...(options?.meanCenter
? {meanCenters: getMeanCentersFromBinaryGeometries(binaryGeometries)}
: {})
};
}

/**
* Get mean centers from binary geometries
* @param binaryGeometries binary geometries from geoarrow column, an array of BinaryFeatureCollection
* @returns mean centers of the binary geometries
*/
export function getMeanCentersFromBinaryGeometries(binaryGeometries: BinaryFeatures[]): number[][] {
const globalMeanCenters: number[][] = [];
binaryGeometries.forEach((binaryGeometry: BinaryFeatures) => {
let binaryGeometryType: string | null = null;
if (binaryGeometry.points && binaryGeometry.points.positions.value.length > 0) {
binaryGeometryType = 'points';
} else if (binaryGeometry.lines && binaryGeometry.lines.positions.value.length > 0) {
binaryGeometryType = 'lines';
} else if (binaryGeometry.polygons && binaryGeometry.polygons.positions.value.length > 0) {
binaryGeometryType = 'polygons';
}

const binaryContent = binaryGeometryType ? binaryGeometry[binaryGeometryType] : null;
if (binaryContent && binaryGeometryType !== null) {
const featureIds = binaryContent.featureIds.value;
const flatCoordinateArray = binaryContent.positions.value;
const nDim = binaryContent.positions.size;
const primitivePolygonIndices = binaryContent.primitivePolygonIndices?.value;

const meanCenters = getMeanCentersFromGeometry(
featureIds,
flatCoordinateArray,
nDim,
binaryGeometryType,
primitivePolygonIndices
);
meanCenters.forEach((center) => {
globalMeanCenters.push(center);
});
}
});
return globalMeanCenters;
}

/**
* Get mean centers from raw coordinates and feature ids
* @param featureIds Array of feature ids indexes by vertex
* @param flatCoordinateArray Array of vertex, e.g. x, y or x, y, z, positions
* @param nDim number of dimensions per position
* @returns - mean centers of each polygon
*/
function getMeanCentersFromGeometry(
featureIds: TypedArray,
flatCoordinateArray: TypedArray,
nDim: number,
geometryType: string,
primitivePolygonIndices?: TypedArray
) {
const meanCenters: number[][] = [];
const vertexCount = flatCoordinateArray.length;
let vertexIndex = 0;
while (vertexIndex < vertexCount) {
const featureId = featureIds[vertexIndex / nDim];
const center = [0, 0];
let vertexCountInFeature = 0;
while (vertexIndex < vertexCount && featureIds[vertexIndex / nDim] === featureId) {
if (
geometryType === 'polygons' &&
primitivePolygonIndices &&
primitivePolygonIndices.indexOf(vertexIndex / nDim) >= 0
) {
// skip the first point since it is the same as the last point in each ring for polygons
vertexIndex += nDim;
} else {
center[0] += flatCoordinateArray[vertexIndex];
center[1] += flatCoordinateArray[vertexIndex + 1];
vertexIndex += nDim;
vertexCountInFeature++;
}
}
center[0] /= vertexCountInFeature;
center[1] /= vertexCountInFeature;
meanCenters.push(center);
}
return meanCenters;
}

/**
Expand Down Expand Up @@ -142,6 +258,53 @@ function getBinaryGeometriesFromChunk(
}
}

/**
* get triangle indices. Allows deck.gl to skip performing costly triangulation on main thread.
* @param polygonIndices Indices within positions of the start of each simple Polygon
* @param primitivePolygonIndices Indices within positions of the start of each primitive Polygon/ring
* @param flatCoordinateArray Array of x, y or x, y, z positions
* @param nDim - number of dimensions per position
* @returns
*/
export function getTriangleIndices(
polygonIndices: Uint16Array,
primitivePolygonIndices: Int32Array,
flatCoordinateArray: Float64Array,
nDim: number
): Uint32Array {
let primitiveIndex = 0;
const triangles: number[] = [];
// loop polygonIndices to get triangles
for (let i = 0; i < polygonIndices.length - 1; i++) {
const startIdx = polygonIndices[i];
const endIdx = polygonIndices[i + 1];
// get subarray of flatCoordinateArray
const slicedFlatCoords = flatCoordinateArray.subarray(startIdx * nDim, endIdx * nDim);
// get holeIndices for earcut
const holeIndices: number[] = [];
while (primitivePolygonIndices[primitiveIndex] < endIdx) {
if (primitivePolygonIndices[primitiveIndex] > startIdx) {
holeIndices.push(primitivePolygonIndices[primitiveIndex] - startIdx);
}
primitiveIndex++;
}
const triangleIndices = earcut(
slicedFlatCoords,
holeIndices.length > 0 ? holeIndices : undefined,
nDim
);
for (let j = 0; j < triangleIndices.length; j++) {
triangles.push(triangleIndices[j] + startIdx);
}
}
// convert traingles to Uint32Array
const trianglesUint32 = new Uint32Array(triangles.length);
for (let i = 0; i < triangles.length; i++) {
trianglesUint32[i] = triangles[i];
}
return trianglesUint32;
}

/**
* get binary polygons from geoarrow polygon column
* @param chunk one chunk of geoarrow polygon column
Expand Down Expand Up @@ -178,12 +341,14 @@ function getBinaryPolygonsFromChunk(chunk: arrow.Data, geoEncoding: string): Bin
}
}

const triangles = getTriangleIndices(geometryIndicies, geomOffset, flatCoordinateArray, nDim);
return {
featureIds,
flatCoordinateArray,
nDim,
geomOffset,
geometryIndicies
geometryIndicies,
triangles
};
}

Expand All @@ -209,11 +374,23 @@ function getBinaryLinesFromChunk(chunk: arrow.Data, geoEncoding: string): Binary

const numOfVertices = flatCoordinateArray.length / nDim;
const featureIds = new Uint32Array(numOfVertices);
for (let i = 0; i < chunk.length; i++) {
const startIdx = geomOffset[i];
const endIdx = geomOffset[i + 1];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;

if (isMultiLineString) {
const partData = chunk.valueOffsets;
for (let i = 0; i < partData.length - 1; i++) {
const startIdx = geomOffset[partData[i]];
const endIdx = geomOffset[partData[i + 1]];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;
}
}
} else {
for (let i = 0; i < chunk.length; i++) {
const startIdx = geomOffset[i];
const endIdx = geomOffset[i + 1];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;
}
}
}

Expand Down Expand Up @@ -248,8 +425,20 @@ function getBinaryPointsFromChunk(chunk: arrow.Data, geoEncoding: string): Binar

const numOfVertices = flatCoordinateArray.length / nDim;
const featureIds = new Uint32Array(numOfVertices);
for (let i = 0; i < chunk.length; i++) {
featureIds[i] = i;

if (isMultiPoint) {
const partData = chunk.valueOffsets;
for (let i = 0; i < partData.length - 1; i++) {
const startIdx = partData[i];
const endIdx = partData[i + 1];
for (let j = startIdx; j < endIdx; j++) {
featureIds[j] = i;
}
}
} else {
for (let i = 0; i < chunk.length; i++) {
featureIds[i] = i;
}
}

return {
Expand Down
9 changes: 7 additions & 2 deletions modules/arrow/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,15 @@ export type {GeoArrowEncoding} from '@loaders.gl/gis';
// getGeometryColumnsFromArrowTable,
// getGeoArrowEncoding

export type {BinaryDataFromGeoArrow} from './geoarrow/convert-geoarrow-to-binary-geometry';
export type {
BinaryDataFromGeoArrow,
BinaryGeometriesFromArrowOptions
} from './geoarrow/convert-geoarrow-to-binary-geometry';
export {
BINARY_GEOMETRY_TEMPLATE,
getBinaryGeometriesFromArrow
getBinaryGeometriesFromArrow,
getTriangleIndices,
getMeanCentersFromBinaryGeometries
} from './geoarrow/convert-geoarrow-to-binary-geometry';

export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson';
Expand Down
Binary file modified modules/arrow/test/data/line.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/multiline.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/multipoint.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/point.arrow
Binary file not shown.
Binary file modified modules/arrow/test/data/polygon.arrow
Binary file not shown.
Loading
Loading