Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally detect similar features during PK-less reimport. #336

Merged
merged 1 commit into from
Dec 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Please note that compatibility for 0.x releases (software or repositories) isn't

_When adding new entries to the changelog, please include issue/PR numbers wherever possible._

## 0.8.0 (UNRELEASED)

* Support for detecting features which have changed slightly during a re-import from a data source without a primary key, and reimporting them with the same primary key as last time so they show as edits as opposed to inserts. [#212](https://github.com/koordinates/sno/issues/212)

## 0.7.1

#### JSON syntax-highlighting fix
Expand Down
6 changes: 5 additions & 1 deletion sno/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,17 @@ def geom_column_name(self):
def features(self):
"""
Yields a dict for every feature. Dicts contain key-value pairs for each feature property,
and geometries use sno.geometry.Geometry objects, as in the following example:
and geometries use sno.geometry.Geometry objects, as in the following example::

{
"fid": 123,
"geom": Geometry(b"..."),
"name": "..."
"last-modified": "..."
}

Each dict is guaranteed to iterate in the same order as the columns are ordered in the schema,
so that zip(schema.columns, feature.values()) matches each field with its column.
"""
for blob in self.feature_blobs():
yield self.get_feature(path=blob.name, data=memoryview(blob))
Expand Down
2 changes: 1 addition & 1 deletion sno/fast_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def fast_import_tables(
)

# Add primary keys if needed.
sources = PkGeneratingImportSource.wrap_if_needed(sources, repo)
sources = PkGeneratingImportSource.wrap_sources_if_needed(sources, repo)

cmd = [
"git",
Expand Down
6 changes: 5 additions & 1 deletion sno/import_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,17 @@ def has_geometry(self):
def features(self):
"""
Yields a dict for every feature. Dicts contain key-value pairs for each feature property,
and geometries use sno.geometry.Geometry objects, as in the following example:
and geometries use sno.geometry.Geometry objects, as in the following example::

{
"fid": 123,
"geom": Geometry(b"..."),
"name": "..."
"last-modified": "..."
}

Each dict is guaranteed to iterate in the same order as the columns are ordered in the schema,
so that zip(schema.columns, feature.values()) matches each field with its column.
"""
raise NotImplementedError()

Expand Down
19 changes: 17 additions & 2 deletions sno/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,17 @@ def _add_datasets_to_working_copy(repo, *datasets, replace_existing=False):
is_flag=True,
help="Replace existing dataset(s) of the same name.",
)
@click.option(
"--similarity-detection-limit",
hidden=True,
type=click.INT,
default=50,
help=(
"When replacing an existing dataset where primary keys are auto-generated: the maximum number of unmatched "
"features to search through for similar features, so that primary keys can be reassigned for features that "
"are similar but have had minor edits. Zero means that no similarity detection is performed. (Advanced users only)"
),
)
@click.option(
"--allow-empty",
is_flag=True,
Expand Down Expand Up @@ -167,6 +178,7 @@ def import_table(
tables,
table_info,
replace_existing,
similarity_detection_limit,
allow_empty,
max_delta_depth,
do_checkout,
Expand Down Expand Up @@ -247,8 +259,11 @@ def import_table(
# will result in a new schema object, and thus a new blob for every feature.
# Note that alignment works better if we add the generated-pk-column first (when needed),
# if one schema has this and the other lacks it they will be harder to align.
import_source = PkGeneratingImportSource.wrap_if_needed(
import_source, repo
import_source = PkGeneratingImportSource.wrap_source_if_needed(
import_source,
repo,
dest_path=dest_path,
similarity_detection_limit=similarity_detection_limit,
)
import_source.schema = existing_ds.schema.align_to_self(
import_source.schema
Expand Down
Loading