Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support patches that create or delete datasets #246

Merged
merged 2 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where
## 0.6.0 (UNRELEASED)

* `apply` and `import` no longer create empty commits unless you specify `--allow-empty` [#243](https://github.com/koordinates/sno/issues/243), [#245](https://github.com/koordinates/sno/issues/245)
* Patches that create or delete datasets are now supported in Datasets V2 [#239](https://github.com/koordinates/sno/issues/239)

## 0.5.0

Expand Down
35 changes: 17 additions & 18 deletions sno/apply.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from datetime import datetime
from enum import Enum, auto

import click

Expand All @@ -20,22 +21,22 @@


V1_NO_META_UPDATE = (
"Sorry, patches that make meta changes are not supported until Datasets V2 (Sno 0.5)\n"
"Sorry, patches that make meta changes are not supported until Datasets V2\n"
"Use `sno upgrade`"
)
# TODO: support this for V2.
NO_DATASET_CREATE_DELETE = (
"Sorry, patches that create or delete datasets are not yet supported."
V1_NO_DATASET_CREATE_DELETE = (
"Sorry, patches that create or delete datasets are not supported until Datasets V2\n"
"Use `sno upgrade`"
)
NO_COMMIT_NO_DATASET_CREATE_DELETE = (
"Sorry, patches that create or delete datasets cannot be applied with --no-commit"
)


class MetaChangeType:
CREATE_DATASET = "+"
DELETE_DATASET = "-"
META_UPDATE = "+/-"
class MetaChangeType(Enum):
CREATE_DATASET = auto()
DELETE_DATASET = auto()
META_UPDATE = auto()


def _meta_change_type(ds_diff_dict):
Expand All @@ -59,19 +60,17 @@ def check_change_supported(repo_version, dataset, ds_path, meta_change_type, com
else:
desc = f"Patch contains meta changes for dataset '{ds_path}'"

# TODO - support creates and deletes for datasets V2.
if meta_change_type in (
MetaChangeType.CREATE_DATASET,
MetaChangeType.DELETE_DATASET,
):
raise NotYetImplemented(f"{desc}\n{NO_DATASET_CREATE_DELETE}")

if repo_version < 2 and meta_change_type == MetaChangeType.META_UPDATE:
raise NotYetImplemented(f"{desc}\n{V1_NO_META_UPDATE}")
if repo_version < 2 and meta_change_type is not None:
message = (
V1_NO_META_UPDATE
if meta_change_type == MetaChangeType.META_UPDATE
else V1_NO_DATASET_CREATE_DELETE
)
raise NotYetImplemented(f"{desc}\n{message}")

if dataset is None and meta_change_type != MetaChangeType.CREATE_DATASET:
raise NotFound(
f"Patch contains dataset '{ds_path}' which is not in this repository",
f"Patch contains changes for dataset '{ds_path}' which is not in this repository",
exit_code=NO_TABLE,
)
if dataset is not None and meta_change_type == MetaChangeType.CREATE_DATASET:
Expand Down
12 changes: 4 additions & 8 deletions sno/dataset2.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,6 @@ def encode_legend(self, legend):
"""
return self.full_path(self.LEGEND_PATH + legend.hexhash()), legend.dumps()

@property
@functools.lru_cache(maxsize=1)
def schema(self):
"""Load the current schema from this dataset."""
return Schema.loads(self.get_data_at(self.SCHEMA_PATH))

@property
@functools.lru_cache(maxsize=1)
def crs_identifier(self):
Expand Down Expand Up @@ -401,9 +395,11 @@ def apply_meta_diff(self, meta_diff, tree_builder):
if not meta_diff:
return

conflicts = False
# Applying diffs works even if there is no tree yet created for the dataset,
# as is the case when the dataset is first being created right now.
meta_tree = self.meta_tree if self.tree is not None else ()

meta_tree = self.meta_tree
conflicts = False
olsen232 marked this conversation as resolved.
Show resolved Hide resolved
with tree_builder.cd(self.META_PATH):
for delta in meta_diff.values():
name = delta.key
Expand Down
48 changes: 8 additions & 40 deletions sno/fast_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ def fast_import_tables(
extra_cmd_args - any extra args for the git-fast-import command.
"""

head_tree = None if replace_existing == ReplaceExisting.ALL else get_head_tree(repo)
head_tree = (
None
if replace_existing == ReplaceExisting.ALL
else git_util.get_head_tree(repo)
)

if not head_tree:
# Starting from an effectively empty repo. Write the blobs needed for this repo version.
Expand Down Expand Up @@ -95,20 +99,16 @@ def fast_import_tables(
import_branch = f'refs/heads/{uuid.uuid4()}'

# may be None, if head is detached
orig_branch = get_head_branch(repo)
orig_branch = git_util.get_head_branch(repo)
header = generate_header(repo, sources, message, import_branch)
else:
import_branch = None
orig_commit = get_head_commit(repo)
orig_commit = git_util.get_head_commit(repo)

if not quiet:
click.echo("Starting git-fast-import...")

p = subprocess.Popen(
cmd,
cwd=repo.path,
stdin=subprocess.PIPE,
)
p = subprocess.Popen(cmd, cwd=repo.path, stdin=subprocess.PIPE,)
try:
if replace_existing != ReplaceExisting.ALL:
header += f"from {orig_commit.oid}\n"
Expand Down Expand Up @@ -222,38 +222,6 @@ def fast_import_tables(
repo.references.delete(import_branch)


def get_head_tree(repo):
"""Returns the tree at the current repo HEAD."""
if repo.is_empty:
return None
try:
return repo.head.peel(pygit2.Tree)
except pygit2.GitError:
# This happens when the repo is not empty, but the current HEAD has no commits.
return None


def get_head_commit(repo):
"""Returns the commit that HEAD is currently on."""
if repo.is_empty:
return None
try:
return repo.head.peel(pygit2.Commit)
except pygit2.GitError:
# This happens when the repo is not empty, but the current HEAD has no commits.
return None


def get_head_branch(repo):
"""
Returns the branch that HEAD is currently on.
If HEAD is detached, returns None
"""
if repo.head_is_detached:
return None
return repo.head.name if not repo.is_empty else "refs/heads/master"


def write_blobs_to_stream(stream, blobs):
for i, (blob_path, blob_data) in enumerate(blobs):
stream.write(
Expand Down
32 changes: 32 additions & 0 deletions sno/git_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,38 @@
from .timestamps import tz_offset_to_minutes


def get_head_tree(repo):
"""Returns the tree at the current repo HEAD."""
olsen232 marked this conversation as resolved.
Show resolved Hide resolved
if repo.is_empty:
return None
try:
return repo.head.peel(pygit2.Tree)
except pygit2.GitError:
# This happens when the repo is not empty, but the current HEAD has no commits.
return None


def get_head_commit(repo):
"""Returns the commit at the current repo HEAD."""
olsen232 marked this conversation as resolved.
Show resolved Hide resolved
if repo.is_empty:
return None
try:
return repo.head.peel(pygit2.Commit)
except pygit2.GitError:
# This happens when the repo is not empty, but the current HEAD has no commits.
return None


def get_head_branch(repo):
"""
Returns the branch that HEAD is currently on.
If HEAD is detached, returns None
"""
if repo.head_is_detached:
return None
return repo.references["HEAD"].target


_GIT_VAR_OUTPUT_RE = re.compile(
r"^(?P<name>.*) <(?P<email>[^>]*)> (?P<time>\d+) (?P<offset>[+-]?\d+)$"
)
Expand Down
16 changes: 14 additions & 2 deletions sno/rich_tree_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import pygit2

from .repository_version import get_repo_version, extra_blobs_for_version

# Hash for git's empty tree.
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'


Expand All @@ -13,18 +16,27 @@ class RichTreeBuilder:
Conflicts are not detected.
"""

def __init__(self, repo, initial_root_tree):
def __init__(self, repo, initial_root_tree, auto_include_version_blobs=True):
"""
The repo and an initial root tree which will be updated.
All paths are specified relative to this tree - the root tree at a particular commit is a good choice.
olsen232 marked this conversation as resolved.
Show resolved Hide resolved
"""
self.repo = repo
self.root_tree = initial_root_tree
self.root_tree = (
initial_root_tree
if initial_root_tree is not None
else repo.get(EMPTY_TREE_ID)
)

self.root_dict = {}
self.cur_path = []
self.path_stack = []

if auto_include_version_blobs and initial_root_tree is None:
extra_blobs = extra_blobs_for_version(get_repo_version(repo))
for path, blob in extra_blobs:
self.insert(path, blob)

def _resolve_path(self, path):
"""
Resolve the given a path relative to the current path.
Expand Down
67 changes: 51 additions & 16 deletions sno/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def __iter__(self):

def iter_at(self, tree):
""" Iterate over available datasets in this repository using a specified Tree """
if tree is None:
return

to_examine = deque([("", tree)])

dataset_version = self.version
Expand Down Expand Up @@ -194,18 +197,33 @@ def working_copy(self):
wc.delete()
del self._working_copy

def create_tree_from_diff(self, diff):
def create_tree_from_diff(self, repo_diff):
"""
Given a diff, returns a new tree created by applying the diff to self.tree -
Doesn't create any commits or modify the working copy at all.
"""
tree_builder = RichTreeBuilder(self.repo, self.tree)
dataset_class = DatasetStructure.for_version(self.version)

for ds_path, ds_diff in repo_diff.items():
schema_delta = ds_diff.recursive_get(["meta", "schema.json"])
if schema_delta and self.version < 2:
# This should have been handled already, but just to be safe.
raise NotYetImplemented(
"Meta changes are not supported until datasets V2"
)

if schema_delta and schema_delta.type == "delete":
tree_builder.remove(ds_path)
continue

if schema_delta and schema_delta.type == "insert":
dataset = dataset_class(tree=None, path=ds_path)
else:
dataset = self[ds_path]

for ds in self.iter_at(self.tree):
ds_diff = diff.get(ds.path)
if ds_diff:
ds.apply_diff(ds_diff, tree_builder)
tree_builder.flush()
dataset.apply_diff(ds_diff, tree_builder)
tree_builder.flush()

tree = tree_builder.flush()
L.info(f"Tree sha: {tree.oid}")
Expand All @@ -220,18 +238,24 @@ def commit(
NOTE: Doesn't update working-copy meta or tracking tables, this is the
responsibility of the caller.
"""
old_tree_oid = self.tree.oid if self.tree is not None else None
new_tree_oid = self.create_tree_from_diff(wcdiff)
if (not allow_empty) and new_tree_oid == self.tree.oid:
if (not allow_empty) and new_tree_oid == old_tree_oid:
raise NotFound("No changes to commit", exit_code=NO_CHANGES)

L.info("Committing...")

parent_commit = git_util.get_head_commit(self.repo)
parents = [parent_commit.oid] if parent_commit is not None else []

# this will also update the ref (branch) to point to the current commit
new_commit = self.repo.create_commit(
"HEAD", # reference_name
author or git_util.author_signature(self.repo),
committer or git_util.committer_signature(self.repo),
message, # message
new_tree_oid, # tree
[self.repo.head.target], # parents
message,
new_tree_oid,
parents,
)
L.info(f"Commit: {new_commit}")

Expand Down Expand Up @@ -367,6 +391,12 @@ def get_meta_item(self, name, missing_ok=False):
return None
raise KeyError(f"No meta-item found named {name}, type={type(leaf)}") from e

@property
@functools.lru_cache(maxsize=1)
def schema(self):
"""Convenience method for loading the schema.json into a Schema object"""
return Schema.from_column_dicts(self.get_meta_item("schema.json"))

@property
@functools.lru_cache(maxsize=1)
def has_geometry(self):
Expand All @@ -375,8 +405,8 @@ def has_geometry(self):
@property
@functools.lru_cache(maxsize=1)
def geom_column_name(self):
meta_geom = self.get_gpkg_meta_item("gpkg_geometry_columns")
return meta_geom["column_name"] if meta_geom else None
geom_columns = self.schema.geometry_columns
return geom_columns[0].name if geom_columns else None

def get_crs_definition(self, crs_name):
"""Return the CRS definition stored with the given name."""
Expand Down Expand Up @@ -663,7 +693,12 @@ def apply_feature_diff(self, feature_diff, tree_builder, *, schema=None):
if schema is not None:
encode_kwargs = {"schema": schema}

geom_column_name = self.geom_column_name
geom_columns = (schema or self.schema).geometry_columns
geom_column_name = geom_columns[0].name if geom_columns else None

# Applying diffs works even if there is no tree yet created for the dataset,
# as is the case when the dataset is first being created right now.
tree = self.tree or ()

conflicts = False
for delta in feature_diff.values():
Expand All @@ -677,21 +712,21 @@ def apply_feature_diff(self, feature_diff, tree_builder, *, schema=None):
)

# Conflict detection
if delta.type == "delete" and old_path not in self.tree:
if delta.type == "delete" and old_path not in tree:
conflicts = True
click.echo(
f"{self.path}: Trying to delete nonexistent feature: {old_key}"
)
continue

if delta.type == "insert" and new_path in self.tree:
if delta.type == "insert" and new_path in tree:
conflicts = True
click.echo(
f"{self.path}: Trying to create feature that already exists: {new_key}"
)
continue

if delta.type == "update" and old_path not in self.tree:
if delta.type == "update" and old_path not in tree:
conflicts = True
click.echo(
f"{self.path}: Trying to update nonexistent feature: {old_key}"
Expand Down
Loading