diff --git a/CHANGELOG.md b/CHANGELOG.md
index e568febf3..751857e11 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,14 @@ Please note that compatibility for 0.x releases (software or repositories) isn't
 
 _When adding new entries to the changelog, please include issue/PR numbers wherever possible._
 
+## 0.10.1
+
+#### Fix for `kart upgrade`
+Fixed `kart upgrade` so that it preserves more complicated (or yet-to-be-released) features of V2 repos as they are upgraded to V3. [#448](https://github.com/koordinates/kart/issues/448)
+
+Specifically:
+* `generated-pks.json` metadata, extra metadata found in datasets that have an automatically generated primary key and which are maintained by repeatedly importing from a primary-key-less datasource
+* attachments (which are not yet fully supported by Kart) - arbitrary files kept alongside datasets, such as license or readme files.
 
 ## 0.10.0
 
diff --git a/kart/dataset3.py b/kart/dataset3.py
index a91d8dd5f..b04b95bf2 100644
--- a/kart/dataset3.py
+++ b/kart/dataset3.py
@@ -332,6 +332,9 @@ def import_iter_meta_blobs(self, repo, source):
 
             yield full_path, content
 
+        for rel_path, content in source.attachment_items():
+            yield self.full_attachment_path(rel_path), content
+
     def iter_legend_blob_data(self):
         """
         Generates (full_path, blob_data) tuples for each legend in this dataset
diff --git a/kart/fast_import.py b/kart/fast_import.py
index 019aafe70..55a57123e 100644
--- a/kart/fast_import.py
+++ b/kart/fast_import.py
@@ -42,7 +42,7 @@ class _CommitMissing(Exception):
     pass
 
 
-def _safe_walk_repo(repo):
+def _safe_walk_repo(repo, from_commit):
     """
     Contextmanager. Walk the repo log, yielding each commit.
     If a commit isn't present, raises _CommitMissing.
@@ -50,7 +50,7 @@ def _safe_walk_repo(repo):
     """
     do_raise = False
     try:
-        for commit in repo.walk(repo.head.target):
+        for commit in repo.walk(from_commit.id):
             try:
                 yield commit
             except KeyError:
@@ -65,7 +65,10 @@ def _safe_walk_repo(repo):
 
 
 def should_compare_imported_features_against_old_features(
-    repo, source, replacing_dataset
+    repo,
+    source,
+    replacing_dataset,
+    from_commit,
 ):
     """
     Returns True iff we should compare feature blobs to the previous feature blobs
@@ -91,7 +94,7 @@ def should_compare_imported_features_against_old_features(
 
     # Walk the log until we encounter a relevant schema change
     try:
-        for commit in _safe_walk_repo(repo):
+        for commit in _safe_walk_repo(repo, from_commit):
             datasets = repo.datasets(commit.oid)
             try:
                 old_dataset = datasets[replacing_dataset.path]
@@ -149,14 +152,22 @@ def fast_import_clear_trees(*, procs, replace_ids, replacing_dataset, source):
     if replacing_dataset is None:
         # nothing to do
         return
-    dest_inner_path = f"{source.dest_path}/{replacing_dataset.DATASET_DIRNAME}"
+    dest_path = source.dest_path
+    dest_inner_path = f"{dest_path}/{replacing_dataset.DATASET_DIRNAME}"
     for i, proc in enumerate(procs):
         if replace_ids is None:
             # Delete the existing dataset, before we re-import it.
             proc.stdin.write(f"D {source.dest_path}\n".encode("utf8"))
         else:
-            # delete and reimport meta/
+            # Delete and reimport any attachments at dest_path
+            attachment_names = [
+                obj.name for obj in replacing_dataset.tree if obj.type_str == "blob"
+            ]
+            for name in attachment_names:
+                proc.stdin.write(f"D {dest_path}/{name}\n".encode("utf8"))
+            # Delete and reimport <inner_path>/meta/
             proc.stdin.write(f"D {dest_inner_path}/meta\n".encode("utf8"))
+
             # delete all features not pertaining to this process.
             # we also delete the features that *do*, but we do it further down
             # so that we don't have to iterate the IDs more than once.
@@ -174,20 +185,25 @@ def fast_import_clear_trees(*, procs, replace_ids, replacing_dataset, source):
             pass
 
 
+UNSPECIFIED = object()
+
+
 def fast_import_tables(
     repo,
     sources,
     *,
     verbosity=1,
     num_processes=4,
-    header=None,
     message=None,
     replace_existing=ReplaceExisting.DONT_REPLACE,
+    from_commit=UNSPECIFIED,
     replace_ids=None,
     allow_empty=False,
     limit=None,
     max_pack_size="2G",
     max_delta_depth=0,
+    # Advanced use - used by kart upgrade.
+    header=None,
     extra_cmd_args=(),
 ):
     """
@@ -200,13 +216,16 @@ def fast_import_tables(
         1: basic status information
         2: full output of `git-fast-import --stats ...`
     num_processes: how many import processes to run in parallel
-    header - the commit-header to supply git-fast-import. Generated if not supplied - see generate_header.
     message - the commit-message used when generating the header. Generated if not supplied - see generate_message.
     replace_existing - See ReplaceExisting enum
+    from_commit - the commit to be used as a starting point before beginning the import.
     replace_ids - list of PK values to replace, or None
     limit - maximum number of features to import per source.
     max_pack_size - maximum size of pack files. Affects performance.
     max_delta_depth - maximum depth of delta-compression chains. Affects performance.
+
+    The following extra options are used by kart upgrade.
+    header - the commit-header to supply git-fast-import. Generated if not supplied - see generate_header.
     extra_cmd_args - any extra args for the git-fast-import command.
     """
 
@@ -220,28 +239,26 @@ def fast_import_tables(
         # too many processes it won't be very even.
         raise ValueError(f"Can't import with more than {MAX_PROCESSES} processes")
 
-    # The tree this repo was at before this function was called.
-    # May be None (repo is empty)
-    orig_tree = repo.head_tree
-
-    # The tree we look at for considering what datasets already exist
-    # depends what we want to replace.
-    if replace_existing == ReplaceExisting.ALL:
-        starting_tree = None
+    # The commit that this import is using as the basis for the new commit.
+    # If we are replacing everything, we start from scratch, so from_commit is None.
+    if replace_existing is ReplaceExisting.ALL:
+        from_commit = None
     else:
-        starting_tree = repo.head_tree
+        if from_commit is UNSPECIFIED:
+            raise RuntimeError(
+                "Caller should specify from_commit when requesting an import that doesn't start from scratch"
+            )
 
-    if not starting_tree:
-        replace_existing = ReplaceExisting.ALL
+    from_tree = from_commit.peel(pygit2.Tree) if from_commit else repo.empty_tree
 
     assert repo.version in SUPPORTED_REPO_VERSIONS
-    extra_blobs = extra_blobs_for_version(repo.version) if not starting_tree else []
+    extra_blobs = extra_blobs_for_version(repo.version) if not from_commit else []
 
     ImportSource.check_valid(sources)
 
     if replace_existing == ReplaceExisting.DONT_REPLACE:
         for source in sources:
-            if source.dest_path in starting_tree:
+            if source.dest_path in from_tree:
                 raise InvalidOperation(
                     f"Cannot import to {source.dest_path}/ - already exists in repository"
                 )
@@ -260,7 +277,6 @@ def fast_import_tables(
     for arg in extra_cmd_args:
         cmd.append(arg)
 
-    orig_commit = repo.head_commit
     import_refs = []
 
     if verbosity >= 1:
@@ -291,11 +307,13 @@ def fast_import_tables(
                     import_ref = f"refs/kart-import/{uuid.uuid4()}"
                     import_refs.append(import_ref)
 
-                    # may be None, if head is detached
+                    # orig_branch may be None, if head is detached
+                    # FIXME - this code relies upon the fact that we always either a) import at HEAD (import flow)
+                    # or b) Fix up the branch heads later (upgrade flow).
                     orig_branch = repo.head_branch
-                    proc_header = generate_header(repo, sources, message, import_ref)
-                    if replace_existing != ReplaceExisting.ALL:
-                        proc_header += f"from {orig_commit.oid}\n"
+                    proc_header = generate_header(
+                        repo, sources, message, import_ref, from_commit
+                    )
 
                 proc = stack.enter_context(_git_fast_import(repo, *cmd))
                 procs.append(proc)
@@ -303,10 +321,7 @@ def fast_import_tables(
 
             # Write the extra blob that records the repo's version:
             for i, blob_path in write_blobs_to_stream(procs[0].stdin, extra_blobs):
-                if (
-                    replace_existing != ReplaceExisting.ALL
-                    and blob_path in starting_tree
-                ):
+                if replace_existing != ReplaceExisting.ALL and blob_path in from_tree:
                     raise ValueError(f"{blob_path} already exists")
 
             if num_processes == 1:
@@ -326,6 +341,7 @@ def proc_for_feature_path(path):
                     repo,
                     source,
                     replace_existing,
+                    from_commit,
                     procs,
                     proc_for_feature_path,
                     replace_ids,
@@ -363,7 +379,7 @@ def proc_for_feature_path(path):
             else:
                 new_tree = trees[0]
             if not allow_empty:
-                if new_tree == orig_tree:
+                if new_tree == from_tree:
                     raise NotFound("No changes to commit", exit_code=NO_CHANGES)
 
             # use the existing commit details we already imported, but use the new tree
@@ -380,16 +396,14 @@ def proc_for_feature_path(path):
         # remove the import branches
         for b in import_refs:
             if b in repo.references:
-                try:
-                    repo.references.delete(b)
-                except KeyError:
-                    pass  # Nothing to delete, probably due to some earlier failure.
+                repo.references.delete(b)
 
 
 def _import_single_source(
     repo,
     source,
     replace_existing,
+    from_commit,
     procs,
     proc_for_feature_path,
     replace_ids,
@@ -400,6 +414,7 @@ def _import_single_source(
     repo - the Kart repo to import into.
     source - an individual ImportSource
     replace_existing - See ReplaceExisting enum
+    from_commit - the commit to be used as a starting point before beginning the import.
     procs - all the processes to be used (for parallel imports)
     proc_for_feature_path - function, given a feature path returns the process to use to import it
     replace_ids - list of PK values to replace, or None
@@ -412,7 +427,7 @@ def _import_single_source(
     replacing_dataset = None
     if replace_existing == ReplaceExisting.GIVEN:
         try:
-            replacing_dataset = repo.datasets()[source.dest_path]
+            replacing_dataset = repo.datasets(refish=from_commit)[source.dest_path]
         except KeyError:
             # no such dataset; no problem
             replacing_dataset = None
@@ -478,7 +493,10 @@ def _ids():
             )
 
         elif should_compare_imported_features_against_old_features(
-            repo, source, replacing_dataset
+            repo,
+            source,
+            replacing_dataset,
+            from_commit,
         ):
             feature_blob_iter = dataset.import_iter_feature_blobs(
                 repo,
@@ -536,18 +554,21 @@ def copy_existing_blob_to_stream(stream, blob_path, blob_sha):
     stream.write(f"M 644 {blob_sha} {blob_path}\n".encode("utf8"))
 
 
-def generate_header(repo, sources, message, branch):
+def generate_header(repo, sources, message, branch, from_commit):
     if message is None:
         message = generate_message(sources)
 
     author = repo.author_signature()
     committer = repo.committer_signature()
-    return (
+    result = (
         f"commit {branch}\n"
         f"author {author.name} <{author.email}> {author.time} {minutes_to_tz_offset(author.offset)}\n"
         f"committer {committer.name} <{committer.email}> {committer.time} {minutes_to_tz_offset(committer.offset)}\n"
         f"data {len(message.encode('utf8'))}\n{message}\n"
     )
+    if from_commit:
+        result += f"from {from_commit.oid}\n"
+    return result
 
 
 def generate_message(sources):
diff --git a/kart/import_source.py b/kart/import_source.py
index b719488b3..b336d5252 100644
--- a/kart/import_source.py
+++ b/kart/import_source.py
@@ -101,6 +101,14 @@ def meta_items(self):
         """
         raise NotImplementedError()
 
+    def attachment_items(self):
+        """
+        Returns a dict of all the attachment items that need to be imported.
+        These are files that will be imported verbatim to dest_path, but not hidden inside the dataset.
+        This could be a license or a readme.
+        """
+        return {}
+
     def crs_definitions(self):
         """
         Returns an {identifier: definition} dict containing every CRS definition.
diff --git a/kart/init.py b/kart/init.py
index 8e5832cce..a36c5112b 100644
--- a/kart/init.py
+++ b/kart/init.py
@@ -326,15 +326,17 @@ def import_(
         import_sources.append(import_source)
 
     ImportSource.check_valid(import_sources, param_hint="tables")
+    replace_existing_enum = (
+        ReplaceExisting.GIVEN if replace_existing else ReplaceExisting.DONT_REPLACE
+    )
     fast_import_tables(
         repo,
         import_sources,
         verbosity=ctx.obj.verbosity + 1,
         message=message,
         max_delta_depth=max_delta_depth,
-        replace_existing=ReplaceExisting.GIVEN
-        if replace_existing
-        else ReplaceExisting.DONT_REPLACE,
+        replace_existing=replace_existing_enum,
+        from_commit=repo.head_commit,
         replace_ids=replace_ids,
         allow_empty=allow_empty,
         num_processes=num_processes or get_default_num_processes(),
@@ -458,6 +460,7 @@ def init(
         fast_import_tables(
             repo,
             sources,
+            from_commit=None,
             message=message,
             max_delta_depth=max_delta_depth,
             num_processes=num_processes or get_default_num_processes(),
diff --git a/kart/pk_generation.py b/kart/pk_generation.py
index ce9c97541..666a40064 100644
--- a/kart/pk_generation.py
+++ b/kart/pk_generation.py
@@ -3,7 +3,7 @@
 from .import_source import ImportSource
 from .dataset2 import Dataset2
 from .dataset2 import Dataset3
-from .schema import ColumnSchema, Schema
+from .schema import ColumnSchema
 
 
 class PkGeneratingImportSource(ImportSource):
diff --git a/kart/rich_base_dataset.py b/kart/rich_base_dataset.py
index f01b25ef2..0e1532b96 100644
--- a/kart/rich_base_dataset.py
+++ b/kart/rich_base_dataset.py
@@ -201,9 +201,9 @@ def diff_feature(self, other, feature_filter=UNFILTERED, reverse=False):
         """
         feature_filter = feature_filter or UNFILTERED
 
-        params = {}
+        params = {"flags": pygit2.GIT_DIFF_SKIP_BINARY_CHECK}
         if reverse:
-            params = {"swap": True}
+            params["swap"] = True
 
         if other is None:
             diff_index = self.inner_tree.diff_to_tree(**params)
diff --git a/kart/upgrade/__init__.py b/kart/upgrade/__init__.py
index f7b1d23f0..943468bc6 100644
--- a/kart/upgrade/__init__.py
+++ b/kart/upgrade/__init__.py
@@ -10,7 +10,7 @@
 from kart.dataset2 import Dataset2
 from kart.exceptions import InvalidOperation, NotFound
 from kart.fast_import import fast_import_tables, ReplaceExisting
-from kart.repo import KartRepo
+from kart.repo import KartRepo, KartConfigKeys
 from kart.repo_version import DEFAULT_NEW_REPO_VERSION
 from kart.structure import RepoStructure
 from kart.timestamps import minutes_to_tz_offset
@@ -27,22 +27,29 @@ def dataset_class_for_legacy_version(version, in_place=False):
         return Dataset1
     elif version == 2:
         if in_place:
-            return InPlaceUpgradingDataset2
+            return InPlaceUpgradeSourceDataset2
         else:
-            return XmlUpgradingDataset2
+            return UpgradeSourceDataset2
 
     return None
 
 
-class XmlUpgradingDataset2(Dataset2):
-    """Variant of Dataset2 that extracts the metadata.xml out of dataset/metadata.json."""
+class UpgradeSourceDataset2(Dataset2):
+    """
+    Variant of Dataset2 that:
+    - preserves all meta_items, even non-standard ones.
+    - preserves attachments
+    - upgrades dataset/metadata.json to metadata.xml
+    """
+
+    def meta_items(self):
+        # We also want to include hidden items like generated-pks.json, in the odd case where we have it.
+        return super().meta_items(only_standard_items=False)
 
     def get_meta_item(self, name, missing_ok=True):
         if name == "metadata/dataset.json":
             return None
-
         result = super().get_meta_item(name, missing_ok=missing_ok)
-
         if result is None and name == "metadata.xml":
             metadata_json = super().get_meta_item("metadata/dataset.json")
             if metadata_json:
@@ -53,8 +60,13 @@ def get_meta_item(self, name, missing_ok=True):
                     return next(iter(metadata_xml))["text/xml"]
         return result
 
+    def attachment_items(self):
+        attachments = [obj for obj in self.tree if obj.type_str == "blob"]
+        for attachment in attachments:
+            yield attachment.name, attachment.data
 
-class InPlaceUpgradingDataset2(XmlUpgradingDataset2):
+
+class InPlaceUpgradeSourceDataset2(UpgradeSourceDataset2):
     @property
     def feature_blobs_already_written(self):
         return True
@@ -209,6 +221,11 @@ def upgrade(ctx, source, dest, in_place):
         subctx.obj.user_repo_path = str(dest)
         subctx.invoke(checkout.create_workingcopy)
 
+    if in_place:
+        dest_repo.config[KartConfigKeys.KART_REPOSTRUCTURE_VERSION] = str(
+            DEFAULT_NEW_REPO_VERSION
+        )
+
     click.secho("\nUpgrade complete", fg="green", bold=True)
 
 
@@ -218,7 +235,7 @@ def _upgrade_commit(
     source_repo,
     source_commit,
     source_dataset_class,
-    dest_parents,
+    dest_parent_ids,
     dest_repo,
     commit_map,
 ):
@@ -254,26 +271,28 @@ def _upgrade_commit(
         ds_path, ds_diff = sole_dataset_diff
         source_datasets = [source_rs.datasets[ds_path]]
         replace_existing = ReplaceExisting.GIVEN
-        from_parent = commit_map[source_commit.parents[0].hex]
-        merge_parents = [p for p in dest_parents if p != from_parent]
+        from_id = commit_map[source_commit.parents[0].hex]
+        from_commit = dest_repo[from_id]
+        merge_ids = [p for p in dest_parent_ids if p != from_id]
         replace_ids = list(ds_diff.get("feature", {}).keys())
         feature_count = len(replace_ids)
     else:
-        from_parent = None
-        merge_parents = dest_parents
+        from_id = from_commit = None
+        merge_ids = dest_parent_ids
         replace_existing = ReplaceExisting.ALL
         replace_ids = None
         feature_count = sum(s.feature_count for s in source_datasets)
 
-    if from_parent:
-        header += f"from {from_parent}\n"
-    header += "".join(f"merge {p}\n" for p in merge_parents)
+    if from_id:
+        header += f"from {from_id}\n"
+    header += "".join(f"merge {p}\n" for p in merge_ids)
 
     try:
         fast_import_tables(
             dest_repo,
             source_datasets,
             replace_existing=replace_existing,
+            from_commit=from_commit,
             replace_ids=replace_ids,
             verbosity=ctx.obj.verbosity,
             header=header,
diff --git a/tests/test_structure.py b/tests/test_structure.py
index e7a55f1c7..97a7ff6b1 100644
--- a/tests/test_structure.py
+++ b/tests/test_structure.py
@@ -1120,7 +1120,7 @@ def test_fast_import(data_archive, tmp_path, cli_runner, chdir):
                 data / "nz-pa-points-topo-150k.gpkg", table=table
             )
 
-            fast_import.fast_import_tables(repo, [source])
+            fast_import.fast_import_tables(repo, [source], from_commit=None)
 
             assert not repo.is_empty
             assert repo.head.name == "refs/heads/main"