From c4932afbb9d8cb576645c1e32ee96f46bb8d7b93 Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:14:01 +0100 Subject: [PATCH 1/9] Accept layer has no geometry field - error raised in geometry_field when getting geom_field in the model, blocking layer with no geom_field being mapped --- django/contrib/gis/utils/layermapping.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index a4cd04dc057a..b1c13fbc6279 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -540,6 +540,11 @@ def geometry_field(self): Return the GeometryField instance associated with the geographic column. """ + + # Allow layer has no geometry field being mapped + if self.geom_field is None: + return None + # Use `get_field()` on the model's options so that we # get the correct field instance if there's model inheritance. opts = self.model._meta From c9cdc0bd0bff535ea01158069ddd2fbeac22113f Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:59:52 +0100 Subject: [PATCH 2/9] new method to bulk create the whole layer - utilize model.bulk_create - but heavy load to memory --- django/contrib/gis/utils/layermapping.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index b1c13fbc6279..78dbdcabbd92 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -20,6 +20,7 @@ OGRGeomType, SpatialReference, ) +from django.contrib.gis.gdal.feature import Feature from django.contrib.gis.gdal.field import ( OFTDate, OFTDateTime, @@ -739,3 +740,13 @@ def _save(feat_range=default_range, num_feat=0, num_saved=0): else: # Otherwise, just calling the previously defined _save() function. _save() + + def bulk_create_all(self, batch_size: int = 1000): + # Drawback: Load all features into memory at once + features = [ + self.model(**self.feature_kwargs(feature)) for feature in self.layer + ] + # The batch_size is only applied to bulk_create, but not overall handling + self.model.objects.using(self.using).bulk_create( + features, batch_size=batch_size + ) From ec588ffe49ddf9f8fd48f29812415898ad5b1d29 Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Sat, 11 Oct 2025 00:52:37 +0100 Subject: [PATCH 3/9] enable efficient fk verification - added faster_verify_fk flag for enabling --- django/contrib/gis/utils/layermapping.py | 100 +++++++++++++++++++++-- 1 file changed, 95 insertions(+), 5 deletions(-) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 78dbdcabbd92..819b5ae2b556 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -10,6 +10,7 @@ from decimal import Decimal from decimal import InvalidOperation as DecimalInvalidOperation from pathlib import Path +from typing import Optional from django.contrib.gis.db.models import GeometryField from django.contrib.gis.gdal import ( @@ -106,6 +107,7 @@ def __init__( transform=True, unique=None, using=None, + faster_verify_fk=False, ): """ A LayerMapping object is initialized using the given Model (not an @@ -128,10 +130,20 @@ def __init__( self.mapping = mapping self.model = model + # Flag to enable fetch required FK model pks in batch to avoid N+1 queries + self.faster_verify_fk = faster_verify_fk + if self.faster_verify_fk: + print("Faster foreign key verification is enabled") + # Checking the layer -- initialization of the object will fail if # things don't check out before hand. self.check_layer() + self.fk_field_names = self.get_model_fk_field_names() + + # TODO: Improve naming, it stores FK id to actual FK instance pk + self.fks_uid_pk_map = {} + # Getting the geometry column associated with the model (an # exception will be raised if there is no geometry column). if connection.features.supports_transform: @@ -341,6 +353,47 @@ def check_unique(self, unique): "Unique keyword argument must be set with a tuple, list, or string." ) + # TODO: assign the fk_field_names by other methods + def get_model_fk_field_names(self): + fk_field_names = [] + for field_name, ogr_name in self.mapping.items(): + model_field = self.fields[field_name] + if isinstance(model_field, models.base.ModelBase): + fk_field_names.append(field_name) + return fk_field_names + + # Implement after real batching + # def verify_fk_exists(self, field_name: str, fk_ids: list[int]): + # if field_name not in self.fk_field_names: + # raise Exception("Field name not found in fk_field_names") + + # related_model = self.fields[field_name] + # related_model_pks = related_model.objects.filter(pk__in=fk_ids).values_list( + # "pk", flat=True + # ) + # if missing_pks := set(fk_ids) - set(related_model_pks): + # raise Exception(f"Missing {field_name} foreign key ids: {missing_pks}") + + # TODO: Implement real batching + def batch_fetch_fk_pks(self, field_name: str, uids: Optional[list[int]] = None): + related_model = self.fields[field_name] + + uid_pk_map = {} + + for related_model_column_name, ogr_name_fk in self.mapping[field_name].items(): + queryset = related_model.objects.all() + if uids: + queryset = queryset.filter(**{f"{related_model_column_name}__in": uids}) + + result = queryset.values_list(related_model_column_name, "pk") + for uid, pk in result: + uid_pk_map[uid] = pk + return uid_pk_map + + def load_fks_uid_pk_map(self): + for field_name in self.fk_field_names: + self.fks_uid_pk_map[field_name] = self.batch_fetch_fk_pks(field_name) + # Keyword argument retrieval routines. def feature_kwargs(self, feat): """ @@ -364,7 +417,24 @@ def feature_kwargs(self, feat): elif isinstance(model_field, models.base.ModelBase): # The related _model_, not a field was passed in -- indicating # another mapping for the related Model. - val = self.verify_fk(feat, model_field, ogr_name) + if not self.faster_verify_fk: + val = self.verify_fk(feat, model_field, ogr_name) + else: + for rel_model_column_name, ogr_name_fk in ogr_name.items(): + # Seems not very helpful + fk_val = self.verify_ogr_field(feat[ogr_name_fk], model_field) + if fk_val not in self.fks_uid_pk_map[field_name]: + # TODO: batch matching to report all missing fk ids + raise Exception( + f"Missing {field_name} foreign key ids: {fk_val}" + ) + else: + rel_model_pk = self.fks_uid_pk_map[field_name][fk_val] + val = rel_model_pk + # TODO: Validate it is always correct + field_name = f"{field_name}_id" + # Should only have one key inside the dict + break else: # Otherwise, verify OGR Field type. val = self.verify_ogr_field(feat[ogr_name], model_field) @@ -461,6 +531,7 @@ def verify_ogr_field(self, ogr_field, model_field): val = ogr_field.value return val + # Will be removed def verify_fk(self, feat, rel_model, rel_mapping): """ Given an OGR Feature, the related model and its dictionary mapping, @@ -622,6 +693,9 @@ def save( else: progress_interval = progress + if self.faster_verify_fk: + self.load_fks_uid_pk_map() + def _save(feat_range=default_range, num_feat=0, num_saved=0): if feat_range: layer_iter = self.layer[feat_range] @@ -742,10 +816,26 @@ def _save(feat_range=default_range, num_feat=0, num_saved=0): _save() def bulk_create_all(self, batch_size: int = 1000): - # Drawback: Load all features into memory at once - features = [ - self.model(**self.feature_kwargs(feature)) for feature in self.layer - ] + if self.faster_verify_fk: + self.load_fks_uid_pk_map() + + all_features_kwargs = [ + self.feature_kwargs(feature) for feature in self.layer + ] + for fk_field_name in self.fk_field_names: + uids = [kwargs[fk_field_name + "_id"] for kwargs in all_features_kwargs] + if missing_uids := set(uids) - set( + self.fks_uid_pk_map[fk_field_name].values() + ): + raise Exception( + f"Missing {fk_field_name} foreign key ids: {missing_uids}" + ) + features = [self.model(**kwargs) for kwargs in all_features_kwargs] + else: + # Drawback: Load all features into memory at once + features = [ + self.model(**self.feature_kwargs(feature)) for feature in self.layer + ] # The batch_size is only applied to bulk_create, but not overall handling self.model.objects.using(self.using).bulk_create( features, batch_size=batch_size From ebcb51e87eb88f04354b66229ef9a00674e39674 Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Sat, 11 Oct 2025 00:54:26 +0100 Subject: [PATCH 4/9] verify_fk use lighter query - it is slow because N+1 query - fetch only pk is adequate and slightly speed up --- django/contrib/gis/utils/layermapping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 819b5ae2b556..037618cc2668 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -550,7 +550,8 @@ def verify_fk(self, feat, rel_model, rel_mapping): # Attempting to retrieve and return the related model. try: - return rel_model.objects.using(self.using).get(**fk_kwargs) + # Lighter query by only fetching pk + return rel_model.objects.using(self.using).only("pk").get(**fk_kwargs) except ObjectDoesNotExist: raise MissingForeignKey( "No ForeignKey %s model found with keyword arguments: %s" From 0a2c38af417c8f284fbab1f8891dc87a0a704e5d Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:01:28 +0100 Subject: [PATCH 5/9] add method to split layer in batches --- django/contrib/gis/utils/layermapping.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 037618cc2668..b504713e1a96 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -816,6 +816,19 @@ def _save(feat_range=default_range, num_feat=0, num_saved=0): # Otherwise, just calling the previously defined _save() function. _save() + def _split_layer(self, batch_size: int = 1000): + """ + Split the features in the layer into batches of the given size. + """ + current_batch = [] + for feature in self.layer: + current_batch.append(feature) + if len(current_batch) >= batch_size: + yield current_batch + current_batch = [] + if current_batch: + yield current_batch + def bulk_create_all(self, batch_size: int = 1000): if self.faster_verify_fk: self.load_fks_uid_pk_map() From 45d4857f8412539b877c95567a43842fb8e34c23 Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:23:07 +0100 Subject: [PATCH 6/9] batch handling a layer includes layer iteration - not loading the whole layer in a go now --- django/contrib/gis/utils/layermapping.py | 31 ++++++++++++++---------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index b504713e1a96..5c3255a11fc1 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -829,28 +829,33 @@ def _split_layer(self, batch_size: int = 1000): if current_batch: yield current_batch - def bulk_create_all(self, batch_size: int = 1000): + def _bulk_create_batch(self, features_batch: list[Feature]): + """ + Given a batch of features, bulk create these features. + """ if self.faster_verify_fk: - self.load_fks_uid_pk_map() - - all_features_kwargs = [ - self.feature_kwargs(feature) for feature in self.layer + features_kwargs = [ + self.feature_kwargs(feature) for feature in features_batch ] + # Verify FK existence for fk_field_name in self.fk_field_names: - uids = [kwargs[fk_field_name + "_id"] for kwargs in all_features_kwargs] + uids = [kwargs[fk_field_name + "_id"] for kwargs in features_kwargs] if missing_uids := set(uids) - set( self.fks_uid_pk_map[fk_field_name].values() ): raise Exception( f"Missing {fk_field_name} foreign key ids: {missing_uids}" ) - features = [self.model(**kwargs) for kwargs in all_features_kwargs] + features = [self.model(**kwargs) for kwargs in features_kwargs] else: - # Drawback: Load all features into memory at once features = [ - self.model(**self.feature_kwargs(feature)) for feature in self.layer + self.model(**self.feature_kwargs(feature)) for feature in features_batch ] - # The batch_size is only applied to bulk_create, but not overall handling - self.model.objects.using(self.using).bulk_create( - features, batch_size=batch_size - ) + self.model.objects.using(self.using).bulk_create(features) + + def bulk_create_all(self, batch_size: int = 1000): + if self.faster_verify_fk: + self.load_fks_uid_pk_map() + # TODO: optional transition.atomic + for features_batch in self._split_layer(batch_size): + self._bulk_create_batch(features_batch) From 02a31c2add380b9050b642781586cf32e7bc7bfb Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Sat, 11 Oct 2025 21:35:59 +0100 Subject: [PATCH 7/9] implement transaction.atomic() --- django/contrib/gis/utils/layermapping.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 5c3255a11fc1..2f0f28cf5423 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -7,6 +7,7 @@ https://docs.djangoproject.com/en/dev/ref/contrib/gis/layermapping/ """ import sys +from contextlib import nullcontext from decimal import Decimal from decimal import InvalidOperation as DecimalInvalidOperation from pathlib import Path @@ -856,6 +857,12 @@ def _bulk_create_batch(self, features_batch: list[Feature]): def bulk_create_all(self, batch_size: int = 1000): if self.faster_verify_fk: self.load_fks_uid_pk_map() - # TODO: optional transition.atomic - for features_batch in self._split_layer(batch_size): - self._bulk_create_batch(features_batch) + + context = ( + transaction.atomic() + if self.transaction_mode == "commit_on_success" + else nullcontext() + ) + with context: + for features_batch in self._split_layer(batch_size): + self._bulk_create_batch(features_batch) From 785f83689e9f43fd602e991024894c839648c77e Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Wed, 15 Oct 2025 19:31:10 +0100 Subject: [PATCH 8/9] bulk_create_all supports overwriting feature kwargs - also as a method to write fields not in DataSource --- django/contrib/gis/utils/layermapping.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 2f0f28cf5423..5814689f5247 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -11,7 +11,7 @@ from decimal import Decimal from decimal import InvalidOperation as DecimalInvalidOperation from pathlib import Path -from typing import Optional +from typing import Any, Optional from django.contrib.gis.db.models import GeometryField from django.contrib.gis.gdal import ( @@ -830,13 +830,22 @@ def _split_layer(self, batch_size: int = 1000): if current_batch: yield current_batch - def _bulk_create_batch(self, features_batch: list[Feature]): + def _bulk_create_batch( + self, + features_batch: list[Feature], + overwrite_kwargs: Optional[dict[str, Any]] = None, + ): """ Given a batch of features, bulk create these features. """ + + if not overwrite_kwargs: + overwrite_kwargs = {} + if self.faster_verify_fk: features_kwargs = [ - self.feature_kwargs(feature) for feature in features_batch + {**self.feature_kwargs(feature), **overwrite_kwargs} + for feature in features_batch ] # Verify FK existence for fk_field_name in self.fk_field_names: @@ -850,11 +859,14 @@ def _bulk_create_batch(self, features_batch: list[Feature]): features = [self.model(**kwargs) for kwargs in features_kwargs] else: features = [ - self.model(**self.feature_kwargs(feature)) for feature in features_batch + self.model(**{**self.feature_kwargs(feature), **overwrite_kwargs}) + for feature in features_batch ] self.model.objects.using(self.using).bulk_create(features) - def bulk_create_all(self, batch_size: int = 1000): + def bulk_create_all( + self, overwrite_kwargs: Optional[dict[str, Any]] = None, batch_size: int = 1000 + ): if self.faster_verify_fk: self.load_fks_uid_pk_map() @@ -865,4 +877,4 @@ def bulk_create_all(self, batch_size: int = 1000): ) with context: for features_batch in self._split_layer(batch_size): - self._bulk_create_batch(features_batch) + self._bulk_create_batch(features_batch, overwrite_kwargs) From 9cc4108df053a111e37dabac9e65133ea29d71e3 Mon Sep 17 00:00:00 2001 From: wongcht <16640500+wongcht@users.noreply.github.com> Date: Wed, 15 Oct 2025 20:01:59 +0100 Subject: [PATCH 9/9] Downgrade curved geom to linear so that it can be saved --- django/contrib/gis/utils/layermapping.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/django/contrib/gis/utils/layermapping.py b/django/contrib/gis/utils/layermapping.py index 5814689f5247..8ff9ca9ccde6 100644 --- a/django/contrib/gis/utils/layermapping.py +++ b/django/contrib/gis/utils/layermapping.py @@ -573,6 +573,10 @@ def verify_geom(self, geom, model_field): if self.coord_dim == 2 and geom.is_3d: geom.set_3d(False) + # Downgrade a curved geom to a linear one so that it can be saved + if geom.has_curve: + geom = geom.get_linear_geometry() + if self.make_multi(geom.geom_type, model_field): # Constructing a multi-geometry type to contain the single geometry multi_type = self.MULTI_TYPES[geom.geom_type.num]