diff --git a/tests/test_watson/tests.py b/tests/test_watson/tests.py index 6d45704..940921f 100644 --- a/tests/test_watson/tests.py +++ b/tests/test_watson/tests.py @@ -259,6 +259,26 @@ def testFixesDuplicateSearchEntries(self): # Make sure that we have six again (including duplicates). self.assertEqual(search_entries.all().count(), 6) + def testSliceQuerysetBuildWatson(self): + # Delete and re-add + deleted, _ = SearchEntry.objects.filter(engine_slug="default").delete() + self.assertEqual(deleted, 6) + call_command("buildwatson", verbosity=0, slice_queryset=True) + self.assertEqual(SearchEntry.objects.filter(engine_slug="default").count(), 6) + + def testFixesDuplicateSliceQuerysetBuildWatson(self): + search_entries = SearchEntry.objects.filter(engine_slug="default") + # Duplicate a couple of search entries. + for search_entry in search_entries.all()[:2]: + search_entry.id = None + search_entry.save() + # Make sure that we have eight (including duplicates). + self.assertEqual(search_entries.all().count(), 8) + # Run the rebuild command. + call_command("buildwatson", verbosity=0, slice_queryset=True) + # Make sure that we have six again (including duplicates). + self.assertEqual(search_entries.all().count(), 6) + def testEmptyFilterGivesAllResults(self): for model in (WatsonTestModel1, WatsonTestModel2, WatsonTestModel3): self.assertEqual(watson.filter(model, "").count(), 2) diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 895abf9..514ac90 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -29,13 +29,25 @@ def get_engine(engine_slug_): raise CommandError("Search Engine \"%s\" is not registered!" % force_str(engine_slug_)) -def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch_size_=100, non_atomic_=False): +def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch_size_=100, non_atomic_=False, slice_queryset_=False): """rebuilds index for a model""" search_engine_ = get_engine(engine_slug_) local_refreshed_model_count = [0] # HACK: Allows assignment to outer scope. + def report(obj): + local_refreshed_model_count[0] += 1 + if verbosity_ >= 3: + print( + "Refreshed search entry for {model} {obj} " + "in {engine_slug!r} search engine.".format( + model=force_str(model_._meta.verbose_name), + obj=force_str(obj), + engine_slug=force_str(engine_slug_), + ) + ) + def iter_search_entries(): # Only index specified objects if slim_ is True if slim_ and search_engine_._registered_models[model_].get_live_queryset(): @@ -43,19 +55,19 @@ def iter_search_entries(): else: obj_list = model_._default_manager.all() - for obj in obj_list.iterator(): - for search_entry in search_engine_._update_obj_index_iter(obj): - yield search_entry - local_refreshed_model_count[0] += 1 - if verbosity_ >= 3: - print( - "Refreshed search entry for {model} {obj} " - "in {engine_slug!r} search engine.".format( - model=force_str(model_._meta.verbose_name), - obj=force_str(obj), - engine_slug=force_str(engine_slug_), - ) - ) + if slice_queryset_: + count = obj_list.count() + for i in range(0, count, batch_size_): + for obj in obj_list[i:i + batch_size_]: + for search_entry in search_engine_._update_obj_index_iter(obj): + yield search_entry + report(obj) + else: + for obj in obj_list.iterator(): + for search_entry in search_engine_._update_obj_index_iter(obj): + yield search_entry + report(obj) + if verbosity_ == 2: print( "Refreshed {local_refreshed_model_count} {model} search entry(s) " @@ -107,6 +119,12 @@ def add_arguments(self, parser): type=int, help="The batchsize with which entries will be added to the index." ) + parser.add_argument( + '--slice-queryset', + action='store_true', + default=False, + help="Uses slicing on QuerySet instead of .iterator()" + ) def handle(self, *args, **options): """Runs the management command.""" @@ -125,6 +143,7 @@ def handle(self, *args, **options): slim = options.get("slim") batch_size = options.get("batch_size") non_atomic = options.get("non_atomic") + slice_queryset = options.get("slice_queryset") # work-around for legacy optparser hack in BaseCommand. In Django=1.10 the # args are collected in options['apps'], but in earlier versions they are @@ -166,7 +185,8 @@ def handle(self, *args, **options): verbosity, slim_=slim, batch_size_=batch_size, - non_atomic_=non_atomic) + non_atomic_=non_atomic, + slice_queryset_=slice_queryset) else: # full rebuild (for one or all search engines) if engine_selected: @@ -188,7 +208,8 @@ def handle(self, *args, **options): verbosity, slim_=slim, batch_size_=batch_size, - non_atomic_=non_atomic) + non_atomic_=non_atomic, + slice_queryset_=slice_queryset) # Clean out any search entries that exist for stale content types. # Only do it during full rebuild