diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py index 3b4bedc6d2a..3fa66a1ddd8 100644 --- a/ckan/cli/search_index.py +++ b/ckan/cli/search_index.py @@ -41,7 +41,7 @@ def rebuild( force=force, refresh=refresh, defer_commit=(not commit_each), - quiet=quiet) + quiet=quiet and not verbose) except Exception as e: tk.error_shout(e) if not commit_each: diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 89b7da80775..f149380ffad 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -123,6 +123,7 @@ def notify(self, entity, operation): if (not isinstance(entity, model.Package) or not asbool(config.get('ckan.search.automatic_indexing', True))): return + if operation != model.domain_object.DomainObjectOperation.deleted: dispatch_by_operation( entity.__class__.__name__, @@ -169,8 +170,12 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, log.info('Indexing just package %r...', pkg_dict['name']) package_index.update_dict(pkg_dict, True) else: - package_ids = [r[0] for r in model.Session.query(model.Package.id). - filter(model.Package.state != 'deleted').all()] + packages = model.Session.query(model.Package.id) + if asbool(config.get('ckan.search.remove_deleted_packages')): + packages = packages.filter(model.Package.state != 'deleted') + + package_ids = [r[0] for r in packages.all()] + if only_missing: log.info('Indexing only missing packages...') package_query = query_for(model.Package) diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index e64045e8e85..68316a8d905 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -12,8 +12,7 @@ import six import pysolr -from ckan.common import config -from ckan.common import asbool +from ckan.common import asbool, config import six from six import text_type from six.moves import map @@ -137,9 +136,10 @@ def index_package(self, pkg_dict, defer_commit=False): if title: pkg_dict['title_string'] = title - # delete the package if there is no state, or the state is `deleted` - if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): - return self.delete_package(pkg_dict) + if asbool(config.get('ckan.search.remove_deleted_packages')): + # delete the package if there is no state, or the state is `deleted` + if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')): + return self.delete_package(pkg_dict) index_fields = RESERVED_FIELDS + list(pkg_dict.keys()) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index a8d2ff23da5..6c77dd8fc23 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1721,6 +1721,10 @@ def package_search(context, data_dict): sysadmin will be returned all draft datasets. Optional, the default is ``False``. :type include_drafts: bool + :param include_deleted: if ``True``, deleted datasets will be included in the + results (site configuration "ckan.search.remove_deleted_packages" must + be set to False). Optional, the default is ``False``. + :type include_deleted: bool :param include_private: if ``True``, private datasets will be included in the results. Only private datasets from the user's organizations will be returned and sysadmins will be returned all private datasets. @@ -1846,14 +1850,23 @@ def package_search(context, data_dict): else: data_dict['fl'] = ' '.join(result_fl) + data_dict.setdefault('fq', '') + # Remove before these hit solr FIXME: whitelist instead include_private = asbool(data_dict.pop('include_private', False)) include_drafts = asbool(data_dict.pop('include_drafts', False)) - data_dict.setdefault('fq', '') + include_deleted = asbool(data_dict.pop('include_deleted', False)) + if not include_private: data_dict['fq'] = '+capacity:public ' + data_dict['fq'] - if include_drafts: - data_dict['fq'] += ' +state:(active OR draft)' + + if '+state' not in data_dict['fq']: + states = ['active'] + if include_drafts: + states.append('draft') + if include_deleted: + states.append('deleted') + data_dict['fq'] += ' +state:({})'.format(' OR '.join(states)) # Pop these ones as Solr does not need them extras = data_dict.pop('extras', None) diff --git a/ckan/templates/admin/snippets/data_type.html b/ckan/templates/admin/snippets/data_type.html index b7004a182c2..e85e4926502 100644 --- a/ckan/templates/admin/snippets/data_type.html +++ b/ckan/templates/admin/snippets/data_type.html @@ -12,13 +12,18 @@ {% endif %} +{# entities list can be of different types #} +{% set items = [] %} + -
- {% set truncate = truncate or 180 %} +
+ {% set truncate = truncate or 180 %} {% set truncate_title = truncate_title or 80 %} - {% if entities.first() %} + {% if items|length > 0 %}
{% endif %} -
\ No newline at end of file +
diff --git a/ckan/tests/cli/test_search_index.py b/ckan/tests/cli/test_search_index.py index d00f8d02091..32427ef051c 100644 --- a/ckan/tests/cli/test_search_index.py +++ b/ckan/tests/cli/test_search_index.py @@ -28,6 +28,52 @@ def test_search_index_rebuild(self, cli): search_result = helpers.call_action(u'package_search', q=u"After") assert search_result[u'count'] == 1 + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True) + def test_no_index_deleted_package(self, cli): + """ Deleted packages should not be in search index. """ + factories.Dataset(title="Deleted package", id="deleted-pkg") + helpers.call_action("package_delete", id="deleted-pkg") + search_result = helpers.call_action('package_search', q="Deleted") + assert search_result[u'count'] == 0 + + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True) + def test_no_index_deleted_package_rebuild(self, cli): + """ Deleted packages should not be in search index after rebuild. """ + factories.Dataset(title="Deleted package", id="deleted-pkg") + helpers.call_action("package_delete", id="deleted-pkg") + result = cli.invoke(ckan, ['search-index', 'rebuild']) + assert not result.exit_code, result.output + search_result = helpers.call_action('package_search', q="Deleted") + assert search_result[u'count'] == 0 + + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False) + def test_index_deleted_package(self, cli): + """ Deleted packages should be in search index if ckan.search.remove_deleted_packages """ + dataset = factories.Dataset(title="Deleted package", id="deleted-pkg") + helpers.call_action("package_delete", id="deleted-pkg") + search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True) + assert search_result[u'count'] == 1 + assert search_result[u'results'][0]['id'] == dataset['id'] + # should be removed after purge + helpers.call_action("dataset_purge", id="deleted-pkg") + search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True) + assert search_result[u'count'] == 0 + + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False) + def test_index_deleted_package_rebuild(self, cli): + """ Deleted packages should be in search index after rebuild if ckan.search.remove_deleted_packages """ + dataset = factories.Dataset(title="Deleted package", id="deleted-pkg") + helpers.call_action("package_delete", id="deleted-pkg") + result = cli.invoke(ckan, ['search-index', 'rebuild']) + assert not result.exit_code, result.output + search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True) + assert search_result[u'count'] == 1 + assert search_result[u'results'][0]['id'] == dataset['id'] + # should be removed after purge + helpers.call_action("dataset_purge", id="deleted-pkg") + search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True) + assert search_result[u'count'] == 0 + def test_test_main_operations(self, cli): """Create few datasets, clear index, rebuild it - make sure search results are always reflect correct state of index. diff --git a/ckan/tests/controllers/test_admin.py b/ckan/tests/controllers/test_admin.py index c0d0c91a031..6d7fd535eca 100644 --- a/ckan/tests/controllers/test_admin.py +++ b/ckan/tests/controllers/test_admin.py @@ -280,6 +280,7 @@ def test_trash_no_organizations(self, app, sysadmin_env): # no packages available to purge assert len(trash_org_list) == 0 + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True) def test_trash_with_deleted_datasets(self, app, sysadmin_env): """Getting the trash view with 'deleted' datasets should list the datasets.""" @@ -295,6 +296,22 @@ def test_trash_with_deleted_datasets(self, app, sysadmin_env): # Two packages in the list to purge assert len(trash_pkg_list) == 2 + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False) + def test_trash_with_deleted_datasets_no_remove_deleted_packages(self, app, sysadmin_env): + """Getting the trash view with 'deleted' datasets should list the + datasets.""" + factories.Dataset(state="deleted") + factories.Dataset(state="deleted") + factories.Dataset() + + trash_url = url_for("admin.trash") + response = app.get(trash_url, extra_environ=sysadmin_env, status=200) + + response_html = BeautifulSoup(response.body) + trash_pkg_list = response_html.select("ul.package-list li") + # Two packages in the list to purge + assert len(trash_pkg_list) == 2 + def test_trash_with_deleted_groups(self, app, sysadmin_env): """Getting the trash view with "deleted" groups should list the groups.""" @@ -371,6 +388,7 @@ def test_trash_purge_custom_ds_type(self, app, sysadmin_env): pkgs_after_purge = model.Session.query(model.Package).count() assert pkgs_after_purge == 0 + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True) def test_trash_purge_deleted_datasets(self, app, sysadmin_env): """Posting the trash view with 'deleted' datasets, purges the datasets.""" @@ -397,6 +415,34 @@ def test_trash_purge_deleted_datasets(self, app, sysadmin_env): pkgs_after_purge = model.Session.query(model.Package).count() assert pkgs_after_purge == 1 + @pytest.mark.usefixtures("clean_index") + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False) + def test_trash_purge_deleted_datasets_no_remove_deleted_packages(self, app, sysadmin_env): + """Posting the trash view with 'deleted' datasets, purges the + datasets.""" + factories.Dataset(state="deleted") + factories.Dataset(state="deleted") + factories.Dataset() + + # how many datasets before purge + pkgs_before_purge = model.Session.query(model.Package).count() + assert pkgs_before_purge == 3 + + trash_url = url_for("admin.trash") + response = app.post( + trash_url, + data={"action": "package"}, + extra_environ=sysadmin_env, + status=200 + ) + + # check for flash success msg + assert "datasets have been purged" in response.body + + # how many datasets after purge + pkgs_after_purge = model.Session.query(model.Package).count() + assert pkgs_after_purge == 1 + def test_trash_purge_deleted_groups(self, app, sysadmin_env): """Posting the trash view with 'deleted' groups, purges the groups.""" @@ -451,6 +497,7 @@ def test_trash_purge_deleted_organization(self, app, sysadmin_env): is_organization=True).count() assert orgs_after_purge == 1 + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True) def test_trash_purge_all(self, app, sysadmin_env): """Posting the trash view with 'deleted' entities and purge all button purges everything""" @@ -480,6 +527,37 @@ def test_trash_purge_all(self, app, sysadmin_env): orgs_and_grps_after_purge = model.Session.query(model.Group).count() assert pkgs_after_purge + orgs_and_grps_after_purge == 1 + @pytest.mark.usefixtures("clean_index") + @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False) + def test_trash_purge_all_no_remove_deleted_packages(self, app, sysadmin_env): + """Posting the trash view with 'deleted' entities and + purge all button purges everything""" + factories.Dataset(state="deleted", type="custom_dataset") + factories.Group(state="deleted") + factories.Organization(state="deleted") + factories.Organization(state="deleted", type="custom_org") + factories.Organization() + + # how many entities before purge + pkgs_before_purge = model.Session.query(model.Package).count() + orgs_and_grps_before_purge = model.Session.query(model.Group).count() + assert pkgs_before_purge + orgs_and_grps_before_purge == 5 + + trash_url = url_for("admin.trash") + response = app.post( + trash_url, + data={"action": "all"}, + extra_environ=sysadmin_env, + status=200 + ) + # check for flash success msg + assert "Massive purge complete" in response + + # how many entities after purge + pkgs_after_purge = model.Session.query(model.Package).count() + orgs_and_grps_after_purge = model.Session.query(model.Group).count() + assert pkgs_after_purge + orgs_and_grps_after_purge == 1 + def test_trash_cancel_purge(self, app, sysadmin_env): """Cancelling purge doesn't purge anything.""" factories.Organization(state="deleted") diff --git a/ckan/views/admin.py b/ckan/views/admin.py index 1b58adbb925..4ee01665c95 100644 --- a/ckan/views/admin.py +++ b/ckan/views/admin.py @@ -9,9 +9,10 @@ import ckan.lib.base as base import ckan.lib.helpers as h import ckan.lib.navl.dictization_functions as dict_fns +import ckan.lib.search as search import ckan.logic as logic import ckan.model as model -from ckan.common import g, _, config, request +from ckan.common import g, _, config, request, asbool from ckan.views.home import CACHE_PARAMETERS @@ -140,9 +141,9 @@ def post(self): class TrashView(MethodView): + def __init__(self): - self.deleted_packages = model.Session.query( - model.Package).filter_by(state=model.State.DELETED) + self.deleted_packages = self._get_deleted_datasets() self.deleted_orgs = model.Session.query(model.Group).filter_by( state=model.State.DELETED, is_organization=True) self.deleted_groups = model.Session.query(model.Group).filter_by( @@ -173,6 +174,34 @@ def __init__(self): } } + def _get_deleted_datasets(self): + if asbool(config.get('ckan.search.remove_deleted_packages')): + return self._get_deleted_datasets_from_db() + else: + return self._get_deleted_datasets_from_search_index() + + def _get_deleted_datasets_from_db(self): + return model.Session.query( + model.Package + ).filter_by( + state=model.State.DELETED + ) + + def _get_deleted_datasets_from_search_index(self): + query = search.query_for(model.Package) + search_params = { + 'fq': '+state:deleted', + 'df': 'text', + 'fl': 'id name title dataset_type', + } + query.run(search_params) + + results = [] + for result in query.results: + result['type'] = result['dataset_type'] + results.append(result) + return results + def get(self): ent_type = request.args.get(u'name') @@ -207,21 +236,31 @@ def purge_all(self): ) for action, deleted_entities in zip(actions, entities): + if type(deleted_entities) == list: + def get_id(x): return x['id'] + else: + def get_id(x): return x.id + for entity in deleted_entities: logic.get_action(action)( - {u'user': g.user}, {u'id': entity.id} + {u'user': g.user}, {u'id': get_id(entity)} ) model.Session.remove() h.flash_success(_(u'Massive purge complete')) def purge_entity(self, ent_type): entities = self.deleted_entities[ent_type] - number = entities.count() + if type(entities) == list: + number = len(entities) + def get_id(x): return x['id'] + else: + number = entities.count() + def get_id(x): return x.id for ent in entities: logic.get_action(self._get_purge_action(ent_type))( {u'user': g.user}, - {u'id': ent.id} + {u'id': get_id(ent)} ) model.Session.remove() diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 290a7ab4f10..3eee1335f0c 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -1059,6 +1059,17 @@ to occur asynchronously, set this option to false. .. note:: This is equivalent to explicitly load the ``synchronous_search`` plugin. +ckan.search.remove_deleted_packages +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Example:: + + ckan.search.remove_deleted_packages = true + +Default value: |config:ckan.search.remove_deleted_packages| + +By default, deleted datasets are removed from the search index so are no longer available in searches. To keep them in the search index, set this setting to ``False``. This will enable the ``include_deleted`` parameter in the :py:func:`ckan.logic.action.get.package_search` API action. + .. _ckan.search.solr_commit: ckan.search.solr_commit