diff --git a/ckan/cli/search_index.py b/ckan/cli/search_index.py
index 3b4bedc6d2a..3fa66a1ddd8 100644
--- a/ckan/cli/search_index.py
+++ b/ckan/cli/search_index.py
@@ -41,7 +41,7 @@ def rebuild(
force=force,
refresh=refresh,
defer_commit=(not commit_each),
- quiet=quiet)
+ quiet=quiet and not verbose)
except Exception as e:
tk.error_shout(e)
if not commit_each:
diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py
index 89b7da80775..f149380ffad 100644
--- a/ckan/lib/search/__init__.py
+++ b/ckan/lib/search/__init__.py
@@ -123,6 +123,7 @@ def notify(self, entity, operation):
if (not isinstance(entity, model.Package) or
not asbool(config.get('ckan.search.automatic_indexing', True))):
return
+
if operation != model.domain_object.DomainObjectOperation.deleted:
dispatch_by_operation(
entity.__class__.__name__,
@@ -169,8 +170,12 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False,
log.info('Indexing just package %r...', pkg_dict['name'])
package_index.update_dict(pkg_dict, True)
else:
- package_ids = [r[0] for r in model.Session.query(model.Package.id).
- filter(model.Package.state != 'deleted').all()]
+ packages = model.Session.query(model.Package.id)
+ if asbool(config.get('ckan.search.remove_deleted_packages')):
+ packages = packages.filter(model.Package.state != 'deleted')
+
+ package_ids = [r[0] for r in packages.all()]
+
if only_missing:
log.info('Indexing only missing packages...')
package_query = query_for(model.Package)
diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py
index e64045e8e85..68316a8d905 100644
--- a/ckan/lib/search/index.py
+++ b/ckan/lib/search/index.py
@@ -12,8 +12,7 @@
import six
import pysolr
-from ckan.common import config
-from ckan.common import asbool
+from ckan.common import asbool, config
import six
from six import text_type
from six.moves import map
@@ -137,9 +136,10 @@ def index_package(self, pkg_dict, defer_commit=False):
if title:
pkg_dict['title_string'] = title
- # delete the package if there is no state, or the state is `deleted`
- if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
- return self.delete_package(pkg_dict)
+ if asbool(config.get('ckan.search.remove_deleted_packages')):
+ # delete the package if there is no state, or the state is `deleted`
+ if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
+ return self.delete_package(pkg_dict)
index_fields = RESERVED_FIELDS + list(pkg_dict.keys())
diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
index a8d2ff23da5..6c77dd8fc23 100644
--- a/ckan/logic/action/get.py
+++ b/ckan/logic/action/get.py
@@ -1721,6 +1721,10 @@ def package_search(context, data_dict):
sysadmin will be returned all draft datasets. Optional, the default is
``False``.
:type include_drafts: bool
+ :param include_deleted: if ``True``, deleted datasets will be included in the
+ results (site configuration "ckan.search.remove_deleted_packages" must
+ be set to False). Optional, the default is ``False``.
+ :type include_deleted: bool
:param include_private: if ``True``, private datasets will be included in
the results. Only private datasets from the user's organizations will
be returned and sysadmins will be returned all private datasets.
@@ -1846,14 +1850,23 @@ def package_search(context, data_dict):
else:
data_dict['fl'] = ' '.join(result_fl)
+ data_dict.setdefault('fq', '')
+
# Remove before these hit solr FIXME: whitelist instead
include_private = asbool(data_dict.pop('include_private', False))
include_drafts = asbool(data_dict.pop('include_drafts', False))
- data_dict.setdefault('fq', '')
+ include_deleted = asbool(data_dict.pop('include_deleted', False))
+
if not include_private:
data_dict['fq'] = '+capacity:public ' + data_dict['fq']
- if include_drafts:
- data_dict['fq'] += ' +state:(active OR draft)'
+
+ if '+state' not in data_dict['fq']:
+ states = ['active']
+ if include_drafts:
+ states.append('draft')
+ if include_deleted:
+ states.append('deleted')
+ data_dict['fq'] += ' +state:({})'.format(' OR '.join(states))
# Pop these ones as Solr does not need them
extras = data_dict.pop('extras', None)
diff --git a/ckan/templates/admin/snippets/data_type.html b/ckan/templates/admin/snippets/data_type.html
index b7004a182c2..e85e4926502 100644
--- a/ckan/templates/admin/snippets/data_type.html
+++ b/ckan/templates/admin/snippets/data_type.html
@@ -12,13 +12,18 @@
{% endif %}
+{# entities list can be of different types #}
+{% set items = [] %}
+
-
- {% set truncate = truncate or 180 %}
+
+ {% set truncate = truncate or 180 %}
{% set truncate_title = truncate_title or 80 %}
{% for entity in entities %}
{% set title = entity.title or entity.name %}
+ {% do items.append(title) %}
-
{{ h.link_to(h.truncate(title, truncate_title), h.url_for(entity.type + '.read', id=entity.name)) }}
@@ -30,7 +35,7 @@
- {% if entities.first() %}
+ {% if items|length > 0 %}
{% endif %}
-
\ No newline at end of file
+
diff --git a/ckan/tests/cli/test_search_index.py b/ckan/tests/cli/test_search_index.py
index d00f8d02091..32427ef051c 100644
--- a/ckan/tests/cli/test_search_index.py
+++ b/ckan/tests/cli/test_search_index.py
@@ -28,6 +28,52 @@ def test_search_index_rebuild(self, cli):
search_result = helpers.call_action(u'package_search', q=u"After")
assert search_result[u'count'] == 1
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
+ def test_no_index_deleted_package(self, cli):
+ """ Deleted packages should not be in search index. """
+ factories.Dataset(title="Deleted package", id="deleted-pkg")
+ helpers.call_action("package_delete", id="deleted-pkg")
+ search_result = helpers.call_action('package_search', q="Deleted")
+ assert search_result[u'count'] == 0
+
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
+ def test_no_index_deleted_package_rebuild(self, cli):
+ """ Deleted packages should not be in search index after rebuild. """
+ factories.Dataset(title="Deleted package", id="deleted-pkg")
+ helpers.call_action("package_delete", id="deleted-pkg")
+ result = cli.invoke(ckan, ['search-index', 'rebuild'])
+ assert not result.exit_code, result.output
+ search_result = helpers.call_action('package_search', q="Deleted")
+ assert search_result[u'count'] == 0
+
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
+ def test_index_deleted_package(self, cli):
+ """ Deleted packages should be in search index if ckan.search.remove_deleted_packages """
+ dataset = factories.Dataset(title="Deleted package", id="deleted-pkg")
+ helpers.call_action("package_delete", id="deleted-pkg")
+ search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
+ assert search_result[u'count'] == 1
+ assert search_result[u'results'][0]['id'] == dataset['id']
+ # should be removed after purge
+ helpers.call_action("dataset_purge", id="deleted-pkg")
+ search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
+ assert search_result[u'count'] == 0
+
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
+ def test_index_deleted_package_rebuild(self, cli):
+ """ Deleted packages should be in search index after rebuild if ckan.search.remove_deleted_packages """
+ dataset = factories.Dataset(title="Deleted package", id="deleted-pkg")
+ helpers.call_action("package_delete", id="deleted-pkg")
+ result = cli.invoke(ckan, ['search-index', 'rebuild'])
+ assert not result.exit_code, result.output
+ search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
+ assert search_result[u'count'] == 1
+ assert search_result[u'results'][0]['id'] == dataset['id']
+ # should be removed after purge
+ helpers.call_action("dataset_purge", id="deleted-pkg")
+ search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
+ assert search_result[u'count'] == 0
+
def test_test_main_operations(self, cli):
"""Create few datasets, clear index, rebuild it - make sure search results
are always reflect correct state of index.
diff --git a/ckan/tests/controllers/test_admin.py b/ckan/tests/controllers/test_admin.py
index c0d0c91a031..6d7fd535eca 100644
--- a/ckan/tests/controllers/test_admin.py
+++ b/ckan/tests/controllers/test_admin.py
@@ -280,6 +280,7 @@ def test_trash_no_organizations(self, app, sysadmin_env):
# no packages available to purge
assert len(trash_org_list) == 0
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_trash_with_deleted_datasets(self, app, sysadmin_env):
"""Getting the trash view with 'deleted' datasets should list the
datasets."""
@@ -295,6 +296,22 @@ def test_trash_with_deleted_datasets(self, app, sysadmin_env):
# Two packages in the list to purge
assert len(trash_pkg_list) == 2
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
+ def test_trash_with_deleted_datasets_no_remove_deleted_packages(self, app, sysadmin_env):
+ """Getting the trash view with 'deleted' datasets should list the
+ datasets."""
+ factories.Dataset(state="deleted")
+ factories.Dataset(state="deleted")
+ factories.Dataset()
+
+ trash_url = url_for("admin.trash")
+ response = app.get(trash_url, extra_environ=sysadmin_env, status=200)
+
+ response_html = BeautifulSoup(response.body)
+ trash_pkg_list = response_html.select("ul.package-list li")
+ # Two packages in the list to purge
+ assert len(trash_pkg_list) == 2
+
def test_trash_with_deleted_groups(self, app, sysadmin_env):
"""Getting the trash view with "deleted" groups should list the
groups."""
@@ -371,6 +388,7 @@ def test_trash_purge_custom_ds_type(self, app, sysadmin_env):
pkgs_after_purge = model.Session.query(model.Package).count()
assert pkgs_after_purge == 0
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_trash_purge_deleted_datasets(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' datasets, purges the
datasets."""
@@ -397,6 +415,34 @@ def test_trash_purge_deleted_datasets(self, app, sysadmin_env):
pkgs_after_purge = model.Session.query(model.Package).count()
assert pkgs_after_purge == 1
+ @pytest.mark.usefixtures("clean_index")
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
+ def test_trash_purge_deleted_datasets_no_remove_deleted_packages(self, app, sysadmin_env):
+ """Posting the trash view with 'deleted' datasets, purges the
+ datasets."""
+ factories.Dataset(state="deleted")
+ factories.Dataset(state="deleted")
+ factories.Dataset()
+
+ # how many datasets before purge
+ pkgs_before_purge = model.Session.query(model.Package).count()
+ assert pkgs_before_purge == 3
+
+ trash_url = url_for("admin.trash")
+ response = app.post(
+ trash_url,
+ data={"action": "package"},
+ extra_environ=sysadmin_env,
+ status=200
+ )
+
+ # check for flash success msg
+ assert "datasets have been purged" in response.body
+
+ # how many datasets after purge
+ pkgs_after_purge = model.Session.query(model.Package).count()
+ assert pkgs_after_purge == 1
+
def test_trash_purge_deleted_groups(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' groups, purges the
groups."""
@@ -451,6 +497,7 @@ def test_trash_purge_deleted_organization(self, app, sysadmin_env):
is_organization=True).count()
assert orgs_after_purge == 1
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_trash_purge_all(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' entities and
purge all button purges everything"""
@@ -480,6 +527,37 @@ def test_trash_purge_all(self, app, sysadmin_env):
orgs_and_grps_after_purge = model.Session.query(model.Group).count()
assert pkgs_after_purge + orgs_and_grps_after_purge == 1
+ @pytest.mark.usefixtures("clean_index")
+ @pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
+ def test_trash_purge_all_no_remove_deleted_packages(self, app, sysadmin_env):
+ """Posting the trash view with 'deleted' entities and
+ purge all button purges everything"""
+ factories.Dataset(state="deleted", type="custom_dataset")
+ factories.Group(state="deleted")
+ factories.Organization(state="deleted")
+ factories.Organization(state="deleted", type="custom_org")
+ factories.Organization()
+
+ # how many entities before purge
+ pkgs_before_purge = model.Session.query(model.Package).count()
+ orgs_and_grps_before_purge = model.Session.query(model.Group).count()
+ assert pkgs_before_purge + orgs_and_grps_before_purge == 5
+
+ trash_url = url_for("admin.trash")
+ response = app.post(
+ trash_url,
+ data={"action": "all"},
+ extra_environ=sysadmin_env,
+ status=200
+ )
+ # check for flash success msg
+ assert "Massive purge complete" in response
+
+ # how many entities after purge
+ pkgs_after_purge = model.Session.query(model.Package).count()
+ orgs_and_grps_after_purge = model.Session.query(model.Group).count()
+ assert pkgs_after_purge + orgs_and_grps_after_purge == 1
+
def test_trash_cancel_purge(self, app, sysadmin_env):
"""Cancelling purge doesn't purge anything."""
factories.Organization(state="deleted")
diff --git a/ckan/views/admin.py b/ckan/views/admin.py
index 1b58adbb925..4ee01665c95 100644
--- a/ckan/views/admin.py
+++ b/ckan/views/admin.py
@@ -9,9 +9,10 @@
import ckan.lib.base as base
import ckan.lib.helpers as h
import ckan.lib.navl.dictization_functions as dict_fns
+import ckan.lib.search as search
import ckan.logic as logic
import ckan.model as model
-from ckan.common import g, _, config, request
+from ckan.common import g, _, config, request, asbool
from ckan.views.home import CACHE_PARAMETERS
@@ -140,9 +141,9 @@ def post(self):
class TrashView(MethodView):
+
def __init__(self):
- self.deleted_packages = model.Session.query(
- model.Package).filter_by(state=model.State.DELETED)
+ self.deleted_packages = self._get_deleted_datasets()
self.deleted_orgs = model.Session.query(model.Group).filter_by(
state=model.State.DELETED, is_organization=True)
self.deleted_groups = model.Session.query(model.Group).filter_by(
@@ -173,6 +174,34 @@ def __init__(self):
}
}
+ def _get_deleted_datasets(self):
+ if asbool(config.get('ckan.search.remove_deleted_packages')):
+ return self._get_deleted_datasets_from_db()
+ else:
+ return self._get_deleted_datasets_from_search_index()
+
+ def _get_deleted_datasets_from_db(self):
+ return model.Session.query(
+ model.Package
+ ).filter_by(
+ state=model.State.DELETED
+ )
+
+ def _get_deleted_datasets_from_search_index(self):
+ query = search.query_for(model.Package)
+ search_params = {
+ 'fq': '+state:deleted',
+ 'df': 'text',
+ 'fl': 'id name title dataset_type',
+ }
+ query.run(search_params)
+
+ results = []
+ for result in query.results:
+ result['type'] = result['dataset_type']
+ results.append(result)
+ return results
+
def get(self):
ent_type = request.args.get(u'name')
@@ -207,21 +236,31 @@ def purge_all(self):
)
for action, deleted_entities in zip(actions, entities):
+ if type(deleted_entities) == list:
+ def get_id(x): return x['id']
+ else:
+ def get_id(x): return x.id
+
for entity in deleted_entities:
logic.get_action(action)(
- {u'user': g.user}, {u'id': entity.id}
+ {u'user': g.user}, {u'id': get_id(entity)}
)
model.Session.remove()
h.flash_success(_(u'Massive purge complete'))
def purge_entity(self, ent_type):
entities = self.deleted_entities[ent_type]
- number = entities.count()
+ if type(entities) == list:
+ number = len(entities)
+ def get_id(x): return x['id']
+ else:
+ number = entities.count()
+ def get_id(x): return x.id
for ent in entities:
logic.get_action(self._get_purge_action(ent_type))(
{u'user': g.user},
- {u'id': ent.id}
+ {u'id': get_id(ent)}
)
model.Session.remove()
diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst
index 290a7ab4f10..3eee1335f0c 100644
--- a/doc/maintaining/configuration.rst
+++ b/doc/maintaining/configuration.rst
@@ -1059,6 +1059,17 @@ to occur asynchronously, set this option to false.
.. note:: This is equivalent to explicitly load the ``synchronous_search`` plugin.
+ckan.search.remove_deleted_packages
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Example::
+
+ ckan.search.remove_deleted_packages = true
+
+Default value: |config:ckan.search.remove_deleted_packages|
+
+By default, deleted datasets are removed from the search index so are no longer available in searches. To keep them in the search index, set this setting to ``False``. This will enable the ``include_deleted`` parameter in the :py:func:`ckan.logic.action.get.package_search` API action.
+
.. _ckan.search.solr_commit:
ckan.search.solr_commit