Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow index deleted packages #2

Open
wants to merge 3 commits into
base: av-ckan-2.9.5
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ckan/cli/search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def rebuild(
force=force,
refresh=refresh,
defer_commit=(not commit_each),
quiet=quiet)
quiet=quiet and not verbose)
except Exception as e:
tk.error_shout(e)
if not commit_each:
Expand Down
9 changes: 7 additions & 2 deletions ckan/lib/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def notify(self, entity, operation):
if (not isinstance(entity, model.Package) or
not asbool(config.get('ckan.search.automatic_indexing', True))):
return

if operation != model.domain_object.DomainObjectOperation.deleted:
dispatch_by_operation(
entity.__class__.__name__,
Expand Down Expand Up @@ -169,8 +170,12 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False,
log.info('Indexing just package %r...', pkg_dict['name'])
package_index.update_dict(pkg_dict, True)
else:
package_ids = [r[0] for r in model.Session.query(model.Package.id).
filter(model.Package.state != 'deleted').all()]
packages = model.Session.query(model.Package.id)
if asbool(config.get('ckan.search.remove_deleted_packages')):
packages = packages.filter(model.Package.state != 'deleted')

package_ids = [r[0] for r in packages.all()]

if only_missing:
log.info('Indexing only missing packages...')
package_query = query_for(model.Package)
Expand Down
10 changes: 5 additions & 5 deletions ckan/lib/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@

import six
import pysolr
from ckan.common import config
from ckan.common import asbool
from ckan.common import asbool, config
import six
from six import text_type
from six.moves import map
Expand Down Expand Up @@ -137,9 +136,10 @@ def index_package(self, pkg_dict, defer_commit=False):
if title:
pkg_dict['title_string'] = title

# delete the package if there is no state, or the state is `deleted`
if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
return self.delete_package(pkg_dict)
if asbool(config.get('ckan.search.remove_deleted_packages')):
# delete the package if there is no state, or the state is `deleted`
if (not pkg_dict.get('state') or 'deleted' in pkg_dict.get('state')):
return self.delete_package(pkg_dict)

index_fields = RESERVED_FIELDS + list(pkg_dict.keys())

Expand Down
19 changes: 16 additions & 3 deletions ckan/logic/action/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -1721,6 +1721,10 @@ def package_search(context, data_dict):
sysadmin will be returned all draft datasets. Optional, the default is
``False``.
:type include_drafts: bool
:param include_deleted: if ``True``, deleted datasets will be included in the
results (site configuration "ckan.search.remove_deleted_packages" must
be set to False). Optional, the default is ``False``.
:type include_deleted: bool
:param include_private: if ``True``, private datasets will be included in
the results. Only private datasets from the user's organizations will
be returned and sysadmins will be returned all private datasets.
Expand Down Expand Up @@ -1846,14 +1850,23 @@ def package_search(context, data_dict):
else:
data_dict['fl'] = ' '.join(result_fl)

data_dict.setdefault('fq', '')

# Remove before these hit solr FIXME: whitelist instead
include_private = asbool(data_dict.pop('include_private', False))
include_drafts = asbool(data_dict.pop('include_drafts', False))
data_dict.setdefault('fq', '')
include_deleted = asbool(data_dict.pop('include_deleted', False))

if not include_private:
data_dict['fq'] = '+capacity:public ' + data_dict['fq']
if include_drafts:
data_dict['fq'] += ' +state:(active OR draft)'

if '+state' not in data_dict['fq']:
states = ['active']
if include_drafts:
states.append('draft')
if include_deleted:
states.append('deleted')
data_dict['fq'] += ' +state:({})'.format(' OR '.join(states))

# Pop these ones as Solr does not need them
extras = data_dict.pop('extras', None)
Expand Down
13 changes: 9 additions & 4 deletions ckan/templates/admin/snippets/data_type.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,18 @@
{% endif %}
</button>

{# entities list can be of different types #}
{% set items = [] %}

<!-- expanded by default to prevent problems with disabled js -->
<div id="{{ ent_type }}" class="entities collapse in">
{% set truncate = truncate or 180 %}
<div id="{{ ent_type }}" class="accordion-collapse collapse show p-2" aria-labelledby="heading-{{ ent_type }}"
data-bs-parent="#accordion-{{ ent_type }}">
{% set truncate = truncate or 180 %}
{% set truncate_title = truncate_title or 80 %}
<ul class="{{ ent_type }}-list">
{% for entity in entities %}
{% set title = entity.title or entity.name %}
{% do items.append(title) %}
<li>
{{ h.link_to(h.truncate(title, truncate_title), h.url_for(entity.type + '.read', id=entity.name)) }}
</li>
Expand All @@ -30,7 +35,7 @@
</ul>

<!-- show button only if there is entity to purge -->
{% if entities.first() %}
{% if items|length > 0 %}
<form method="POST" action="{{ h.url_for('admin.trash') }}" id="form-purge-{{ ent_type }}">
<input type="hidden" name="action" value="{{ent_type}}">
<a href="{{ h.url_for('admin.trash', name=ent_type) }}"
Expand All @@ -42,4 +47,4 @@
</a>
</form>
{% endif %}
</div>
</div>
46 changes: 46 additions & 0 deletions ckan/tests/cli/test_search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,52 @@ def test_search_index_rebuild(self, cli):
search_result = helpers.call_action(u'package_search', q=u"After")
assert search_result[u'count'] == 1

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_no_index_deleted_package(self, cli):
""" Deleted packages should not be in search index. """
factories.Dataset(title="Deleted package", id="deleted-pkg")
helpers.call_action("package_delete", id="deleted-pkg")
search_result = helpers.call_action('package_search', q="Deleted")
assert search_result[u'count'] == 0

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_no_index_deleted_package_rebuild(self, cli):
""" Deleted packages should not be in search index after rebuild. """
factories.Dataset(title="Deleted package", id="deleted-pkg")
helpers.call_action("package_delete", id="deleted-pkg")
result = cli.invoke(ckan, ['search-index', 'rebuild'])
assert not result.exit_code, result.output
search_result = helpers.call_action('package_search', q="Deleted")
assert search_result[u'count'] == 0

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
def test_index_deleted_package(self, cli):
""" Deleted packages should be in search index if ckan.search.remove_deleted_packages """
dataset = factories.Dataset(title="Deleted package", id="deleted-pkg")
helpers.call_action("package_delete", id="deleted-pkg")
search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
assert search_result[u'count'] == 1
assert search_result[u'results'][0]['id'] == dataset['id']
# should be removed after purge
helpers.call_action("dataset_purge", id="deleted-pkg")
search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
assert search_result[u'count'] == 0

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
def test_index_deleted_package_rebuild(self, cli):
""" Deleted packages should be in search index after rebuild if ckan.search.remove_deleted_packages """
dataset = factories.Dataset(title="Deleted package", id="deleted-pkg")
helpers.call_action("package_delete", id="deleted-pkg")
result = cli.invoke(ckan, ['search-index', 'rebuild'])
assert not result.exit_code, result.output
search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
assert search_result[u'count'] == 1
assert search_result[u'results'][0]['id'] == dataset['id']
# should be removed after purge
helpers.call_action("dataset_purge", id="deleted-pkg")
search_result = helpers.call_action('package_search', q="Deleted", include_deleted=True)
assert search_result[u'count'] == 0

def test_test_main_operations(self, cli):
"""Create few datasets, clear index, rebuild it - make sure search results
are always reflect correct state of index.
Expand Down
78 changes: 78 additions & 0 deletions ckan/tests/controllers/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def test_trash_no_organizations(self, app, sysadmin_env):
# no packages available to purge
assert len(trash_org_list) == 0

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_trash_with_deleted_datasets(self, app, sysadmin_env):
"""Getting the trash view with 'deleted' datasets should list the
datasets."""
Expand All @@ -295,6 +296,22 @@ def test_trash_with_deleted_datasets(self, app, sysadmin_env):
# Two packages in the list to purge
assert len(trash_pkg_list) == 2

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
def test_trash_with_deleted_datasets_no_remove_deleted_packages(self, app, sysadmin_env):
"""Getting the trash view with 'deleted' datasets should list the
datasets."""
factories.Dataset(state="deleted")
factories.Dataset(state="deleted")
factories.Dataset()

trash_url = url_for("admin.trash")
response = app.get(trash_url, extra_environ=sysadmin_env, status=200)

response_html = BeautifulSoup(response.body)
trash_pkg_list = response_html.select("ul.package-list li")
# Two packages in the list to purge
assert len(trash_pkg_list) == 2

def test_trash_with_deleted_groups(self, app, sysadmin_env):
"""Getting the trash view with "deleted" groups should list the
groups."""
Expand Down Expand Up @@ -371,6 +388,7 @@ def test_trash_purge_custom_ds_type(self, app, sysadmin_env):
pkgs_after_purge = model.Session.query(model.Package).count()
assert pkgs_after_purge == 0

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_trash_purge_deleted_datasets(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' datasets, purges the
datasets."""
Expand All @@ -397,6 +415,34 @@ def test_trash_purge_deleted_datasets(self, app, sysadmin_env):
pkgs_after_purge = model.Session.query(model.Package).count()
assert pkgs_after_purge == 1

@pytest.mark.usefixtures("clean_index")
@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
def test_trash_purge_deleted_datasets_no_remove_deleted_packages(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' datasets, purges the
datasets."""
factories.Dataset(state="deleted")
factories.Dataset(state="deleted")
factories.Dataset()

# how many datasets before purge
pkgs_before_purge = model.Session.query(model.Package).count()
assert pkgs_before_purge == 3

trash_url = url_for("admin.trash")
response = app.post(
trash_url,
data={"action": "package"},
extra_environ=sysadmin_env,
status=200
)

# check for flash success msg
assert "datasets have been purged" in response.body

# how many datasets after purge
pkgs_after_purge = model.Session.query(model.Package).count()
assert pkgs_after_purge == 1

def test_trash_purge_deleted_groups(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' groups, purges the
groups."""
Expand Down Expand Up @@ -451,6 +497,7 @@ def test_trash_purge_deleted_organization(self, app, sysadmin_env):
is_organization=True).count()
assert orgs_after_purge == 1

@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", True)
def test_trash_purge_all(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' entities and
purge all button purges everything"""
Expand Down Expand Up @@ -480,6 +527,37 @@ def test_trash_purge_all(self, app, sysadmin_env):
orgs_and_grps_after_purge = model.Session.query(model.Group).count()
assert pkgs_after_purge + orgs_and_grps_after_purge == 1

@pytest.mark.usefixtures("clean_index")
@pytest.mark.ckan_config("ckan.search.remove_deleted_packages", False)
def test_trash_purge_all_no_remove_deleted_packages(self, app, sysadmin_env):
"""Posting the trash view with 'deleted' entities and
purge all button purges everything"""
factories.Dataset(state="deleted", type="custom_dataset")
factories.Group(state="deleted")
factories.Organization(state="deleted")
factories.Organization(state="deleted", type="custom_org")
factories.Organization()

# how many entities before purge
pkgs_before_purge = model.Session.query(model.Package).count()
orgs_and_grps_before_purge = model.Session.query(model.Group).count()
assert pkgs_before_purge + orgs_and_grps_before_purge == 5

trash_url = url_for("admin.trash")
response = app.post(
trash_url,
data={"action": "all"},
extra_environ=sysadmin_env,
status=200
)
# check for flash success msg
assert "Massive purge complete" in response

# how many entities after purge
pkgs_after_purge = model.Session.query(model.Package).count()
orgs_and_grps_after_purge = model.Session.query(model.Group).count()
assert pkgs_after_purge + orgs_and_grps_after_purge == 1

def test_trash_cancel_purge(self, app, sysadmin_env):
"""Cancelling purge doesn't purge anything."""
factories.Organization(state="deleted")
Expand Down
51 changes: 45 additions & 6 deletions ckan/views/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
import ckan.lib.base as base
import ckan.lib.helpers as h
import ckan.lib.navl.dictization_functions as dict_fns
import ckan.lib.search as search
import ckan.logic as logic
import ckan.model as model
from ckan.common import g, _, config, request
from ckan.common import g, _, config, request, asbool
from ckan.views.home import CACHE_PARAMETERS


Expand Down Expand Up @@ -140,9 +141,9 @@ def post(self):


class TrashView(MethodView):

def __init__(self):
self.deleted_packages = model.Session.query(
model.Package).filter_by(state=model.State.DELETED)
self.deleted_packages = self._get_deleted_datasets()
self.deleted_orgs = model.Session.query(model.Group).filter_by(
state=model.State.DELETED, is_organization=True)
self.deleted_groups = model.Session.query(model.Group).filter_by(
Expand Down Expand Up @@ -173,6 +174,34 @@ def __init__(self):
}
}

def _get_deleted_datasets(self):
if asbool(config.get('ckan.search.remove_deleted_packages')):
return self._get_deleted_datasets_from_db()
else:
return self._get_deleted_datasets_from_search_index()

def _get_deleted_datasets_from_db(self):
return model.Session.query(
model.Package
).filter_by(
state=model.State.DELETED
)

def _get_deleted_datasets_from_search_index(self):
query = search.query_for(model.Package)
search_params = {
'fq': '+state:deleted',
'df': 'text',
'fl': 'id name title dataset_type',
}
query.run(search_params)

results = []
for result in query.results:
result['type'] = result['dataset_type']
results.append(result)
return results

def get(self):
ent_type = request.args.get(u'name')

Expand Down Expand Up @@ -207,21 +236,31 @@ def purge_all(self):
)

for action, deleted_entities in zip(actions, entities):
if type(deleted_entities) == list:
def get_id(x): return x['id']
else:
def get_id(x): return x.id

for entity in deleted_entities:
logic.get_action(action)(
{u'user': g.user}, {u'id': entity.id}
{u'user': g.user}, {u'id': get_id(entity)}
)
model.Session.remove()
h.flash_success(_(u'Massive purge complete'))

def purge_entity(self, ent_type):
entities = self.deleted_entities[ent_type]
number = entities.count()
if type(entities) == list:
number = len(entities)
def get_id(x): return x['id']
else:
number = entities.count()
def get_id(x): return x.id

for ent in entities:
logic.get_action(self._get_purge_action(ent_type))(
{u'user': g.user},
{u'id': ent.id}
{u'id': get_id(ent)}
)

model.Session.remove()
Expand Down
Loading