Skip to content

Commit

Permalink
moderation:added percolator and refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
0einstein0 committed Nov 7, 2024
1 parent 1e7a087 commit e4a30b7
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 62 deletions.
1 change: 0 additions & 1 deletion site/tests/moderation/test_moderation_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from invenio_db import db
from invenio_search import current_search_client

from zenodo_rdm.moderation.models import ModerationQuery


Expand Down
34 changes: 14 additions & 20 deletions site/zenodo_rdm/moderation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,8 @@

"""Moderation config."""

from .rules import (
community_match_query_rule,
files_rule,
links_rule,
record_match_query_rule,
text_sanitization_rule,
verified_user_rule,
)
from .rules import (files_rule, links_rule, match_query_rule,
text_sanitization_rule, verified_user_rule)

MODERATION_BANNED_LINK_DOMAINS = []
"""Banned domains for links."""
Expand Down Expand Up @@ -48,29 +42,29 @@
MODERATION_SPAM_FILE_EXTS = {"jpg", "jpeg", "pdf", "png", "jfif", "docx", "webp"}
"""Frequest spam file extensions."""

MODERATION_RECORD_PERCOLATOR_INDEX = (
"moderation-queries-rdmrecords-records-record-v7.0.0"
)
"""Percolator index for moderation queries for records."""

MODERATION_COMMUNITY_PERCOLATOR_INDEX = (
"moderation-queries-communities-communities-v2.0.0"
)
"""Percolator index for moderation queries for communities."""

MODERATION_RECORD_SCORE_RULES = [
verified_user_rule,
links_rule,
files_rule,
text_sanitization_rule,
record_match_query_rule,
match_query_rule,
]
"""Scoring rules for record moderation."""

MODERATION_COMMUNITY_SCORE_RULES = [
links_rule,
text_sanitization_rule,
verified_user_rule,
community_match_query_rule,
match_query_rule,
]
"""Scoring rules for communtiy moderation."""

MODERATION_PERCOLATOR_MAPPING = {
"properties": {
"query": {"type": "percolator"},
"score": {"type": "integer"},
"notes": {"type": "text"},
"active": {"type": "boolean"},
}
}
"""Properties for moderation percolator index."""
19 changes: 4 additions & 15 deletions site/zenodo_rdm/moderation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from invenio_db import db
from invenio_search import current_search_client

from .percolator import add_percolate_query


class ModerationQuery(db.Model):
"""Moderation queries model."""
Expand All @@ -33,24 +35,11 @@ class ModerationQuery(db.Model):
"""Indicates whether the moderation query is currently active."""

@classmethod
def create(cls, query_string, notes=None, score=0, active=True):
def create(cls, query_string, record=None, notes=None, score=0, active=True):
"""Create a new moderation query."""
query = cls(query_string=query_string, notes=notes, score=score, active=active)
db.session.add(query)

try:
current_search_client.index(
index="moderation-queries",
body={
"query": {"query_string": {"query": query_string}},
"active": active,
"score": score,
"notes": notes,
},
)
except Exception as e:
current_app.logger.exception(e)

add_percolate_query(query_string, active, score, notes)
return query

@classmethod
Expand Down
89 changes: 89 additions & 0 deletions site/zenodo_rdm/moderation/percolator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-

Check failure on line 1 in site/zenodo_rdm/moderation/percolator.py

View workflow job for this annotation

GitHub Actions / Python (site, 3.9, postgresql14, opensearch2)

Black format check --- /home/runner/work/zenodo-rdm/zenodo-rdm/site/zenodo_rdm/moderation/percolator.py 2024-11-07 16:33:24.912738+00:00 +++ /home/runner/work/zenodo-rdm/zenodo-rdm/site/zenodo_rdm/moderation/percolator.py 2024-11-07 16:42:02.490468+00:00 @@ -68,13 +68,10 @@ current_app.logger.info(f"Index {percolator_index} created successfully.") except Exception as e: current_app.logger.exception(e) - - - def add_percolate_query(query_string, active=True, score=1.0, notes=None): """Adds a percolate query to the moderation-queries index.""" try: current_search_client.index( index="moderation-queries",
#
# This file is part of Invenio.
# Copyright (C) 2017-2024 CERN.
# Copyright (C) 2022 Graz University of Technology.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.

"""Percolator."""


from flask import current_app
from invenio_search import current_search_client
from invenio_search.utils import build_alias_name, build_index_name


def create_percolator_index(index_name, record):
"""Create mappings with the percolator field for moderation queries.
This function creates a new Elasticsearch index for percolator queries by copying
the settings and mappings from an existing record index and adding specific
percolator mappings.
"""
# Retrieve the alias name for the record index
record_index = build_alias_name(record.index._name)

# Build the name for the new percolator index, using a prefix and the record's index name
percolator_index = build_index_name(
index_name, suffix=record.index._name, app=current_app
)

# Get the current mapping for the record index to copy its structure
record_mapping = current_search_client.indices.get_mapping(index=record_index)
# Extract the mappings from the record index and store in `percolator_mappings`
percolator_mappings = list(record_mapping.values())[0]["mappings"]

# Add specific properties for percolator fields from the app configuration
percolator_mappings["properties"].update(
current_app.config.get("MODERATION_PERCOLATOR_MAPPING")["properties"]
)

# Retrieve the current settings of the record index to copy them to the percolator index
record_settings = list(
current_search_client.indices.get_settings(index=record_index).values()
)[0]["settings"]["index"]

percolator_settings = {
"index": {
"query": {
"default_field": record_settings.get("query", {}).get(
"default_field", []
)
}
},
"analysis": record_settings.get("analysis", {}),
}

if not current_search_client.indices.exists(percolator_index):
try:
current_search_client.indices.create(
index=percolator_index,
body={
"settings": percolator_settings,
"mappings": {**percolator_mappings},
},
)
current_app.logger.info(f"Index {percolator_index} created successfully.")
except Exception as e:
current_app.logger.exception(e)





def add_percolate_query(query_string, active=True, score=1.0, notes=None):
"""Adds a percolate query to the moderation-queries index."""
try:
current_search_client.index(
index="moderation-queries",
body={
"query": {"query_string": {"query": query_string}},
"active": active,
"score": score,
"notes": notes,
},
)
except Exception as e:
current_app.logger.exception(e)
30 changes: 4 additions & 26 deletions site/zenodo_rdm/moderation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from flask import current_app
from invenio_search import current_search_client

from invenio_search.utils import build_alias_name
from zenodo_rdm.moderation.proxies import current_domain_tree

from .proxies import current_domain_tree, current_scores
Expand Down Expand Up @@ -126,15 +126,13 @@ def files_rule(identity, draft=None, record=None):
return score


def match_query_rule(identity, draft=None, record=None, index=None):
def match_query_rule(identity, draft=None, record=None):
"""Calculate a score based on matched percolate queries against the given document in the specified index."""
if not index:
raise ValueError("Index must be specified for matching query rule.")

document = record.dumps() if record else draft.dumps()

percolator_index = build_alias_name(f"moderation-queries-{record.index._name}")
matched_queries = current_search_client.search(
index=index,
index=percolator_index,
body={"query": {"percolate": {"field": "query", "document": document}}},
)

Expand All @@ -145,23 +143,3 @@ def match_query_rule(identity, draft=None, record=None, index=None):
score += query_score

return score


def record_match_query_rule(identity, draft=None, record=None):
"""Match query rule for records."""
return match_query_rule(
identity,
draft=draft,
record=record,
index=current_app.config.get("MODERATION_RECORD_PERCOLATOR_INDEX"),
)


def community_match_query_rule(identity, draft=None, record=None):
"""Match query rule for communities."""
return match_query_rule(
identity,
draft=draft,
record=record,
index=current_app.config.get("MODERATION_COMMUNITY_PERCOLATOR_INDEX"),
)

0 comments on commit e4a30b7

Please sign in to comment.