Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Support Elasticsearch 7.x and OpenSearch 1.x #1800

Open
wants to merge 1 commit into
base: qa/1.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions hack/docker-compose.opensearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
version: "2.1"

volumes:
opensearch_data:

services:
opensearch:
image: opensearchproject/opensearch:1.3.0
environment:
- discovery.type=single-node
- cluster.name=am-cluster
- cluster.routing.allocation.disk.threshold_enabled=false
- node.name=am-node
- bootstrap.memory_lock=true
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
- "DISABLE_INSTALL_DEMO_CONFIG=true"
- "DISABLE_SECURITY_PLUGIN=true"
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
volumes:
- opensearch_data:/usr/share/opensearch/data
ports:
- 127.0.0.1:62092:9200
- 127.0.0.1:62096:9600

archivematica-mcp-client:
environment:
ARCHIVEMATICA_MCPCLIENT_MCPCLIENT_ELASTICSEARCHSERVER: "opensearch:9200"

archivematica-dashboard:
environment:
ARCHIVEMATICA_DASHBOARD_DASHBOARD_ELASTICSEARCH_SERVER: "opensearch:9200"
4 changes: 3 additions & 1 deletion hack/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ services:
- "127.0.0.1:62001:3306"

elasticsearch:
image: "docker.elastic.co/elasticsearch/elasticsearch:6.5.4"
image: "docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2"
environment:
- "discovery.type=single-node"
- "cluster.name=am-cluster"
- "cluster.routing.allocation.disk.threshold_enabled=false"
- "node.name=am-node"
- "network.host=0.0.0.0"
- "bootstrap.memory_lock=true"
Expand Down
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ brotli==0.5.2
certifi==2021.5.30
# via
# -r requirements.txt
# elasticsearch
# requests
cffi==1.14.6
# via
Expand Down Expand Up @@ -77,7 +78,7 @@ django-shibboleth-remoteuser @ git+https://github.com/Brown-University-Library/d
# via -r requirements.txt
django-tastypie==0.13.2
# via -r requirements.txt
elasticsearch==6.8.2
elasticsearch==7.13.0
# via -r requirements.txt
filelock==3.3.1
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ django-extensions==1.7.9
django-forms-bootstrap>=3.0.0,<4.0.0
django-prometheus==1.0.15
django-tastypie==0.13.2
elasticsearch>=6.0.0,<7.0.0
elasticsearch==7.13.0
gearman3==0.2.1
gevent==1.3.6 # used by gunicorn's async workers
gunicorn==19.9.0
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ bagit==1.7.0
brotli==0.5.2
# via -r requirements.in
certifi==2021.5.30
# via requests
# via
# elasticsearch
# requests
cffi==1.14.6
# via cryptography
charset-normalizer==2.0.1
Expand Down Expand Up @@ -56,7 +58,7 @@ django-shibboleth-remoteuser @ git+https://github.com/Brown-University-Library/d
# via -r requirements.in
django-tastypie==0.13.2
# via -r requirements.in
elasticsearch==6.8.2
elasticsearch==7.13.0
# via -r requirements.in
future==0.18.2
# via metsrw
Expand Down
18 changes: 15 additions & 3 deletions src/archivematicaCommon/lib/elasticSearchFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def create_indexes_if_needed(client, indexes):
# Call get index body functions below for each index
body = getattr(sys.modules[__name__], "_get_%s_index_body" % index)()
logger.info('Creating "%s" index ...', index)
client.indices.create(index, body=body, ignore=400)
client.indices.create(index, body=body, ignore=400, include_type_name=True)
logger.info("Index created.")


Expand Down Expand Up @@ -1212,7 +1212,13 @@ def search_all_results(client, body, index):
if isinstance(index, list):
index = ",".join(index)

results = client.search(body=body, index=index, size=MAX_QUERY_SIZE)
results = client.search(
body=body,
index=index,
size=MAX_QUERY_SIZE,
rest_total_hits_as_int=True,
track_total_hits=True,
)

if results["hits"]["total"] > MAX_QUERY_SIZE:
logger.warning(
Expand Down Expand Up @@ -1284,7 +1290,13 @@ def get_file_tags(client, uuid):
"""
query = {"query": {"term": {"fileuuid": uuid}}}

results = client.search(body=query, index=TRANSFER_FILES_INDEX, _source="tags")
results = client.search(
body=query,
index=TRANSFER_FILES_INDEX,
_source="tags",
rest_total_hits_as_int=True,
track_total_hits=True,
)

count = results["hits"]["total"]
if count == 0:
Expand Down
4 changes: 2 additions & 2 deletions src/archivematicaCommon/tests/fixtures/test_delete_aip.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
method: POST
uri: http://elasticsearch:9200/aips/_search?_source=uuid
response:
body: {string: !!python/unicode '{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.2876821,"hits":[{"_index":"aips","_type":"_doc","_id":"lBsZBWgBn49OAVhMXeO8","_score":0.2876821,"_source":{"uuid":"b34521a3-1c63-43dd-b901-584416f36c91"}}]}}'}
Expand All @@ -30,7 +30,7 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
method: POST
uri: http://elasticsearch:9200/aips/_search?_source=uuid
response:
body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
method: POST
uri: http://elasticsearch:9200/aipfiles/_search
response:
body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":2,"max_score":0.2876821,"hits":[{"_index":"aipfiles","_type":"_doc","_id":"lRsZBWgBn49OAVhMXuMC","_score":0.2876821,"_source":{"origin":"1a14043f-68ef-4bfe-a129-e2e4cdbe391b","METS":{"dmdSec":{"ns0:xmlData_dict_list":[{"@xmlns:ns1":"http://www.loc.gov/premis/v3","@xmlns:ns0":"http://www.loc.gov/METS/","@xmlns:xsi":"http://www.w3.org/2001/XMLSchema-instance","ns1:object_dict_list":[{"ns1:originalName":"20181231153024-b34521a3-1c63-43dd-b901-584416f36c91","@version":"3.0","@xsi:type":"premis:intellectualEntity","ns1:objectIdentifier_dict_list":[{"ns1:objectIdentifierType":"UUID","ns1:objectIdentifierValue":"b34521a3-1c63-43dd-b901-584416f36c91"}],"@xsi:schemaLocation":"http://www.loc.gov/premis/v3
Expand Down Expand Up @@ -98,7 +98,7 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
method: POST
uri: http://elasticsearch:9200/aipfiles/_search
response:
body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags
method: POST
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags&rest_total_hits_as_int=true&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'}
headers:
Expand Down
8 changes: 4 additions & 4 deletions src/archivematicaCommon/tests/fixtures/test_set_get_tags.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?size=10000
method: POST
uri: http://elasticsearch:9200/transferfiles/_search?rest_total_hits_as_int=true&size=10000&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.6931472,"hits":[{"_index":"transferfiles","_type":"_doc","_id":"mBsZBWgBn49OAVhMh-OV","_score":0.6931472,"_source":{"accessionid":"","status":"backlog","sipuuid":"17b168b6-cbba-4f43-8838-a53360238acb","tags":[],"file_extension":"jpg","relative_path":"test-17b168b6-cbba-4f43-8838-a53360238acb/objects/Landing_zone.jpg","bulk_extractor_reports":[],"origin":"1a14043f-68ef-4bfe-a129-e2e4cdbe391b","size":1.2982568740844727,"modification_date":"2018-12-11","created":1546273029.7313669,"format":[],"ingestdate":"2018-12-31","filename":"Landing_zone.jpg","fileuuid":"268421a7-a986-4fa0-95c1-54176e508210"}}]}}'}
headers:
Expand All @@ -30,8 +30,8 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags
method: POST
uri: http://elasticsearch:9200/transferfiles/_search?_source=tags&rest_total_hits_as_int=true&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.47000363,"hits":[{"_index":"transferfiles","_type":"_doc","_id":"mBsZBWgBn49OAVhMh-OV","_score":0.47000363,"_source":{"tags":["test"]}}]}}'}
headers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ interactions:
headers:
connection: [keep-alive]
content-type: [application/json]
method: GET
uri: http://elasticsearch:9200/transferfiles/_search?size=10000
method: POST
uri: http://elasticsearch:9200/transferfiles/_search?rest_total_hits_as_int=true&size=10000&track_total_hits=true
response:
body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'}
headers:
Expand Down
2 changes: 2 additions & 0 deletions src/dashboard/src/components/archival_storage/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ def search(request):
size=page_size,
sort=order_by + ":" + sort_direction if order_by else "",
_source=source,
rest_total_hits_as_int=True,
track_total_hits=True,
)

if file_mode:
Expand Down
2 changes: 2 additions & 0 deletions src/dashboard/src/components/backlog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ def search(request):
size=page_size,
sort=order_by + ":" + sort_direction if order_by else "",
_source=source,
rest_total_hits_as_int=True,
track_total_hits=True,
)
hit_count = hits["hits"]["total"]

Expand Down