Skip to content

Commit

Permalink
Test with "external" elasticsearch (#441)
Browse files Browse the repository at this point in the history
* Test with "external" elasticsearch

* Strip all ES config except "enable" (for forwarding)
* Create ECK subscription no matter the observability_strategy
* Deploy ES from CI for events testing (Code copied/trimmed from STO)
* Default to use_redhat for CI

* Fixes from testing

* ephemeral volume
* wait for CRD to establish

* Adjust smoketest to always test events

* Test events to external ES in any observability_strategy mode
* We no longer need the smoketest to know the observability_strategy at all

* Update build/stf-run-ci/tasks/setup_elasticsearch.yml

Co-authored-by: Leif Madsen <lmadsen@redhat.com>

* Apply spelling/caps suggestions from code review

---------

Co-authored-by: Leif Madsen <lmadsen@redhat.com>
  • Loading branch information
csibbitt and leifmadsen authored Jul 13, 2023
1 parent 10bf140 commit 483f898
Show file tree
Hide file tree
Showing 10 changed files with 141 additions and 41 deletions.
4 changes: 3 additions & 1 deletion build/stf-run-ci/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ __service_telemetry_snmptraps_trap_oid_prefix: "1.3.6.1.4.1.50495.15"
__service_telemetry_snmptraps_trap_default_oid: "1.3.6.1.4.1.50495.15.1.2.1"
__service_telemetry_snmptraps_trap_default_severity: ""
__service_telemetry_logs_enabled: false
__service_telemetry_observability_strategy: use_hybrid
__service_telemetry_observability_strategy: use_redhat
__service_telemetry_transports_certificates_endpoint_cert_duration: 70080h
__service_telemetry_transports_certificates_ca_cert_duration: 70080h
__internal_registry_path: image-registry.openshift-image-registry.svc:5000
Expand All @@ -37,6 +37,8 @@ __smart_gateway_bundle_image_path:
default_operator_registry_image_base: registry.redhat.io/openshift4/ose-operator-registry
default_operator_registry_image_tag: v4.12

elasticsearch_version: 7.16.1

sgo_image_tag: latest
sto_image_tag: latest
sg_core_image_tag: latest
Expand Down
9 changes: 0 additions & 9 deletions build/stf-run-ci/tasks/deploy_stf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,6 @@
events:
elasticsearch:
enabled: {{ __service_telemetry_events_enabled }}
storage:
strategy: {{ "ephemeral" if __service_telemetry_storage_ephemeral_enabled else "persistent" }}
{% if __service_telemetry_storage_persistent_storage_class is defined %}
persistent:
storageClass: {{ __service_telemetry_storage_persistent_storage_class }}
{% endif %}
certificates:
endpointCertDuration: {{ __service_telemetry_events_certificates_endpoint_cert_duration }}
caCertDuration: {{ __service_telemetry_events_certificates_ca_cert_duration }}
metrics:
prometheus:
enabled: {{ __service_telemetry_metrics_enabled }}
Expand Down
5 changes: 4 additions & 1 deletion build/stf-run-ci/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@
tags:
- deploy

- name: Deploy ES for events testing
include_tasks: setup_elasticsearch.yml

- name: Set default base dir if not provided
set_fact:
base_dir: "{{ playbook_dir }}"
Expand Down Expand Up @@ -89,7 +92,7 @@
- { name: prometheus-webhook-snmp, dockerfile_path: Dockerfile, image_reference_name: prometheus_webhook_snmp_image_path, working_build_dir: ./working/prometheus-webhook-snmp }

- debug:
var: build_list
var: build_list

- name: Create builds and artifacts
include_tasks: create_builds.yml
Expand Down
12 changes: 12 additions & 0 deletions build/stf-run-ci/tasks/pre-clean.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,15 @@
kind: Project
metadata:
name: openshift-cert-manager-operator

- name: Remove Elasticsearch
ignore_errors: True
k8s:
state: absent
wait: yes
definition:
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
name: elasticsearch
namespace: "{{ namespace }}"
12 changes: 10 additions & 2 deletions build/stf-run-ci/tasks/setup_base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,16 @@
name: elasticsearch-eck-operator-certified
source: certified-operators
sourceNamespace: openshift-marketplace
when:
- __service_telemetry_observability_strategy in ['use_community', 'use_hybrid']

- name: Wait for Elasticsearch CRD to appear
k8s_info:
api_version: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
name: elasticsearches.elasticsearch.k8s.elastic.co
register: eckCRD
until: eckCRD.resources[0] is defined
retries: 5
delay: 30

- block:
# Upstream Source + Sub from https://github.com/rhobs/observability-operator/tree/main/hack/olm
Expand Down
32 changes: 32 additions & 0 deletions build/stf-run-ci/tasks/setup_elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
- name: Set default ElasticSearch manifest
set_fact:
elasticsearch_manifest: "{{ lookup('template', './manifest_elasticsearch.j2') | from_yaml }}"
when: elasticsearch_manifest is not defined

- name: Create an instance of Elasticsearch
k8s:
state: present
definition:
'{{ elasticsearch_manifest }}'

- name: Look up the newly generated ES Certs
k8s_info:
api_version: v1
kind: Secret
name: elasticsearch-es-http-certs-public
namespace: '{{ namespace }}'
register: elasticsearch_certs
until: elasticsearch_certs.resources[0].data["ca.crt"] is defined
retries: 5
delay: 30

- name: Copy the ES CA cert to our TLS secret
k8s:
definition:
apiVersion: v1
kind: Secret
metadata:
name: elasticsearch-es-cert
namespace: '{{ namespace }}'
data:
ca.crt: '{{ elasticsearch_certs.resources[0].data["ca.crt"] }}'
52 changes: 52 additions & 0 deletions build/stf-run-ci/templates/manifest_elasticsearch.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
name: elasticsearch
namespace: {{ namespace }}
spec:
auth: {}
http:
service:
metadata: {}
spec: {}
tls:
certificate: {}
monitoring:
logs: {}
metrics: {}
nodeSets:
- count: 1
name: default
config:
node.roles:
- master
- data
- ingest
node.store.allow_mmap: true
podTemplate:
metadata:
labels:
tuned.openshift.io/elasticsearch: elasticsearch
spec:
containers:
- name: elasticsearch
resources:
limits:
cpu: "2"
memory: 4Gi
requests:
cpu: "1"
memory: 4Gi
volumes:
- emptyDir: {}
name: elasticsearch-data
transport:
service:
metadata: {}
spec: {}
tls:
certificate: {}
certificateAuthorities: {}
updateStrategy:
changeBudget: {}
version: {{ elasticsearch_version }}
7 changes: 7 additions & 0 deletions roles/servicetelemetry/tasks/component_elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# DEPRECATED
#
# This code in the servicetelemetry role is deprecated as of STF 1.5.3, after
# which only forwarding to an external elasticsearch is supported.
#
# The code lives on in the stf-run-ci role for CI testing of the forwarding
# feature.
- name: Lookup template
debug:
msg: "{{ lookup('template', './manifest_elasticsearch.j2') | from_yaml }}"
Expand Down
12 changes: 4 additions & 8 deletions tests/smoketest/smoketest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ fi

CLEANUP=${CLEANUP:-true}

OBSERVABILITY_STRATEGY="${OBSERVABILITY_STRATEGY:-use_redhat}"

for ((i=1; i<=NUMCLOUDS; i++)); do
NAME="smoke${i}"
CLOUDNAMES+=("${NAME}")
Expand Down Expand Up @@ -66,7 +64,7 @@ oc create configmap stf-smoketest-ceilometer-entrypoint-script --from-file "${RE
echo "*** [INFO] Creating smoketest jobs..."
oc delete job -l app=stf-smoketest
for NAME in "${CLOUDNAMES[@]}"; do
oc create -f <(sed -e "s/<<CLOUDNAME>>/${NAME}/;s/<<ELASTICSEARCH_AUTH_PASS>>/${ELASTICSEARCH_AUTH_PASS}/;s/<<PROMETHEUS_AUTH_PASS>>/${PROMETHEUS_AUTH_PASS}/;s/<<OBSERVABILITY_STRATEGY>>/${OBSERVABILITY_STRATEGY}/" ${REL}/smoketest_job.yaml.template)
oc create -f <(sed -e "s/<<CLOUDNAME>>/${NAME}/;s/<<ELASTICSEARCH_AUTH_PASS>>/${ELASTICSEARCH_AUTH_PASS}/;s/<<PROMETHEUS_AUTH_PASS>>/${PROMETHEUS_AUTH_PASS}/" ${REL}/smoketest_job.yaml.template)
done

echo "*** [INFO] Triggering an alertmanager notification..."
Expand Down Expand Up @@ -154,11 +152,9 @@ echo "*** [INFO] Logs from prometheus..."
oc logs "$(oc get pod -l prometheus=default -o jsonpath='{.items[0].metadata.name}')" -c prometheus
echo

if [ "$OBSERVABILITY_STRATEGY" != "use_redhat" ]; then
echo "*** [INFO] Logs from elasticsearch..."
oc logs "$(oc get pod -l common.k8s.elastic.co/type=elasticsearch -o jsonpath='{.items[0].metadata.name}')"
echo
fi
echo "*** [INFO] Logs from elasticsearch..."
oc logs "$(oc get pod -l common.k8s.elastic.co/type=elasticsearch -o jsonpath='{.items[0].metadata.name}')"
echo

echo "*** [INFO] Logs from snmp webhook..."
oc logs "$(oc get pod -l app=default-snmp-webhook -o jsonpath='{.items[0].metadata.name}')"
Expand Down
37 changes: 17 additions & 20 deletions tests/smoketest/smoketest_collectd_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,31 +62,28 @@ grep -E '"result":\[{"metric":{"__name__":"sensubility_container_health_status",
metrics_result=$((metrics_result || $?))
echo; echo

if [ "$OBSERVABILITY_STRATEGY" != "use_redhat" ]; then
echo "*** [INFO] Get documents for this test from ElasticSearch..."
DOCUMENT_HITS=$(curl -sk -u "elastic:${ELASTICSEARCH_AUTH_PASS}" -X GET "${ELASTICSEARCH}/_search" -H 'Content-Type: application/json' -d'{
"query": {
"bool": {
"filter": [
{ "term" : { "labels.instance" : { "value" : "'${CLOUDNAME}'", "boost" : 1.0 } } },
{ "range" : { "generated" : { "gte" : "now-1m", "lt" : "now" } } }
]
}
echo "*** [INFO] Get documents for this test from Elasticsearch..."
DOCUMENT_HITS=$(curl -sk -u "elastic:${ELASTICSEARCH_AUTH_PASS}" -X GET "${ELASTICSEARCH}/_search" -H 'Content-Type: application/json' -d'{
"query": {
"bool": {
"filter": [
{ "term" : { "labels.instance" : { "value" : "'${CLOUDNAME}'", "boost" : 1.0 } } },
{ "range" : { "generated" : { "gte" : "now-1m", "lt" : "now" } } }
]
}
}' | python3 -c "import sys, json; parsed = json.load(sys.stdin); print(parsed['hits']['total']['value'])")
}
}' | python3 -c "import sys, json; parsed = json.load(sys.stdin); print(parsed['hits']['total']['value'])")

echo "*** [INFO] Found ${DOCUMENT_HITS} documents"
echo; echo
echo "*** [INFO] Found ${DOCUMENT_HITS} documents"
echo; echo

# check if we got documents back for this test
events_result=1
if [ "$DOCUMENT_HITS" -gt "0" ]; then
events_result=0
fi
else
events_result=0
# check if we got documents back for this test
events_result=1
if [ "$DOCUMENT_HITS" -gt "0" ]; then
events_result=0
fi


echo "[INFO] Verification exit codes (0 is passing, non-zero is a failure): events=${events_result} metrics=${metrics_result}"
echo; echo

Expand Down

0 comments on commit 483f898

Please sign in to comment.