Skip to content

Commit

Permalink
Add data-dashboard-backend service
Browse files Browse the repository at this point in the history
Includes KSQLDB transformer logic.
  • Loading branch information
pvannierop committed May 29, 2024
1 parent 601e42c commit 48afbd4
Show file tree
Hide file tree
Showing 13 changed files with 374 additions and 138 deletions.
247 changes: 118 additions & 129 deletions README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions bin/generate-secrets
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ insert_secret ".management_portal.oauth_clients.radar_redcap_integrator.client_s
insert_secret ".management_portal.oauth_clients.radar_fitbit_connector.client_secret"
insert_secret ".management_portal.oauth_clients.radar_appconfig.client_secret"
insert_secret ".management_portal.oauth_clients.radar_push_endpoint.client_secret"
insert_secret ".management_portal.oauth_clients.radar_data_dashboard_backend.client_secret"

insert_secret \
".radar_appserver_postgresql.global.postgresql.auth.postgresPassword" \
Expand Down
4 changes: 3 additions & 1 deletion etc/base-secrets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ management_portal:
client_secret: secret
radar_push_endpoint:
client_secret: secret
radar_data_dashboard_backend:
client_secret: secret
smtp:
password: secret

Expand Down Expand Up @@ -120,7 +122,7 @@ oura_api_secret: "secret"
radar_rest_sources_backend:
postgres:
password: secret
# --------------------------------------------------------- 20-grafana.yaml ---------------------------------------------------------
# --------------------------------------------------------- 20-dashboard.yaml ---------------------------------------------------------
timescaledb_password: secret
grafana_password: secret
grafana_metrics_password: secret
Expand Down
16 changes: 13 additions & 3 deletions etc/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,10 @@ radar_rest_sources_backend:
garmin:
enable: "false"

# --------------------------------------------------------- 20-grafana.yaml ---------------------------------------------------------
# --------------------------------------------------------- 20-dashboard.yaml ---------------------------------------------------------

timescaledb_username: postgres
timescaledb_db_name: grafana-metrics
grafana_metrics_username: postgres
timescaledb_db_name: data-dashboard

timescaledb:
_install: true
Expand All @@ -295,6 +294,8 @@ timescaledb:
# Uncomment when upgrading
#existingClaim: "data-timescaledb-postgresql-0"

grafana_metrics_username: postgres

radar_grafana:
_install: true
_chart_version: 6.26.8
Expand All @@ -303,6 +304,11 @@ radar_grafana:
env:
GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /var/lib/grafana/dashboards/allprojects/home.json

data_dashboard_backend:
_install: false
_chart_version: 0.1.0
replicaCount: 1

radar_jdbc_connector:
_install: true
_chart_version: 0.5.1
Expand All @@ -312,6 +318,10 @@ radar_jdbc_connector:
# Change the list of topics if you have dashboards that read other data or if you don't have certain topics available on your cluster.
topics: android_phone_relative_location, android_phone_battery_level, connect_fitbit_intraday_heart_rate, connect_fitbit_intraday_steps

kafka_data_transformer:
_install: false
_chart_version: 0.3.1

# --------------------------------------------------------- 20-ingestion.yaml ---------------------------------------------------------

radar_gateway:
Expand Down
14 changes: 10 additions & 4 deletions etc/base.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,15 @@ radar_grafana:
# google_application_credentials: {{ readFile "../etc/radar-appserver/firebase-adminsdk.json" | quote }}
#*/}}

# Remove below Go comment to read the queries.sql and set the queries
# in the ksql_server
#ksql_server:
# If data transformation of kafka topic data is needed, please remove the Go template comments and yaml comments.
# Make sure to reference a ksql transformation file that contains the required transformation logic.
# The files below are transform the data from the questionnaire_response and questionnaire_app_events topics to the
# ksql_observations topic, used by the data-dashboard-backend. If using the data-dashboard-backend, please make sure
# to uncomment the relevant ksql transformer files.
# Note: never remove the _base_observations_stream.sql file.
# kafka_data_transformer:
# ksql:
# queries: |
# {{/*- readFile "cp-ksql-server/queries.sql" | nindent 8 */}}
# {{/* - readFile "../etc/cp-ksql-server/_base_observations_stream.sql" | nindent 8 */}}
# {{/* - readFile "../etc/cp-ksql-server/questionnaire_response_observations.sql" | nindent 8 */}}
# {{/* - readFile "../etc/cp-ksql-server/questionnaire_app_events_observations.sql" | nindent 8 */}}
41 changes: 41 additions & 0 deletions etc/cp-ksql-server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# kafka-data transformer (KSQLDB)

Reference: https://docs.ksqldb.io/

The KSQLDB Kafka data transformer is able to register Consumer/Producers to Kafka that transform data in a topic and
publish the results to another topic.

The provided KSQLDB _questionnaire_response_observations.sql_ and _questionnaire_app_events_observation.sql_ SQL files
transform, respectively, the _questionnaire_response_ and _questionnaire_app_event_ topics and publish the data to the
_ksql_observations_ topic. The _ksql_observations_ topic is consumed by the Kafka-JDBC-connector used for the by the
RADAR-base Data Dashboard backend service (see: [20-data-dashboard.yaml](../../helmfile.d/20-dashboard.yaml)).

When transformation of other topics is required, new SQL files can be added to this directory. These new files should be
referenced in the _kafka_data_transformer -> ksql -> queries_ section of the `etc/base.yaml.gotmpl` file. New KSQLDB SQL
files should transform towards the following format of the _ksql_observations_ topic:

```
TOPIC KEY:
PROJECT: the project identifier
SOURCE: the source identifier
SUBJECT: the subject/study participant identifier
TOPIC VALUE:
TOPIC: the topic identifier
CATEGORY: the category identifier (optional)
VARIABLE: the variable identifier
DATE: the date of the observation
END_DATE: the end date of the observation (optional)
TYPE: the type of the observation (STRING, STRING_JSON, INTEGER, DOUBLE)
VALUE_TEXTUAL: the textual value of the observation (optional, must be set when VALUE_NUMERIC is NULL)
VALUE_NUMERIC: the numeric value of the observation (optional, must be set when VALUE_TEXTUAL is NULL)
```

New messages are added to the _ksql_observations_ topic by inserting into the _observations_ stream (see [_base_observations_stream.sql](_base_observations_stream.sql)):

```
INSERT INTO observations
SELECT
...
PARTITION BY q.projectId, q.userId, q.sourceId
EMIT CHANGES;
```
20 changes: 20 additions & 0 deletions etc/cp-ksql-server/_base_observations_stream.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
SET 'auto.offset.reset' = 'earliest';

-- Register the 'ksql_observations' topic (is created when not exists).
CREATE STREAM observations (
PROJECT VARCHAR KEY, -- 'KEY' means that this field is part of the kafka message key
SUBJECT VARCHAR KEY,
SOURCE VARCHAR KEY,
`TOPIC` VARCHAR,
CATEGORY VARCHAR,
VARIABLE VARCHAR,
DATE TIMESTAMP,
END_DATE TIMESTAMP,
TYPE VARCHAR,
VALUE_NUMERIC DOUBLE,
VALUE_TEXTUAL VARCHAR
) WITH (
kafka_topic = 'ksql_observations',
partitions = 3,
format = 'avro'
);
Empty file removed etc/cp-ksql-server/queries.sql
Empty file.
30 changes: 30 additions & 0 deletions etc/cp-ksql-server/questionnaire_app_event_observations.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
CREATE STREAM questionnaire_app_event (
projectId VARCHAR KEY, -- 'KEY' means that this field is part of the kafka message key
userId VARCHAR KEY,
sourceId VARCHAR KEY,
questionnaireName VARCHAR,
eventType VARCHAR,
time DOUBLE,
metadata MAP<VARCHAR, VARCHAR>
) WITH (
kafka_topic = 'questionnaire_app_event',
partitions = 3,
format = 'avro'
);

INSERT INTO observations
SELECT
q.projectId AS PROJECT,
q.userId AS SUBJECT,
q.sourceId AS SOURCE,
'questionnaire_app_event' as `TOPIC`,
CAST(NULL as VARCHAR) as CATEGORY,
q.questionnaireName as VARIABLE,
FROM_UNIXTIME(CAST(q.time * 1000 AS BIGINT)) as DATE,
CAST(NULL as TIMESTAMP) as END_DATE,
'STRING_JSON' as TYPE,
CAST(NULL as DOUBLE) as VALUE_NUMERIC,
TO_JSON_STRING(q.metadata) as VALUE_TEXTUAL
FROM questionnaire_app_event q
PARTITION BY q.projectId, q.userId, q.sourceId -- this sets the fields in the kafka message key
EMIT CHANGES;
82 changes: 82 additions & 0 deletions etc/cp-ksql-server/questionnaire_response_observations.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
CREATE STREAM questionnaire_response (
projectId VARCHAR KEY, -- 'KEY' means that this field is part of the kafka message key
userId VARCHAR KEY,
sourceId VARCHAR KEY,
time DOUBLE,
timeCompleted DOUBLE,
timeNotification DOUBLE,
name VARCHAR,
version VARCHAR,
answers ARRAY<STRUCT<questionId VARCHAR, value STRUCT<int INT, string VARCHAR, double DOUBLE>, startTime DOUBLE, endTime DOUBLE>>
) WITH (
kafka_topic = 'questionnaire_response',
partitions = 3,
format = 'avro'
);

CREATE STREAM questionnaire_response_exploded
AS SELECT
EXPLODE(TRANSFORM(q.answers, a => a->questionId)) as VARIABLE,
FROM_UNIXTIME(CAST(q.time * 1000 AS BIGINT)) as DATE,
q.projectId,
q.userId,
q.sourceId,
'questionnaire_response' as `TOPIC`,
q.name as CATEGORY,
CAST(NULL as TIMESTAMP) as END_DATE,
-- WARNING!!! The cast from VARCHAR (string) to DOUBLE will throw an JAVA exception if the string is not a number.
-- This does not mean that the message will be lost. The value will be present in the VALUE_TEXTUAL_OPTIONAL field.
EXPLODE(TRANSFORM(q.answers, a => COALESCE(a->value->double, CAST(a->value->int as DOUBLE), CAST(a->value->string as DOUBLE)))) as VALUE_NUMERIC,
EXPLODE(TRANSFORM(q.answers, a => CASE
WHEN a->value->int IS NOT NULL THEN 'INTEGER'
WHEN a->value->double IS NOT NULL THEN 'DOUBLE'
ELSE NULL
END)) as TYPE,
-- Note: When cast to double works for the string value, the VALUE_TEXTUAL_OPTIONAL will also be set.
EXPLODE(TRANSFORM(q.answers, a => a->value->string)) as VALUE_TEXTUAL_OPTIONAL
FROM questionnaire_response q
EMIT CHANGES;

INSERT INTO observations
SELECT
q.projectId as PROJECT,
q.sourceId as SOURCE,
q.userId as SUBJECT,
`TOPIC`, CATEGORY, VARIABLE, DATE, END_DATE,
CASE
WHEN TYPE IS NULL AND VALUE_NUMERIC IS NOT NULL THEN 'DOUBLE' -- must have been derived from a string cast
WHEN TYPE IS NULL AND VALUE_NUMERIC IS NULL THEN 'STRING'
ELSE TYPE -- keep the original type when TYPE is not NULL
END as TYPE,
VALUE_NUMERIC,
CASE
WHEN VALUE_NUMERIC IS NOT NULL THEN NULL -- When cast to double has worked for the string value, set VALUE_TEXTUAL to NULL.
ELSE VALUE_TEXTUAL_OPTIONAL
END as VALUE_TEXTUAL
FROM questionnaire_response_exploded q
PARTITION BY q.projectId, q.userId, q.sourceId -- this sets the fields in the kafka message key
EMIT CHANGES;

-- TODO: exploding the 'select:' questions is not yet fully designed.
-- I keep the code here for future reference.
-- Multi-select questionnaire questions are stored as a single 'value' string with the
-- names of the selected options separated by comma's. Multiselect questions are prefixed
-- by 'select:' in the questionId.
-- When 'questionId' is like 'select:%' create a new stream with the select options.
-- The options in the value field split commas and added as separate VARIABLE records.
-- The VALUE_NUMERIC is set to 1 and VALUE_TEXTUAL is set to NULL.
-- INSERT INTO observations
-- SELECT
-- EXPLODE(SPLIT(VALUE_TEXTUAL, ',')) as VARIABLE,
-- PROJECT, SOURCE, SUBJECT, `TOPIC`, CATEGORY, DATE, END_DATE,
-- 'INTEGER' as TYPE,
-- CAST(1 as DOUBLE) VALUE_NUMERIC,
-- CAST(NULL as VARCHAR) as VALUE_TEXTUAL
-- FROM questionnaire_response_observations
-- WHERE
-- VARIABLE IS NOT NULL
-- AND VARIABLE LIKE 'select:%'
-- AND VALUE_TEXTUAL IS NOT NULL
-- AND VALUE_TEXTUAL != ''
-- PARTITION BY SUBJECT, PROJECT, SOURCE
-- EMIT CHANGES;
23 changes: 23 additions & 0 deletions etc/data-dashboard-backend/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
ingress:
enabled: true
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/enable-cors: "true"
className: nginx
hosts:
- host: localhost
tls:
secretName: radar-base-data-dashboard
hosts:
- localhost
path: /api
jdbc:
url: jdbc:postgresql://timescaledb-postgresql-hl:5432/data-dashboard
dialect: org.hibernate.dialect.PostgreSQLDialect
user: postgres
password: secret
managementPortal:
url: http://management-portal:8080/managementportal
clientId: radar_data_dashboard_backend
clientSecret: secret
jwtResourceName: res_DataDashboardAPI
2 changes: 1 addition & 1 deletion etc/timescaledb/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ auth:
postgresPassword: ""
## @param auth.database Name for a custom database to create
##
database: grafana-metrics
database: data-dashboard
## @param architecture PostgreSQL architecture (`standalone` or `replication`)
##
architecture: standalone
Expand Down
32 changes: 32 additions & 0 deletions helmfile.d/20-grafana.yaml → helmfile.d/20-dashboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,38 @@ releases:
- name: "grafana\\.ini.auth\\.generic_oauth.token_url"
value: "https://{{ .Values.server_name }}/managementportal/oauth/token"

- name: data-dashboard-backend
chart: radar/data-dashboard-backend
version: {{ .Values.data_dashboard_backend._chart_version }}
installed: {{ .Values.data_dashboard_backend._install }}
values:
- "../etc/data-dashboard-backend/values.yaml"
- {{ .Values.data_dashboard_backend | toYaml | indent 8 | trim }}
set:
- name: serverName
value: {{ .Values.server_name }}
- name: ingress.hosts
values:
- data.{{ .Values.server_name }}
- name: jdbc.user
value: {{ .Values.timescaledb_username }}
- name: jdbc.password
value: {{ .Values.timescaledb_password }}
- name: jdbc.url
value: {{ dig "jdbc" "url" (printf "jdbc:postgresql://timescaledb-postgresql-hl:5432/%s" .Values.timescaledb_db_name) .Values.data_dashboard_backend }}
- name: managementPortal.clientSecret
value: {{ .Values.management_portal.oauth_clients.radar_data_dashboard_backend.client_secret }}

- name: kafka-data-transformer
chart: cp-radar/cp-ksql-server
version: {{ .Values.kafka_data_transformer._chart_version }}
timeout: {{ add .Values.base_timeout .Values.kafka_data_transformer._extra_timeout }}
wait: false
installed: {{ .Values.kafka_data_transformer._install }}
values:
- "../etc/cp-ksql-server/values.yaml"
- {{ .Values.kafka_data_transformer | toYaml | indent 8 | trim }}

- name: radar-jdbc-connector
chart: radar/radar-jdbc-connector
version: {{ .Values.radar_jdbc_connector._chart_version }}
Expand Down

0 comments on commit 48afbd4

Please sign in to comment.