Skip to content

Commit

Permalink
Add harvest & xloader worker configuration
Browse files Browse the repository at this point in the history
- Update Dockerfiles
- Update start_ckan.sh with custom workers
- Update start_ckan_development.sh.override to remove automatic workers
- Add supervisor harvester.conf
- Add supervisor xloader.conf
- Update Scheming DCAT version to 2.1.0
  • Loading branch information
mjanez committed Mar 8, 2024
1 parent 0ed33f2 commit 68a9ce9
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 32 deletions.
5 changes: 3 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ CKAN_SITE_URL=http://localhost:81
CKAN__ROOT_PATH=/catalog/{{LANG}}
CKAN_PORT=5000
CKAN__FAVICON=/catalog/base/images/ckan.ico
CKAN__SITE_LOGO=/images/default/ckan-logo.png
CKAN___BEAKER__SESSION__SECRET=CHANGE_ME
# See https://docs.ckan.org/en/latest/maintaining/configuration.html#api-token-settings
CKAN___API_TOKEN__JWT__ENCODE__SECRET=string:CHANGE_ME
Expand Down Expand Up @@ -125,7 +126,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru
CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv"

# Extensions
CKAN__PLUGINS="envvars stats text_view image_view webpage_view recline_view resourcedictionary datastore xloader harvest ckan_harvester spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface scheming_dcat_datasets scheming_dcat_groups scheming_dcat_organizations scheming_dcat scheming_dcat_ckan_harvester scheming_dcat_csw_harvester pdf_view pages fluent"
CKAN__PLUGINS="envvars stats text_view image_view webpage_view recline_view resourcedictionary datastore xloader harvest spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface scheming_dcat_datasets scheming_dcat_groups scheming_dcat_organizations scheming_dcat scheming_dcat_ckan_harvester scheming_dcat_xls_harvester pdf_view pages fluent"

# ckanext-harvest
CKAN__HARVEST__MQ__TYPE=redis
Expand All @@ -137,7 +138,7 @@ CKAN__HARVEST__LOG_TIMEFRAME=40

# ckanext-xloader
CKANEXT__XLOADER__API_TOKEN=api_token
CKANEXT__XLOADER__JOBS__DB_URI=postgresql://ckan:ckan@db/ckan
CKANEXT__XLOADER__JOBS__DB_URI=postgresql://ckandbuser:ckandbpassword@db/ckandb

# ckanext-dcat
CKANEXT__DCAT__BASE_URI=${CKAN_URL}
Expand Down
10 changes: 5 additions & 5 deletions ckan/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ COPY req_fixes req_fixes
## Pages - v0.5.2 ##
## PDFView - 0.0.8 ##
## Fluent - v1.0.1 (Forked stable version) ##
## Scheming DCAT - v2.0.0 (GeoDCAT-AP/NTI-RISP extended version) ##
## Scheming DCAT - v2.1.0 (GeoDCAT-AP/NTI-RISP extended version) ##
RUN echo ${TZ} > /etc/timezone && \

Check warning on line 26 in ckan/Dockerfile

View workflow job for this annotation

GitHub Actions / runner/test-docker-pr:feature/harvester-worker

Pin versions in pip. Instead of `pip install <package>` use `pip install <package>==<version>` or `pip install --requirement <requirements file>`
if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then cp /usr/share/zoneinfo/${TZ} /etc/localtime; fi && \
# Remove apk cache
Expand Down Expand Up @@ -54,8 +54,8 @@ RUN echo ${TZ} > /etc/timezone && \
echo "mjanez/ckanext-fluent" && \
pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-fluent.git@v1.0.1#egg=ckanext-fluent && \
echo "mjanez/ckanext-scheming_dcat" && \
pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-scheming_dcat.git@v2.0.0#egg=ckanext_scheming_dcat && \
pip3 install --no-cache-dir -r https://raw.githubusercontent.com/mjanez/ckanext-scheming_dcat/v2.0.0/requirements.txt
pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-scheming_dcat.git@v2.1.0#egg=ckanext_scheming_dcat && \
pip3 install --no-cache-dir -r https://raw.githubusercontent.com/mjanez/ckanext-scheming_dcat/v2.1.0/requirements.txt

# Used to configure the container environment by setting environment variables, creating users, running initialization scripts, .etc
COPY docker-entrypoint.d/* /docker-entrypoint.d/
Expand All @@ -79,8 +79,8 @@ RUN for d in $APP_DIR/patches/*; do \
COPY setup/start_ckan.sh.override ${APP_DIR}/start_ckan.sh
RUN chmod +x ${APP_DIR}/start_ckan.sh

## Harvester
COPY setup/workers/harvester.conf /etc/supervisord.d/harvester.conf
## Load workers supervisor configuration
COPY setup/workers/* /etc/supervisord.d/

# Start CKAN
CMD ["/bin/sh", "-c", "$APP_DIR/start_ckan.sh"]
4 changes: 2 additions & 2 deletions ckan/Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ COPY setup/who.ini ./
COPY setup/start_ckan_development.sh.override ./start_ckan_development.sh
RUN chmod +x ./start_ckan_development.sh

## Harvester
COPY setup/workers/harvester.conf /etc/supervisord.d/harvester.conf
## Load workers supervisor configuration
COPY setup/workers/* /etc/supervisord.d/

# Apply any patches needed to CKAN core or any of the built extensions (not the
# runtime mounted ones)
Expand Down
9 changes: 9 additions & 0 deletions ckan/Dockerfile.ghcr
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,13 @@ RUN for d in $APP_DIR/patches/*; do \
fi ; \
done

# Workers
## Update start_ckan.sh with custom workers
COPY setup/start_ckan.sh.override ${APP_DIR}/start_ckan.sh
RUN chmod +x ${APP_DIR}/start_ckan.sh

## Load workers supervisor configuration
COPY setup/workers/* /etc/supervisord.d/

# Start CKAN
CMD ["/bin/sh", "-c", "$APP_DIR/start_ckan.sh"]
18 changes: 9 additions & 9 deletions ckan/setup/start_ckan.sh.override
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ if grep -E "beaker.session.secret ?= ?$" ckan.ini
then
echo "Setting beaker.session.secret in ini file"
ckan config-tool $CKAN_INI "beaker.session.secret=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
ckan config-tool $CKAN_INI "WTF_CSRF_SECRET_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
JWT_SECRET=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')
ckan config-tool $CKAN_INI "api_token.jwt.encode.secret=${JWT_SECRET}"
ckan config-tool $CKAN_INI "api_token.jwt.decode.secret=${JWT_SECRET}"
Expand Down Expand Up @@ -51,17 +50,18 @@ UWSGI_OPTS="--plugins http,python \
if [ $? -eq 0 ]
then
# Start supervisord
echo "[prerun.workers] Loading the CKAN workers with supervisord..."
supervisord --configuration /etc/supervisord.conf &

# Workers
## Add harvester background procces to crontab
echo "[prerun.workers] Add harvester background procceses to crontab"
crontab -l | { cat; echo "*/15 * * * * /usr/bin/supervisorctl start ckan_harvester_run"; } | crontab -
## Clean-up mechanism for the harvest log table. 'ckan.harvest.log_timeframe'. The default time frame is 30 days
crontab -l | { cat; echo "0 5 */30 * * /usr/bin/supervisorctl start ckan_harvester_clean_log"; } | crontab -

# Start uwsgi
uwsgi $UWSGI_OPTS
else
echo "[prerun] failed...not starting CKAN."
fi
# Workers
## Start the Harvester worker
echo "[prerun.workers] Starting the CKAN Harvester worker"
ckan harvester run
## Add harvester to crontab
crontab -l | { cat; echo "0 */2 * * * ckan harvester run"; } | crontab -
## Clean-up mechanism for the harvest log table. 'ckan.harvest.log_timeframe'. The default time frame is 30 days
crontab -l | { cat; echo "0 5 * * * ckan harvester clean-harvest-log"; } | crontab -
19 changes: 8 additions & 11 deletions ckan/setup/start_ckan_development.sh.override
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ if grep -E "beaker.session.secret ?= ?$" ckan.ini
then
echo "Setting beaker.session.secret in ini file"
ckan config-tool $CKAN_INI "beaker.session.secret=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
ckan config-tool $CKAN_INI "WTF_CSRF_SECRET_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
JWT_SECRET=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')
ckan config-tool $CKAN_INI "api_token.jwt.encode.secret=${JWT_SECRET}"
ckan config-tool $CKAN_INI "api_token.jwt.decode.secret=${JWT_SECRET}"
Expand Down Expand Up @@ -95,15 +94,13 @@ mkdir -p $CKAN_LOGS_PATH/xloader
chown -R ckan:ckan $CKAN_LOGS_PATH/xloader

# Start supervisord
supervisord --configuration /etc/supervisord.conf

# Workers
## Start the Harvester worker
echo "[prerun.workers] Starting the CKAN Harvester worker"
ckan harvester run

## Clean-up mechanism for the harvest log table
ckan harvester clean-harvest-log
#supervisord --configuration /etc/supervisord.conf &

# Start the development server as the ckan user with automatic reload
su ckan -c "/usr/bin/ckan -c $CKAN_INI run -H 0.0.0.0"
su ckan -c "/usr/bin/ckan -c $CKAN_INI run -H 0.0.0.0"

# Workers
# To start the Harvester worker
# ckan harvester run
# Clean-up mechanism for the harvest log table
# ckan harvester clean-harvest-log
38 changes: 35 additions & 3 deletions ckan/setup/workers/harvester.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,49 @@ command=ckan harvester gather-consumer
user=ckan
numprocs=1
stdout_logfile=/var/log/harvester/gather_consumer.log
stderr_logfile=/var/log/harvester/gather_consumer.err.log
stdout_logfile_maxbytes=50MB
stderr_logfile=/var/log/harvester/gather_consumer.log
stderr_logfile_maxbytes=50MB
autostart=true
autorestart=true
startsecs=10
priority=1

[program:ckan_fetch_consumer]
command=ckan harvester fetch-consumer
user=ckan
numprocs=1
stdout_logfile=/var/log/harvester/fetch_consumer.log
stderr_logfile=/var/log/harvester/fetch_consumer.err.log
stdout_logfile_maxbytes=50MB
stderr_logfile=/var/log/harvester/fetch_consumer.log
stderr_logfile_maxbytes=50MB
autostart=true
autorestart=true
startsecs=10
startsecs=10
priority=2

[program:ckan_harvester_run]
command=ckan harvester run
user=ckan
numprocs=1
stdout_logfile=/var/log/harvester/ckan_harvester.log
stdout_logfile_maxbytes=25MB
stderr_logfile=/var/log/harvester/ckan_harvester.log
stderr_logfile_maxbytes=25MB
autostart=true
autorestart=false
startsecs=2
priority=3

[program:ckan_harvester_clean_log]
command=ckan harvester clean-harvest-log
user=ckan
numprocs=1
stdout_logfile=/var/log/harvester/ckan_harvester_clean_log.log
stdout_logfile_maxbytes=25MB
stderr_logfile=/var/log/harvester/ckan_harvester_clean_log.log
stderr_logfile_maxbytes=25MB
autostart=false
autorestart=false
startsecs=2
priority=4
12 changes: 12 additions & 0 deletions ckan/setup/workers/xloader.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[program:ckan_xloader]
command=ckan jobs worker default
user=ckan
numprocs=1
stdout_logfile=/var/log/harvester/ckan_xloader.log
stdout_logfile_maxbytes=100MB
stderr_logfile=/var/log/harvester/ckan_xloader.log
stderr_logfile_maxbytes=100MB
autostart=true
autorestart=true
startsecs=4
priority=1

0 comments on commit 68a9ce9

Please sign in to comment.