diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..216332204 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,23 @@ +# build artifacts +*.pyc +*.egg-info +build +dist +docs/_build +MANIFEST + +# testing artifacts +tests/index.html +tests/results +**.cache +.coverage +.tox + +# test configurations +/default.cfg + +# git stuff +.git + +# pycharm ide +.idea diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4210ed43a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,99 @@ +# ================================================================= +# +# Authors: Ricardo Garcia Silva +# +# Copyright (c) 2017 Ricardo Garcia Silva +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= +FROM alpine:3.4 + +# There's bug in libxml2 v2.9.4 that prevents using an XMLParser with a schema +# file. +# +# https://bugzilla.gnome.org/show_bug.cgi?id=766834 +# +# It seems to have been fixed upstream, but the fix has not been released into +# a new libxml2 version. As a workaround, we are sticking with the previous +# version, which works fine. +# This means that we need to use alpine's archives for version 3.1, which are +# the ones that contain the previous version of libxml2 +# +# Also, for some unkwnon reason, alpine 3.1 version of libxml2 depends on +# python2. We'd rather use python3 for pycsw, so we install it too. +RUN echo 'http://dl-cdn.alpinelinux.org/alpine/v3.1/main' >> /etc/apk/repositories \ + && apk add --no-cache \ + build-base \ + ca-certificates \ + postgresql-dev \ + python3 \ + python3-dev \ + libpq \ + libxslt-dev \ + 'libxml2<2.9.4' \ + 'libxml2-dev<2.9.4' \ + wget \ + && apk add --no-cache \ + --repository http://dl-3.alpinelinux.org/alpine/edge/testing/ \ + --allow-untrusted \ + geos \ + geos-dev + +RUN adduser -D -u 1000 pycsw + +WORKDIR /tmp/pycsw + +COPY . . + +ENV PYCSW_CONFIG=/etc/pycsw/pycsw.cfg + +RUN pip3 install --upgrade pip setuptools \ + && pip3 install --requirement requirements-standalone.txt \ + && pip3 install --requirement requirements-pg.txt \ + && pip3 install gunicorn \ + && pip3 install . \ + && mkdir /etc/pycsw \ + && mv docker/pycsw.cfg ${PYCSW_CONFIG} \ + && mkdir /var/lib/pycsw \ + && chown pycsw:pycsw /var/lib/pycsw \ + && cp docker/entrypoint.py /usr/local/bin/entrypoint.py \ + && chmod a+x /usr/local/bin/entrypoint.py \ + && cp -r tests /home/pycsw \ + && cp requirements.txt /home/pycsw \ + && cp requirements-standalone.txt /home/pycsw \ + && cp requirements-pg.txt /home/pycsw \ + && cp requirements-dev.txt /home/pycsw \ + && chown -R pycsw:pycsw /home/pycsw/* \ + && rm -rf * + +WORKDIR /home/pycsw + +USER pycsw + + +EXPOSE 8000 + +ENTRYPOINT [\ + "python3", \ + "/usr/local/bin/entrypoint.py" \ +] diff --git a/docker/docker-stack-pycsw.cfg b/docker/docker-stack-pycsw.cfg new file mode 100644 index 000000000..fab76af73 --- /dev/null +++ b/docker/docker-stack-pycsw.cfg @@ -0,0 +1,100 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# Ricardo Garcia Silva +# +# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2017 Ricardo Garcia Silva +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +[server] +home=/home/pycsw +url=http://localhost/pycsw/csw.py +mimetype=application/xml; charset=UTF-8 +encoding=UTF-8 +language=en-US +maxrecords=10 +loglevel=DEBUG +logfile= +#ogc_schemas_base=http://foo +#federatedcatalogues=http://catalog.data.gov/csw +#pretty_print=true +#gzip_compresslevel=8 +#domainquerytype=range +#domaincounts=true +#spatial_ranking=true +profiles=apiso + +[manager] +transactions=false +allowed_ips=127.0.0.1 +#csw_harvest_pagesize=10 + +[metadata:main] +identification_title=pycsw Geospatial Catalogue +identification_abstract=pycsw is an OGC CSW server implementation written in Python +identification_keywords=catalogue,discovery,metadata +identification_keywords_type=theme +identification_fees=None +identification_accessconstraints=None +provider_name=Organization Name +provider_url=http://pycsw.org/ +contact_name=Lastname, Firstname +contact_position=Position Title +contact_address=Mailing Address +contact_city=City +contact_stateorprovince=Administrative Area +contact_postalcode=Zip or Postal Code +contact_country=Country +contact_phone=+xx-xxx-xxx-xxxx +contact_fax=+xx-xxx-xxx-xxxx +contact_email=Email Address +contact_url=Contact URL +contact_hours=Hours of Service +contact_instructions=During hours of service. Off on weekends. +contact_role=pointOfContact + +[repository] +# sqlite +#database=sqlite:////home/pycsw/tests/functionaltests/suites/cite/data/cite.db +# postgres +database=postgresql://postgres:mypass@db/pycsw +# mysql +#database=mysql://username:password@localhost/pycsw?charset=utf8 +#mappings=path/to/mappings.py +table=records +#filter=type = 'http://purl.org/dc/dcmitype/Dataset' + +[metadata:inspire] +enabled=true +languages_supported=eng,gre +default_language=eng +date=YYYY-MM-DD +gemet_keywords=Utility and governmental services +conformity_service=notEvaluated +contact_name=Organization Name +contact_email=Email Address +temp_extent=YYYY-MM-DD/YYYY-MM-DD + diff --git a/docker/docker-stack.yml b/docker/docker-stack.yml new file mode 100644 index 000000000..ea316c2ba --- /dev/null +++ b/docker/docker-stack.yml @@ -0,0 +1,63 @@ +# ================================================================= +# +# Authors: Ricardo Garcia Silva +# +# Copyright (c) 2017 Ricardo Garcia Silva +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= +# +# +# This docker-stack file demos how to use the pycsw docker image with a +# postgis database +# +# Use it with docker-compose or in a docker swarm: +# +# docker-compose --file docker-stack.yml --project pycsw up +# +# PYCSW_DOCKER_IMAGE=2.1-dev docker stack deploy --compose-file docker-stack.yml pycsw +# + +version: "3" + +services: + + db: + image: mdillon/postgis:9.6-alpine + environment: + POSTGRES_PASSWORD: mypass + POSTGRES_DB: pycsw + PGDATA: /var/lib/postgresql/data/pgdata + volumes: + - db-data:/var/lib/postgresql/data/pgdata + + pycsw: + image: geopython/pycsw:${PYCSW_DOCKER_VERSION} + ports: + - "8000:8000" + volumes: + - ./docker-stack-pycsw.cfg:/etc/pycsw/pycsw.cfg + + +volumes: + db-data: diff --git a/docker/entrypoint.py b/docker/entrypoint.py new file mode 100644 index 000000000..f7d789672 --- /dev/null +++ b/docker/entrypoint.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# ================================================================= +# +# Authors: Ricardo Garcia Silva +# +# Copyright (c) 2017 Ricardo Garcia Silva +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +"""Entrypoint script for docker containers. + +This module serves as the entrypoint for docker containers. Its main +purpose is to set up the pycsw database so that newly generated +containers may be useful soon after being launched, without requiring +additional input. + +""" + + +import argparse +import logging +import os +from six.moves.configparser import SafeConfigParser +from six.moves.configparser import NoOptionError +from subprocess import call +from time import sleep + +from sqlalchemy import create_engine +from sqlalchemy.exc import OperationalError +from sqlalchemy.exc import ProgrammingError + +from pycsw.core import admin + +logger = logging.getLogger(__name__) + + +def launch_pycsw(pycsw_config, gunicorn_workers=2): + db_url = pycsw_config.get("repository", "database") + db = db_url.partition(":")[0].partition("+")[0] + db_handler = { + "sqlite": handle_sqlite_db, + "postgresql": handle_postgresql_db, + }.get(db) + logger.debug("Setting up pycsw's data repository...") + logger.debug("Repository URL: {}".format(db_url)) + db_handler( + db_url, + pycsw_config.get("repository", "table"), + pycsw_config.get("server", "home") + ) + logger.debug("Launching pycsw...") + pycsw_server_command = [ + "gunicorn", + "--bind=0.0.0.0:8000", + "--access-logfile=-", + "--error-logfile=-", + "--workers={}".format(gunicorn_workers) + ] + pycsw_server_command.append("pycsw.wsgi") + call(pycsw_server_command) + + +def handle_sqlite_db(database_url, table_name, pycsw_home): + db_path = database_url.rpartition(":///")[-1] + if not os.path.isfile(db_path): + try: + os.makedirs(os.path.dirname(db_path)) + except OSError as exc: + if exc.args[0] == 17: # directory already exists + pass + admin.setup_db(database=database_url, table=table_name, + home=pycsw_home) + + +def handle_postgresql_db(database_url, table_name, pycsw_home): + _wait_for_postgresql_db(database_url) + try: + admin.setup_db(database=database_url, table=table_name, + home=pycsw_home) + except ProgrammingError: + pass # database tables are already created + + +def _wait_for_postgresql_db(database_url, max_tries=10, wait_seconds=3): + logger.debug("Waiting for {!r}...".format(database_url)) + engine = create_engine(database_url) + current_try = 0 + while current_try < max_tries: + try: + engine.execute("SELECT version();") + logger.debug("Database is already up!") + break + except OperationalError: + logger.debug("Database not responding yet ...") + current_try += 1 + sleep(wait_seconds) + else: + raise RuntimeError( + "Database not responding at {} after {} tries. " + "Giving up".format(database_url, max_tries) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--workers", + default=2, + help="Number of workers to use by the gunicorn server. Defaults to 2." + ) + args = parser.parse_args() + config = SafeConfigParser() + config.read(os.getenv("PYCSW_CONFIG")) + try: + level = config.get("server", "loglevel").upper() + except NoOptionError: + level = "WARNING" + logging.basicConfig(level=getattr(logging, level)) + launch_pycsw(config, gunicorn_workers=args.workers) diff --git a/docker/pycsw.cfg b/docker/pycsw.cfg new file mode 100644 index 000000000..c303df7bf --- /dev/null +++ b/docker/pycsw.cfg @@ -0,0 +1,100 @@ +# ================================================================= +# +# Authors: Tom Kralidis +# Ricardo Garcia Silva +# +# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2017 Ricardo Garcia Silva +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= + +[server] +home=/home/pycsw +url=http://localhost/pycsw/csw.py +mimetype=application/xml; charset=UTF-8 +encoding=UTF-8 +language=en-US +maxrecords=10 +loglevel=DEBUG +logfile= +#ogc_schemas_base=http://foo +#federatedcatalogues=http://catalog.data.gov/csw +#pretty_print=true +#gzip_compresslevel=8 +#domainquerytype=range +#domaincounts=true +#spatial_ranking=true +profiles=apiso + +[manager] +transactions=false +allowed_ips=127.0.0.1 +#csw_harvest_pagesize=10 + +[metadata:main] +identification_title=pycsw Geospatial Catalogue +identification_abstract=pycsw is an OGC CSW server implementation written in Python +identification_keywords=catalogue,discovery,metadata +identification_keywords_type=theme +identification_fees=None +identification_accessconstraints=None +provider_name=Organization Name +provider_url=http://pycsw.org/ +contact_name=Lastname, Firstname +contact_position=Position Title +contact_address=Mailing Address +contact_city=City +contact_stateorprovince=Administrative Area +contact_postalcode=Zip or Postal Code +contact_country=Country +contact_phone=+xx-xxx-xxx-xxxx +contact_fax=+xx-xxx-xxx-xxxx +contact_email=Email Address +contact_url=Contact URL +contact_hours=Hours of Service +contact_instructions=During hours of service. Off on weekends. +contact_role=pointOfContact + +[repository] +# sqlite +database=sqlite:////home/pycsw/tests/functionaltests/suites/cite/data/cite.db +# postgres +#database=postgresql://username:password@localhost/pycsw +# mysql +#database=mysql://username:password@localhost/pycsw?charset=utf8 +#mappings=path/to/mappings.py +table=records +#filter=type = 'http://purl.org/dc/dcmitype/Dataset' + +[metadata:inspire] +enabled=true +languages_supported=eng,gre +default_language=eng +date=YYYY-MM-DD +gemet_keywords=Utility and governmental services +conformity_service=notEvaluated +contact_name=Organization Name +contact_email=Email Address +temp_extent=YYYY-MM-DD/YYYY-MM-DD +