diff --git a/README.md b/README.md index 911c1ae46d7..4ca5538a1e5 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,8 @@ Integration tests require all of the haskell services (brig, galley, cannon, gun - SNS - S3 - DynamoDB +- Required additional software: + - netcat (in order to allow the services being tested to talk to the dependencies above) Setting up these real, but in-memory internal and "fake" external dependencies is done easiest using [`docker-compose`](https://docs.docker.com/compose/install/). Run the following in a separate terminal (it will block that terminal, C-c to shut all these docker images down again): diff --git a/deploy/docker-ephemeral/build/Makefile b/deploy/docker-ephemeral/build/Makefile new file mode 100644 index 00000000000..c0d254bd185 --- /dev/null +++ b/deploy/docker-ephemeral/build/Makefile @@ -0,0 +1,281 @@ +# use DOCKER_ so we allow users to pass in values without conflicting with USERNAME, EMAIL, or somesuch already in their environments. +DOCKER_USERNAME ?= wireserver +DOCKER_REALNAME ?= Wire +DOCKER_EMAIL ?= backend@wire.com +TAGNAME ?= :0.0.9 + +# shorten the variable names above, to make the make rules below a little clearer to read. +USERNAME := $(DOCKER_USERNAME) +REALNAME := $(DOCKER_REALNAME) +EMAIL := $(DOCKER_EMAIL) + +# the distribution we're going to build for. this can be either DEBIAN or ALPINE. +DIST ?= DEBIAN + +# these are docker architecture names, not debian. +STRETCHARCHES := arm32v5 arm32v7 386 amd64 arm64v8 ppc64le s390x +JESSIEARCHES := arm32v5 arm32v7 386 amd64 +# the arches that our images based on debian support. +# note that we only care about the pi, the 386, and amd64 for now. +DEBARCHES := arm32v5 arm32v7 386 amd64 + +# the names of the docker images we're building that are based on debian jessie. +JESSIENAMES := airdock_fakesqs airdock_rvm airdock_base smtp +# the names of the docker images we're building that are based on debian stretch. +STRETCHNAMES := dynamodb_local cassandra +# the names of the docker images that we're building that are based on debian. +DEBNAMES := $(JESSIENAMES) $(STRETCHNAMES) + +# the arches that we build for alpine. +ALPINEARCHES := amd64 386 arm32v6 +# images we build that are based on alpine. +ALPINENAMES := elasticsearch java_maven_node_python localstack minio + +# dependencies between docker images. - +PREBUILDS := airdock_rvm-airdock_base airdock_fakesqs-airdock_rvm localstack-java_maven_node_python + +# manifest files don't work for these when they are finding the image they are based on. +# by adding the name of the docker image here, we use the image:tag- format, instead of /image:tag. +NOMANIFEST := airdock_rvm airdock_fakesqs localstack + +# convert from debian architecture string to docker architecture string. +dockerarch=$(patsubst i%,%,$(patsubst armel,arm32v5,$(patsubst armhf,arm32v7,$(patsubst arm64,arm64v8,$(1))))) + +# the local architecture, in debian format. (i386, amd64, armel, armhf, arm64, ..) +LOCALDEBARCH := $(shell [ ! -z `which dpkg` ] && dpkg --print-architecture) +# the local architecture, in docker format. (386, amd64, arm32v5, arm32v7, arm64v8, ...) +LOCALARCH ?= $(call dockerarch,$(LOCALDEBARCH)) + +ifeq ($(LOCALARCH),) + $(error LOCALARCH is empty, you may need to supply it.) +endif + +# FIXME: make this a section that depends on LOCALARCH, so we can allow these images to be built on native arm32. +# FIXME: what's up with dynamodb? +# note that qemu's x86_64 support is not strong enough to cross-build most things on i386. +# these targets won't build on the system emulators for these arches. working with the qemu team to fix. they think it might be https://bugs.launchpad.net/qemu/+bug/1813398 . +BADARCHSIM := localstack-arm32v6 java_maven_node_python-arm32v6 dynamodb_local-386 + +# set the targets, depending on the distro base specified. this is so that the debian images are built for all of the debian arches, and the alpine images for its arches. +ifeq ($(DIST),DEBIAN) + ARCHES ?= $(DEBARCHES) + NAMES ?= $(DEBNAMES) +endif +ifeq ($(DIST),ALPINE) + ARCHES ?= $(ALPINEARCHES) + NAMES ?= $(ALPINENAMES) +endif + +# which sed to use. GNU-SED for macs. +SED ?= sed + +# turn on experimental features in docker. +export DOCKER_CLI_EXPERIMENTAL=enabled + +# allow for us to (ab)use $$* in dependencies of rules. +.SECONDEXPANSION: + +# disable make's default builtin rules, to make debugging output cleaner. +MAKEFLAGS += --no-builtin-rules + +# make sure we use bash. for proper quoting when inserting JVM_OPTIONS snippet. +SHELL = bash + +# empty out the default suffix list, to make debugging output cleaner. +.SUFFIXES: + +# too much haskell. returns first or second from -, respectively. +fst=$(word 1, $(subst -, ,$(1))) +snd=$(word 2, $(subst -, ,$(1))) + +# filter the list of architectures, removing architectures that we know do not work for a given docker image. +goodarches=$(filter-out $(call snd,$(foreach arch,$(ARCHES),$(filter $(1)-$(arch),$(BADARCHSIM)))),$(ARCHES)) +# filter the list of names, returning only names that have no pre-dependencies. +nodeps=$(filter-out $(foreach target,$(NAMES),$(call snd,$(foreach dependency,$(NAMES),$(filter $(target)-$(dependency),$(PREBUILDS))))),$(NAMES)) + +# the three entry points we expect users to use. all by default, to create and upload either debian or alpine images, build-, to build a single image (for all arches, but without the manifest), push- to build a single image, push the image, build it's manifest, and push it to dockerhub. +all: $(foreach image,$(nodeps),manifest-push-$(image)) + +# build- +build-%: $$(foreach arch,$$(call goodarches,%),create-$$(arch)-$$*) + @echo -n + +.PHONY: build-all +build-all: $(foreach image,$(nodeps),build-$(image)) + +# push- +push-%: manifest-push-% + @echo -n + +.PHONY: +push-all: $(foreach image,$(nodeps),manifest-push-$(image)) + +# manifests use a slightly different form of architecture name than docker itsself. arm instead of arm32, and a seperate variant field. +maniarch=$(patsubst %32,%,$(call fst,$(subst v, ,$(1)))) +# seperate and use the variant, if it is part of the architecture name. +manivariant=$(foreach variant,$(word 2, $(subst v, ,$(1))), --variant $(variant)) + +# manifest-push- +manifest-push-%: $$(foreach arch,$$(call goodarches,$$*), manifest-annotate-$$(arch)-$$*) + docker manifest push $(USERNAME)/$*$(TAGNAME) + +#manifest-annotate-- +manifest-annotate-%: manifest-create-$$(call snd,$$*) + docker manifest annotate $(USERNAME)/$(call snd,$*)$(TAGNAME) $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) --arch $(call maniarch,$(call fst,$*)) $(call manivariant,$(call fst,$*)) + +#manifest-create- +manifest-create-%: $$(foreach arch,$$(call goodarches,%), upload-$$(arch)-$$*) + docker manifest create $(USERNAME)/$*$(TAGNAME) $(patsubst %,$(USERNAME)/$*$(TAGNAME)-%,$(call goodarches,$*)) --amend + +# upload-- +upload-%: create-% $$(foreach predep,$$(filter $$(call snd,%)-%,$$(PREBUILDS)), dep-upload-$$(call fst,$$*)-$$(call snd,$$(predep))) + docker push $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) | cat + +dep-upload-%: create-% $$(foreach predep,$$(filter $$(call snd,%)-%,$$(PREBUILDS)), dep-subupload-$$(call fst,$$*)-$$(call snd,$$(predep))) + docker push $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) | cat + +dep-subupload-%: create-% + docker push $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) | cat + +# create-- +create-%: Dockerfile-$$(foreach target,$$(filter $$(call snd,$$*),$(NOMANIFEST)),NOMANIFEST-)$$* $$(foreach predep,$$(filter $$(call snd,%)-%,$(PREBUILDS)), depend-create-$$(call fst,$$*)-$$(call snd,$$(predep))) + cd $(call snd,$*) && docker build -t $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) -f Dockerfile-$(call fst,$*) . | cat + +depend-create-%: Dockerfile-$$(foreach target,$$(filter $$(call snd,$$*),$(NOMANIFEST)),NOMANIFEST-)$$* $$(foreach predep,$$(filter $$(call snd,%)-%,$(PREBUILDS)), depend-subcreate-$$(call fst,$$*)-$$(call snd,$$(predep))) + cd $(call snd,$*) && docker build -t $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) -f Dockerfile-$(call fst,$*) . | cat + +depend-subcreate-%: Dockerfile-$$(foreach target,$$(filter $$(call snd,$$*),$(NOMANIFEST)),NOMANIFEST-)$$* + cd $(call snd,$*) && docker build -t $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) -f Dockerfile-$(call fst,$*) . | cat + +# with a broken manifest(our images, either docker or local), we have to use a postfix to request docker images other than the one for our native architecture. +archpostfix=$(foreach arch,$(filter-out $(filter-out $(word 3, $(subst -, ,$(filter $(call snd,$(1))-%-$(call fst,$(1)),$(foreach prebuild,$(PREBUILDS),$(prebuild)-$(call fst,$(1)))))),$(LOCALARCH)),$(call fst,$(1))),-$(arch)) +# with working manifest (official images from docker built correctry), we have to use a path when requesting docker images other than the one for our native architecture. +archpath=$(foreach arch,$(patsubst 386,i386,$(filter-out $(LOCALARCH),$(1))),$(arch)/) + +# handle cases where a manifest file is not being respected, and we have to use :- format. +# Dockerfile-NOMANIFEST-- +Dockerfile-NOMANIFEST-%: $$(call snd,%)/Dockerfile + cd $(call snd,$*) && cat Dockerfile | ${SED} "s/^\(MAINTAINER\).*/\1 $(REALNAME) \"$(EMAIL)\"/" | ${SED} "s=^\(FROM \)\(.*\)$$=\1\2$(call archpostfix,$*)=" > Dockerfile-$(call fst,$*) + +# handle situations where a manifest is present in upstream, and available as /: +# Dockerfile-- +Dockerfile-%: $$(call snd,%)/Dockerfile + cd $(call snd,$*) && cat Dockerfile | ${SED} "s/^\(MAINTAINER\).*/\1 $(REALNAME) \"$(EMAIL)\"/" | ${SED} "s=^\(FROM \)\(.*\)$$=\1$(call archpath,$(call fst,$*))\2=" > Dockerfile-$(call fst,$*) + +# real files, finally! + +# define commit IDs for the versions we're using. +SMTP_COMMIT ?= 8ad8b849855be2cb6a11d97d332d27ba3e47483f +DYNAMODB_COMMIT ?= c1eabc28e6d08c91672ff3f1973791bca2e08918 +ELASTICSEARCH_COMMIT ?= 06779bd8db7ab81d6706c8ede9981d815e143ea3 +AIRDOCKBASE_COMMIT ?= 692625c9da3639129361dc6ec4eacf73f444e98d +AIRDOCKRVM_COMMIT ?= cdc506d68b92fa4ffcc7c32a1fc7560c838b1da9 +AIRDOCKFAKESQS_COMMIT ?= 9547ca5e5b6d7c1b79af53e541f8940df09a495d +JAVAMAVENNODEPYTHON_COMMIT ?= 645af21162fffd736c93ab0047ae736dc6881959 +LOCALSTACK_COMMIT ?= 645af21162fffd736c93ab0047ae736dc6881959 +MINIO_COMMIT ?= 118270d76fc90f1e54cd9510cee9688bd717250b +CASSANDRA_COMMIT ?= 064fb4e2682bf9c1909e4cb27225fa74862c9086 + +smtp/Dockerfile: + git clone https://github.com/namshi/docker-smtp.git smtp + cd smtp && git reset --hard $(SMTP_COMMIT) + +dynamodb_local/Dockerfile: + git clone https://github.com/cnadiminti/docker-dynamodb-local.git dynamodb_local + cd dynamodb_local && git reset --hard $(DYNAMODB_COMMIT) + +elasticsearch/Dockerfile: + git clone https://github.com/blacktop/docker-elasticsearch-alpine.git elasticsearch-all + cd elasticsearch-all && git reset --hard $(ELASTICSEARCH_COMMIT) + cp -R elasticsearch-all/5.6/ elasticsearch + # add a block to the entrypoint script to interpret CS_JVM_OPTIONS, modifying the jvm.options before launching elasticsearch. + # first, add a marker to be replaced before the last if. + ${SED} -i.bak -r ':a;$$!{N;ba};s/^(.*)(\n?)fi/\2\1fi\nREPLACEME/' elasticsearch/elastic-entrypoint.sh + # next, load our variables. + ${SED} -i.bak 's@REPLACEME@MY_APP_CONFIG="/usr/share/elasticsearch/config/"\n&@' elasticsearch/elastic-entrypoint.sh + # add our parser and replacer. + ${SED} -i.bak $$'s@REPLACEME@if [ ! -z "$${JVM_OPTIONS_ES}" ]; then\\nfor x in $${JVM_OPTIONS_ES}; do { l="$${x%%=*}"; r=""; e=""; [ "$$x" != "$${x/=//}" ] \&\& e="=" \&\& r="$${x##*=}"; [ "$$x" != "$${x##-Xm?}" ] \&\& r="$${x##-Xm?}" \&\& l="$${x%%$$r}"; echo $$l $$e $$r; sed -i.bak -r \'s/^[# ]?(\'"$$l$$e"\').*/\\\\1\'"$$r"\'/\' "$$MY_APP_CONFIG/jvm.options"; diff "$$MY_APP_CONFIG/jvm.options.bak" "$$MY_APP_CONFIG/jvm.options" \&\& echo "no difference"; } done;\\nfi\\n&@' elasticsearch/elastic-entrypoint.sh + # remove the marker we added earlier. + ${SED} -i.bak 's@REPLACEME@@' elasticsearch/elastic-entrypoint.sh + +airdock_base/Dockerfile: + git clone https://github.com/airdock-io/docker-base.git airdock_base-all + cd airdock_base-all && git reset --hard $(AIRDOCKBASE_COMMIT) + cp -R airdock_base-all/jessie airdock_base + # work around go compiler bug by using newer version of GOSU. https://bugs.launchpad.net/qemu/+bug/1696353 + ${SED} -i.bak "s/GOSU_VERSION=.* /GOSU_VERSION=1.11 /" $@ + # work around missing architecture specific binaries in earlier versions of tini. + ${SED} -i.bak "s/TINI_VERSION=.*/TINI_VERSION=v0.16.1/" $@ + # work around the lack of architecture usage when downloading tini binaries. https://github.com/airdock-io/docker-base/issues/8 + ${SED} -i.bak 's/tini\(.asc\|\)"/tini-\$$dpkgArch\1"/' $@ + +airdock_rvm/Dockerfile: + git clone https://github.com/airdock-io/docker-rvm.git airdock_rvm-all + cd airdock_rvm-all && git reset --hard $(AIRDOCKRVM_COMMIT) + cp -R airdock_rvm-all/jessie-rvm airdock_rvm + ${SED} -i.bak "s=airdock/base:jessie=$(USERNAME)/airdock_base$(TAGNAME)=" $@ + # add a second key used to sign ruby to the dockerfile. https://github.com/airdock-io/docker-rvm/issues/1 + ${SED} -i.bak "s=\(409B6B1796C275462A1703113804BB82D39DC0E3\)=\1 7D2BAF1CF37B13E2069D6956105BD0E739499BDB=" $@ + +airdock_fakesqs/Dockerfile: + git clone https://github.com/airdock-io/docker-fake-sqs.git airdock_fakesqs-all + cd airdock_fakesqs-all && git reset --hard $(AIRDOCKFAKESQS_COMMIT) + cp -R airdock_fakesqs-all/0.3.1 airdock_fakesqs + ${SED} -i.bak "s=airdock/rvm:latest=$(USERNAME)/airdock_rvm$(TAGNAME)=" $@ + # add a workdir declaration to the final switch to root. + ${SED} -i.bak "s=^USER root=USER root\nWORKDIR /=" $@ + # break directory creation into two pieces, one run by root. + ${SED} -i.bak "s=^USER ruby=USER root=" $@ + ${SED} -i.bak "s=cd /srv/ruby/fake-sqs.*=chown ruby.ruby /srv/ruby/fake-sqs\nUSER ruby\nWORKDIR /srv/ruby/fake-sqs\nRUN cd /srv/ruby/fake-sqs \&\& \\\\=" $@ + +java_maven_node_python/Dockerfile: + git clone https://github.com/localstack/localstack.git java_maven_node_python + cd java_maven_node_python && git reset --hard $(JAVAMAVENNODEPYTHON_COMMIT) + cd java_maven_node_python && mv bin/Dockerfile.base Dockerfile + # disable installing docker-ce. not available on many architectures in binary form. + ${SED} -i.bak "/.*install Docker.*/{N;N;N;N;N;d}" $@ + +localstack/Dockerfile: + git clone https://github.com/localstack/localstack.git localstack + cd localstack && git reset --hard $(LOCALSTACK_COMMIT) + ${SED} -i.bak "s=localstack/java-maven-node-python=$(USERNAME)/java_maven_node_python$(TAGNAME)=" $@ + # skip tests. they take too long. + ${SED} -i.bak "s=make lint.*=make lint=" localstack/Makefile + ${SED} -i.bak "s=\(.*lambda.*\)=#\1=" localstack/Makefile + +minio/Dockerfile: + git clone https://github.com/minio/minio.git minio + cd minio && git reset --hard $(MINIO_COMMIT) + +cassandra/Dockerfile: + git clone https://github.com/docker-library/cassandra.git cassandra-all + cd cassandra-all && git reset --hard $(CASSANDRA_COMMIT) + cp -R cassandra-all/3.11 cassandra + # work around go compiler bug by using newer version of GOSU. https://bugs.launchpad.net/qemu/+bug/1696353 + ${SED} -i.bak "s/GOSU_VERSION .*/GOSU_VERSION 1.11/" $@ + # add a block to the entrypoint script to interpret CS_JVM_OPTIONS, modifying the jvm.options before launching cassandra. + # first, add a marker to be replaced before the last if. + ${SED} -i.bak -r ':a;$$!{N;ba};s/^(.*)(\n?)fi/\2\1REPLACEME\nfi/' cassandra/docker-entrypoint.sh + # next, load our variables. + ${SED} -i.bak 's/REPLACEME/\nAPP_CONFIG="$$CASSANDRA_CONFIG"\n&/' cassandra/docker-entrypoint.sh + ${SED} -i.bak 's/REPLACEME/JVM_OPTIONS="$$CS_JVM_OPTIONS"\n&/' cassandra/docker-entrypoint.sh + # add our parser and replacer. + ${SED} -i.bak $$'s@REPLACEME@if [ ! -z "$${JVM_OPTIONS}" ]; then\\nfor x in $${JVM_OPTIONS}; do { l="$${x%%=*}"; r=""; e=""; [ "$$x" != "$${x/=//}" ] \&\& e="=" \&\& r="$${x##*=}"; [ "$$x" != "$${x##-Xm?}" ] \&\& r="$${x##-Xm?}" \&\& l="$${x%%$$r}"; echo $$l $$e $$r; _sed-in-place "$$APP_CONFIG/jvm.options" -r \'s/^[# ]*(\'"$$l$$e"\').*/\\\\1\'"$$r"\'/\'; } done\\nfi\\n&@' cassandra/docker-entrypoint.sh + # remove the marker we added earlier. + ${SED} -i.bak 's@REPLACEME@@' cassandra/docker-entrypoint.sh + +# cleanup. remove the directories we set up for building, as well as the git repos we download. +.PHONY: clean +clean: + rm -rf elasticsearch-all airdock_base-all airdock_rvm-all airdock_fakesqs-all cassandra-all $(DEBNAMES) $(ALPINENAMES) + +.PHONY: cleandocker +cleandocker: + docker rm $$(docker ps -a -q) || true + docker rmi $$(docker images -q) --force || true + +names: + @echo Debian based images: + @echo $(DEBNAMES) + @echo Alpine based images: + @echo $(ALPINENAMES) diff --git a/deploy/docker-ephemeral/build/README.md b/deploy/docker-ephemeral/build/README.md new file mode 100644 index 00000000000..0ce32305265 --- /dev/null +++ b/deploy/docker-ephemeral/build/README.md @@ -0,0 +1,54 @@ +A makefile that uses docker.io, and qemu-user-static to build dependencies for our integration tests. + +Builds and uploadsdocker images for multiple architectures. Allows for '-j' to build multiple images at once. Uploads assume the hub.docker.com docker registry. + +# Setup + +## Docker + +Follow the instructions in [our dependencies file](doc/Dependencies.md) to ensure you have docker installed, and logged in. + +## qemu + +### Debian + + +```bash +apt-get install qemu-user-static +sudo service binfmt-support start +``` + +### Fedora + +'sudo dnf install -y qemu-user-static' + +# Using + +Assuming you have docker, and have followed the above instructions, "make build-all" should work. This builds all of the images, and places them in docker on the local machine. +to build an individual image (and it's dependent images), run "make-". to see a list of images that are buildable, run "make names". + +## Using with Dockerhub + +If you want to upload images to dockerhub, you must go to dockerhub, and create repositories under your user with the names of the images you want to upload. Again, to get the list of names buildable with this Makefile, type 'make names'. + +If you don't want to change the Makefile, add the DOCKER_USERNAME, DOCKER_EMAIL, and DOCKER_REALNAME environment variables. + +For instance, when I want to build all debian images, and upload them to dockerhub, i use: +```bash +make DIST=DEBIAN DOCKER_USERNAME=julialongtin DOCKER_EMAIL=julia.longtin@wire.com DOCKER_REALNAME='Julia Longtin' push-all +``` + +You can also push a single image (and it's dependencies) with "make push-". + +If you want your builds to go faster, and are good with having more garbled output, use the '-j' argument to make, to parallize the builds. + +By default this makefile builds and uploads the debian based images. Use the 'DIST=ALPINE' environment variable to build the alpine based images instead. + +# Troubleshooting: +## binfmt support: + +examine the following file, and ensure the 'flags:' line has an "F" flag on it: +cat /proc/sys/fs/binfmt_misc/qemu-arm | grep flags + +if it doesn't, try re-starting binfmt-support on debian. + diff --git a/deploy/docker-ephemeral/docker-compose.yaml b/deploy/docker-ephemeral/docker-compose.yaml index c15e885aae3..76d6f04aca3 100644 --- a/deploy/docker-ephemeral/docker-compose.yaml +++ b/deploy/docker-ephemeral/docker-compose.yaml @@ -6,7 +6,8 @@ networks: services: fake_dynamodb: container_name: demo_wire_dynamodb - image: cnadiminti/dynamodb-local:2018-04-11 +# image: cnadiminti/dynamodb-local:2018-04-11 + image: julialongtin/dynamodb_local:0.0.9 ports: - 127.0.0.1:4567:8000 networks: @@ -14,7 +15,8 @@ services: fake_sqs: container_name: demo_wire_sqs - image: airdock/fake-sqs:0.3.1 +# image: airdock/fake-sqs:0.3.1 + image: julialongtin/airdock_fakesqs:0.0.9 ports: - 127.0.0.1:4568:4568 networks: @@ -22,7 +24,8 @@ services: fake_localstack: container_name: demo_wire_localstack - image: localstack/localstack:0.8.0 # NB: this is younger than 0.8.6! +# image: localstack/localstack:0.8.0 # NB: this is younger than 0.8.6! + image: julialongtin/localstack:0.0.9 ports: - 127.0.0.1:4569:4579 # ses # needed for local integration tests - 127.0.0.1:4575:4575 # sns @@ -35,8 +38,8 @@ services: basic_smtp: # needed for demo setup container_name: demo_wire_smtp - # https://github.com/namshi/docker-smtp - image: namshi/smtp +# image: namshi/smtp + image: julialongtin/smtp:0.0.9 ports: - 127.0.0.1:2500:25 networks: @@ -44,7 +47,8 @@ services: fake_s3: container_name: demo_wire_s3 - image: minio/minio:RELEASE.2018-05-25T19-49-13Z +# image: minio/minio:RELEASE.2018-05-25T19-49-13Z + image: julialongtin/minio:0.0.9 ports: - "127.0.0.1:4570:9000" environment: @@ -59,6 +63,7 @@ services: # ports: # - "61613:61613" + # FIXME: replace redis image with one we build. redis: container_name: demo_wire_redis image: redis:3.0.7-alpine @@ -69,20 +74,33 @@ services: elasticsearch: container_name: demo_wire_elasticsearch - image: elasticsearch:5.6 + #image: elasticsearch:5.6 + image: julialongtin/elasticsearch:0.0.9-amd64 # https://hub.docker.com/_/elastic is deprecated, but 6.2.4 did not work without further changes. # image: docker.elastic.co/elasticsearch/elasticsearch:6.2.4 ports: - "127.0.0.1:9200:9200" - "127.0.0.1:9300:9300" + environment: + - "bootstrap.system_call_filter=false" +# ES_JVM_OPTIONS is reserved, so... +# what's present in the jvm.options file by default. +# - "JVM_OPTIONS_ES=-Xmx2g -Xms2g" + - "JVM_OPTIONS_ES=-Xmx512m -Xms512m" + - "discovery.type=single-node" networks: - demo_wire cassandra: container_name: demo_wire_cassandra - image: cassandra:3.11.2 + #image: cassandra:3.11.2 + image: julialongtin/cassandra:0.0.9 ports: - "127.0.0.1:9042:9042" + environment: +# what's present in the jvm.options file by default. +# - "CS_JAVA_OPTIONS=-Xmx1024M -Xms1024M -Xmn200M" + - "CS_JVM_OPTIONS=-Xmx128M -Xms128M -Xmn50M" networks: - demo_wire @@ -146,6 +164,7 @@ services: networks: - demo_wire + # FIXME: replace aws_cli with an image that we build. aws_cli: image: mesosphere/aws-cli:1.14.5 depends_on: diff --git a/docs/developer/dependencies.md b/docs/developer/dependencies.md index db7c06072ee..7917d74ab08 100644 --- a/docs/developer/dependencies.md +++ b/docs/developer/dependencies.md @@ -149,6 +149,13 @@ _Note_: While it is possible to use non-docker solutions to set up and configure sudo apt install docker.io docker-compose ``` +After installing docker-io, add your user to the docker group, and restart your shell (usually involving a restart of your graphical environment). + +once you've logged in again, if you would like to upload any docker images (optional): +```bash +docker login --username= +```` + ### Generic: * [Install docker](https://docker.com) diff --git a/docs/reference/make-docker-and-qemu.md b/docs/reference/make-docker-and-qemu.md new file mode 100644 index 00000000000..c4ab362d27c --- /dev/null +++ b/docs/reference/make-docker-and-qemu.md @@ -0,0 +1,1072 @@ +# About this document: +This document is written with the goal of explaining https://github.com/wireapp/wire-server/pull/622 well enough that someone can honestly review it. :) + +In this document, we're going to rapidly bounce back and forth between GNU make, bash, GNU sed, Docker, and QEMU. + +# What does this Makefile do? Why was it created? + +To answer that, we're going to have to go back to Wire-Server, specifically, our integration tests. Integration tests are run locally on all of our machines, in order to ensure that changes we make to the Wire backend do not break currently existing functionality. In order to simulate the components that wire's backend depends on (s3, cassandra, redis, etc..), we use a series of docker images. These docker images are downloaded from dockerhub, are maintained (or not maintained) by outside parties, and are built by those parties. + +When a docker image is built, even if the docker image is something like a java app, or a pile of perl/node/etc, the interpreters (openjdk, node, perl) are embedded into the image. Those interpreters are compiled for a specific processor architecture, and only run on that architecture (and supersets of it). For instance, an AMD64 image will run on only an AMD64 system, but a 386 image will run on AMD64 since AMD64 is a superset of 386. Neither of those images will run on an ARM, like a Raspberry pi. + +This Makefile contains rules that allow our Mac users to build all of the docker images locally on their machine, with some minor improvements, which will save us about 2.5G of ram during integration tests. Additionally, it contains rules for uploading these images to dockerhub for others to use, and support for linux users to build images for arm32v5, arm32v7, 386, and AMD64, despite not being on these architectures. + +It builds non-AMD64 images on linux by using QEMU, a system emulator, to allow docker to run images that are not built for the architecture the system is currently running on. This is full system emulation, like many video game engines you're probably familiar with. You know how you have to throw gobs of hardware at a machine, to play a game written for a gaming system 20 years ago? This is similarly slow. To work around this, the Makefile is written in a manner that allows us to build many docker images at once, to take advantage of the fact that most of us have many processor cores lying around doing not-all-much. + +# What does this get us? + +To start with, the resulting docker images allow us to tune the JVM settings on cassandra and elasticsearch, resulting in lower memory consumption, and faster integration tests that don't impact our systems as much. Additionally, it allows us more control of the docker images we're depending on, so that another leftpad incident on docker doesn't impact us. As things stand, any of the developers of these docker images can upload a new docker image that does Very Bad Things(tm), and we'll gladly download and run it many times a day. Building these images ourselves from known good GIT revisions prevents this. Additionally, the multi-architecture approach allows us to be one step closer to running the backend on more esoteric systems, like a Raspberry pi, or an AWS A instance, both of which are built on the ARM architecture. Or, if rumour is to be believed, the next release of MacBook Pros. :) + +# Breaking it down: + +## Docker: + +to start with, we're going to have to get a bit into some docker architecture. We all have used docker, and pretty much understand the following workflow: + +I build a docker image from a Dockerfile and maybe some additions, I upload it to dockerhub, and other people can download and use the image. I can use the locally built image directly, without downloading it from dockerhub, and I can share the Dockerfile and additions via git, on github, and allow others to build the image. + +While this workflow works well for working with a single architecture, we're going to have to introduce some new concepts in order to support the multiple architecture way of building docker files. + +### Manifest files. + +Manifest files are agenerated by docker and contain references to multiple docker images, one for each architecture a given docker image has been built for. Each image in the manifest file is tagged with the architecture that the image is built for. + +Docker contains just enough built-in logic to interpret a manifest file on dockerhub, and download an image that matches the architecture that docker was built for. When using a manifest file, this is how docker determines what image to download. + +### A Manifest centric Workflow: + +If you're building a docker image for multiple architectures, you want a Manifest, so that docker automatically grabs the right image for the user's machine. This changes our workflow from earlier quite a bit: + +I build a docker image from a Dockerfile, and I build other images from slightly different versions of this Dockerfile (more on this later). I tag these images with a suffix, so that I can tell them apart. I upload the images to dockerhub, retaining the tags that differentiate the diffenent versions from each other. I create a manifest file, referring to the images that have been pushed to DockerHub, and upload the manifest file to DockerHub. People can download and use the image from dockerhub by refering to the tag of the manifest file. I can share the Dockerfile and additions via git, on dockerhub, and others can build their own images from it. + +#### What does this look like? + +All of us on the team are using AMD64 based machines, so in this example, we're going to build one image for AMD64, and one for it's predecessor architecture, I386. We're going to build the SMTP server image we depend on, from https://hub.docker.com/r/namshi/smtp. We're going to use a known safe git revision, and use some minor GNU sed to generate architecture dependent Dockerfiles from the Dockerfile in git. Everyone should be able to do this on your laptops. + +```bash +$ git clone https://github.com/namshi/docker-smtp.git smtp +Cloning into 'smtp'... +remote: Enumerating objects: 4, done. +remote: Counting objects: 100% (4/4), done. +remote: Compressing objects: 100% (4/4), done. +remote: Total 126 (delta 0), reused 0 (delta 0), pack-reused 122 +Receiving objects: 100% (126/126), 26.57 KiB | 269.00 KiB/s, done. +Resolving deltas: 100% (61/61), done. +$ cd smtp +$ git reset --hard 8ad8b849855be2cb6a11d97d332d27ba3e47483f +HEAD is now at 8ad8b84 Merge pull request #48 from zzzsochi/master +$ cat Dockerfile | sed "s/^\(MAINTAINER\).*/\1 Julia Longtin \"julia.longtin@wire.com\"/" | sed "s=^\(FROM \)\(.*\)$=\1i386/\2=" > Dockerfile-386 +$ cat Dockerfile | sed "s/^\(MAINTAINER\).*/\1 Julia Longtin \"julia.longtin@wire.com\"/" | sed "s=^\(FROM \)\(.*\)$=\1\2=" > Dockerfile-amd64 +$ docker build -t julialongtin/smtp:0.0.9-amd64 -f Dockerfile-amd64 + +$ docker build -t julialongtin/smtp:0.0.9-386 -f Dockerfile-386 + +$ docker push julialongtin/smtp:0.0.9-amd64 + +$ docker push julialongtin/smtp:0.0.9-386 +] 271.46K --.-KB/s in 0.07s + +2019-03-06 14:27:39 (3.65 MB/s) - ‘sash_3.8-5_armel.deb’ saved [277976/277976] +$ +``` + +This deb will not install on our machine, so we're going to manually take it apart, to get the sash binary out of it. + +```bash +$ mkdir tmp +$ cd tmp +$ ar x ../sash_3.8-5_armel.deb +$ ls + control.tar.xz data.tar.xz +$ tar -xf data.tar.gz +$ ls -la bin/sash +-rwxr-xr-x 1 demo demo 685348 Jun 9 2018 bin/sash +``` + +to verify what architecture this binary is built for, use the 'file' command. +```bash +$ file bin/sash +bin/sash: ELF 32-bit LSB executable, ARM, EABI5 version 1 (SYSV), statically linked, for GNU/Linux 3.2.0, BuildID[sha1]=20641a8ca21b2c320ea7e6079ec88b857c7cbcfb, stripped +$ +``` + +now we can run this, and even run Arm64 programs that are on our own machine using it. +```bash +$ bin/sash +Stand-alone shell (version 3.8) +> file bin/sash +bin/sash: ELF 32-bit LSB executable, ARM, EABI5 version 1 (SYSV), statically linked, for GNU/Linux 3.2.0, BuildID[sha1]=20641a8ca21b2c320ea7e6079ec88b857c7cbcfb, stripped +> ls +bin usr +> uname -a +Linux boxtop 4.9.0-8-amd64 #1 SMP Debian 4.9.144-3 (2019-02-02) x86_64 GNU/Linux +> whoami +demo +> +``` + +## QEMU, BinFmt, and Docker (Oh my!) + +After following the directions in the last two sections, you've created two docker images (one for i386, one for AMD64), created a manifest referring to them, set up for linux to load qemu and use it, and launched a binary for another architecture. + +Creating non-native docker images can now be done very similar to how i386 was done earlier. + +Because you are using a system emulator, your docker builds for non-x86 will be slower. additionally, the emulators are not perfect, so some images won't build. finally, code is just less tested on machines that are not an AMD64 machine, so there are generally more bugs. + +### Arm Complications: +The 32 bit version of arm is actually divided into versions, and not all linux distributions are available for all versions. arm32v5 and arm32v7 are supported by debian, while arm32v6 is supported by alpine. This variant must be specified during manifest construction, so to continue with our current example, these are the commands for tagging the docker images for our arm32v5 and arm32v7 builds of smtp: +```bash +$ docker manifest annotate julialongtin/smtp:0.0.9 julialongtin/smtp:0.0.9-arm32v5 --arch arm --variant 5 +$ docker manifest annotate julialongtin/smtp:0.0.9 julialongtin/smtp:0.0.9-arm32v7 --arch arm --variant 7 +``` + + +# Into the GNU Make Abyss + +Now that we've done all of the above, we should be capable of working with docker images independent of the architecture we're targeting. Now, into the rabit hole we go, automating everything with GNU Make + +## Why Make? +GNU make is designed to build targets by looking at the environment it's in, and executing a number of rules depending on what it sees, and what it has been requested to do. The Makefile we're going to look through does all of the above, along with making some minor changes to the docker images. It does this in parallel, calling as many of the commands at once as possible, in order to take advantage of idle cores. + +## Using the Makefile + +Before we take the Makefile apart, let's go over using it. + +This Makefile is meant to be used in four ways: building a set of images, pushing (and building) a set of images, building a single image. It follows the manifest workflow we documented earlier. + +By default, running 'make' in the same directory as the Makefile (assuming you've set all of the above up correctly) will attempt to build and push all of the docker images the makefile knows about to dockerhub. If you want this to work, you need to create a dockerhub account, use 'docker login' to log your local instance of docker in to dockerhub, then you need to create a repository for each docker image. + +To get a list of the names of the docker images this Makefile knows about, run 'make names'. +```bash +$ make names +Debian based images: +airdock_fakesqs airdock_rvm airdock_base smtp dynamodb_local cassandra +Alpine based images: +elasticsearch java_maven_node_python localstack minio +$ +``` + +The list of names is divided into two groups. one group is for images based on debian, and the other is for images based on alpine. This makefile can only build for one of these two distributions at once. + +Since no-one wants to click through dockerhub to create repositories, let's just build docker images locally, for now. + +Make looks at it's environment in order to decide what to do, so here are some environment variables that we're going to use. all of these variables have default values, so we're only going to provide a few of them. + +- `ARCHES`: the list of architectures we're going to attempt docker builds for. Mac users should supply "386 AMD64" to this, as they have no binfmt support. +- `DIST`: the distribution we're going to build for. this can be either DEBIAN or ALPINE. +- `DOCKER`_USERNAME: our username on dockerhub. +- `DOCKER`_EMAIL: Our email address, as far as dockerhub is concerned. +- `DOCKER`_REALNAME: again, our name string that will be displayed in DockerHub. +- `SED`: which sed binary to use. Mac users should install GSED, and pass the path to it in this variable. + +To build all of the debian based images locally on my machine, I run +```bash +make DIST=DEBIAN DOCKER_USERNAME=julialongtin DOCKER_EMAIL=julia.longtin@wire.com DOCKER_REALNAME='Julia Longtin' build-all -j". +``` + +What's the -j for? adding a '-j' to the command line causes make to execute in parallel. That's to say, it will try to build ALL of the images at once, taking care to build images that are dependencies of other images before building the images that depend on them. + +Note that since we are building the images without pushing them to DockerHub, no manifest files are generated. + +If we want to use these images in our docker compose, we can edit the docker compose file, and refer to the image we want with it's architecture suffix attached. This will make docker-compose use the local copy, instead of hitting DockerHub, grabbing the manifest, and using an image from there. for instance, to use the local cassandra image I just built, I would edit the docker-compose.yaml file in our wire-server repo, and make the cassandra section look like the following: + +``` + cassandra: + container_name: demo_wire_cassandra + #image: cassandra:3.11.2 + image: julialongtin/cassandra:0.0.9-amd64 + ports: + - "127.0.0.1:9042:9042" + environment: +# what's present in the jvm.options file by default. +# - "CS_JAVA_OPTIONS=-Xmx1024M -Xms1024M -Xmn200M" + - "CS_JVM_OPTIONS=-Xmx128M -Xms128M -Xmn50M" + networks: + - demo_wire +``` + +To remove all of the git repositories containing the Dockerfiles we download to build these images, we can run `make clean`. There is also the option to run `make cleandocker` to REMOVE ALL OF THE DOCKER IMAGES ON YOUR MACHINE. careful with that one. Note that docker makes good use of caching, so running 'make clean' and the same make command you used to build the images will complete really fast, as docker does not actually need to rebuild the images. + +## Reading through the Makefile + +OK, now that we have a handle on what it does, and how to use it, let's get into the Makefile itsself. + +A Makefile is a series of rules for performing tasks, variables used when creating those tasks, and some minimal functions and conditional structures. Rules are implemented as groups of bash commands, where each line is handled by a new bash interpreter. Personally, I think it 'feels functiony', only without a type system and with lots of side effects. Like if bash tried to be functional. + +### Variables + +#### Overrideable Variables +the make language has multiple types of variables and variable assignments. To begin with, let's look at the variables we used in the last step. +```bash +$ cat Makefile | grep "?=" +DOCKER_USERNAME ?= wireserver +DOCKER_REALNAME ?= Wire +DOCKER_EMAIL ?= backend@wire.com +TAGNAME ?= :0.0.9 +DIST ?= DEBIAN +LOCALARCH ?= $(call dockerarch,$(LOCALDEBARCH)) + ARCHES ?= $(DEBARCHES) + NAMES ?= $(DEBNAMES) + ARCHES ?= $(ALPINEARCHES) + NAMES ?= $(ALPINENAMES) +SED ?= sed +SMTP_COMMIT ?= 8ad8b849855be2cb6a11d97d332d27ba3e47483f +DYNAMODB_COMMIT ?= c1eabc28e6d08c91672ff3f1973791bca2e08918 +ELASTICSEARCH_COMMIT ?= 06779bd8db7ab81d6706c8ede9981d815e143ea3 +AIRDOCKBASE_COMMIT ?= 692625c9da3639129361dc6ec4eacf73f444e98d +AIRDOCKRVM_COMMIT ?= cdc506d68b92fa4ffcc7c32a1fc7560c838b1da9 +AIRDOCKFAKESQS_COMMIT ?= 9547ca5e5b6d7c1b79af53e541f8940df09a495d +JAVAMAVENNODEPYTHON_COMMIT ?= 645af21162fffd736c93ab0047ae736dc6881959 +LOCALSTACK_COMMIT ?= 645af21162fffd736c93ab0047ae736dc6881959 +MINIO_COMMIT ?= 118270d76fc90f1e54cd9510cee9688bd717250b +CASSANDRA_COMMIT ?= 064fb4e2682bf9c1909e4cb27225fa74862c9086 +``` + +The '?=' assignment operator is used to provide a default value. When earlier, we ran make as "make DIST=DEBIAN DOCKER_USERNAME=julialongtin DOCKER_EMAIL=julia.longtin@wire.com DOCKER_REALNAME='Julia Longtin' build-all -j", we were overriding those values. the Make interpreter will use values provided on the command line, or values we have used 'export' to place into our shell environment. + +LOCALARCH and the assignments for ARCHES and NAMES are a bit different. LOCALARCH is a function call, and the ARCHES and NAMES are emdedded in conditional statements. We'll cover those later. + +Note the block of COMMIT IDs. This is in case we want to experiment with newer releases of each of the docker images we're using. Fixing what we're using to a commit ID makes it much harder for an upstream source to send us malicious code. + +#### Non-Overrideable Variables +The following group of variables use a different assignment operator, that tells make not to look in the environment first. +```bash +$ cat Makefile | grep ":=" +USERNAME := $(DOCKER_USERNAME) +REALNAME := $(DOCKER_REALNAME) +EMAIL := $(DOCKER_EMAIL) +STRETCHARCHES := arm32v5 arm32v7 386 amd64 arm64v8 ppc64le s390x +JESSIEARCHES := arm32v5 arm32v7 386 amd64 +DEBARCHES := arm32v5 arm32v7 386 amd64 +JESSIENAMES := airdock_fakesqs airdock_rvm airdock_base smtp +STRETCHNAMES := dynamodb_local cassandra +DEBNAMES := $(JESSIENAMES) $(STRETCHNAMES) +ALPINEARCHES := amd64 386 arm32v6 +ALPINENAMES := elasticsearch java_maven_node_python localstack minio +PREBUILDS := airdock_rvm-airdock_base airdock_fakesqs-airdock_rvm localstack-java_maven_node_python +NOMANIFEST := airdock_rvm airdock_fakesqs localstack +LOCALDEBARCH := $(shell [ ! -z `which dpkg` ] && dpkg --print-architecture) +BADARCHSIM := localstack-arm32v6 java_maven_node_python-arm32v6 dynamodb_local-386 +$ +``` + +The first three variable assignments are referring to other variables. These basically exist as alias, to make our make rules denser later. + +STRETCHARCHES and JESSIEARCHES contain the list of architectures that dockerhub's debian stretch and jessie images provide. DEBARCHES defines what architectures we're going to build, for our debian targets. STRETCHARCHES and DEBIANARCHES only exist to make it visible to readers of the Makefile which images CAN be built for which architectures. + +JESSIENAMES and STRETCHNAMES are used similarly, only they are actually referred to by DEBNAMES, to provide the list of debian based images that can be built. + +ALPINEARCHES and ALPINENAMES work similarly, and are used when we've provided "DIST=ALPINE". We do not divide into seperate variables quite the same way as debian, because all of our alpine images are based on alpine 3.7. + +PREBUILDS contains our dependency map. essentially, this is a set of pairs of image names, where the first image mentioned depends on the second image. so, airdock_rvm depends on airdock_base, where airdock_fakesqs depends on airdock_rvm, etc. this means that our docker image names may not contain `-`s. Dockerhub allows it, but this makefile needed a seperator... and that's the one I picked. + +BADARCH is similar, pairing the name of an image with the architecture it fails to build on. This is so I can blacklist things that don't work yet. + +LOCALDEBARCH is a variable set by executing a small snippet of bash. The snippet makes sure dpkg is installed (the debian package manager), and uses dpkg to determine what the architecture of your local machine is. As you remember from when we were building docker images by hand, docker will automatically fetch an image that is compiled for your current architecture, so we use LOCALDEBARCH later to decide what architectures we need to fetch with a prefix or postfix, and which we can fetch normally. + +NOMANIFEST lists images that need a work-around for fetching image dependencies for specific architectures. You know how we added the name of the architecture BEFORE the image name in the dockerfiles? well, in the case of the dependencies of the images listed here, dockerhub isn't supporting that. DockerHub is supporting that form only for 'official' docker images, like alpine, debian, etc. as a result, in order to fetch an architecture specific version of the dependencies of these images, we need to add a - suffix. like -386 -arm32v7, etc. + +### Conditionals +We don't make much use of conditionals, but there are three total uses in this Makefile. let's take a look at them. + +In order to look at our conditionals (and many other sections of this Makefile later), we're going to abuse sed. If you're not comfortable with the sed shown here, or are having problems getting it to work, you can instead just open the Makefile in your favorite text editor, and search around. I abuse sed here for both brevity, and to encourage the reader to understand complicated sed commands, for when we are using them later IN the Makefile. + +SED ABUSE: +to get our list of conditionals out of the Makefile, we're going to use some multiline sed. specifically, we're going to look for a line starting with 'ifeq', lines starting with two spaces, then the line following. + +```bash +$ cat Makefile | sed -n '/ifeq/{:n;N;s/\n /\n /;tn;p}' +ifeq ($(LOCALARCH),) + $(error LOCALARCH is empty, you may need to supply it.) + endif +ifeq ($(DIST),DEBIAN) + ARCHES ?= $(DEBARCHES) + NAMES ?= $(DEBNAMES) +endif +ifeq ($(DIST),ALPINE) + ARCHES ?= $(ALPINEARCHES) + NAMES ?= $(ALPINENAMES) +endif +$ +``` + +There's a lot to unpack there, so let's start with the simple part, the conditionals. +The conditionals are checking for equality, in all cases. +First, we check to see if LOCALARCH is empty. This can happen if dpkg was unavailable, and the user did not supply a value on the make command line or in the user's bash environment. if that happens, we use make's built in error function to display an error, and break out of the Makefile. +The second and third conditionals decide on the values of ARCHES and NAMES. Earlier, we determined the default selection for DIST was DEBIAN, so this pair just allows the user to select ALPINE instead. note that the variable assignments in the conditionals are using the overrideable form, so the end user can override these on make's command line or in the user's environment. mac users will want to do this, since they don't have QEMU available in the same form, and are limited to building X86 and AMD64 architecture. + +Note that conditionals are evaluated when the file is read, once. This means that we don't have the ability to use them in our rules, or in our functions, and have to abuse other operations in 'functionalish' manners... + +Now, back to our sed abuse. +SED is a stream editor, and quite a powerful one. In this case, we're using it for a multi-line search. we're supplying the -n option, which squashes all output, except what sed is told specificly to print something with a command. +Let's look at each of the commands in that statement seperately. +```sed +# find a line that has 'ifeq' in it. +/ifeq/ +# begin a block of commands. every command in the block should be seperated by a semicolon. +{ +# create an anchor, that is to say, a point that can be branched to. +:n; +# Append the next line into the parameter space. so now, for the first block, the hold parameter space would include "ifeq ($(LOCALARCH),)\n $(error LOCALARCH is empty, you may need to supply it.)". +N; +# Replace the two spaces in the parameter space with one space. +s/\n /\n /; +# If the previous 's' command found something, and changed something, go to our label. +tn; +# print the contents of the parameter space. +p +# close the block of commands. +} +``` +... Simple, right? + +note that the contents above can be stored to a file, and run with sed's "-f" command, for more complicated sed scripts. Sed is turing complete, so... things like tetris have been written in it. My longest sed scripts do things like sanity check OS install procedures, or change binaryish protocols into xmlish forms. + +### Functions +Make has a concept of functions, and the first two functions we use are a bit haskell inspired. + +SED ABUSE: +To get a list of the functions in our makefile, we're going to use a bit more traditional sed. specifically, we're going to look for lines that start with a number of lowercase characters that are immediately followed by an '=' sign. + +```bash +$ cat Makefile | sed -n '/^[a-z]*=/p' +dockerarch=$(patsubst i%,%,$(patsubst armel,arm32v5,$(patsubst armhf,arm32v7,$(patsubst arm64,arm64v8,$(1))))) +fst=$(word 1, $(subst -, ,$(1))) +snd=$(word 2, $(subst -, ,$(1))) +goodarches=$(filter-out $(call snd,$(foreach arch,$(ARCHES),$(filter $(1)-$(arch),$(BADARCHSIM)))),$(ARCHES)) +nodeps=$(filter-out $(foreach target,$(NAMES),$(call snd,$(foreach dependency,$(NAMES),$(filter $(target)-$(dependency),$(PREBUILDS))))),$(NAMES)) +maniarch=$(patsubst %32,%,$(call fst,$(subst v, ,$(1)))) +manivariant=$(foreach variant,$(word 2, $(subst v, ,$(1))), --variant $(variant)) +archpostfix=$(foreach arch,$(filter-out $(filter-out $(word 3, $(subst -, ,$(filter $(call snd,$(1))-%-$(call fst,$(1)),$(foreach prebuild,$(PREBUILDS),$(prebuild)-$(call fst,$(1)))))),$(LOCALARCH)),$(call fst,$(1))),-$(arch)) +archpath=$(foreach arch,$(patsubst 386,i386,$(filter-out $(LOCALARCH),$(1))),$(arch)/) +$ +``` + +These are going to be a bit hard to explain in order, especially since we haven't covered where they are being called from. Let's take them from simplest to hardest, which happens to co-incide with shortest, to longest. + +The fst and snd functions are what happens when a haskell programmer is writing make. You remember all of the pairs of values earlier, that were seperated by a single '-' character? these functions return either the first, or the second item in the pair. Let's unpack 'fst'. +fst uses the 'word' function of make to retrieve the first word from "$(subst -, ,$(1))". the 'subst' function substitutes a single dash for a single space. this seperates a - pair into a space seperated string. $(1) is the first argument passed to this function. +snd works similarly, retrieving from our pair. + +The next easiest to explain function is 'maniarch'. It returns the architecture string that we use when annotating a docker image. When we refer to an architecture, we use a string like 'amd64' or 'arm32v6', but docker manifest wants just 'arm' 'amd64' or '386'. +maniarch first uses the 'patsubst' command to replace "anystring32" with "anystring". this removes the 32 from arm32. It's given the result of $(call fst,$(subst v, ,$(1)))) as a string to work with. +$(call fst,$(subst v, ,$(1)))) calls our 'fst' function, giving it the result of us substituting 'v' for ' ' in the passed in argument. in the case of arm32v6, it seperates the string into "arm32 6". Note that instead of calling fst, we could have just used 'word 1' like we did in fst. This is a mistake on my part, but it works regardless, because of the way fst is built. as before, $(1) is the argument passed into our function. + +manivariant has a similar function to maniarch. It's job is to take an architecture name (amd64, arm32v5, etc...), and if it has a 'v', to return the '--variant ' command line option for our 'docker manifest anotate'. +manivariant starts by using make's 'foreach' function. this works by breaking it's second argument into words, storing them into the variable name given in the first argument, and then generating text using the third option. this is a bit abusive, as we're really just using it as "if there is a variant, add --variant " structure. +The first argument of foreach is the name of a variable. we used 'variant' here. the second argument in this case properly uses word, and subst to return only the content after a 'v' in our passed in argument, or emptystring. the third option is ' --variant $(variant)', using the variable defined in the first parameter of foreach to create " --variant 5" if this is passed "arm32v5", for instance. + +archpath is similar in structure to manivariant. In order to find a version of a docker image that is appropriate for our non-native architectures, we have to add the 'archname/' string to the path to the image we're deriving from, in our Dockerfile. This function returns that string. We start by using foreach in a similar method as manivariant, to only return a string if the second argument to foreach evaluates to content. In our second argument, we begin by performing a patsubst, replacing a '386' with an 'i386' if it's found in the patsubst argument. This is because on dockerhub, official images of different architectures are actually stored in a series of machine maintained accounts, and an account name can't start with a number. therefore, 386 images are stored under a user called 'i386'. As an argument to the patsubst, we're providing our first usage of filter-out. it's used here so that if the local architecture was supplied to this function, the string will return empty in section 2 of our foreach, and therefore the third section won't even be evaluated. + +our next function to explain is 'goodarches'. This function is passed an image name, and returns all of the arches from our architecture list that that image can be built for. It basically searches BADARCHSIM from earlier, and removes an architecture from the returned copy of ARCHES if a - pair for that architecture exists. We use filter-out to remove anything returned from it's second argument from the ARCHES list we provide as it's third argument. The second argument to filter-out uses snd to seperate the architecture name from a string found, and uses foreach and filter to search BADARCHSIM for all possible combinations between the passed in image name, and all of the architectures. + +dockerarch is a bit simpler than the last few. it takes the debian architecture name, replacing it with the docker architecture name, using a series of nested patsubst substititions. + +Unlike our earlier functions, nodeps does not require an argument. It's function is to return a list of images from NAMES that do not have any dependencies. To do this, we start with a filter-out of NAMES, then use a pair of nested foreach statements, both searching through NAMES, and constructing all combinations of -. This value is looked for in PREBUILDS, and if a combination is found, we use snd to return the dependency to filter-out. this is probably overly complicated, and can likely be shortened by the use of patsubst. "it works, ship it." + +Finally, we get to archpostfix. archpostfix has a similar function to archpath, only it provides a - for the end of the image path if the DEPENDENCY of this image is not an official image, and therefore can not be found by adding an 'arch/' postfix. This is long, and probably also a candidate for shortening. Reading your way through this one is an exercise for when the reader wants a reverse polish notation headache. + + +To summarize the Make functions we've (ab)used in this section: +``` +$(word 1,string of words) # return the Nth word in a space separated string. +$(subst -, ,string-of-words) # replace occurances of '-' with ' ' in string. +$(patsubst string%,%string) # replace a patern with another patern, using % as a single wildcard. +$(call function,argument) # function calling. +$(foreach var,string,$(var)) # iterate on a space seperated string, evaluating the last argument with var set to each word in string. +$(filter word,word list) # return word if it is found in word list. +$(filter-out word, word list) # return word list without word. +``` + +Now after all of that, let's go through the SED command we last used. Remember that? +```bash +$ cat Makefile | sed -n '/^[a-z]*=/p' +``` +Again, we're going to use sed in '-n' mode, supressing all output except the output we are searching for. /PATTERN/ searches the lines of the input for a pattern, and if it's found, the command afterward is executed, which is a 'p' for print, in this case. the patern given is '^[a-z]*='. The '^' at the beginning means 'look for this patern at the beginning of the line, and the '=' at the end is the equal sign we were looking for. '[a-z]*' is us using a character class. character classes are sedspeak for sets of characters. they can be individually listed, or in this case, be a character range. the '*' after the character class just means "look for these characters any number of times". technically, that means a line starting in '=' would work (since zero is any number of times), but luckily, our file doesn't contain lines starting with =, as this is not valid make syntax. + +### Rules. + +Traditionally, makefiles are pretty simple. they are used to build a piece of software on your local machine, so you don't have to memorize all of the steps, and can type 'make', and have it just done. A simple Makefile looks like the following: +```make +CC=gcc +CFLAGS=-I. +DEPS = hellomake.h + +%.o: %.c $(DEPS) + $(CC) -c -o $@ $< $(CFLAGS) + +hellomake: hellomake.o hellofunc.o + $(CC) -o hellomake hellomake.o hellofunc.o + +clean: + rm hellomake hellomake.o hellofunc.o +``` +This example Makefile has some variables, and rules, that are used to build a C program into an executable, using GCC. + +Our Makefile is much more advanced, necessatating this document, to ensure maintainability. + + +A single make rule is divided into three sections: what you want to build, what you need to build first, and the commands you run to build the thing in question: +```make +my_thing: things I need first + bash commands to build it + +target: prerequisites + recipe line 1 +``` + +The commands to build a thing (recipe lines) are prefaced with a tab character, and not spaces. Each line is executed in a seperate shell instance. + + +#### The roots of the trees + +In the section where we showed you how to use our Makefile, we were calling 'make' with an option, such as push-all, build-smtp, names, or clean. We're now going to show you the rules that implement these options. + +SED ABUSE: +This time, we're going to add the -E command to sed. this kicks sed into the 'extended regex' mode, meaning for our purposes, we don't have to put a \ before a ( or a ) in our regex. we're then going to use a patern grouping, to specify that we want either the clean or names rules. we're also going to swap the tabs for spaces, to prevent our substitution command from always matching, and not even visibly change the output. total cheating. +```bash +$ cat Makefile | sed -n -E '/^(clean|names)/{:n;N;s/\n\t/\n /;tn;p}' +clean: + rm -rf elasticsearch-all airdock_base-all airdock_rvm-all airdock_fakesqs-all cassandra-all $(DEBNAMES) $(ALPINENAMES) + +cleandocker: + docker rm $$(docker ps -a -q) || true + docker rmi $$(docker images -q) --force || true + +names: + @echo Debian based images: + @echo $(DEBNAMES) + @echo Alpine based images: + @echo $(ALPINENAMES) +``` + +Most Makefiles change their environment. Having changed their environment, most users want a quick way to set the environment back to default, so they can make changes, and build again. to enable this, as a convention, most Makefiles have a 'clean' rule. Ours remove the git repos that we build the docker images from. note the hardcoded list of '-all' directories: these are the git repos for images where the git repo does not simply have a Dockerfile at the root of the repo. In those cases, our rules that check out the repos check them out to -all, then do Things(tm) to create a /Dockerfile. + +cleandocker is a rule I use on my machine, when docker images have gotten out of control. it removes all of the docker images on my machine, and is not meant to be regularly run. + +names displays the names of the images this Makefile knows about. It uses a single @ symbol at the beginning of the rules. this tells make that it should NOT display the command that make is running, when make runs it. + +OK, that covers the simple make rules, that have no dependencies, or parameters. Now let's take a look at our build and push rules. these are the 'top' of a dependency tree, which is to say they depend on things, that depend on things... that do the think we've asked for. + +```bash +$ cat Makefile | sed -n -E '/^(build|push|all)/{:n;N;s/\n\t/\n /;tn;p}' +all: $(foreach image,$(nodeps),manifest-push-$(image)) + +build-%: $$(foreach arch,$$(call goodarches,%),create-$$(arch)-$$*) + @echo -n + +build-all: $(foreach image,$(nodeps),build-$(image)) + +push-%: manifest-push-% + @echo -n + +push-all: $(foreach image,$(nodeps),manifest-push-$(image)) +$ +``` + +Lets take these simplest to most complex. + +push-% is the rule called when we run 'make push-'. It depends on manifest-push-%, meaning that make will take whatever you've placed after the 'push-', look for a rule called manifest-push-, and make sure that rule completes, before trying to execute this rule. Executing this rule just executes nothing, and in reality, the '@echo -n" exists to allow the push-% rule to be executed. By default, make considers wildcard rules as phony, meaning they cannot be called from the command line, and must be called from a rule with no wildcarding. + +push-all is allowed to have no commands, because it's name contains no wildcard operator. In it's dependency list, we're using a foreach loop to go through our list of images that have no dependencies, and ask for manifest-push- to be built. + +all is identical to push-all. I could have just depended on push-all, and saved some characters here. + +build-all operates similar to push-all, only it asks for build- to be built for all of the no-dependency images. + +build-% combines the approach of push-% and build-all. It uses foreach to request the build of create--, which builds one docker image for each architecture that we know this image will build on. This is our first exposure to $$ structures, so let's look at those a bit. + +By default, make allows for one % in the build-target, and one % in the dependencies. it takes what it matches the % against in the build-target, and substitutes the first % found in the dependency list with that content. so, what do you do if you need to have the thing that was matched twice in the dependency list? enter .SECONDEXPANSION. + +```bash +$ cat Makefile | sed -n -E '/^(.SECOND)/{:n;N;s/\n\t/\n /;tn;p}' | less +.SECONDEXPANSION: + +``` + +.SECONDEXPANSION looks like a rule, but really, it's a flag to make, indicating that dependency lists in this Makefile should be expanded twice. During the first expansion, things will proceed as normal, and everything with two dollar signs will be ignored. during the second expansion things that were delayed by using two dollar signs are run, AND a set of variables that is normally available in the 'recipe' section. In the case we're looking at, this means that during the first expansion, only the "%" character will be interpreted. during the second expansion the foreach and call will actually be executed, and the $$* will be expanded the same way as $* will be in the recipe section, namely, exactly identical to the % expansion in the first expansion. This effectively gives us two instances of %, the one expanded in the first expansion, and $$* expanded in the second expansion. + +build-% also uses the same 'fake recipe' trick as push-%, that is, having a recipe that does nothing, to trick make into letting you run this. + +#### One Level Deeper + +The rules you've seen so far were intended for user interaction. they are all rules that the end user of this Makefile picks between, when deciding what they want this makefile to do. Let's look at the rules that these depend on. + +```bash +$ cat Makefile | sed -n -E '/^(manifest-push)/{:n;N;s/\n\t/\n /;tn;p}' +manifest-push-%: $$(foreach arch,$$(call goodarches,$$*), manifest-annotate-$$(arch)-$$*) + docker manifest push $(USERNAME)/$*$(TAGNAME) + +$ +``` + +manifest-push-% should be relatively simple for you now. the only thing new here, is you get to see $* used in the construction of our docker manifest push command line. Let's follow the manifest creation down a few more steps. + +```bash +$ cat Makefile | sed -n -E '/^(manifest-ann|manifest-crea)/{:n;N;s/\n\t/\n /;tn;p}' +manifest-annotate-%: manifest-create-$$(call snd,$$*) + docker manifest annotate $(USERNAME)/$(call snd,$*)$(TAGNAME) $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) --arch $(call maniarch,$(call fst,$*)) $(call manivariant,$(call fst,$*)) + +manifest-create-%: $$(foreach arch,$$(call goodarches,%), upload-$$(arch)-$$*) + docker manifest create $(USERNAME)/$*$(TAGNAME) $(patsubst %,$(USERNAME)/$*$(TAGNAME)-%,$(call goodarches,$*)) --amend + +``` + +manifest-push depends on manifest-annotate, which depends on manifest-create, that depends on upload-... so when make tries to push a manifest, it makes sure an image has been uploaded, then creates a manifest, then annotates the manifest. We're basically writing rules for each step of our manifest, only backwards. continuing this pattern, the last thing we will depend on will be the rules that actually download the dockerfiles from git. + +#### Dependency Resolving + +We've covered the entry points of this Makefile, and the chained dependencies that create, annotate, and upload a manifest file. now, we get into two seriously complicated sets of rules, the upload rules and the create rules. These accomplish their tasks of uploading and building docker containers, but at the same time, they accomplish our dependency resolution. Let's take a look. + +```bash +$ cat Makefile | sed -n -E '/^(upload|create|my-|dep)/{:n;N;s/\n\t/\n /;tn;p}' + +upload-%: create-% $$(foreach predep,$$(filter $$(call snd,%)-%,$$(PREBUILDS)), dep-upload-$$(call fst,$$*)-$$(call snd,$$(predep))) + docker push $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) | cat + +dep-upload-%: create-% $$(foreach predep,$$(filter $$(call snd,%)-%,$$(PREBUILDS)), dep-subupload-$$(call fst,$$*)-$$(call snd,$$(predep))) + docker push $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) | cat + +dep-subupload-%: create-% + docker push $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) | cat + +create-%: Dockerfile-$$(foreach target,$$(filter $$(call snd,$$*),$(NOMANIFEST)),NOMANIFEST-)$$* $$(foreach predep,$$(filter $$(call snd,%)-%,$(PREBUILDS)), depend-create-$$(call fst,$$*)-$$(call snd,$$(predep))) + cd $(call snd,$*) && docker build -t $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) -f Dockerfile-$(call fst,$*) . | cat + +depend-create-%: Dockerfile-$$(foreach target,$$(filter $$(call snd,$$*),$(NOMANIFEST)),NOMANIFEST-)$$* $$(foreach predep,$$(filter $$(call snd,%)-%,$(PREBUILDS)), depend-subcreate-$$(call fst,$$*)-$$(call snd,$$(predep))) + cd $(call snd,$*) && docker build -t $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) -f Dockerfile-$(call fst,$*) . | cat + +depend-subcreate-%: Dockerfile-$$(foreach target,$$(filter $$(call snd,$$*),$(NOMANIFEST)),NOMANIFEST-)$$* + cd $(call snd,$*) && docker build -t $(USERNAME)/$(call snd,$*)$(TAGNAME)-$(call fst,$*) -f Dockerfile-$(call fst,$*) . | cat + +$ +``` + +First, let's tackle the roles of these rules. the *upload* rules are responsible for running docker push, while the *create* rules are responsible for running docker build. All of the upload rules depend on the first create rule, to ensure what they want to run has been built. + +these rules are setup in groups of three: + +upload-% and create-% form the root of these groups. upload-% depends on create-%, and create-% depends on the creation of a Dockerfile for this image, which is the bottom of our dependency tree. + +upload-%/create-% depend on two rules: dep-upload-%/depend-create-%, which handle the upload/create for the image that THIS image depends on. There are also dep-subupload-% and dep-subcreate-% rules, to handle the dependency of the dependency of this image. + +This dependency-of, and dependency-of-dependency logic is necessary because Make will not let us run a recursive rule: no rule can be in one branch of the dependency graph more than once. so instead, the root of our dependency tree either starts with a single image, or with a list of images that are the root of their own dependency graphs. + + +Now let's look at the rules themselves. +upload-% has a dependency on create-%, to ensure what it wantas to upload already exists. additionally, it has a dependency that uses foreach and filter to look through the list of PREBUILDS, and depend on dep-upload-- for any images this image depends on. + +dep-upload-% is virtually identical to upload-%, also searching through PREBUILDS for possible dependencies, and depending on dep-subupload to build them. + +dep-subupload does no dependency search, but has an identical docker push recipe to upload, and dep-upload. + +create-%, depend-create-%, and depend-subcreate-% work similarly to the upload rules, calling docker build instead of a docker push, and depending on the Dockerfile having been created. When depending on the Dockerfile, we look through the NOMANIFEST list, and insert "NOMANIFEST-" in the name of dependency on the dockerfile. This is so that we depend on the NOMANIFEST variant if the image we are building requires us to use a postfix on the image name to access a version for a specified architecture. otherwise, we run the Dockerfile-% rule that uses a prefix (i386/, amd64/, etc) to access the docker image we are building from. + +It's worth noting that for all of these *create* and *upload* rules, we pipe the output of docker to cat, which causes docker to stop trying to draw progress bars. This seriously cleans up the + + +#### Building Dockerfiles. + +There are two rules for creating Dockerfiles, and we decide in the *create* rules which of these to use by looking at the NOMANIFEST variable, and adding -NOMANIFEST in the name of the rule we depend on for dockerfile creation. + +The rules are relatively simple: +```bash +$ cat Makefile | sed -n -E '/^(Dock)/{:n;N;s/\n\t/\n /;tn;p}' +Dockerfile-NOMANIFEST-%: $$(call snd,%)/Dockerfile + cd $(call snd,$*) && cat Dockerfile | ${SED} "s/^\(MAINTAINER\).*/\1 $(REALNAME) \"$(EMAIL)\"/" | ${SED} "s=^\(FROM \)\(.*\)$$=\1\2$(call archpostfix,$*)=" > Dockerfile-$(call fst,$*) + +Dockerfile-%: $$(call snd,%)/Dockerfile + cd $(call snd,$*) && cat Dockerfile | ${SED} "s/^\(MAINTAINER\).*/\1 $(REALNAME) \"$(EMAIL)\"/" | ${SED} "s=^\(FROM \)\(.*\)$$=\1$(call archpath,$(call fst,$*))\2=" > Dockerfile-$(call fst,$*) +$ +``` + +These two rules depend on the checkout of the git repos containing the Dockerfiles. they do this by depending on /Dockerfile. The rules are responsible for the creation of individual architecture specific derivitives of the Dockerfile that is downloaded. additionally, the rules set the MAINTAINER of the docker image to be us. Most of the heavy lifting of these rules is being done in the archpostfix, and archpath functions, which are being used in a sed expression to either postfix or prefix the image that this image is built from. + + +Let's take a look at that sed with a simpler example: + +SED ABUSE: +```bash +$ echo "FROM image-version" | sed "s=^\(FROM \)\(.*\)$=\1i386/\2=" +FROM i386/image-version +$ +``` + +Unlike our previous sed commands, which have all been forms of "look for this thing, and display it", with the 's' command basically being abused as a test, this one intentionally is making a change. + +'s' commands are immediately followed by a character, that is used to seperate and terminate two blocks of text: the part we're looking for (match section), and the part we're replacing it with(substitution section). Previously, we've used '/' as the character following a 's' command, but since we're using '/' in the text we're placing into the file, we're going to use the '=' character instead. We've covered the '^' character at the beginning of the pattern being an anchor for "this pattern should be found only at the begining of the line". In the match section of this command, we're introducing "$" as the opposite anchor: $ means "end of line.". we're not using a -E on the command line, so are forced to use "\" before our parenthesis for our matching functions. this is a pure stylistic decision. the .* in the second matching section stands for 'any character, any number of times', which will definately match against our dependent image name. + +The match section of this sed command basicaly translates to "at the beginning of the line, look for "FROM ", store it, and store anything else you find up to the end of the line.". These two store operations get placed in sed variables, named \1, and \2. a SED command can have up to nine variables, which we are using in the substitution section. + +The substitution section of this sed command uses the \1 and \2 variable references to wrap the string "i386/". this effectively places i386/ in front of the image name. + +Because we are using that sed command in a Makefile, we have to double up the "$" symbol, to prevent make from interpreting it as a variable. In the first sed command in these rules, we're also doing some minor escaping, adding a '\' in front of some quotes, so that our substitution of the maintainer has quotes around the email address. + +#### Downloading Dockerfiles + +Finally, we are at the bottom of our dependency tree. We've followed this is reverse order, but when we actually ask for things to be pushed, or to be built, these rules are the first ones run. + +There are a lot of these, of various complexities, so let's start with the simple ones first. + +##### Simple Checkout + +```bash +$ cat Makefile | sed -n -E '/^(smtp|dynamo|minio)/{:n;N;s/\n\t/\n /;tn;p}' +smtp/Dockerfile: + git clone https://github.com/namshi/docker-smtp.git smtp + cd smtp && git reset --hard $(SMTP_COMMIT) + +dynamodb_local/Dockerfile: + git clone https://github.com/cnadiminti/docker-dynamodb-local.git dynamodb_local + cd dynamodb_local && git reset --hard $(DYNAMODB_COMMIT) + +minio/Dockerfile: + git clone https://github.com/minio/minio.git minio + cd minio && git reset --hard $(MINIO_COMMIT) + +``` + +These rules are simple. They git clone a repo, then reset the repo to a known good revision. This isolates us from potential breakage from upstreams, and prevents someone from stealing git credentials for our upstreams, and using those credentials to make a malignant version. + +##### Checkout with Modifications + +A bit more complex rule is localstack/Dockerfile: +```bash +$ cat Makefile | sed -n -E '/^(localsta)/{:n;N;s/\n\t/\n /;tn;p}' +localstack/Dockerfile: + git clone https://github.com/localstack/localstack.git localstack + cd localstack && git reset --hard $(LOCALSTACK_COMMIT) + ${SED} -i.bak "s=localstack/java-maven-node-python=$(USERNAME)/java_maven_node_python$(TAGNAME)=" $@ + # skip tests. they take too long. + ${SED} -i.bak "s=make lint.*=make lint=" localstack/Makefile + ${SED} -i.bak "s=\(.*lambda.*\)=#\1=" localstack/Makefile + +$ +``` + +This rule makes some minor modifications to localstack's Dockerfile, and the Makefile that localstack's build process places in the docker image. It changes the Dockerfile such that instead of depending on upstream's version of the java-maven-node-python docker image, we depend on the version we are building. additionally, we disable the test cases for localstack, because they take a long time, and have a timing issues on emulators. It's worth noting that we use the make "$@" variable here, which evaluates to the build target, AKA, everything to the left of the ":" on the first line of our rule. + +SED ABUSE: +These have a little bit of new sed, for us. We're using the '-i' option to sed, to perform sed operations in place, which is to say, we tell sed to edit the file, and store a backup of the file before it edited it as .bak. Other than that, these are standard substitutions, like we covered in our previous SED ABUSE section. + +In the same approximate category is the java_maven_node_python/Dockerfile rule: +```bash +$ cat Makefile | sed -n -E '/^(java)/{:n;N;s/\n\t/\n /;tn;p}' +java_maven_node_python/Dockerfile: + git clone https://github.com/localstack/localstack.git java_maven_node_python + cd java_maven_node_python && git reset --hard $(JAVAMAVENNODEPYTHON_COMMIT) + cd java_maven_node_python && mv bin/Dockerfile.base Dockerfile + # disable installing docker-ce. not available on many architectures in binary form. + ${SED} -i.bak "/.*install Docker.*/{N;N;N;N;N;d}" $@ +``` + +This rule does a checkout like the localstack rule, but the Dockerfile is stored somewhere other that the root of the repo. we move the Dockerfile, then we disable the installation of docker-ce in the environment. we don't use it, and it's got problems with not being ported to all architectures. + +SED ABUSE: +To disable the installation of docker here, we do something a bit hacky. we find the line with 'install Docker' on it, we pull the next 5 lines into the pattern buffer, then delete them. This is effectively just a multiline delete. we use the -i.bak command line, just like the last sed abuse. neat and simple. + + +##### Checkout, Copy, Modify + +Some of the git repositories that we depend on do not store the Dockerfile in the root of the repository. instead, they have one large repository, with many directories containing many docker images. In these cases, we use git to check out the repository into a directory with the name of the image followed by '-all', then copy the directory we want out of the tree. + +```bash +$ cat Makefile | sed -n -E '/^(airdock)/{:n;N;s/\n\t/\n /;tn;p}' +airdock_base/Dockerfile: + git clone https://github.com/airdock-io/docker-base.git airdock_base-all + cd airdock_base-all && git reset --hard $(AIRDOCKBASE_COMMIT) + cp -R airdock_base-all/jessie airdock_base + # work around go compiler bug by using newer version of GOSU. https://bugs.launchpad.net/qemu/+bug/1696353 + ${SED} -i.bak "s/GOSU_VERSION=.* /GOSU_VERSION=1.11 /" $@ + # work around missing architecture specific binaries in earlier versions of tini. + ${SED} -i.bak "s/TINI_VERSION=.*/TINI_VERSION=v0.16.1/" $@ + # work around the lack of architecture usage when downloading tini binaries. https://github.com/airdock-io/docker-base/issues/8 + ${SED} -i.bak 's/tini\(.asc\|\)"/tini-\$$dpkgArch\1"/' $@ + +airdock_rvm/Dockerfile: + git clone https://github.com/airdock-io/docker-rvm.git airdock_rvm-all + cd airdock_rvm-all && git reset --hard $(AIRDOCKRVM_COMMIT) + cp -R airdock_rvm-all/jessie-rvm airdock_rvm + ${SED} -i.bak "s=airdock/base:jessie=$(USERNAME)/airdock_base$(TAGNAME)=" $@ + # add a second key used to sign ruby to the dockerfile. https://github.com/airdock-io/docker-rvm/issues/1 + ${SED} -i.bak "s=\(409B6B1796C275462A1703113804BB82D39DC0E3\)=\1 7D2BAF1CF37B13E2069D6956105BD0E739499BDB=" $@ + +airdock_fakesqs/Dockerfile: + git clone https://github.com/airdock-io/docker-fake-sqs.git airdock_fakesqs-all + cd airdock_fakesqs-all && git reset --hard $(AIRDOCKFAKESQS_COMMIT) + cp -R airdock_fakesqs-all/0.3.1 airdock_fakesqs + ${SED} -i.bak "s=airdock/rvm:latest=$(USERNAME)/airdock_rvm$(TAGNAME)=" $@ + # add a workdir declaration to the final switch to root. + ${SED} -i.bak "s=^USER root=USER root\nWORKDIR /=" $@ + # break directory creation into two pieces, one run by root. + ${SED} -i.bak "s=^USER ruby=USER root=" $@ + ${SED} -i.bak "s=cd /srv/ruby/fake-sqs.*=chown ruby.ruby /srv/ruby/fake-sqs\nUSER ruby\nWORKDIR /srv/ruby/fake-sqs\nRUN cd /srv/ruby/fake-sqs \&\& \\\\=" $@ +``` + +In airdock_base/Dockefile, we do a clone, set it to the revision we are expecting, then copy out one directory from that repo, creating an airdock_base/ directory containing a Dockerfile, like we expect. We then change out some version numbers in the Dockerfile to work around some known bugs, and do a minor modification to two commands to allow airdock_base to be built for non-amd64 architectures. + +SED ABUSE: +The sed in the airdock_base/Dockerfile rule is relatively standard fare for us now, with the exception of the last command. in it, we use a match against "\(.asc\|\)", meaning either a .asc, or empty string. This lets this sed command modify both the line that contains the path to the signature for tini, and the path to the tini package. Since we want a '$' in the dockerfile, so that when the dockerfile is run, it looks at it's internal '$dpkgArch' variable, we have to escape it with a $ to prevent make from eating it, and with a \ to prevent SED from trying to interpret it. + +In airdock_rvm/Dockerfile, we do the same clone, reset hard, copy routine as we did in airdock_base/Dockerfile. Since airdock_rvm depends on airdock_base, we change the image this image derives from to point to our airdock_base image. Additionally, to work around the image using an old signature to verify it's ruby download, we add another key to the gpg import line in the Dockerfile. Technically both keys are in use by the project now, so we did not remove the old one. + +airdock_fakesqs had a bit more modification that was required. we follow the same routine as in airdock_rvm/Dockerfile, doing our clone, reset, copy, and dependant image change, then we have to make some modifications to the WORKDIR and USERs in this Dockerfile. I don't know how they successfully build it, but it looks to me like their original file is using a different docker file interpreter, with a different permissions model. when we tried to run the Dockerfile, it would give us permissions errors. These changes make it function, by being a bit more explicit about creating things with the right permissions. + +SED ABUSE: +Let's take a look at the effect of these sed commands, before we dig into the commands themselves. + +```bash +$ diff -u airdock_fakesqs-all/0.3.1/Dockerfile airdock_fakesqs/Dockerfile +--- airdock_fakesqs-all/0.3.1/Dockerfile 2019-03-11 16:47:40.367319559 +0000 ++++ airdock_fakesqs/Dockerfile 2019-03-11 16:47:40.419320902 +0000 +@@ -4,15 +4,19 @@ + # TO_BUILD: docker build --rm -t airdock/fake-sqs . + # SOURCE: https://github.com/airdock-io/docker-fake-sqs + +-FROM airdock/rvm:latest ++FROM julialongtin/airdock_rvm:0.0.9 + MAINTAINER Jerome Guibert + ARG FAKE_SQS_VERSION=0.3.1 +-USER ruby ++USER root + +-RUN mkdir -p /srv/ruby/fake-sqs && cd /srv/ruby/fake-sqs && \ ++RUN mkdir -p /srv/ruby/fake-sqs && chown ruby.ruby /srv/ruby/fake-sqs ++USER ruby ++WORKDIR /srv/ruby/fake-sqs ++RUN cd /srv/ruby/fake-sqs && \ + rvm ruby-2.3 do gem install fake_sqs -v ${FAKE_SQS_VERSION} --no-ri --no-rdoc + + USER root ++WORKDIR / + + EXPOSE 4568 +``` + +The first change is our path change, to use the airdock_rvm image we're managing, instead of upstream's latest. +The second and third change happens at the place in this file where it fails. On my machine, the mkdir fails, as the ruby user cannot create this directory. to solve this, we perform the directory creation an root, THEN do our rvm work. + +Now, let's look through the sed that did that. +The first sed command in this rule changed the path on the FROM line, just like the similar sed statement in the last make rule we were looking at. +The second sed command added a 'WORKDIR /' to the bottom of the Dockerfile, after the USER root. +The third SED command changes the USER line at the top of the file to using the root user to run the next command, instead of the ruby user. +Finally, the fourth SED command changes the first RUN command into two run commands. one creates the directory and makes sure we have permissions to it, while the second runs our command. the sed command also inserts commands to change user to ruby, and change working directories to the directory created in the first RUN command. + +Structurally, the first, second, and third sed command are all pretty standard things we've seen before. The fourth command looks a little different, but really, it's the same sort of substitution, only it adds several lines. At the end of the statement is some tricky escaping. +'&' characters must be escaped, because in sed, an '&' character is shorthand for 'the entire patern that we matched'. That will be important, later. the single '\' character has to be escaped into '\\\\'. + +Note that when we wrote our 'clean' rule, we added these '-all' directories manually, to make sure they would get deleted. + +##### Checkout, Copy, Modify Multiline + +elasticsearch and cassandra's checkouts are complicated, as they do a bit of injection of code into the docker entrypoint script. The entrypoint script is the script that is launched when you run a docker image. It's responsible for reading in environment variables, setting up the service that the docker image is supposed to run, and then running the service. For both elasticsearch and cassandra, we do a multiline insert, and we do it with multiple chained commands. + +Let's look at elasticsearch, as these two rules are almost identical. + +```bash +$ cat Makefile | sed -n -E '/^(ela)/{:n;N;s/\n\t/\n /;tn;p}' +elasticsearch/Dockerfile: + git clone https://github.com/blacktop/docker-elasticsearch-alpine.git elasticsearch-all + cd elasticsearch-all && git reset --hard $(ELASTICSEARCH_COMMIT) + cp -R elasticsearch-all/5.6/ elasticsearch + # add a block to the entrypoint script to interpret CS_JVM_OPTIONS, modifying the jvm.options before launching elasticsearch. + # first, add a marker to be replaced before the last if. + ${SED} -i.bak -r ':a;$$!{N;ba};s/^(.*)(\n?)fi/\2\1fi\nREPLACEME/' elasticsearch/elastic-entrypoint.sh + # next, load our variables. + ${SED} -i.bak 's@REPLACEME@MY_APP_CONFIG="/usr/share/elasticsearch/config/"\n&@' elasticsearch/elastic-entrypoint.sh + # add our parser and replacer. + ${SED} -i.bak $$'s@REPLACEME@if [ ! -z "$${JVM_OPTIONS_ES}" ]; then\\nfor x in $${JVM_OPTIONS_ES}; do { l="$${x%%=*}"; r=""; e=""; [ "$$x" != "$${x/=//}" ] \&\& e="=" \&\& r="$${x##*=}"; [ "$$x" != "$${x##-Xm?}" ] \&\& r="$${x##-Xm?}" \&\& l="$${x%%$$r}"; echo $$l $$e $$r; sed -i.bak -r \'s/^[# ]?(\'"$$l$$e"\').*/\\\\1\'"$$r"\'/\' "$$MY_APP_CONFIG/jvm.options"; diff "$$MY_APP_CONFIG/jvm.options.bak" "$$MY_APP_CONFIG/jvm.options" \&\& echo "no difference"; } done;\\nfi\\n&@' elasticsearch/elastic-entrypoint.sh + # remove the marker we added earlier. + ${SED} -i.bak 's@REPLACEME@@' elasticsearch/elastic-entrypoint.sh + +$ +``` + +In this rule, we're checking out the git tree, and copying one directory that contains our Dockerfile, and our entrypoint for elasticsearch. Following that, we have four sed commands, one of which inserts some very complicated bash. + +SED ABUSE: +Our first sed command in this rule uses a new trick. We're using -i to edit in place, and -r to quash output. Instead of starting with a match(/.../) or a substitution (s/thing/otherthing/), we immediately start with a label. let's break down this command. + +```sed +:a; # an anchor, we can loop back to. +$!{ # enter here only if there is content to be read from the file. note that to get this "$" past make, we had to escape it, by replacing it with $$. +N; # pull in the next line of content into the pattern space +ba # branch to the 'a' label. +}; +s/(.*)(\n?)fi/\2\1fi\nREPLACEME/ #match everything up to the last 'fi' and replace it with a 'fi', a new line, and REPLACEME +``` + +What does that effectively do? the source file contains a lot of lines with 'fi' in them, by inserting REPLACEME after the last one, this gives us an anchor point, that we can safely run simpler sed commands against. + +for instance, our next sed command: +```sed +s@REPLACEME@MY_APP_CONFIG="/usr/share/elasticsearch/config/"\n&@ +``` + +the 's' on this command is using '@' symbols to seperate the pattern from the replacement. it operates by finding the 'REPLACEME' that we inserted with the last command. As we touched on earlier, the unescaped '&' at the end of this replacement repeats back the patern, in the replacement. This effectively means that this line replaces REPLACEME with a new line of code, and puts the REPLACEME after the line it inserted. + +BASH ABUSE: +The next sed command works similarly, however it inserts an extremely complicated pile of bash on one line. Let's take a look at it. I'm going to remove some semicolons, remove some of the escaping, and insert line breaks and comments, to make this a bit more readable. +```bash +if [ ! -z "$${JVM_OPTIONS_ES}" ]; then # only if JVM_OPTIONS_ES was set when docker was run + for x in $${JVM_OPTIONS_ES} + do { + # set l to everything to the left of an equal sign. + l="${x%%=*}" + # clear out r and e. + r="" + e="" + # if there was an equal sign, set e to an equal sign, and set r to everything after the equal sign. + [ "$x" != "${x/=//}" ] && e="=" && r="$${x##*=}" + # if there was an '-Xm' (a java memory option), set r to the content after the (-XM), and set l to the -XM + [ "$x" != "${x##-Xm?}" ] && r="$${x##-Xm?}" && l="${x%%$r}" + # debugging code. echo what we saw. + echo $l $e $r + # perform a substitution, uncommenting a line found that starts with $l$e, and replacing it with $l$e$r. + sed -i.bak -r 's/^[# ]?('"$l$e"').*/\1'"$r"'/' "$MY_APP_CONFIG/jvm.options" + # show that a change was done with diff, or say there was no difference. + diff "$$MY_APP_CONFIG/jvm.options.bak" "$MY_APP_CONFIG/jvm.options" && echo "no difference"; + } done; +fi +``` + +What this bash script is doing is, it looks for a JVM_OPTIONS_ES environment variable, and if it finds it, it rewrites the jvm.options file, uncommenting and replacing the values for java options. This allows us to change the memory pool settings, and possibly other settings, by setting a variable in the docker compose file that starts up our integration test. + +This bash script is inserted by a sed command and CONTAINS a sed command, and lots of special characters. The quoting of this is handled a bit differently: instead of just surrounding our sed command in '' characters, we use $'', which is bash for "use C style escaping here". + +SED ABUSE: +the bash script above uses a relatively normal sed command, but intersparces it with ' and " characters, in order to pass the sed command in groups of '' characters, while using "" around the sections that we have variables in. bash will substitute variables in doublequotes, but will not substitute them in single quotes. +This substitution command uses slashes as its separators. it starts by anchoring to the beginning of the line, and matching against either a single '#' character, or a single space. it does this by grouping the space and # in a character class ([ and ]), then using a question mark to indicate "maybe one of these.". the substitution continues by matching the bash variables $l and $e, saving them in \1, matching (and therefore removing) anything else on the line, and replacing the line with \1, followed immediately by the contents of the bash variable $r. + +The cassandra/Dockerfile rule is almost identical to this last rule, only substituting out the name of the variable we expect from docker to CS_JVM_OPTIONS, and changing the path to the jvm.options file. + +# Pitfalls I fell into writing this. + +The first large mistake I made when writing this, is that the root of the makefile's dependency tree contained both images that had dependencies, and the dependent images themselves. This had me writing methods to keep the image build process from stepping on itsself. what was happening is that, in the case of the airdock-* and localstack images, when trying to build all of the images at once, make would race all the way down to the git clone steps, and run the git clone multiple times at the same time, where it just needs to be run once. + +The second was that I didn't really understand that manifest files refer to dockerhub only, not to the local machine. This was giving me similar race conditions, where an image build for architecture A would complete, and try to build the manifest when architecture B was still building. + +The third was writing really complicated SED and BASH and MAKE. ;p \ No newline at end of file