From 2ff56204cbfdf38fd6fc5602c01ff3b418b4549e Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 20 Mar 2019 08:47:34 +0000 Subject: [PATCH 1/3] pga-create: add Dockerfile Signed-off-by: Manuel Carmona --- PublicGitArchive/pga-create/Dockerfile | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 PublicGitArchive/pga-create/Dockerfile diff --git a/PublicGitArchive/pga-create/Dockerfile b/PublicGitArchive/pga-create/Dockerfile new file mode 100644 index 00000000..463b4b2b --- /dev/null +++ b/PublicGitArchive/pga-create/Dockerfile @@ -0,0 +1,58 @@ +#========================================================================== +# Stage 1: build pga-create and create select-repos and index-repos scripts +#========================================================================== +FROM golang:1.12.0-alpine3.9 AS builder + +# build pga-create +ENV PGA_CREATE_REPO=github.com/src-d/datasets/PublicGitArchive/pga-create +ENV PGA_CREATE_PATH=$GOPATH/src/$PGA_CREATE_REPO +COPY . ${PGA_CREATE_PATH} +RUN go build -tags norwfs -o /bin/pga-create ${PGA_CREATE_PATH}/cmd/pga-create + +# select-repos command +RUN echo -e '#!/bin/sh \n\ + pga-create discover && \ + pga-create select -m $STARS >/pga/data/pga.list'>/bin/select-repos && \ + chmod +x /bin/select-repos + +# index-repos command +RUN echo -e '#!/bin/sh \n\ + CONFIG_ROOT_REPOSITORIES_DIR=/pga/root-repositories CONFIG_CLEAN_TEMP_DIR=true CONFIG_BUCKETSIZE=$BUCKET_SIZE \ + pga-create index --debug --repos-file=/pga/data/pga.list && \ + pga-create set-forks -f /pga/data/index.csv -o /pga/data/index_$PGA_VERSION.csv &&\ + tar -czf /pga/root-repositories/index_$PGA_VERSION.tar.gz -C /pga/data/ index_$PGA_VERSION.csv'>/bin/index-repos && \ + chmod +x /bin/index-repos + +RUN wget -q -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 && \ + chmod +x /usr/local/bin/dumb-init + +#===================================================== +# Stage 2: copy binaries and set environment variables +#===================================================== +FROM alpine:3.9.2 + +COPY --from=builder /bin/pga-create /bin/*-repos /usr/local/bin/dumb-init /bin/ + +# volume where the data generated by select-repos will persist to be used by index-repos and borges producer +VOLUME ["/pga/data"] +# volume where borges consumer will download the siva files to be analyzed by index-repos. Also, +# the final index.tar.gz will be placed here +VOLUME ["/pga/root-repositories"] + +# core-retrieval database configuration, default: postgres://testing:testing@0.0.0.0:5432/testing?sslmode=disable&connect_timeout=30 +ENV CONFIG_DBUSER=testing +ENV CONFIG_DBPASS=testing +ENV CONFIG_DBHOST=0.0.0.0 +ENV CONFIG_DBPORT=5432 +ENV CONFIG_DBNAME=testing +ENV CONFIG_DBSSLMODE=disable +ENV CONFIG_DBTIMEOUT=30s + +# pga-create configuration (BUCKET_SIZE must be the same value used by borges consumer) +ENV PGA_VERSION=version-undefined +ENV STARS=50 +ENV BUCKET_SIZE=2 + +WORKDIR /pga +ENTRYPOINT ["/bin/dumb-init", "--"] + From 8df69e681cfa91a09f037c25ee11acfee40c56a0 Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 20 Mar 2019 08:48:39 +0000 Subject: [PATCH 2/3] *: add an after_deploy step to .travis.yml to push a pga-create docker image Signed-off-by: Manuel Carmona --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 0c6f8481..5bcd08ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -65,3 +65,6 @@ jobs: notifications: email: false + +after_deploy: + - cd $GOPATH/src/github.com/src-d/datasets/PublicGitArchive/pga-create/ && make docker-push-latest-release From 95632e41fb7311d25bc3310522cf78e4318cb479 Mon Sep 17 00:00:00 2001 From: Manuel Carmona Date: Wed, 20 Mar 2019 11:02:06 +0000 Subject: [PATCH 3/3] pga-create: add the select-repos.sh and index-repos.sh scripts to the repository to be copied into the docker image Signed-off-by: Manuel Carmona --- PublicGitArchive/pga-create/Dockerfile | 21 +++++---------------- PublicGitArchive/pga-create/index-repos.sh | 18 ++++++++++++++++++ PublicGitArchive/pga-create/select-repos.sh | 12 ++++++++++++ 3 files changed, 35 insertions(+), 16 deletions(-) create mode 100644 PublicGitArchive/pga-create/index-repos.sh create mode 100644 PublicGitArchive/pga-create/select-repos.sh diff --git a/PublicGitArchive/pga-create/Dockerfile b/PublicGitArchive/pga-create/Dockerfile index 463b4b2b..11e2be46 100644 --- a/PublicGitArchive/pga-create/Dockerfile +++ b/PublicGitArchive/pga-create/Dockerfile @@ -1,6 +1,6 @@ -#========================================================================== -# Stage 1: build pga-create and create select-repos and index-repos scripts -#========================================================================== +#========================== +# Stage 1: build pga-create +#========================== FROM golang:1.12.0-alpine3.9 AS builder # build pga-create @@ -9,19 +9,8 @@ ENV PGA_CREATE_PATH=$GOPATH/src/$PGA_CREATE_REPO COPY . ${PGA_CREATE_PATH} RUN go build -tags norwfs -o /bin/pga-create ${PGA_CREATE_PATH}/cmd/pga-create -# select-repos command -RUN echo -e '#!/bin/sh \n\ - pga-create discover && \ - pga-create select -m $STARS >/pga/data/pga.list'>/bin/select-repos && \ - chmod +x /bin/select-repos - -# index-repos command -RUN echo -e '#!/bin/sh \n\ - CONFIG_ROOT_REPOSITORIES_DIR=/pga/root-repositories CONFIG_CLEAN_TEMP_DIR=true CONFIG_BUCKETSIZE=$BUCKET_SIZE \ - pga-create index --debug --repos-file=/pga/data/pga.list && \ - pga-create set-forks -f /pga/data/index.csv -o /pga/data/index_$PGA_VERSION.csv &&\ - tar -czf /pga/root-repositories/index_$PGA_VERSION.tar.gz -C /pga/data/ index_$PGA_VERSION.csv'>/bin/index-repos && \ - chmod +x /bin/index-repos +RUN cp ${PGA_CREATE_PATH}/select-repos.sh /bin/select-repos && chmod +x /bin/select-repos +RUN cp ${PGA_CREATE_PATH}/index-repos.sh /bin/index-repos && chmod +x /bin/index-repos RUN wget -q -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 && \ chmod +x /usr/local/bin/dumb-init diff --git a/PublicGitArchive/pga-create/index-repos.sh b/PublicGitArchive/pga-create/index-repos.sh new file mode 100644 index 00000000..a1e2234d --- /dev/null +++ b/PublicGitArchive/pga-create/index-repos.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# This script is intended to be used inside the docker image for +# https://github.com/src-d/datasets/tree/master/PublicGitArchive/pga-create +# The absolut paths refers to mounted volumes inside the docker container. +# See the Dockerfile for more information. + +set -e + +CONFIG_ROOT_REPOSITORIES_DIR=/pga/root-repositories \ +CONFIG_CLEAN_TEMP_DIR=true \ +CONFIG_BUCKETSIZE=$BUCKET_SIZE \ +pga-create index --debug --repos-file=/pga/data/pga.list + +pga-create set-forks -f /pga/data/index.csv -o /pga/data/index_$PGA_VERSION.csv + +tar -czf /pga/root-repositories/index_$PGA_VERSION.tar.gz -C /pga/data/ index_$PGA_VERSION.csv + diff --git a/PublicGitArchive/pga-create/select-repos.sh b/PublicGitArchive/pga-create/select-repos.sh new file mode 100644 index 00000000..bcfdf51c --- /dev/null +++ b/PublicGitArchive/pga-create/select-repos.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# This script is intended to be used inside the docker image for +# https://github.com/src-d/datasets/tree/master/PublicGitArchive/pga-create +# The absolut paths refers to mounted volumes inside the docker container. +# See the Dockerfile for more information. + +set -e + +pga-create discover +pga-create select -m $STARS >/pga/data/pga.list +