diff --git a/.travis.yml b/.travis.yml index 0c6f8481..5bcd08ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -65,3 +65,6 @@ jobs: notifications: email: false + +after_deploy: + - cd $GOPATH/src/github.com/src-d/datasets/PublicGitArchive/pga-create/ && make docker-push-latest-release diff --git a/PublicGitArchive/pga-create/Dockerfile b/PublicGitArchive/pga-create/Dockerfile new file mode 100644 index 00000000..11e2be46 --- /dev/null +++ b/PublicGitArchive/pga-create/Dockerfile @@ -0,0 +1,47 @@ +#========================== +# Stage 1: build pga-create +#========================== +FROM golang:1.12.0-alpine3.9 AS builder + +# build pga-create +ENV PGA_CREATE_REPO=github.com/src-d/datasets/PublicGitArchive/pga-create +ENV PGA_CREATE_PATH=$GOPATH/src/$PGA_CREATE_REPO +COPY . ${PGA_CREATE_PATH} +RUN go build -tags norwfs -o /bin/pga-create ${PGA_CREATE_PATH}/cmd/pga-create + +RUN cp ${PGA_CREATE_PATH}/select-repos.sh /bin/select-repos && chmod +x /bin/select-repos +RUN cp ${PGA_CREATE_PATH}/index-repos.sh /bin/index-repos && chmod +x /bin/index-repos + +RUN wget -q -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 && \ + chmod +x /usr/local/bin/dumb-init + +#===================================================== +# Stage 2: copy binaries and set environment variables +#===================================================== +FROM alpine:3.9.2 + +COPY --from=builder /bin/pga-create /bin/*-repos /usr/local/bin/dumb-init /bin/ + +# volume where the data generated by select-repos will persist to be used by index-repos and borges producer +VOLUME ["/pga/data"] +# volume where borges consumer will download the siva files to be analyzed by index-repos. Also, +# the final index.tar.gz will be placed here +VOLUME ["/pga/root-repositories"] + +# core-retrieval database configuration, default: postgres://testing:testing@0.0.0.0:5432/testing?sslmode=disable&connect_timeout=30 +ENV CONFIG_DBUSER=testing +ENV CONFIG_DBPASS=testing +ENV CONFIG_DBHOST=0.0.0.0 +ENV CONFIG_DBPORT=5432 +ENV CONFIG_DBNAME=testing +ENV CONFIG_DBSSLMODE=disable +ENV CONFIG_DBTIMEOUT=30s + +# pga-create configuration (BUCKET_SIZE must be the same value used by borges consumer) +ENV PGA_VERSION=version-undefined +ENV STARS=50 +ENV BUCKET_SIZE=2 + +WORKDIR /pga +ENTRYPOINT ["/bin/dumb-init", "--"] + diff --git a/PublicGitArchive/pga-create/index-repos.sh b/PublicGitArchive/pga-create/index-repos.sh new file mode 100644 index 00000000..a1e2234d --- /dev/null +++ b/PublicGitArchive/pga-create/index-repos.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# This script is intended to be used inside the docker image for +# https://github.com/src-d/datasets/tree/master/PublicGitArchive/pga-create +# The absolut paths refers to mounted volumes inside the docker container. +# See the Dockerfile for more information. + +set -e + +CONFIG_ROOT_REPOSITORIES_DIR=/pga/root-repositories \ +CONFIG_CLEAN_TEMP_DIR=true \ +CONFIG_BUCKETSIZE=$BUCKET_SIZE \ +pga-create index --debug --repos-file=/pga/data/pga.list + +pga-create set-forks -f /pga/data/index.csv -o /pga/data/index_$PGA_VERSION.csv + +tar -czf /pga/root-repositories/index_$PGA_VERSION.tar.gz -C /pga/data/ index_$PGA_VERSION.csv + diff --git a/PublicGitArchive/pga-create/select-repos.sh b/PublicGitArchive/pga-create/select-repos.sh new file mode 100644 index 00000000..bcfdf51c --- /dev/null +++ b/PublicGitArchive/pga-create/select-repos.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# This script is intended to be used inside the docker image for +# https://github.com/src-d/datasets/tree/master/PublicGitArchive/pga-create +# The absolut paths refers to mounted volumes inside the docker container. +# See the Dockerfile for more information. + +set -e + +pga-create discover +pga-create select -m $STARS >/pga/data/pga.list +