-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathDockerfile-processing
80 lines (66 loc) · 2.64 KB
/
Dockerfile-processing
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
FROM ubuntu:18.04
MAINTAINER Social Feed Manager <sfm@gwu.edu>
ARG DEBIAN_FRONTEND=noninteractive
# This pre-fetches the most recent requirements.apt.
ADD requirements/processing_requirements.apt /opt/sfm-setup/
RUN apt-get update
# Necessary to install Python 3.8 on Ubuntu 18
RUN apt-get install -y software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN cat /opt/sfm-setup/processing_requirements.apt | tr "\n" " " | xargs apt-get install -y
# Symlinks for python
RUN ln -s /usr/bin/python3.8 /usr/bin/python
RUN ln -s /usr/bin/pip3 /usr/bin/pip
#Upgrade pip
#Calling pip via Python to make sure the correct version is used
RUN python -m pip install --upgrade pip
#Avoid the warning of https
RUN python -m pip install --upgrade ndg-httpsclient
# Twitter
RUN git clone https://github.com/gwu-libraries/sfm-twitter-harvester.git /opt/sfm-twitter-harvester
WORKDIR /opt/sfm-twitter-harvester
RUN git checkout 3.0.0
RUN python -m pip install -r requirements/common.txt -r requirements/release.txt
RUN python -m pip install .
# Flickr
RUN git clone https://github.com/gwu-libraries/sfm-flickr-harvester.git /opt/sfm-flickr-harvester
WORKDIR /opt/sfm-flickr-harvester
RUN git checkout 3.0.0
RUN python -m pip install -r requirements/common.txt -r requirements/release.txt
RUN python -m pip install .
#Weibo
RUN git clone https://github.com/gwu-libraries/sfm-weibo-harvester.git /opt/sfm-weibo-harvester
WORKDIR /opt/sfm-weibo-harvester
RUN git checkout 3.0.0
RUN python -m pip install -r requirements/common.txt -r requirements/release.txt
RUN python -m pip install .
# Tumblr
RUN git clone https://github.com/gwu-libraries/sfm-tumblr-harvester.git /opt/sfm-tumblr-harvester
WORKDIR /opt/sfm-tumblr-harvester
RUN git checkout 3.0.0
RUN python -m pip install -r requirements/common.txt -r requirements/release.txt
RUN python -m pip install .
# Clone twarc so that have utilities
RUN git clone https://github.com/docnow/twarc.git /opt/twarc
WORKDIR /opt/twarc
RUN git checkout v1.12.1
RUN python -m pip install -r requirements/python3.txt
RUN python -m pip install .
# Add warctools
RUN pip install warctools
# Add JWAT-tools
ADD https://repo1.maven.org/maven2/org/jwat/jwat-tools/0.6.2/jwat-tools-0.6.2.zip /tmp/
RUN unzip /tmp/jwat-tools-0.6.2.zip -d /tmp/
RUN mv /tmp/jwat-tools-0.6.2 /opt/jwat-tools
# Add CSVKit
RUN python -m pip install csvkit
# Add gron
ADD https://github.com/tomnomnom/gron/releases/download/v0.5.2/gron-linux-amd64-0.5.2.tgz /tmp/
WORKDIR /tmp
RUN tar xzf gron-linux-amd64-0.5.2.tgz
RUN mv gron /usr/local/bin/
# Add sync scripts
ADD docker/processing/*.sh /opt/processing/
RUN chmod +x /opt/processing/*.sh
WORKDIR /sfm-processing
CMD /bin/bash