Skip to content

Commit 66baf1e

Browse files
authored
HADOOP-18682. Move hadoop docker scripts under the main source code (#6483). Contributed by Christos Bisias.
1 parent e4789a2 commit 66baf1e

File tree

8 files changed

+322
-0
lines changed

8 files changed

+322
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
<!---
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. See accompanying LICENSE file.
13+
-->
14+
15+
## Hadoop Docker
16+
17+
### Running from existing setups
18+
19+
There are special branches for running hadoop in docker.
20+
21+
The `docker-hadoop-runner*` branches contain scripts that set up base images that can be used for running any Hadoop version.
22+
23+
* [docker-hadoop-runner-latest](https://github.com/apache/hadoop/tree/docker-hadoop-runner-latest)
24+
* [docker-hadoop-runner-jdk11](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk11)
25+
* [docker-hadoop-runner-jdk8](https://github.com/apache/hadoop/tree/docker-hadoop-runner-jdk8)
26+
* [docker-hadoop-runner](https://github.com/apache/hadoop/tree/docker-hadoop-runner)
27+
28+
The `docker-hadoop*` branches can be used for running a specific version.
29+
30+
* [docker-hadoop-3](https://github.com/apache/hadoop/tree/docker-hadoop-3)
31+
* `hadoop-3.3.6`
32+
* [docker-hadoop-2](https://github.com/apache/hadoop/tree/docker-hadoop-2)
33+
* `hadoop-2.10.2`
34+
35+
### Running from the source code
36+
37+
There is a setup under `hadoop-dist` that contains Docker Compose definitions
38+
for running the current version of Hadoop in a multi-node docker environment.
39+
40+
This is meant for testing code changes locally and debugging.
41+
42+
The base image used by the Docker setup is built as part of the maven lifecycle.
43+
The distribution files generated while building the project with the `-Pdist` profile enabled,
44+
will be used for running hadoop inside the containers.
45+
46+
In order to start the docker environment you need to do the following
47+
* Build the project, using the `-Pdist` profile
48+
```shell
49+
> mvn clean install -Dmaven.javadoc.skip=true -DskipTests -DskipShade -Pdist,src
50+
```
51+
* From the project root, navigate under the docker-compose dir under the generated dist directory
52+
```shell
53+
> cd hadoop-dist/target/hadoop-<current-version>/compose/hadoop
54+
```
55+
* Start the docker environment
56+
```shell
57+
> docker-compose up -d --scale datanode=3
58+
```
59+
* Connect to a container to execute commands
60+
```shell
61+
> docker exec -it hadoop_datanode_1 bash
62+
bash-4.2$ hdfs dfs -mkdir /test
63+
```
64+
65+
### Config files
66+
67+
To add or remove properties from the `core-site.xml`, `hdfs-site.xml`, etc. files used in the docker environment,
68+
simply edit the `config` file before starting the containers. The changes will be persisted in the docker environment.

hadoop-common-project/hadoop-common/src/site/markdown/SingleCluster.md.vm

+6
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,9 @@ Fully-Distributed Operation
236236
---------------------------
237237

238238
For information on setting up fully-distributed, non-trivial clusters see [Cluster Setup](./ClusterSetup.html).
239+
240+
Hadoop in Docker containers
241+
---------------------------
242+
243+
For information on setting up hadoop in docker, using either official releases or the main source code,
244+
check [Hadoop Docker](./HadoopDocker.html).

hadoop-dist/pom.xml

+94
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@
2929
<name>Apache Hadoop Distribution</name>
3030
<packaging>jar</packaging>
3131

32+
<properties>
33+
<file.encoding>UTF-8</file.encoding>
34+
<downloadSources>true</downloadSources>
35+
<docker.hadoop-runner.version>docker-hadoop-runner</docker.hadoop-runner.version>
36+
<maven.test.skip>true</maven.test.skip>
37+
</properties>
38+
3239
<!-- Using dependencies to ensure this module is the last one -->
3340
<dependencies>
3441
<dependency>
@@ -151,6 +158,43 @@
151158
</execution>
152159
</executions>
153160
</plugin>
161+
<plugin>
162+
<artifactId>maven-resources-plugin</artifactId>
163+
<executions>
164+
<execution>
165+
<id>copy-compose-files</id>
166+
<phase>package</phase>
167+
<goals>
168+
<goal>copy-resources</goal>
169+
</goals>
170+
<configuration>
171+
<outputDirectory>${project.build.directory}/hadoop-${project.version}/compose</outputDirectory>
172+
<resources>
173+
<resource>
174+
<directory>src/main/compose</directory>
175+
<filtering>true</filtering>
176+
</resource>
177+
</resources>
178+
</configuration>
179+
</execution>
180+
<execution>
181+
<id>copy-and-filter-dockerfile</id>
182+
<phase>package</phase>
183+
<goals>
184+
<goal>copy-resources</goal>
185+
</goals>
186+
<configuration>
187+
<outputDirectory>${project.build.directory}/hadoop-${project.version}</outputDirectory>
188+
<resources>
189+
<resource>
190+
<directory>src/main/docker</directory>
191+
<filtering>true</filtering>
192+
</resource>
193+
</resources>
194+
</configuration>
195+
</execution>
196+
</executions>
197+
</plugin>
154198
</plugins>
155199
</build>
156200

@@ -230,6 +274,56 @@
230274
</plugins>
231275
</build>
232276
</profile>
277+
<profile>
278+
<id>docker-build</id>
279+
<build>
280+
<plugins>
281+
<plugin>
282+
<groupId>io.fabric8</groupId>
283+
<artifactId>docker-maven-plugin</artifactId>
284+
<executions>
285+
<execution>
286+
<goals>
287+
<goal>build</goal>
288+
</goals>
289+
<phase>package</phase>
290+
</execution>
291+
</executions>
292+
<configuration>
293+
<images>
294+
<image>
295+
<name>${docker.image}</name>
296+
<build>
297+
<dockerFileDir>
298+
${project.build.directory}/hadoop-${project.version}
299+
</dockerFileDir>
300+
</build>
301+
</image>
302+
</images>
303+
</configuration>
304+
</plugin>
305+
</plugins>
306+
</build>
307+
</profile>
308+
<profile>
309+
<id>docker-push</id>
310+
<build>
311+
<plugins>
312+
<plugin>
313+
<groupId>io.fabric8</groupId>
314+
<artifactId>docker-maven-plugin</artifactId>
315+
<executions>
316+
<execution>
317+
<goals>
318+
<goal>push</goal>
319+
</goals>
320+
<phase>package</phase>
321+
</execution>
322+
</executions>
323+
</plugin>
324+
</plugins>
325+
</build>
326+
</profile>
233327
</profiles>
234328

235329
</project>
+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
HADOOP_IMAGE=apache/hadoop
17+
HADOOP_RUNNER_VERSION=${docker.hadoop-runner.version}
18+
HADOOP_RUNNER_IMAGE=apache/hadoop-runner
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
CORE-SITE.XML_fs.default.name=hdfs://namenode
17+
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
18+
19+
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
20+
HDFS-SITE.XML_dfs.replication=1
21+
22+
MAPRED-SITE.XML_mapreduce.framework.name=yarn
23+
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
24+
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
25+
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
26+
27+
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
28+
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false
29+
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
30+
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false
31+
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
32+
33+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
34+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
35+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
36+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
37+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
38+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
39+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
40+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
41+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
42+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
43+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
44+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
45+
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false
46+
47+
LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout
48+
LOG4J.PROPERTIES_log4j.appender.stdout=org.apache.log4j.ConsoleAppender
49+
LOG4J.PROPERTIES_log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
50+
LOG4J.PROPERTIES_log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
version: "3.8"
17+
18+
x-common-config:
19+
&common-config
20+
image: ${HADOOP_RUNNER_IMAGE}:${HADOOP_RUNNER_VERSION}
21+
volumes:
22+
- ../..:/opt/hadoop
23+
env_file:
24+
- ./config
25+
26+
services:
27+
namenode:
28+
<<: *common-config
29+
hostname: namenode
30+
command: ["hdfs", "namenode"]
31+
ports:
32+
- 9870:9870
33+
environment:
34+
ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name"
35+
datanode:
36+
<<: *common-config
37+
command: ["hdfs", "datanode"]
38+
resourcemanager:
39+
<<: *common-config
40+
hostname: resourcemanager
41+
command: ["yarn", "resourcemanager"]
42+
ports:
43+
- 8088:8088
44+
nodemanager:
45+
<<: *common-config
46+
command: ["yarn", "nodemanager"]
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
FROM apache/hadoop-runner:@docker.hadoop-runner.version@
17+
18+
COPY . /opt/hadoop
19+
20+
WORKDIR /opt/hadoop
21+
22+
USER root
23+
24+
RUN chown -R hadoop:users /opt/hadoop
25+
26+
USER hadoop

pom.xml

+14
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
8282
<!-- required as child projects with different version can't use ${project.version} -->
8383
<hadoop.version>3.5.0-SNAPSHOT</hadoop.version>
8484

85+
<docker.image>apache/hadoop:${project.version}</docker.image>
86+
8587
<distMgmtSnapshotsId>apache.snapshots.https</distMgmtSnapshotsId>
8688
<distMgmtSnapshotsName>Apache Development Snapshot Repository</distMgmtSnapshotsName>
8789
<distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
@@ -119,6 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
119121
<jsonschema2pojo-maven-plugin.version>1.1.1</jsonschema2pojo-maven-plugin.version>
120122
<maven-compiler-plugin.version>3.10.1</maven-compiler-plugin.version>
121123
<cyclonedx.version>2.7.10</cyclonedx.version>
124+
<docker-maven-plugin.version>0.29.0</docker-maven-plugin.version>
122125

123126
<shell-executable>bash</shell-executable>
124127

@@ -150,6 +153,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
150153
<artifactId>maven-dependency-plugin</artifactId>
151154
<version>${maven-dependency-plugin.version}</version>
152155
</plugin>
156+
<plugin>
157+
<groupId>io.fabric8</groupId>
158+
<artifactId>docker-maven-plugin</artifactId>
159+
<version>${docker-maven-plugin.version}</version>
160+
</plugin>
153161
<plugin>
154162
<groupId>org.apache.maven.plugins</groupId>
155163
<artifactId>maven-enforcer-plugin</artifactId>
@@ -892,5 +900,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x
892900
</activation>
893901
</profile>
894902

903+
<profile>
904+
<id>docker-build</id>
905+
<properties>
906+
<docker.image>${user.name}/hadoop:${project.version}</docker.image>
907+
</properties>
908+
</profile>
895909
</profiles>
896910
</project>

0 commit comments

Comments
 (0)