Skip to content

Commit 4849cf1

Browse files
authored
Merge branch 'apache:trunk' into YARN-11484
2 parents f931294 + 5b215f2 commit 4849cf1

File tree

26 files changed

+702
-114
lines changed

26 files changed

+702
-114
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.DS_Store
1+
*.DS_Store
22
*.iml
33
*.ipr
44
*.iws

dev-support/bin/hadoop.sh

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,20 @@
2121
personality_plugins "all,-ant,-gradle,-scalac,-scaladoc"
2222

2323
# These flags are needed to run Yetus against Hadoop on Windows.
24-
WINDOWS_FLAGS="-Pnative-win
25-
-Dhttps.protocols=TLSv1.2
26-
-Drequire.openssl
27-
-Drequire.test.libhadoop
28-
-Dshell-executable=${BASH_EXECUTABLE}
29-
-Dopenssl.prefix=${VCPKG_INSTALLED_PACKAGES}
30-
-Dcmake.prefix.path=${VCPKG_INSTALLED_PACKAGES}
31-
-Dwindows.cmake.toolchain.file=${CMAKE_TOOLCHAIN_FILE}
32-
-Dwindows.cmake.build.type=RelWithDebInfo
33-
-Dwindows.build.hdfspp.dll=off
34-
-Dwindows.no.sasl=on
35-
-Duse.platformToolsetVersion=v142"
24+
WINDOWS_FLAGS=(
25+
"-Pnative-win"
26+
"-Dhttps.protocols=TLSv1.2"
27+
"-Drequire.openssl"
28+
"-Drequire.test.libhadoop"
29+
"-Dshell-executable=${BASH_EXECUTABLE}"
30+
"-Dopenssl.prefix=${VCPKG_INSTALLED_PACKAGES}"
31+
"-Dcmake.prefix.path=${VCPKG_INSTALLED_PACKAGES}"
32+
"-Dwindows.cmake.toolchain.file=${CMAKE_TOOLCHAIN_FILE}"
33+
"-Dwindows.cmake.build.type=RelWithDebInfo"
34+
"-Dwindows.build.hdfspp.dll=off"
35+
"-Dwindows.no.sasl=on"
36+
"-Duse.platformToolsetVersion=v142"
37+
)
3638

3739
## @description Globals specific to this personality
3840
## @audience private
@@ -292,7 +294,7 @@ function hadoop_native_flags
292294
-Drequire.snappy \
293295
-Pdist \
294296
-Dtar \
295-
"${WINDOWS_FLAGS}"
297+
"${WINDOWS_FLAGS[@]}"
296298
;;
297299
*)
298300
echo \
@@ -436,7 +438,7 @@ function personality_modules
436438
fi
437439

438440
if [[ "$IS_WINDOWS" && "$IS_WINDOWS" == 1 ]]; then
439-
extra="-Ptest-patch -Pdist -Dtar ${WINDOWS_FLAGS} ${extra}"
441+
extra="-Ptest-patch -Pdist -Dtar ${WINDOWS_FLAGS[*]} ${extra}"
440442
fi
441443

442444
for module in $(hadoop_order ${ordering}); do
@@ -557,14 +559,6 @@ function shadedclient_rebuild
557559
declare module
558560
declare -a modules=()
559561

560-
if [[ ${OSTYPE} = Windows_NT ||
561-
${OSTYPE} =~ ^CYGWIN.* ||
562-
${OSTYPE} =~ ^MINGW32.* ||
563-
${OSTYPE} =~ ^MSYS.* ]]; then
564-
echo "hadoop personality: building on windows, skipping check of client artifacts."
565-
return 0
566-
fi
567-
568562
yetus_debug "hadoop personality: seeing if we need the test of client artifacts."
569563
for module in hadoop-client-modules/hadoop-client-check-invariants \
570564
hadoop-client-modules/hadoop-client-check-test-invariants \
@@ -581,28 +575,47 @@ function shadedclient_rebuild
581575

582576
big_console_header "Checking client artifacts on ${repostatus} with shaded clients"
583577

584-
extra="-Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true"
578+
extra=(
579+
"-Dtest=NoUnitTests"
580+
"-Dmaven.javadoc.skip=true"
581+
"-Dcheckstyle.skip=true"
582+
"-Dspotbugs.skip=true"
583+
)
585584

586585
if [[ "$IS_WINDOWS" && "$IS_WINDOWS" == 1 ]]; then
586+
# shellcheck disable=SC2206
587+
extra+=(${WINDOWS_FLAGS[*]})
588+
589+
# The shaded client integration tests require the Hadoop jars that were just built to be
590+
# installed in the local maven repository.
591+
# shellcheck disable=SC2086
592+
echo_and_redirect "${logfile}" \
593+
"${MAVEN}" "${MAVEN_ARGS[@]}" install -fae --batch-mode \
594+
-DskipTests -DskipDocs -Pdist -Dtar ${extra[*]}
595+
596+
# The shaded client integration tests spawn a MiniDFS and MiniYARN cluster for testing. Both of
597+
# them require winutils.exe to be found in the PATH and HADOOP_HOME to be set.
587598
if load_hadoop_version; then
588599
export HADOOP_HOME="${SOURCEDIR}/hadoop-dist/target/hadoop-${HADOOP_VERSION}-SNAPSHOT"
600+
WIN_HADOOP_HOME=$(cygpath -w -a "${HADOOP_HOME}")
601+
export PATH="${PATH};${WIN_HADOOP_HOME}\bin"
589602
else
590603
yetus_error "[WARNING] Unable to extract the Hadoop version and thus HADOOP_HOME is not set. Some tests may fail."
591604
fi
592-
593-
extra="${WINDOWS_FLAGS} ${extra}"
594605
fi
595606

607+
# shellcheck disable=SC2086
596608
echo_and_redirect "${logfile}" \
597-
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am "${modules[@]}" "${extra}"
609+
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am "${modules[@]}" ${extra[*]}
598610

599611
big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients"
600612

613+
# shellcheck disable=SC2086
601614
echo_and_redirect "${logfile}" \
602615
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
603616
"${modules[@]}" \
604617
-DskipShade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true \
605-
-Dspotbugs.skip=true "${extra}"
618+
-Dspotbugs.skip=true ${extra[*]}
606619

607620
count=$("${GREP}" -c '\[ERROR\]' "${logfile}")
608621
if [[ ${count} -gt 0 ]]; then

dev-support/docker/Dockerfile_windows_10

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ RUN curl -SL --output vs_buildtools.exe https://aka.ms/vs/16/release/vs_buildtoo
3838
&& del /q vs_buildtools.exe
3939

4040
# Install Chocolatey.
41+
ENV chocolateyVersion=1.4.0
4142
RUN powershell -NoProfile -ExecutionPolicy Bypass -Command "iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
42-
RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"
4343

4444
# Install git.
4545
RUN choco install git.install -y
@@ -55,24 +55,18 @@ RUN powershell .\vcpkg\vcpkg.exe install boost:x64-windows
5555
RUN powershell .\vcpkg\vcpkg.exe install protobuf:x64-windows
5656
RUN powershell .\vcpkg\vcpkg.exe install openssl:x64-windows
5757
RUN powershell .\vcpkg\vcpkg.exe install zlib:x64-windows
58-
ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"
5958

6059
# Install Azul Java 8 JDK.
6160
RUN powershell Invoke-WebRequest -URI https://cdn.azul.com/zulu/bin/zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -OutFile $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip
6261
RUN powershell Expand-Archive -Path $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -DestinationPath "C:\Java"
63-
ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
64-
RUN setx PATH "%PATH%;%JAVA_HOME%\bin"
6562

6663
# Install Apache Maven.
6764
RUN powershell Invoke-WebRequest -URI https://archive.apache.org/dist/maven/maven-3/3.8.6/binaries/apache-maven-3.8.6-bin.zip -OutFile $Env:TEMP\apache-maven-3.8.6-bin.zip
6865
RUN powershell Expand-Archive -Path $Env:TEMP\apache-maven-3.8.6-bin.zip -DestinationPath "C:\Maven"
69-
RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
70-
ENV MAVEN_OPTS '-Xmx2048M -Xss128M'
7166

7267
# Install CMake 3.19.0.
7368
RUN powershell Invoke-WebRequest -URI https://cmake.org/files/v3.19/cmake-3.19.0-win64-x64.zip -OutFile $Env:TEMP\cmake-3.19.0-win64-x64.zip
7469
RUN powershell Expand-Archive -Path $Env:TEMP\cmake-3.19.0-win64-x64.zip -DestinationPath "C:\CMake"
75-
RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"
7670

7771
# Install zstd 1.5.4.
7872
RUN powershell Invoke-WebRequest -Uri https://github.com/facebook/zstd/releases/download/v1.5.4/zstd-v1.5.4-win64.zip -OutFile $Env:TEMP\zstd-v1.5.4-win64.zip
@@ -112,13 +106,35 @@ RUN powershell Copy-Item -Path "C:\RSync\usr\bin\*" -Destination "C:\Program` Fi
112106
RUN powershell Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip -OutFile $Env:TEMP\python-3.10.11-embed-amd64.zip
113107
RUN powershell Expand-Archive -Path $Env:TEMP\python-3.10.11-embed-amd64.zip -DestinationPath "C:\Python3"
114108
RUN powershell New-Item -ItemType HardLink -Value "C:\Python3\python.exe" -Path "C:\Python3\python3.exe"
109+
110+
# Create a user HadoopBuilder with basic privileges and use it for building Hadoop on Windows.
111+
RUN powershell New-LocalUser -Name 'HadoopBuilder' -Description 'User account for building Apache Hadoop' -Password ([securestring]::new()) -AccountNeverExpires -PasswordNeverExpires
112+
113+
# Grant the privilege to create symbolic links to HadoopBuilder.
114+
RUN powershell secedit /export /cfg "C:\secpol.cfg"
115+
RUN powershell "(Get-Content C:\secpol.cfg).Replace('SeCreateSymbolicLinkPrivilege = ', 'SeCreateSymbolicLinkPrivilege = HadoopBuilder,') | Out-File C:\secpol.cfg"
116+
RUN powershell secedit /configure /db "C:\windows\security\local.sdb" /cfg "C:\secpol.cfg"
117+
RUN powershell Remove-Item -Force "C:\secpol.cfg" -Confirm:$false
118+
119+
# Login as HadoopBuilder and set the necessary environment and PATH variables.
120+
USER HadoopBuilder
121+
ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"
122+
ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
123+
ENV MAVEN_OPTS '-Xmx2048M -Xss128M'
124+
RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"
125+
RUN setx PATH "%PATH%;%JAVA_HOME%\bin"
126+
RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
127+
RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"
128+
RUN setx PATH "%PATH%;C:\ZStd"
115129
RUN setx path "%PATH%;C:\Python3"
130+
RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin"
116131

117132
# We get strange Javadoc errors without this.
118133
RUN setx classpath ""
119134

135+
# Setting Git configurations.
136+
RUN git config --global core.autocrlf true
120137
RUN git config --global core.longpaths true
121-
RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin"
122138

123139
# Define the entry point for the docker container.
124140
ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat", "&&", "cmd.exe"]

hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,14 +1724,14 @@
17241724
<name>fs.s3a.encryption.algorithm</name>
17251725
<description>Specify a server-side encryption or client-side
17261726
encryption algorithm for s3a: file system. Unset by default. It supports the
1727-
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'SSE-C', and 'CSE-KMS'
1727+
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'DSSE-KMS', 'SSE-C', and 'CSE-KMS'
17281728
</description>
17291729
</property>
17301730

17311731
<property>
17321732
<name>fs.s3a.encryption.key</name>
17331733
<description>Specific encryption key to use if fs.s3a.encryption.algorithm
1734-
has been set to 'SSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
1734+
has been set to 'SSE-KMS', 'DSSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
17351735
, the value of this property should be the Base64 encoded key. If you are
17361736
using SSE-KMS and leave this property empty, you'll be using your default's
17371737
S3 KMS key, otherwise you should set this property to the specific KMS key

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
import org.apache.hadoop.security.token.Token;
6464
import org.apache.hadoop.util.ChunkedArrayList;
6565
import org.apache.hadoop.util.Daemon;
66+
import org.apache.hadoop.util.Shell;
6667
import org.apache.hadoop.util.StringUtils;
6768
import org.slf4j.Logger;
6869
import org.slf4j.LoggerFactory;
@@ -661,7 +662,8 @@ public static boolean isValidName(String src) {
661662
for (int i = 0; i < components.length; i++) {
662663
String element = components[i];
663664
if (element.equals(".") ||
664-
(element.contains(":")) ||
665+
// For Windows, we must allow the : in the drive letter.
666+
(!Shell.WINDOWS && i == 1 && element.contains(":")) ||
665667
(element.contains("/"))) {
666668
return false;
667669
}

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,10 @@ public static class MapOutputBuffer<K extends Object, V extends Object>
955955
new ArrayList<SpillRecord>();
956956
private int totalIndexCacheMemory;
957957
private int indexCacheMemoryLimit;
958+
private int spillFilesCountLimit;
958959
private static final int INDEX_CACHE_MEMORY_LIMIT_DEFAULT = 1024 * 1024;
960+
private static final int SPILL_FILES_COUNT_LIMIT_DEFAULT = -1;
961+
private static final int SPILL_FILES_COUNT_UNBOUNDED_LIMIT_VALUE = -1;
959962

960963
private MapTask mapTask;
961964
private MapOutputFile mapOutputFile;
@@ -984,10 +987,17 @@ public void init(MapOutputCollector.Context context
984987
MRJobConfig.DEFAULT_IO_SORT_MB);
985988
indexCacheMemoryLimit = job.getInt(JobContext.INDEX_CACHE_MEMORY_LIMIT,
986989
INDEX_CACHE_MEMORY_LIMIT_DEFAULT);
990+
spillFilesCountLimit = job.getInt(JobContext.SPILL_FILES_COUNT_LIMIT,
991+
SPILL_FILES_COUNT_LIMIT_DEFAULT);
987992
if (spillper > (float)1.0 || spillper <= (float)0.0) {
988993
throw new IOException("Invalid \"" + JobContext.MAP_SORT_SPILL_PERCENT +
989994
"\": " + spillper);
990995
}
996+
if(spillFilesCountLimit != SPILL_FILES_COUNT_UNBOUNDED_LIMIT_VALUE
997+
&& spillFilesCountLimit < 0) {
998+
throw new IOException("Invalid value for \"" + JobContext.SPILL_FILES_COUNT_LIMIT + "\", " +
999+
"current value: " + spillFilesCountLimit);
1000+
}
9911001
if ((sortmb & 0x7FF) != sortmb) {
9921002
throw new IOException(
9931003
"Invalid \"" + JobContext.IO_SORT_MB + "\": " + sortmb);
@@ -1698,7 +1708,7 @@ private void sortAndSpill() throws IOException, ClassNotFoundException,
16981708
spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
16991709
}
17001710
LOG.info("Finished spill " + numSpills);
1701-
++numSpills;
1711+
incrementNumSpills();
17021712
} finally {
17031713
if (out != null) out.close();
17041714
if (partitionOut != null) {
@@ -1774,7 +1784,7 @@ private void spillSingleRecord(final K key, final V value,
17741784
totalIndexCacheMemory +=
17751785
spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
17761786
}
1777-
++numSpills;
1787+
incrementNumSpills();
17781788
} finally {
17791789
if (out != null) out.close();
17801790
if (partitionOut != null) {
@@ -2022,14 +2032,29 @@ private void sameVolRename(Path srcPath,
20222032
if (!dst.getParentFile().exists()) {
20232033
if (!dst.getParentFile().mkdirs()) {
20242034
throw new IOException("Unable to rename " + src + " to "
2025-
+ dst + ": couldn't create parent directory");
2035+
+ dst + ": couldn't create parent directory");
20262036
}
20272037
}
20282038

20292039
if (!src.renameTo(dst)) {
20302040
throw new IOException("Unable to rename " + src + " to " + dst);
20312041
}
20322042
}
2043+
2044+
/**
2045+
* Increments numSpills local counter by taking into consideration
2046+
* the max limit on spill files being generated by the job.
2047+
* If limit is reached, this function throws an IOException
2048+
*/
2049+
private void incrementNumSpills() throws IOException {
2050+
++numSpills;
2051+
if(spillFilesCountLimit != SPILL_FILES_COUNT_UNBOUNDED_LIMIT_VALUE
2052+
&& numSpills > spillFilesCountLimit) {
2053+
throw new IOException("Too many spill files got created, control it with " +
2054+
"mapreduce.task.spill.files.count.limit, current value: " + spillFilesCountLimit +
2055+
", current spill count: " + numSpills);
2056+
}
2057+
}
20332058
} // MapOutputBuffer
20342059

20352060
/**

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ public interface MRJobConfig {
323323
public static final int DEFAULT_IO_SORT_MB = 100;
324324

325325
public static final String INDEX_CACHE_MEMORY_LIMIT = "mapreduce.task.index.cache.limit.bytes";
326+
String SPILL_FILES_COUNT_LIMIT = "mapreduce.task.spill.files.count.limit";
326327

327328
public static final String PRESERVE_FAILED_TASK_FILES = "mapreduce.task.files.preserve.failedtasks";
328329

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@
6262
set to less than .5</description>
6363
</property>
6464

65+
<property>
66+
<name>mapreduce.task.spill.files.count.limit</name>
67+
<value>-1</value>
68+
<description>Number of spill files that can be created by a MapTask.
69+
After breaching this, task will fail. Default value for this config is -1
70+
which indicates that there is no limit on number of spill files being
71+
created</description>
72+
</property>
73+
6574
<property>
6675
<name>mapreduce.job.local-fs.single-disk-limit.bytes</name>
6776
<value>-1</value>

0 commit comments

Comments
 (0)