Browse Source

HADOOP-18135. Produce Windows binaries of Hadoop (#6673)

This PR enables one to create the Hadoop
release tarball on Windows, complete with
the native binaries (including winutils.exe).
This PR contains the following changes -

* Prevents splitting during array element
  expansion - this is needed since we need
  to pass the arguments correctly to maven.
* Install Python 3.11.8 and pip to the
  Windows docker image for building
  Hadoop.
* pom file changes to get maven to invoke
  the releasedocmaker script through
  bash.exe on Windows.
Gautham B A 1 year ago
parent
commit
f7bb4f1595

+ 14 - 0
BUILDING.txt

@@ -653,6 +653,20 @@ container.
     -Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo^
     -Dwindows.build.hdfspp.dll=off -Dwindows.no.sasl=on -Duse.platformToolsetVersion=v142
 
+Building the release tarball:
+Assuming that we're still running in the Docker container hadoop-windows-10-builder, run the
+following command to create the Apache Hadoop release tarball -
+
+> set IS_WINDOWS=1
+> set MVN_ARGS="-Dshell-executable=C:\Git\bin\bash.exe -Dhttps.protocols=TLSv1.2 -Pnative-win -Drequire.openssl -Dopenssl.prefix=C:\vcpkg\installed\x64-windows -Dcmake.prefix.path=C:\vcpkg\installed\x64-windows -Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo -Dwindows.build.hdfspp.dll=off -Duse.platformToolsetVersion=v142 -Dwindows.no.sasl=on -DskipTests -DskipDocs -Drequire.test.libhadoop"
+> C:\Git\bin\bash.exe C:\hadoop\dev-support\bin\create-release --mvnargs=%MVN_ARGS%
+
+Note:
+If the building fails due to an issue with long paths, rename the Hadoop root directory to just a
+letter (like 'h') and rebuild -
+
+> C:\Git\bin\bash.exe C:\h\dev-support\bin\create-release --mvnargs=%MVN_ARGS%
+
 ----------------------------------------------------------------------------------
 Building distributions:
 

+ 12 - 7
dev-support/bin/create-release

@@ -418,7 +418,8 @@ function option_parse
     fi
   fi
   if [ -n "$MVNEXTRAARGS" ]; then
-    MVN_ARGS+=("$MVNEXTRAARGS")
+    # shellcheck disable=SC2206
+    MVN_ARGS+=(${MVNEXTRAARGS[*]})
   fi
 
   if [[ "${SECURITYRELEASE}" = true ]]; then
@@ -552,10 +553,12 @@ function makearelease
   mkdir -p "${LOGDIR}"
 
   # Install the Hadoop maven plugins first
-  run_and_redirect "${LOGDIR}/mvn_install_maven_plugins.log" "${MVN}" "${MVN_ARGS[@]}" -pl hadoop-maven-plugins -am clean install
+  # shellcheck disable=SC2086
+  run_and_redirect "${LOGDIR}/mvn_install_maven_plugins.log" "${MVN}" ${MVN_ARGS[*]} -pl hadoop-maven-plugins -am clean install
 
   # mvn clean for sanity
-  run_and_redirect "${LOGDIR}/mvn_clean.log" "${MVN}" "${MVN_ARGS[@]}" clean
+  # shellcheck disable=SC2086
+  run_and_redirect "${LOGDIR}/mvn_clean.log" "${MVN}" ${MVN_ARGS[*]} clean
 
   # Create staging dir for release artifacts
   run mkdir -p "${ARTIFACTS_DIR}"
@@ -563,7 +566,8 @@ function makearelease
   big_console_header "Apache RAT Check"
 
   # Create RAT report
-  run_and_redirect "${LOGDIR}/mvn_apache_rat.log" "${MVN}" "${MVN_ARGS[@]}" apache-rat:check
+  # shellcheck disable=SC2086
+  run_and_redirect "${LOGDIR}/mvn_apache_rat.log" "${MVN}" ${MVN_ARGS[*]} apache-rat:check
 
   big_console_header "Maven Build and Install"
 
@@ -577,9 +581,9 @@ function makearelease
   fi
 
   # Create SRC and BIN tarballs for release,
-  # shellcheck disable=SC2046
+  # shellcheck disable=SC2046,SC2086
   run_and_redirect "${LOGDIR}/mvn_${target}.log" \
-    "${MVN}" "${MVN_ARGS[@]}" ${target} \
+    "${MVN}" ${MVN_ARGS[*]} ${target} \
       -Pdist,src,yarn-ui \
       "${signflags[@]}" \
       -DskipTests -Dtar $(hadoop_native_flags)
@@ -608,8 +612,9 @@ function makearelease
   # we need to do install again so that jdiff and
   # a few other things get registered in the maven
   # universe correctly
+  # shellcheck disable=SC2206,SC2086
   run_and_redirect "${LOGDIR}/mvn_site.log" \
-    "${MVN}" "${MVN_ARGS[@]}" install \
+    "${MVN}" ${MVN_ARGS[*]} install \
       site site:stage \
       -DskipTests \
       -DskipShade \

+ 16 - 5
dev-support/docker/Dockerfile_windows_10

@@ -102,10 +102,21 @@ RUN powershell Copy-Item -Path "C:\LibXXHash\usr\bin\*.dll" -Destination "C:\Pro
 RUN powershell Copy-Item -Path "C:\LibZStd\usr\bin\*.dll" -Destination "C:\Program` Files\Git\usr\bin"
 RUN powershell Copy-Item -Path "C:\RSync\usr\bin\*" -Destination "C:\Program` Files\Git\usr\bin"
 
-# Install Python 3.10.11.
-RUN powershell Invoke-WebRequest -Uri https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip -OutFile $Env:TEMP\python-3.10.11-embed-amd64.zip
-RUN powershell Expand-Archive -Path $Env:TEMP\python-3.10.11-embed-amd64.zip -DestinationPath "C:\Python3"
-RUN powershell New-Item -ItemType HardLink -Value "C:\Python3\python.exe" -Path "C:\Python3\python3.exe"
+COPY pkg-resolver pkg-resolver
+
+## Install Python 3.11.8.
+# The Python installation steps below are derived from -
+# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile
+ENV PYTHONIOENCODING UTF-8
+ENV PYTHON_VERSION 3.11.8
+ENV PYTHON_PIP_VERSION 24.0
+ENV PYTHON_SETUPTOOLS_VERSION 65.5.1
+ENV PYTHON_GET_PIP_URL https://github.com/pypa/get-pip/raw/dbf0c85f76fb6e1ab42aa672ffca6f0a675d9ee4/public/get-pip.py
+ENV PYTHON_GET_PIP_SHA256 dfe9fd5c28dc98b5ac17979a953ea550cec37ae1b47a5116007395bfacff2ab9
+RUN powershell Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+RUN powershell pkg-resolver\install-python.ps1
+RUN powershell pkg-resolver\install-pip.ps1
+RUN powershell pip install python-dateutil
 
 # Create a user HadoopBuilder with basic privileges and use it for building Hadoop on Windows.
 RUN powershell New-LocalUser -Name 'HadoopBuilder' -Description 'User account for building Apache Hadoop' -Password ([securestring]::new()) -AccountNeverExpires -PasswordNeverExpires
@@ -121,12 +132,12 @@ USER HadoopBuilder
 ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"
 ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
 ENV MAVEN_OPTS '-Xmx2048M -Xss128M'
+ENV IS_WINDOWS 1
 RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"
 RUN setx PATH "%PATH%;%JAVA_HOME%\bin"
 RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
 RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"
 RUN setx PATH "%PATH%;C:\ZStd"
-RUN setx path "%PATH%;C:\Python3"
 RUN setx PATH "%PATH%;C:\Program Files\Git\usr\bin"
 
 # We get strange Javadoc errors without this.

+ 44 - 0
dev-support/docker/pkg-resolver/install-pip.ps1

@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The code lines below are derived from -
+# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile
+
+Write-Host ('Downloading get-pip.py ({0}) ...' -f $Env:PYTHON_GET_PIP_URL)
+[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
+Invoke-WebRequest -Uri $Env:PYTHON_GET_PIP_URL -OutFile 'get-pip.py'
+Write-Host ('Verifying sha256 ({0}) ...' -f $Env:PYTHON_GET_PIP_SHA256)
+if ((Get-FileHash 'get-pip.py' -Algorithm sha256).Hash -ne $Env:PYTHON_GET_PIP_SHA256) {
+    Write-Host 'FAILED!'
+    exit 1
+}
+
+$Env:PYTHONDONTWRITEBYTECODE = '1'
+
+Write-Host ('Installing pip=={0} ...' -f $Env:PYTHON_PIP_VERSION)
+python get-pip.py `
+    --disable-pip-version-check `
+    --no-cache-dir `
+    --no-compile `
+('pip=={0}' -f $Env:PYTHON_PIP_VERSION) `
+('setuptools=={0}' -f $Env:PYTHON_SETUPTOOLS_VERSION)
+
+Remove-Item get-pip.py -Force
+
+Write-Host 'Verifying pip install ...'
+pip --version
+
+Write-Host 'Complete.'

+ 54 - 0
dev-support/docker/pkg-resolver/install-python.ps1

@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The code lines below are derived from -
+# https://github.com/docker-library/python/blob/105d6f34e7d70aad6f8c3e249b8208efa591916a/3.11/windows/windowsservercore-ltsc2022/Dockerfile
+
+$url = ('https://www.python.org/ftp/python/{0}/python-{1}-amd64.exe' -f ($Env:PYTHON_VERSION -replace '[a-z]+[0-9]*$', ''), $Env:PYTHON_VERSION)
+Write-Host ('Downloading {0} ...' -f $url)
+[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
+Invoke-WebRequest -Uri $url -OutFile 'python.exe'
+
+Write-Host 'Installing ...'
+$exitCode = (Start-Process python.exe -Wait -NoNewWindow -PassThru `
+        -ArgumentList @(
+            '/quiet',
+            'InstallAllUsers=1',
+            'TargetDir=C:\Python',
+            'PrependPath=1',
+            'Shortcuts=0',
+            'Include_doc=0',
+            'Include_pip=0',
+            'Include_test=0'
+        )
+).ExitCode
+if ($exitCode -ne 0) {
+    Write-Host ('Running python installer failed with exit code: {0}' -f $exitCode)
+    Get-ChildItem $Env:TEMP | Sort-Object -Descending -Property LastWriteTime | Select-Object -First 1 | Get-Content
+    exit $exitCode
+}
+
+# the installer updated PATH, so we should refresh our local value
+$Env:PATH = [Environment]::GetEnvironmentVariable('PATH', [EnvironmentVariableTarget]::Machine)
+
+Write-Host 'Verifying install ...'
+Write-Host "python --version $(python --version)"
+
+Write-Host 'Removing ...'
+Remove-Item python.exe -Force
+Remove-Item $Env:TEMP\Python*.log -Force
+
+Write-Host 'Complete.'

+ 2 - 1
hadoop-common-project/hadoop-common/pom.xml

@@ -1095,8 +1095,9 @@
                         <goal>exec</goal>
                     </goals>
                     <configuration>
-                        <executable>${basedir}/../../dev-support/bin/releasedocmaker</executable>
+                        <executable>${shell-executable}</executable>
                         <arguments>
+                            <argument>${basedir}/../../dev-support/bin/releasedocmaker</argument>
                             <argument>--index</argument>
                             <argument>--license</argument>
                             <argument>--outputdir</argument>