Explorar o código

HADOOP-19209. Update and optimize hadoop-runner

Doroszlai, Attila hai 10 meses
pai
achega
5bb6f45034
Modificáronse 6 ficheiros con 113 adicións e 52 borrados
  1. 4 0
      .dockerignore
  2. 1 0
      .gitignore
  3. 79 26
      Dockerfile
  4. 29 8
      build.sh
  5. 0 0
      krb5.conf
  6. 0 18
      scripts/.bashrc

+ 4 - 0
.dockerignore

@@ -0,0 +1,4 @@
+build
+build.sh
+LICENSE
+README.md

+ 1 - 0
.gitignore

@@ -1 +1,2 @@
+build
 __pycache__

+ 79 - 26
Dockerfile

@@ -14,33 +14,86 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM centos
-RUN rpm -Uvh https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-RUN yum install -y sudo python2-pip wget nmap-ncat jq java-11-openjdk
-RUN pip install robotframework
-RUN wget -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64
-RUN chmod +x /usr/local/bin/dumb-init
-RUN mkdir -p /etc/security/keytabs && chmod -R a+wr /etc/security/keytabs 
-ADD https://repo.maven.apache.org/maven2/org/jboss/byteman/byteman/4.0.4/byteman-4.0.4.jar /opt/byteman.jar
-RUN chmod o+r /opt/byteman.jar
-RUN mkdir -p /opt/profiler && \
-    cd /opt/profiler && \
-    curl -L https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.5/async-profiler-1.5-linux-x64.tar.gz | tar xvz
-ENV JAVA_HOME=/usr/lib/jvm/jre/
-ENV PATH $PATH:/opt/hadoop/bin
-
-RUN groupadd --gid 1000 hadoop
-RUN useradd --uid 1000 hadoop --gid 100 --home /opt/hadoop
-RUN chmod 755 /opt/hadoop
-RUN echo "hadoop ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
-RUN chown hadoop /opt
-ADD scripts /opt/
-ADD scripts/krb5.conf /etc/
-RUN yum install -y krb5-workstation
-RUN mkdir -p /etc/hadoop && mkdir -p /var/log/hadoop && chmod 1777 /etc/hadoop && chmod 1777 /var/log/hadoop
-ENV HADOOP_LOG_DIR=/var/log/hadoop
+ARG JAVA_VERSION=11
+
+# Ubuntu 22.04 LTS
+FROM eclipse-temurin:${JAVA_VERSION}-jammy
+
+RUN apt update -q \
+    && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
+      jq \
+      krb5-user \
+      ncat \
+      python3-pip \
+      python-is-python3 \
+      sudo \
+    && apt clean
+
+# Robot Framework for testing
+RUN pip install robotframework \
+    && rm -fr ~/.cache/pip
+
+#dumb init for proper init handling
+RUN set -eux; \
+    ARCH="$(arch)"; \
+    v=1.2.5 ; \
+    url="https://github.com/Yelp/dumb-init/releases/download/v${v}/dumb-init_${v}_${ARCH}"; \
+    case "${ARCH}" in \
+      x86_64) \
+        sha256='e874b55f3279ca41415d290c512a7ba9d08f98041b28ae7c2acb19a545f1c4df'; \
+        ;; \
+      aarch64) \
+        sha256='b7d648f97154a99c539b63c55979cd29f005f88430fb383007fe3458340b795e'; \
+        ;; \
+      *) echo "Unsupported architecture: ${ARCH}"; exit 1 ;; \
+    esac \
+    && curl -L ${url} -o dumb-init \
+    && echo -n "${sha256} *dumb-init"  | sha256sum -c - \
+    && chmod +x dumb-init \
+    && mv dumb-init /usr/local/bin/dumb-init
+
+#byteman test for development
+RUN curl -Lo /opt/byteman.jar https://repo.maven.apache.org/maven2/org/jboss/byteman/byteman/4.0.23/byteman-4.0.23.jar \
+    && chmod o+r /opt/byteman.jar
+
+#async profiler for development profiling
+RUN set -eux; \
+    ARCH="$(arch)"; \
+    v=2.8.3; \
+    case "${ARCH}" in \
+      x86_64) \
+        url="https://github.com/jvm-profiling-tools/async-profiler/releases/download/v${v}/async-profiler-${v}-linux-x64.tar.gz" \
+        ;; \
+      aarch64) \
+        url="https://github.com/jvm-profiling-tools/async-profiler/releases/download/v${v}/async-profiler-${v}-linux-arm64.tar.gz" \
+        ;; \
+      *) echo "Unsupported architecture: ${ARCH}"; exit 1 ;; \
+    esac \
+    && curl -L ${url} | tar xvz \
+    && mv async-profiler-* /opt/profiler \
+    && chmod -R go+rX /opt/profiler
+
+RUN mkdir -p /etc/security/keytabs \
+    && chmod -R a+wr /etc/security/keytabs
+
+RUN groupadd --gid 1000 hadoop \
+    && useradd --uid 1000 hadoop --gid 100 --home /opt/hadoop \
+    && mkdir -p /opt/hadoop \
+    && chown hadoop:users /opt/hadoop \
+    && echo "hadoop ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
+
+RUN mkdir -p /etc/hadoop \
+    && chmod 1777 /etc/hadoop \
+    && mkdir -p /var/log/hadoop \
+    && chmod 1777 /var/log/hadoop
+
 ENV HADOOP_CONF_DIR=/etc/hadoop
+ENV HADOOP_LOG_DIR=/var/log/hadoop
+ENV PATH=$PATH:/opt/hadoop/bin
+
+COPY --chown=hadoop --chmod=755 scripts /opt/
+COPY --chmod=644 krb5.conf /etc/
+
 WORKDIR /opt/hadoop
-RUN mkdir /data && chmod 1777 /data
 USER hadoop
 ENTRYPOINT ["/usr/local/bin/dumb-init", "--", "/opt/starter.sh"]

+ 29 - 8
build.sh

@@ -15,13 +15,34 @@
 # limitations under the License.
 
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-set -e
+
+set -eu
+
 mkdir -p build
-if [ ! -d "$DIR/build/apache-rat-0.12" ]; then
-   wget 'https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=creadur/apache-rat-0.13/apache-rat-0.13-bin.tar.gz' -O $DIR/build/apache-rat.tar.gz
-	cd $DIR/build
-	tar zvxf apache-rat.tar.gz
-	cd -
+
+rat_version=0.16.1
+
+if [ ! -d "$DIR/build/apache-rat-${rat_version}" ]; then
+  url="https://dlcdn.apache.org/creadur/apache-rat-${rat_version}/apache-rat-${rat_version}-bin.tar.gz"
+  output="$DIR/build/apache-rat.tar.gz"
+  if type wget 2> /dev/null; then
+    wget -O "$output" "$url"
+  elif type curl 2> /dev/null; then
+    curl -LSs -o "$output" "$url"
+  else
+    exit 1
+  fi
+  cd $DIR/build
+  tar zvxf apache-rat.tar.gz
+  cd -
+fi
+
+java -jar $DIR/build/apache-rat-${rat_version}/apache-rat-${rat_version}.jar $DIR -e .dockerignore -e public -e apache-rat-${rat_version} -e .git -e .gitignore
+
+if [[ $# -ge 1 ]]; then
+  for v in "$@"; do
+    docker build --progress plain --build-arg "JAVA_VERSION=$v" -t apache/hadoop-runner:jdk${v}-dev .
+  done
+else
+  docker build --progress plain -t apache/hadoop-runner:dev .
 fi
-java -jar $DIR/build/apache-rat-0.13/apache-rat-0.13.jar $DIR -e public -e apache-rat-0.12 -e .git -e .gitignore
-docker build -t apache/hadoop-runner .

+ 0 - 0
scripts/krb5.conf → krb5.conf


+ 0 - 18
scripts/.bashrc

@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-PS1="\u@\h: \w> "