Browse Source

HDFS-14640. [Dynamometer] Fix TestDynamometerInfra failure. Contributed by Erik Krogen.

Erik Krogen 5 years ago
parent
commit
32925d04d9

+ 10 - 0
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml

@@ -128,6 +128,16 @@
           </excludes>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <forkedProcessTimeoutInSeconds>1800</forkedProcessTimeoutInSeconds>
+          <environmentVariables>
+            <JAVA_HOME>${java.home}</JAVA_HOME>
+          </environmentVariables>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 

+ 1 - 1
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/bash/create-slim-hadoop-tar.sh

@@ -36,7 +36,7 @@ hadoopTarTmp="$hadoopTar.temporary"
 mkdir -p "$hadoopTarTmp"
 
 tar xzf "$hadoopTar" -C "$hadoopTarTmp"
-baseDir="$(find -H "$hadoopTarTmp" -depth 1 -type d | head -n 1)" # Should only be one subdir
+baseDir="$(find -H "$hadoopTarTmp" -maxdepth 1 -mindepth 1 -type d | head -n 1)" # Should only be one subdir
 hadoopShare="$baseDir/share/hadoop"
 
 # Remove unnecessary files

+ 2 - 2
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/bash/upload-fsimage.sh

@@ -38,12 +38,12 @@ else
   name_dir="$(pwd)"
 fi
 
-image_file_count="$(find -H "${name_dir}" -depth 1 -name "fsimage_*$image_txid" -type f | wc -l)"
+image_file_count="$(find -H "${name_dir}" -maxdepth 1 -mindepth 1 -name "fsimage_*$image_txid" -type f | wc -l)"
 if [[ "$image_file_count" != 1 ]]; then
   echo "Error; found $image_file_count matching fsimage files."
   exit 1
 fi
-image_file="$(find -H "${name_dir}" -depth 1 -name "fsimage_*$image_txid" -type f)"
+image_file="$(find -H "${name_dir}" -maxdepth 1 -mindepth 1 -name "fsimage_*$image_txid" -type f)"
 image_file_name="$(basename "${image_file}")"
 echo "Using fsimage: $image_file_name"
 image_file_md5="${image_file}.md5"

+ 3 - 6
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/resources/start-component.sh

@@ -75,7 +75,7 @@ chmod 755 "$baseDir"
 chmod 700 "$pidDir"
 
 # Set Hadoop variables for component
-hadoopHome="$(find -H "$(pwd)/hadoopBinary" -depth 1 -type d | head -n 1)"
+hadoopHome="$(find -H "$(pwd)/hadoopBinary" -maxdepth 1 -mindepth 1 -type d | head -n 1)"
 # Save real environment for later
 hadoopConfOriginal=${HADOOP_CONF_DIR:-$confDir}
 hadoopHomeOriginal=${HADOOP_HOME:-$hadoopHome}
@@ -252,11 +252,8 @@ EOF
   rm -rf "$nameDir" "$editsDir" "$checkpointDir"
   mkdir -p "$nameDir/current" "$editsDir/current" "$checkpointDir"
   chmod -R 700 "$nameDir" "$editsDir" "$checkpointDir"
-  fsImageFile="$(find "$(pwd)" -depth 1 -name "fsimage_*" | tail -n 1)"
-  fsImageMD5File="$(find "$(pwd)" -depth 1 -name "fsimage_*.md5" | tail -n 1)"
-  ln -snf "$fsImageFile" "$nameDir/current/$(basename "$fsImageFile")"
-  ln -snf "$fsImageMD5File" "$nameDir/current/$(basename "$fsImageMD5File")"
-  ln -snf "$(pwd)/VERSION" "$nameDir/current/VERSION"
+  # Link all of the fsimage files into the name dir
+  find "$(pwd)" -maxdepth 1 -mindepth 1 \( -name "fsimage_*" -or -name "VERSION" \) -execdir ln -snf "$(pwd)/{}" "$nameDir/current/{}" \;
   chmod 700 "$nameDir"/current/*
 
   namenodeConfigs=(

+ 18 - 3
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/test/java/org/apache/hadoop/tools/dynamometer/TestDynamometerInfra.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.tools.dynamometer;
 import com.google.common.collect.Sets;
 import java.util.Optional;
 import java.util.concurrent.TimeoutException;
+import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 import org.apache.hadoop.test.PlatformAssumptions;
 import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditLogDirectParser;
@@ -315,16 +316,30 @@ public class TestDynamometerInfra {
 
     awaitApplicationStartup();
 
-    Supplier<Boolean> falseSupplier = () -> false;
+    long startTime = System.currentTimeMillis();
+    long maxWaitTimeMs = TimeUnit.MINUTES.toMillis(10);
+    Supplier<Boolean> exitCheckSupplier = () -> {
+      if (System.currentTimeMillis() - startTime > maxWaitTimeMs) {
+        // Wait at most 10 minutes for the NameNode to start and be ready
+        return true;
+      }
+      try {
+        // Exit immediately if the YARN app fails
+        return yarnClient.getApplicationReport(infraAppId)
+            .getYarnApplicationState() == YarnApplicationState.FAILED;
+      } catch (IOException | YarnException e) {
+        return true;
+      }
+    };
     Optional<Properties> namenodeProperties = DynoInfraUtils
-        .waitForAndGetNameNodeProperties(falseSupplier, localConf,
+        .waitForAndGetNameNodeProperties(exitCheckSupplier, localConf,
             client.getNameNodeInfoPath(), LOG);
     if (!namenodeProperties.isPresent()) {
       fail("Unable to fetch NameNode properties");
     }
 
     DynoInfraUtils.waitForNameNodeReadiness(namenodeProperties.get(), 3, false,
-        falseSupplier, localConf, LOG);
+        exitCheckSupplier, localConf, LOG);
 
     assertClusterIsFunctional(localConf, namenodeProperties.get());
 

+ 2 - 2
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/bash/parse-start-timestamp.sh

@@ -38,12 +38,12 @@ else
   edits_dir="$(pwd)"
 fi
 
-edits_file_count="$(find -H "${edits_dir}" -depth 1 -type f -name "edits_*-*$image_txid" | wc -l)"
+edits_file_count="$(find -H "${edits_dir}" -maxdepth 1 -type f -name "edits_*-*$image_txid" | wc -l)"
 if [[ "$edits_file_count" != 1 ]]; then
   echo "Error; found $edits_file_count matching edit files."
   exit 1
 fi
-edits_file="$(find -H "${edits_dir}" -depth 1 -type f -name "edits_*-*$image_txid")"
+edits_file="$(find -H "${edits_dir}" -maxdepth 1 -type f -name "edits_*-*$image_txid")"
 
 # Shellcheck complains about the $ in the single-quote because it won't expand, but this is intentional
 # shellcheck disable=SC2016