hadoop-validate-setup.sh 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. ###############################################################################
  17. # Run the following jobs to validate a hadoop cluster
  18. ## teragen
  19. ## terasort
  20. ## teravalidate
  21. # If they all pass 0 will be returned and 1 otherwise
  22. # The test will work for both secure and unsecure deploys. If the kerberos-realm
  23. # is passed we will assume that the deploy is secure and proceed with a kinit before
  24. # running the validation jobs.
  25. ################################################################################
  26. bin=`dirname "$0"`
  27. bin=`cd "$bin"; pwd`
  28. DEFAULT_LIBEXEC_DIR="$bin"/../libexec
  29. HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
  30. . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
  31. usage() {
  32. echo "
  33. usage: $0 <parameters>
  34. Optional parameters:
  35. -h Display this message
  36. --user=hdfs
  37. --user_keytab=/home/hdfs/hdfs.keytab
  38. --kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm
  39. "
  40. exit 1
  41. }
  42. OPTS=$(getopt \
  43. -n $0 \
  44. -o '' \
  45. -l 'user:' \
  46. -l 'user-keytab:' \
  47. -l 'kerberos-realm:' \
  48. -o 'h' \
  49. -- "$@")
  50. if [ $? != 0 ] ; then
  51. usage
  52. fi
  53. eval set -- "${OPTS}"
  54. while true ; do
  55. case "$1" in
  56. --user)
  57. TEST_USER=$2; shift 2
  58. AUTOMATED=1
  59. ;;
  60. --user-keytab)
  61. USER_KEYTAB_FILE=$2; shift 2
  62. AUTOMATED=1
  63. ;;
  64. --kerberos-realm)
  65. KERBEROS_REALM=$2; shift 2
  66. AUTOMATED=1
  67. ;;
  68. --)
  69. shift ; break
  70. ;;
  71. *)
  72. echo "Unknown option: $1"
  73. usage
  74. exit 1
  75. ;;
  76. esac
  77. done
  78. #set the hadoop command and the path to the hadoop examples jar
  79. HADOOP_CMD="${HADOOP_PREFIX}/bin/hadoop --config $HADOOP_CONF_DIR"
  80. #find the hadoop examples jar
  81. HADOOP_EXAMPLES_JAR=''
  82. #find under HADOOP_PREFIX (tar ball install)
  83. HADOOP_EXAMPLES_JAR=`find ${HADOOP_PREFIX} -name 'hadoop-examples-*.jar' | head -n1`
  84. #if its not found look under /usr/share/hadoop (rpm/deb installs)
  85. if [ "$HADOOP_EXAMPLES_JAR" == '' ]
  86. then
  87. HADOOP_EXAMPLES_JAR=`find /usr/share/hadoop -name 'hadoop-examples-*.jar' | head -n1`
  88. fi
  89. #if it is still empty then dont run the tests
  90. if [ "$HADOOP_EXAMPLES_JAR" == '' ]
  91. then
  92. echo "Did not find hadoop-examples-*.jar under '${HADOOP_PREFIX} or '/usr/share/hadoop'"
  93. exit 1
  94. fi
  95. # do a kinit if secure
  96. if [ "${KERBEROS_REALM}" != "" ]; then
  97. # Determine kerberos location base on Linux distro.
  98. if [ -e /etc/lsb-release ]; then
  99. KERBEROS_BIN=/usr/bin
  100. else
  101. KERBEROS_BIN=/usr/kerberos/bin
  102. fi
  103. kinit_cmd="su -c '${KERBEROS_BIN}/kinit -kt ${USER_KEYTAB_FILE} ${TEST_USER}' ${TEST_USER}"
  104. echo $kinit_cmd
  105. eval $kinit_cmd
  106. if [ $? -ne 0 ]
  107. then
  108. echo "kinit command did not run successfully."
  109. exit 1
  110. fi
  111. fi
  112. #dir where to store the data on hdfs. The data is relative of the users home dir on hdfs.
  113. PARENT_DIR="validate_deploy_`date +%s`"
  114. TERA_GEN_OUTPUT_DIR="${PARENT_DIR}/tera_gen_data"
  115. TERA_SORT_OUTPUT_DIR="${PARENT_DIR}/tera_sort_data"
  116. TERA_VALIDATE_OUTPUT_DIR="${PARENT_DIR}/tera_validate_data"
  117. #tera gen cmd
  118. TERA_GEN_CMD="su -c '$HADOOP_CMD jar $HADOOP_EXAMPLES_JAR teragen 10000 $TERA_GEN_OUTPUT_DIR' $TEST_USER"
  119. #tera sort cmd
  120. TERA_SORT_CMD="su -c '$HADOOP_CMD jar $HADOOP_EXAMPLES_JAR terasort $TERA_GEN_OUTPUT_DIR $TERA_SORT_OUTPUT_DIR' $TEST_USER"
  121. #tera validate cmd
  122. TERA_VALIDATE_CMD="su -c '$HADOOP_CMD jar $HADOOP_EXAMPLES_JAR teravalidate $TERA_SORT_OUTPUT_DIR $TERA_VALIDATE_OUTPUT_DIR' $TEST_USER"
  123. echo "Starting teragen...."
  124. #run tera gen
  125. echo $TERA_GEN_CMD
  126. eval $TERA_GEN_CMD
  127. if [ $? -ne 0 ]; then
  128. echo "tera gen failed."
  129. exit 1
  130. fi
  131. echo "Teragen passed starting terasort...."
  132. #run tera sort
  133. echo $TERA_SORT_CMD
  134. eval $TERA_SORT_CMD
  135. if [ $? -ne 0 ]; then
  136. echo "tera sort failed."
  137. exit 1
  138. fi
  139. echo "Terasort passed starting teravalidate...."
  140. #run tera validate
  141. echo $TERA_VALIDATE_CMD
  142. eval $TERA_VALIDATE_CMD
  143. if [ $? -ne 0 ]; then
  144. echo "tera validate failed."
  145. exit 1
  146. fi
  147. echo "teragen, terasort, teravalidate passed."
  148. echo "Cleaning the data created by tests: $PARENT_DIR"
  149. CLEANUP_CMD="su -c '$HADOOP_CMD dfs -rmr -skipTrash $PARENT_DIR' $TEST_USER"
  150. echo $CLEANUP_CMD
  151. eval $CLEANUP_CMD
  152. exit 0