parse-metrics.sh 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one or more
  3. # contributor license agreements. See the NOTICE file distributed with
  4. # this work for additional information regarding copyright ownership.
  5. # The ASF licenses this file to You under the Apache License, Version 2.0
  6. # (the "License"); you may not use this file except in compliance with
  7. # the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # This script is used for fetching the standard Hadoop metrics which the
  17. # Dynamometer NameNode generates during its execution (standard Hadoop metrics).
  18. # Those metrics are uploaded onto HDFS when the Dynamometer application completes.
  19. # This script will download them locally and parse out the specified metric for
  20. # the given time period. This is useful to, for example, isolate only the metrics
  21. # produced during the workload replay portion of a job. For this, specify startTimeMs
  22. # as the start time of the workload job (which it logs during execution) and
  23. # periodMinutes the period (in minutes) of the replay.
  24. if [ $# -lt 5 ]; then
  25. echo "Usage:"
  26. echo "./parse-metrics.sh applicationID outputFileName startTimeMs periodMinutes metricName [ context ] [ isCounter ]"
  27. echo "If no file namenode_metrics_{applicationID} is present in the working directory,"
  28. echo "attempts to download one from HDFS for applicationID. Filters values"
  29. echo "for the specified metric, during the range"
  30. echo "(startTimeMs, startTimeMs + periodMinutes) optionally filtering on the context as well"
  31. echo "(which is just applied as a regex search across the metric line output)"
  32. echo "and outputs CSV pairs of (seconds_since_start_time,value)."
  33. echo "If isCounter is true, treats the metrics as a counter and outputs per-second rate values."
  34. exit 1
  35. fi
  36. appId="$1"
  37. output="$2"
  38. start_ts="$3"
  39. period_minutes="$4"
  40. metric="$5"
  41. context="$6"
  42. is_counter="$7"
  43. localFile="namenode_metrics_$appId"
  44. if [ ! -f "$localFile" ]; then
  45. remoteFile=".dynamometer/$appId/namenode_metrics"
  46. echo "Downloading file from HDFS: $remoteFile"
  47. if ! hdfs dfs -copyToLocal "$remoteFile" "$localFile"; then
  48. exit 1
  49. fi
  50. fi
  51. read -d '' -r awk_script <<'EOF'
  52. BEGIN {
  53. metric_regex="[[:space:]]"metric"=([[:digit:].E]+)";
  54. end_ts=start_ts+(period_minutes*60*1000)
  55. last_val=0
  56. last_ts=start_ts
  57. }
  58. "true" ~ is_counter && $0 ~ metric_regex && $0 ~ context && $1 < start_ts {
  59. match($0, metric_regex, val_arr);
  60. last_val=val_arr[1]
  61. last_ts=$1
  62. }
  63. $0 ~ metric_regex && $0 ~ context && $1 >= start_ts && $1 <= end_ts {
  64. match($0, metric_regex, val_arr);
  65. val=val_arr[1]
  66. if (is_counter == "true") {
  67. tmp=val
  68. val=val-last_val
  69. val=val/(($1-last_ts)/1000)
  70. last_ts=$1
  71. last_val=tmp
  72. }
  73. printf("%.0f,%.6f\n", ($0-start_ts)/1000, val)
  74. }
  75. EOF
  76. gawk -v metric="$metric" -v context="$context" -v start_ts="$start_ts" \
  77. -v period_minutes="$period_minutes" -v is_counter="$is_counter" -v OFS="," "$awk_script" "$localFile" > "$output"