123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- #!/usr/bin/env bash
- # Licensed to the Apache Software Foundation (ASF) under one or more
- # contributor license agreements. See the NOTICE file distributed with
- # this work for additional information regarding copyright ownership.
- # The ASF licenses this file to You under the Apache License, Version 2.0
- # (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # This script is used for fetching the standard Hadoop metrics which the
- # Dynamometer NameNode generates during its execution (standard Hadoop metrics).
- # Those metrics are uploaded onto HDFS when the Dynamometer application completes.
- # This script will download them locally and parse out the specified metric for
- # the given time period. This is useful to, for example, isolate only the metrics
- # produced during the workload replay portion of a job. For this, specify startTimeMs
- # as the start time of the workload job (which it logs during execution) and
- # periodMinutes the period (in minutes) of the replay.
- if [ $# -lt 5 ]; then
- echo "Usage:"
- echo "./parse-metrics.sh applicationID outputFileName startTimeMs periodMinutes metricName [ context ] [ isCounter ]"
- echo "If no file namenode_metrics_{applicationID} is present in the working directory,"
- echo "attempts to download one from HDFS for applicationID. Filters values"
- echo "for the specified metric, during the range"
- echo "(startTimeMs, startTimeMs + periodMinutes) optionally filtering on the context as well"
- echo "(which is just applied as a regex search across the metric line output)"
- echo "and outputs CSV pairs of (seconds_since_start_time,value)."
- echo "If isCounter is true, treats the metrics as a counter and outputs per-second rate values."
- exit 1
- fi
- appId="$1"
- output="$2"
- start_ts="$3"
- period_minutes="$4"
- metric="$5"
- context="$6"
- is_counter="$7"
- localFile="namenode_metrics_$appId"
- if [ ! -f "$localFile" ]; then
- remoteFile=".dynamometer/$appId/namenode_metrics"
- echo "Downloading file from HDFS: $remoteFile"
- if ! hdfs dfs -copyToLocal "$remoteFile" "$localFile"; then
- exit 1
- fi
- fi
- read -d '' -r awk_script <<'EOF'
- BEGIN {
- metric_regex="[[:space:]]"metric"=([[:digit:].E]+)";
- end_ts=start_ts+(period_minutes*60*1000)
- last_val=0
- last_ts=start_ts
- }
- "true" ~ is_counter && $0 ~ metric_regex && $0 ~ context && $1 < start_ts {
- match($0, metric_regex, val_arr);
- last_val=val_arr[1]
- last_ts=$1
- }
- $0 ~ metric_regex && $0 ~ context && $1 >= start_ts && $1 <= end_ts {
- match($0, metric_regex, val_arr);
- val=val_arr[1]
- if (is_counter == "true") {
- tmp=val
- val=val-last_val
- val=val/(($1-last_ts)/1000)
- last_ts=$1
- last_val=tmp
- }
- printf("%.0f,%.6f\n", ($0-start_ts)/1000, val)
- }
- EOF
- gawk -v metric="$metric" -v context="$context" -v start_ts="$start_ts" \
- -v period_minutes="$period_minutes" -v is_counter="$is_counter" -v OFS="," "$awk_script" "$localFile" > "$output"
|