|
@@ -0,0 +1,182 @@
|
|
|
+/**
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
+ * distributed with this work for additional information
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
+ * limitations under the License.
|
|
|
+ */
|
|
|
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.LinkedList;
|
|
|
+
|
|
|
+/**
|
|
|
+ * File size distribution visitor.
|
|
|
+ *
|
|
|
+ * <h3>Description.</h3>
|
|
|
+ * This is the tool for analyzing file sizes in the namespace image.
|
|
|
+ * In order to run the tool one should define a range of integers
|
|
|
+ * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
|
|
|
+ * The range of integers is divided into segments of size <tt>step</tt>:
|
|
|
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
|
|
|
+ * and the visitor calculates how many files in the system fall into
|
|
|
+ * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>.
|
|
|
+ * Note that files larger than <tt>maxSize</tt> always fall into
|
|
|
+ * the very last segment.
|
|
|
+ *
|
|
|
+ * <h3>Input.</h3>
|
|
|
+ * <ul>
|
|
|
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
|
|
|
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
|
|
|
+ * sizes considered by the visitor;</li>
|
|
|
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
|
|
|
+ * </ul>
|
|
|
+ *
|
|
|
+ * <h3>Output.</h3>
|
|
|
+ * The output file is formatted as a tab separated two column table:
|
|
|
+ * Size and NumFiles. Where Size represents the start of the segment,
|
|
|
+ * and numFiles is the number of files form the image which size falls in
|
|
|
+ * this segment.
|
|
|
+ */
|
|
|
+class FileDistributionVisitor extends TextWriterImageVisitor {
|
|
|
+ final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
|
|
|
+
|
|
|
+ private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
|
|
|
+ private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
|
|
|
+
|
|
|
+ private int[] distribution;
|
|
|
+ private long maxSize;
|
|
|
+ private int step;
|
|
|
+
|
|
|
+ private int totalFiles;
|
|
|
+ private int totalDirectories;
|
|
|
+ private int totalBlocks;
|
|
|
+ private long totalSpace;
|
|
|
+ private long maxFileSize;
|
|
|
+
|
|
|
+ private FileContext current;
|
|
|
+
|
|
|
+ private boolean inInode = false;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * File or directory information.
|
|
|
+ */
|
|
|
+ private static class FileContext {
|
|
|
+ String path;
|
|
|
+ long fileSize;
|
|
|
+ int numBlocks;
|
|
|
+ int replication;
|
|
|
+ }
|
|
|
+
|
|
|
+ public FileDistributionVisitor(String filename,
|
|
|
+ long maxSize,
|
|
|
+ int step) throws IOException {
|
|
|
+ super(filename, false);
|
|
|
+ this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
|
|
|
+ this.step = (step == 0 ? INTERVAL_DEFAULT : step);
|
|
|
+ long numIntervals = this.maxSize / this.step;
|
|
|
+ if(numIntervals >= Integer.MAX_VALUE)
|
|
|
+ throw new IOException("Too many distribution intervals " + numIntervals);
|
|
|
+ this.distribution = new int[1 + (int)(numIntervals)];
|
|
|
+ this.totalFiles = 0;
|
|
|
+ this.totalDirectories = 0;
|
|
|
+ this.totalBlocks = 0;
|
|
|
+ this.totalSpace = 0;
|
|
|
+ this.maxFileSize = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ void start() throws IOException {}
|
|
|
+
|
|
|
+ @Override
|
|
|
+ void finish() throws IOException {
|
|
|
+ // write the distribution into the output file
|
|
|
+ write("Size\tNumFiles\n");
|
|
|
+ for(int i = 0; i < distribution.length; i++)
|
|
|
+ write(((long)i * step) + "\t" + distribution[i] + "\n");
|
|
|
+ System.out.println("totalFiles = " + totalFiles);
|
|
|
+ System.out.println("totalDirectories = " + totalDirectories);
|
|
|
+ System.out.println("totalBlocks = " + totalBlocks);
|
|
|
+ System.out.println("totalSpace = " + totalSpace);
|
|
|
+ System.out.println("maxFileSize = " + maxFileSize);
|
|
|
+ super.finish();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ void leaveEnclosingElement() throws IOException {
|
|
|
+ ImageElement elem = elemS.pop();
|
|
|
+
|
|
|
+ if(elem != ImageElement.Inode &&
|
|
|
+ elem != ImageElement.INodeUnderConstruction)
|
|
|
+ return;
|
|
|
+ inInode = false;
|
|
|
+ if(current.numBlocks < 0) {
|
|
|
+ totalDirectories ++;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ totalFiles++;
|
|
|
+ totalBlocks += current.numBlocks;
|
|
|
+ totalSpace += current.fileSize * current.replication;
|
|
|
+ if(maxFileSize < current.fileSize)
|
|
|
+ maxFileSize = current.fileSize;
|
|
|
+ int high;
|
|
|
+ if(current.fileSize > maxSize)
|
|
|
+ high = distribution.length-1;
|
|
|
+ else
|
|
|
+ high = (int)Math.ceil((double)current.fileSize / step);
|
|
|
+ distribution[high]++;
|
|
|
+ if(totalFiles % 1000000 == 1)
|
|
|
+ System.out.println("Files processed: " + totalFiles
|
|
|
+ + " Current: " + current.path);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ void visit(ImageElement element, String value) throws IOException {
|
|
|
+ if(inInode) {
|
|
|
+ switch(element) {
|
|
|
+ case INodePath:
|
|
|
+ current.path = (value.equals("") ? "/" : value);
|
|
|
+ break;
|
|
|
+ case Replication:
|
|
|
+ current.replication = Integer.valueOf(value);
|
|
|
+ break;
|
|
|
+ case NumBytes:
|
|
|
+ current.fileSize += Long.valueOf(value);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ void visitEnclosingElement(ImageElement element) throws IOException {
|
|
|
+ elemS.push(element);
|
|
|
+ if(element == ImageElement.Inode ||
|
|
|
+ element == ImageElement.INodeUnderConstruction) {
|
|
|
+ current = new FileContext();
|
|
|
+ inInode = true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ void visitEnclosingElement(ImageElement element,
|
|
|
+ ImageElement key, String value) throws IOException {
|
|
|
+ elemS.push(element);
|
|
|
+ if(element == ImageElement.Inode ||
|
|
|
+ element == ImageElement.INodeUnderConstruction)
|
|
|
+ inInode = true;
|
|
|
+ else if(element == ImageElement.Blocks)
|
|
|
+ current.numBlocks = Integer.parseInt(value);
|
|
|
+ }
|
|
|
+}
|