|
@@ -17,17 +17,19 @@
|
|
|
*/
|
|
|
package org.apache.hadoop.hdfs.server.datanode;
|
|
|
|
|
|
-import com.google.common.annotations.VisibleForTesting;
|
|
|
import java.io.File;
|
|
|
import java.io.FilenameFilter;
|
|
|
import java.io.IOException;
|
|
|
+import java.util.ArrayList;
|
|
|
import java.util.Arrays;
|
|
|
+import java.util.Collection;
|
|
|
import java.util.Collections;
|
|
|
import java.util.HashMap;
|
|
|
+import java.util.HashSet;
|
|
|
import java.util.LinkedList;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
-import java.util.Map.Entry;
|
|
|
+import java.util.Set;
|
|
|
import java.util.concurrent.Callable;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
import java.util.concurrent.Executors;
|
|
@@ -36,23 +38,27 @@ import java.util.concurrent.ScheduledExecutorService;
|
|
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
|
|
import java.util.concurrent.ThreadLocalRandom;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
+import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
|
|
-import org.apache.commons.lang3.time.FastDateFormat;
|
|
|
-import org.slf4j.Logger;
|
|
|
-import org.slf4j.LoggerFactory;
|
|
|
+import org.apache.commons.collections.CollectionUtils;
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
-import org.apache.hadoop.util.AutoCloseableLock;
|
|
|
import org.apache.hadoop.fs.StorageType;
|
|
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
|
import org.apache.hadoop.hdfs.protocol.Block;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi.ScanInfo;
|
|
|
+import org.apache.hadoop.util.AutoCloseableLock;
|
|
|
import org.apache.hadoop.util.Daemon;
|
|
|
import org.apache.hadoop.util.StopWatch;
|
|
|
-import org.apache.hadoop.util.Time;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+
|
|
|
+import com.google.common.annotations.VisibleForTesting;
|
|
|
+import com.google.common.collect.ArrayListMultimap;
|
|
|
+import com.google.common.collect.ListMultimap;
|
|
|
|
|
|
/**
|
|
|
* Periodically scans the data directories for block and block metadata files.
|
|
@@ -62,48 +68,48 @@ import org.apache.hadoop.util.Time;
|
|
|
public class DirectoryScanner implements Runnable {
|
|
|
private static final Logger LOG =
|
|
|
LoggerFactory.getLogger(DirectoryScanner.class);
|
|
|
- private static final int MILLIS_PER_SECOND = 1000;
|
|
|
- private static final String START_MESSAGE =
|
|
|
- "Periodic Directory Tree Verification scan"
|
|
|
- + " starting at %s with interval of %dms";
|
|
|
- private static final String START_MESSAGE_WITH_THROTTLE = START_MESSAGE
|
|
|
- + " and throttle limit of %dms/s";
|
|
|
+
|
|
|
+ private static final int DEFAULT_MAP_SIZE = 32768;
|
|
|
|
|
|
private final FsDatasetSpi<?> dataset;
|
|
|
private final ExecutorService reportCompileThreadPool;
|
|
|
private final ScheduledExecutorService masterThread;
|
|
|
private final long scanPeriodMsecs;
|
|
|
- private final int throttleLimitMsPerSec;
|
|
|
- private volatile boolean shouldRun = false;
|
|
|
+ private final long throttleLimitMsPerSec;
|
|
|
+ private final AtomicBoolean shouldRun = new AtomicBoolean();
|
|
|
+
|
|
|
private boolean retainDiffs = false;
|
|
|
- private final DataNode datanode;
|
|
|
|
|
|
/**
|
|
|
* Total combined wall clock time (in milliseconds) spent by the report
|
|
|
- * compiler threads executing. Used for testing purposes.
|
|
|
+ * compiler threads executing. Used for testing purposes.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
final AtomicLong timeRunningMs = new AtomicLong(0L);
|
|
|
+
|
|
|
/**
|
|
|
* Total combined wall clock time (in milliseconds) spent by the report
|
|
|
- * compiler threads blocked by the throttle. Used for testing purposes.
|
|
|
+ * compiler threads blocked by the throttle. Used for testing purposes.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
final AtomicLong timeWaitingMs = new AtomicLong(0L);
|
|
|
+
|
|
|
/**
|
|
|
* The complete list of block differences indexed by block pool ID.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
- final ScanInfoPerBlockPool diffs = new ScanInfoPerBlockPool();
|
|
|
+ final BlockPoolReport diffs = new BlockPoolReport();
|
|
|
+
|
|
|
/**
|
|
|
- * Statistics about the block differences in each blockpool, indexed by
|
|
|
- * block pool ID.
|
|
|
+ * Statistics about the block differences in each blockpool, indexed by block
|
|
|
+ * pool ID.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
- final Map<String, Stats> stats = new HashMap<String, Stats>();
|
|
|
-
|
|
|
+ final Map<String, Stats> stats;
|
|
|
+
|
|
|
/**
|
|
|
- * Allow retaining diffs for unit test and analysis. Defaults to false (off)
|
|
|
+ * Allow retaining diffs for unit test and analysis. Defaults to false (off).
|
|
|
+ *
|
|
|
* @param b whether to retain diffs
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
@@ -123,92 +129,157 @@ public class DirectoryScanner implements Runnable {
|
|
|
long missingMemoryBlocks = 0;
|
|
|
long mismatchBlocks = 0;
|
|
|
long duplicateBlocks = 0;
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* Create a new Stats object for the given blockpool ID.
|
|
|
+ *
|
|
|
* @param bpid blockpool ID
|
|
|
*/
|
|
|
public Stats(String bpid) {
|
|
|
this.bpid = bpid;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
@Override
|
|
|
public String toString() {
|
|
|
- return "BlockPool " + bpid
|
|
|
- + " Total blocks: " + totalBlocks + ", missing metadata files:"
|
|
|
- + missingMetaFile + ", missing block files:" + missingBlockFile
|
|
|
- + ", missing blocks in memory:" + missingMemoryBlocks
|
|
|
- + ", mismatched blocks:" + mismatchBlocks;
|
|
|
+ return "BlockPool " + bpid + " Total blocks: " + totalBlocks
|
|
|
+ + ", missing metadata files: " + missingMetaFile
|
|
|
+ + ", missing block files: " + missingBlockFile
|
|
|
+ + ", missing blocks in memory: " + missingMemoryBlocks
|
|
|
+ + ", mismatched blocks: " + mismatchBlocks;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Helper class for compiling block info reports from report compiler threads.
|
|
|
+ * Contains a volume, a set of block pool IDs, and a collection of ScanInfo
|
|
|
+ * objects. If a block pool exists but has no ScanInfo objects associated with
|
|
|
+ * it, there will be no mapping for that particular block pool.
|
|
|
*/
|
|
|
- static class ScanInfoPerBlockPool extends
|
|
|
- HashMap<String, LinkedList<ScanInfo>> {
|
|
|
-
|
|
|
+ @VisibleForTesting
|
|
|
+ public static class ScanInfoVolumeReport {
|
|
|
+
|
|
|
+ @SuppressWarnings("unused")
|
|
|
private static final long serialVersionUID = 1L;
|
|
|
|
|
|
+ private final FsVolumeSpi volume;
|
|
|
+
|
|
|
+ private final BlockPoolReport blockPoolReport;
|
|
|
+
|
|
|
/**
|
|
|
* Create a new info list.
|
|
|
+ *
|
|
|
+ * @param volume
|
|
|
*/
|
|
|
- ScanInfoPerBlockPool() {super();}
|
|
|
+ ScanInfoVolumeReport(final FsVolumeSpi volume) {
|
|
|
+ this.volume = volume;
|
|
|
+ this.blockPoolReport = new BlockPoolReport();
|
|
|
+ }
|
|
|
|
|
|
/**
|
|
|
* Create a new info list initialized to the given expected size.
|
|
|
- * See {@link java.util.HashMap#HashMap(int)}.
|
|
|
*
|
|
|
* @param sz initial expected size
|
|
|
*/
|
|
|
- ScanInfoPerBlockPool(int sz) {super(sz);}
|
|
|
-
|
|
|
+ ScanInfoVolumeReport(final FsVolumeSpi volume,
|
|
|
+ final Collection<String> blockPools) {
|
|
|
+ this.volume = volume;
|
|
|
+ this.blockPoolReport = new BlockPoolReport(blockPools);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void addAll(final String bpid,
|
|
|
+ final Collection<ScanInfo> scanInfos) {
|
|
|
+ this.blockPoolReport.addAll(bpid, scanInfos);
|
|
|
+ }
|
|
|
+
|
|
|
+ public Set<String> getBlockPoolIds() {
|
|
|
+ return this.blockPoolReport.getBlockPoolIds();
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<ScanInfo> getScanInfo(final String bpid) {
|
|
|
+ return this.blockPoolReport.getScanInfo(bpid);
|
|
|
+ }
|
|
|
+
|
|
|
+ public FsVolumeSpi getVolume() {
|
|
|
+ return volume;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public String toString() {
|
|
|
+ return "ScanInfoVolumeReport [volume=" + volume + ", blockPoolReport="
|
|
|
+ + blockPoolReport + "]";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Helper class for compiling block info reports per block pool.
|
|
|
+ */
|
|
|
+ @VisibleForTesting
|
|
|
+ public static class BlockPoolReport {
|
|
|
+
|
|
|
+ @SuppressWarnings("unused")
|
|
|
+ private static final long serialVersionUID = 1L;
|
|
|
+
|
|
|
+ private final Set<String> blockPools;
|
|
|
+
|
|
|
+ private final ListMultimap<String, ScanInfo> map;
|
|
|
+
|
|
|
/**
|
|
|
- * Merges {@code that} ScanInfoPerBlockPool into this one
|
|
|
+ * Create a block pool report.
|
|
|
*
|
|
|
- * @param that ScanInfoPerBlockPool to merge
|
|
|
+ * @param volume
|
|
|
*/
|
|
|
- public void addAll(ScanInfoPerBlockPool that) {
|
|
|
- if (that == null) return;
|
|
|
-
|
|
|
- for (Entry<String, LinkedList<ScanInfo>> entry : that.entrySet()) {
|
|
|
- String bpid = entry.getKey();
|
|
|
- LinkedList<ScanInfo> list = entry.getValue();
|
|
|
-
|
|
|
- if (this.containsKey(bpid)) {
|
|
|
- //merge that per-bpid linked list with this one
|
|
|
- this.get(bpid).addAll(list);
|
|
|
- } else {
|
|
|
- //add that new bpid and its linked list to this
|
|
|
- this.put(bpid, list);
|
|
|
- }
|
|
|
- }
|
|
|
+ BlockPoolReport() {
|
|
|
+ this.blockPools = new HashSet<>(2);
|
|
|
+ this.map = ArrayListMultimap.create(2, DEFAULT_MAP_SIZE);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
- * Convert all the LinkedList values in this ScanInfoPerBlockPool map
|
|
|
- * into sorted arrays, and return a new map of these arrays per blockpool
|
|
|
+ * Create a new block pool report initialized to the given expected size.
|
|
|
*
|
|
|
- * @return a map of ScanInfo arrays per blockpool
|
|
|
+ * @param blockPools initial list of known block pools
|
|
|
*/
|
|
|
- public Map<String, ScanInfo[]> toSortedArrays() {
|
|
|
- Map<String, ScanInfo[]> result =
|
|
|
- new HashMap<String, ScanInfo[]>(this.size());
|
|
|
-
|
|
|
- for (Entry<String, LinkedList<ScanInfo>> entry : this.entrySet()) {
|
|
|
- String bpid = entry.getKey();
|
|
|
- LinkedList<ScanInfo> list = entry.getValue();
|
|
|
-
|
|
|
- // convert list to array
|
|
|
- ScanInfo[] record = list.toArray(new ScanInfo[list.size()]);
|
|
|
+ BlockPoolReport(final Collection<String> blockPools) {
|
|
|
+ this.blockPools = new HashSet<>(blockPools);
|
|
|
+ this.map = ArrayListMultimap.create(blockPools.size(), DEFAULT_MAP_SIZE);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public void addAll(final String bpid,
|
|
|
+ final Collection<ScanInfo> scanInfos) {
|
|
|
+ this.blockPools.add(bpid);
|
|
|
+ this.map.putAll(bpid, scanInfos);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void sortBlocks() {
|
|
|
+ for (final String bpid : this.map.keySet()) {
|
|
|
+ final List<ScanInfo> list = this.map.get(bpid);
|
|
|
// Sort array based on blockId
|
|
|
- Arrays.sort(record);
|
|
|
- result.put(bpid, record);
|
|
|
+ Collections.sort(list);
|
|
|
}
|
|
|
- return result;
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
+ public Set<String> getBlockPoolIds() {
|
|
|
+ return Collections.unmodifiableSet(this.blockPools);
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<ScanInfo> getScanInfo(final String bpid) {
|
|
|
+ return this.map.get(bpid);
|
|
|
+ }
|
|
|
+
|
|
|
+ public Collection<Map.Entry<String, ScanInfo>> getEntries() {
|
|
|
+ return Collections.unmodifiableCollection(this.map.entries());
|
|
|
+ }
|
|
|
+
|
|
|
+ public void clear() {
|
|
|
+ this.map.clear();
|
|
|
+ this.blockPools.clear();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public String toString() {
|
|
|
+ return "BlockPoolReport [blockPools=" + blockPools + ", map=" + map + "]";
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
/**
|
|
|
* Create a new directory scanner, but don't cycle it running yet.
|
|
@@ -217,75 +288,58 @@ public class DirectoryScanner implements Runnable {
|
|
|
* @param dataset the dataset to scan
|
|
|
* @param conf the Configuration object
|
|
|
*/
|
|
|
- public DirectoryScanner(DataNode datanode, FsDatasetSpi<?> dataset,
|
|
|
- Configuration conf) {
|
|
|
- this.datanode = datanode;
|
|
|
+ public DirectoryScanner(FsDatasetSpi<?> dataset, Configuration conf) {
|
|
|
this.dataset = dataset;
|
|
|
+ this.stats = new HashMap<>(DEFAULT_MAP_SIZE);
|
|
|
int interval = (int) conf.getTimeDuration(
|
|
|
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY,
|
|
|
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT,
|
|
|
TimeUnit.SECONDS);
|
|
|
- scanPeriodMsecs = interval * MILLIS_PER_SECOND; //msec
|
|
|
|
|
|
- int throttle =
|
|
|
- conf.getInt(
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY,
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT);
|
|
|
+ scanPeriodMsecs = TimeUnit.SECONDS.toMillis(interval);
|
|
|
|
|
|
- if ((throttle > MILLIS_PER_SECOND) || (throttle <= 0)) {
|
|
|
- if (throttle > MILLIS_PER_SECOND) {
|
|
|
- LOG.error(
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY
|
|
|
- + " set to value above 1000 ms/sec. Assuming default value of " +
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT);
|
|
|
- } else {
|
|
|
- LOG.error(
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY
|
|
|
- + " set to value below 1 ms/sec. Assuming default value of " +
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT);
|
|
|
- }
|
|
|
+ int throttle = conf.getInt(
|
|
|
+ DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY,
|
|
|
+ DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT);
|
|
|
|
|
|
- throttleLimitMsPerSec =
|
|
|
+ if (throttle >= TimeUnit.SECONDS.toMillis(1)) {
|
|
|
+ LOG.warn(
|
|
|
+ "{} set to value above 1000 ms/sec. Assuming default value of {}",
|
|
|
+ DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY,
|
|
|
+ DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT);
|
|
|
+ throttle =
|
|
|
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT;
|
|
|
- } else {
|
|
|
- throttleLimitMsPerSec = throttle;
|
|
|
}
|
|
|
|
|
|
- int threads =
|
|
|
+ throttleLimitMsPerSec = throttle;
|
|
|
+
|
|
|
+ int threads =
|
|
|
conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY,
|
|
|
- DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT);
|
|
|
+ DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT);
|
|
|
|
|
|
- reportCompileThreadPool = Executors.newFixedThreadPool(threads,
|
|
|
- new Daemon.DaemonFactory());
|
|
|
- masterThread = new ScheduledThreadPoolExecutor(1,
|
|
|
- new Daemon.DaemonFactory());
|
|
|
+ reportCompileThreadPool =
|
|
|
+ Executors.newFixedThreadPool(threads, new Daemon.DaemonFactory());
|
|
|
+
|
|
|
+ masterThread =
|
|
|
+ new ScheduledThreadPoolExecutor(1, new Daemon.DaemonFactory());
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Start the scanner. The scanner will run every
|
|
|
+ * Start the scanner. The scanner will run every
|
|
|
* {@link DFSConfigKeys#DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY} seconds.
|
|
|
*/
|
|
|
void start() {
|
|
|
- shouldRun = true;
|
|
|
- long offset = ThreadLocalRandom.current().nextInt(
|
|
|
- (int) (scanPeriodMsecs/MILLIS_PER_SECOND)) * MILLIS_PER_SECOND; //msec
|
|
|
- long firstScanTime = Time.now() + offset;
|
|
|
- String logMsg;
|
|
|
-
|
|
|
- if (throttleLimitMsPerSec < MILLIS_PER_SECOND) {
|
|
|
- logMsg = String.format(START_MESSAGE_WITH_THROTTLE,
|
|
|
- FastDateFormat.getInstance().format(firstScanTime), scanPeriodMsecs,
|
|
|
- throttleLimitMsPerSec);
|
|
|
- } else {
|
|
|
- logMsg = String.format(START_MESSAGE,
|
|
|
- FastDateFormat.getInstance().format(firstScanTime), scanPeriodMsecs);
|
|
|
- }
|
|
|
-
|
|
|
- LOG.info(logMsg);
|
|
|
- masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs,
|
|
|
- TimeUnit.MILLISECONDS);
|
|
|
+ shouldRun.set(true);
|
|
|
+ long firstScanTime = ThreadLocalRandom.current().nextLong(scanPeriodMsecs);
|
|
|
+
|
|
|
+ LOG.info(
|
|
|
+ "Periodic Directory Tree Verification scan starting in {}ms with interval of {}ms and throttle limit of {}ms/s",
|
|
|
+ firstScanTime, scanPeriodMsecs, throttleLimitMsPerSec);
|
|
|
+
|
|
|
+ masterThread.scheduleAtFixedRate(this, firstScanTime, scanPeriodMsecs,
|
|
|
+ TimeUnit.MILLISECONDS);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* Return whether the scanner has been started.
|
|
|
*
|
|
@@ -293,7 +347,7 @@ public class DirectoryScanner implements Runnable {
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
boolean getRunStatus() {
|
|
|
- return shouldRun;
|
|
|
+ return shouldRun.get();
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -305,67 +359,69 @@ public class DirectoryScanner implements Runnable {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Main program loop for DirectoryScanner. Runs {@link reconcile()}
|
|
|
- * and handles any exceptions.
|
|
|
+ * Main program loop for DirectoryScanner. Runs {@link reconcile()} and
|
|
|
+ * handles any exceptions.
|
|
|
*/
|
|
|
@Override
|
|
|
public void run() {
|
|
|
+ if (!shouldRun.get()) {
|
|
|
+ // shutdown has been activated
|
|
|
+ LOG.warn(
|
|
|
+ "This cycle terminating immediately because 'shouldRun' has been deactivated");
|
|
|
+ return;
|
|
|
+ }
|
|
|
try {
|
|
|
- if (!shouldRun) {
|
|
|
- //shutdown has been activated
|
|
|
- LOG.warn("this cycle terminating immediately because 'shouldRun' has been deactivated");
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
- //We're are okay to run - do it
|
|
|
- reconcile();
|
|
|
-
|
|
|
+ reconcile();
|
|
|
} catch (Exception e) {
|
|
|
- //Log and continue - allows Executor to run again next cycle
|
|
|
- LOG.error("Exception during DirectoryScanner execution - will continue next cycle", e);
|
|
|
+ // Log and continue - allows Executor to run again next cycle
|
|
|
+ LOG.error(
|
|
|
+ "Exception during DirectoryScanner execution - will continue next cycle",
|
|
|
+ e);
|
|
|
} catch (Error er) {
|
|
|
- //Non-recoverable error - re-throw after logging the problem
|
|
|
- LOG.error("System Error during DirectoryScanner execution - permanently terminating periodic scanner", er);
|
|
|
+ // Non-recoverable error - re-throw after logging the problem
|
|
|
+ LOG.error(
|
|
|
+ "System Error during DirectoryScanner execution - permanently terminating periodic scanner",
|
|
|
+ er);
|
|
|
throw er;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
- * Stops the directory scanner. This method will wait for 1 minute for the
|
|
|
+ * Stops the directory scanner. This method will wait for 1 minute for the
|
|
|
* main thread to exit and an additional 1 minute for the report compilation
|
|
|
- * threads to exit. If a thread does not exit in that time period, it is
|
|
|
- * left running, and an error is logged.
|
|
|
+ * threads to exit. If a thread does not exit in that time period, it is left
|
|
|
+ * running, and an error is logged.
|
|
|
*/
|
|
|
void shutdown() {
|
|
|
- if (!shouldRun) {
|
|
|
- LOG.warn("DirectoryScanner: shutdown has been called, but periodic scanner not started");
|
|
|
- } else {
|
|
|
- LOG.warn("DirectoryScanner: shutdown has been called");
|
|
|
+ LOG.info("Shutdown has been called");
|
|
|
+ if (!shouldRun.getAndSet(false)) {
|
|
|
+ LOG.warn("Shutdown has been called, but periodic scanner not started");
|
|
|
+ }
|
|
|
+ if (masterThread != null) {
|
|
|
+ masterThread.shutdown();
|
|
|
}
|
|
|
- shouldRun = false;
|
|
|
- if (masterThread != null) masterThread.shutdown();
|
|
|
-
|
|
|
if (reportCompileThreadPool != null) {
|
|
|
reportCompileThreadPool.shutdownNow();
|
|
|
}
|
|
|
-
|
|
|
if (masterThread != null) {
|
|
|
try {
|
|
|
masterThread.awaitTermination(1, TimeUnit.MINUTES);
|
|
|
} catch (InterruptedException e) {
|
|
|
- LOG.error("interrupted while waiting for masterThread to " +
|
|
|
- "terminate", e);
|
|
|
+ LOG.error(
|
|
|
+ "interrupted while waiting for masterThread to " + "terminate", e);
|
|
|
}
|
|
|
}
|
|
|
if (reportCompileThreadPool != null) {
|
|
|
try {
|
|
|
reportCompileThreadPool.awaitTermination(1, TimeUnit.MINUTES);
|
|
|
} catch (InterruptedException e) {
|
|
|
- LOG.error("interrupted while waiting for reportCompileThreadPool to " +
|
|
|
- "terminate", e);
|
|
|
+ LOG.error("interrupted while waiting for reportCompileThreadPool to "
|
|
|
+ + "terminate", e);
|
|
|
}
|
|
|
}
|
|
|
- if (!retainDiffs) clear();
|
|
|
+ if (!retainDiffs) {
|
|
|
+ clear();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -374,45 +430,54 @@ public class DirectoryScanner implements Runnable {
|
|
|
@VisibleForTesting
|
|
|
public void reconcile() throws IOException {
|
|
|
scan();
|
|
|
- for (Entry<String, LinkedList<ScanInfo>> entry : diffs.entrySet()) {
|
|
|
- String bpid = entry.getKey();
|
|
|
- LinkedList<ScanInfo> diff = entry.getValue();
|
|
|
-
|
|
|
- for (ScanInfo info : diff) {
|
|
|
- dataset.checkAndUpdate(bpid, info);
|
|
|
- }
|
|
|
+
|
|
|
+ for (final Map.Entry<String, ScanInfo> entry : diffs.getEntries()) {
|
|
|
+ dataset.checkAndUpdate(entry.getKey(), entry.getValue());
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!retainDiffs) {
|
|
|
+ clear();
|
|
|
}
|
|
|
- if (!retainDiffs) clear();
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Scan for the differences between disk and in-memory blocks
|
|
|
- * Scan only the "finalized blocks" lists of both disk and memory.
|
|
|
+ * Scan for the differences between disk and in-memory blocks Scan only the
|
|
|
+ * "finalized blocks" lists of both disk and memory.
|
|
|
*/
|
|
|
private void scan() {
|
|
|
+ BlockPoolReport blockPoolReport = new BlockPoolReport();
|
|
|
+
|
|
|
clear();
|
|
|
- Map<String, ScanInfo[]> diskReport = getDiskReport();
|
|
|
+
|
|
|
+ Collection<ScanInfoVolumeReport> volumeReports = getVolumeReports();
|
|
|
+ for (ScanInfoVolumeReport volumeReport : volumeReports) {
|
|
|
+ for (String blockPoolId : volumeReport.getBlockPoolIds()) {
|
|
|
+ List<ScanInfo> scanInfos = volumeReport.getScanInfo(blockPoolId);
|
|
|
+ blockPoolReport.addAll(blockPoolId, scanInfos);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Pre-sort the reports outside of the lock
|
|
|
+ blockPoolReport.sortBlocks();
|
|
|
|
|
|
// Hold FSDataset lock to prevent further changes to the block map
|
|
|
- try(AutoCloseableLock lock = dataset.acquireDatasetLock()) {
|
|
|
- for (Entry<String, ScanInfo[]> entry : diskReport.entrySet()) {
|
|
|
- String bpid = entry.getKey();
|
|
|
- ScanInfo[] blockpoolReport = entry.getValue();
|
|
|
-
|
|
|
+ try (AutoCloseableLock lock = dataset.acquireDatasetLock()) {
|
|
|
+ for (final String bpid : blockPoolReport.getBlockPoolIds()) {
|
|
|
+ List<ScanInfo> blockpoolReport = blockPoolReport.getScanInfo(bpid);
|
|
|
+
|
|
|
Stats statsRecord = new Stats(bpid);
|
|
|
stats.put(bpid, statsRecord);
|
|
|
- LinkedList<ScanInfo> diffRecord = new LinkedList<ScanInfo>();
|
|
|
- diffs.put(bpid, diffRecord);
|
|
|
-
|
|
|
- statsRecord.totalBlocks = blockpoolReport.length;
|
|
|
+ Collection<ScanInfo> diffRecord = new ArrayList<>();
|
|
|
+
|
|
|
+ statsRecord.totalBlocks = blockpoolReport.size();
|
|
|
final List<ReplicaInfo> bl = dataset.getFinalizedBlocks(bpid);
|
|
|
Collections.sort(bl); // Sort based on blockId
|
|
|
-
|
|
|
+
|
|
|
int d = 0; // index for blockpoolReport
|
|
|
int m = 0; // index for memReprot
|
|
|
- while (m < bl.size() && d < blockpoolReport.length) {
|
|
|
+ while (m < bl.size() && d < blockpoolReport.size()) {
|
|
|
ReplicaInfo memBlock = bl.get(m);
|
|
|
- ScanInfo info = blockpoolReport[d];
|
|
|
+ ScanInfo info = blockpoolReport.get(d);
|
|
|
if (info.getBlockId() < memBlock.getBlockId()) {
|
|
|
if (!dataset.isDeletingBlock(bpid, info.getBlockId())) {
|
|
|
// Block is missing in memory
|
|
@@ -424,15 +489,15 @@ public class DirectoryScanner implements Runnable {
|
|
|
}
|
|
|
if (info.getBlockId() > memBlock.getBlockId()) {
|
|
|
// Block is missing on the disk
|
|
|
- addDifference(diffRecord, statsRecord,
|
|
|
- memBlock.getBlockId(), info.getVolume());
|
|
|
+ addDifference(diffRecord, statsRecord, memBlock.getBlockId(),
|
|
|
+ info.getVolume());
|
|
|
m++;
|
|
|
continue;
|
|
|
}
|
|
|
// Block file and/or metadata file exists on the disk
|
|
|
// Block exists in memory
|
|
|
- if (info.getVolume().getStorageType() != StorageType.PROVIDED &&
|
|
|
- info.getBlockFile() == null) {
|
|
|
+ if (info.getVolume().getStorageType() != StorageType.PROVIDED
|
|
|
+ && info.getBlockFile() == null) {
|
|
|
// Block metadata file exits and block file is missing
|
|
|
addDifference(diffRecord, statsRecord, info);
|
|
|
} else if (info.getGenStamp() != memBlock.getGenerationStamp()
|
|
@@ -442,16 +507,16 @@ public class DirectoryScanner implements Runnable {
|
|
|
statsRecord.mismatchBlocks++;
|
|
|
addDifference(diffRecord, statsRecord, info);
|
|
|
} else if (memBlock.compareWith(info) != 0) {
|
|
|
- // volumeMap record and on-disk files don't match.
|
|
|
+ // volumeMap record and on-disk files do not match.
|
|
|
statsRecord.duplicateBlocks++;
|
|
|
addDifference(diffRecord, statsRecord, info);
|
|
|
}
|
|
|
d++;
|
|
|
|
|
|
- if (d < blockpoolReport.length) {
|
|
|
- // There may be multiple on-disk records for the same block, don't increment
|
|
|
- // the memory record pointer if so.
|
|
|
- ScanInfo nextInfo = blockpoolReport[d];
|
|
|
+ if (d < blockpoolReport.size()) {
|
|
|
+ // There may be multiple on-disk records for the same block, do not
|
|
|
+ // increment the memory record pointer if so.
|
|
|
+ ScanInfo nextInfo = blockpoolReport.get(d);
|
|
|
if (nextInfo.getBlockId() != info.getBlockId()) {
|
|
|
++m;
|
|
|
}
|
|
@@ -461,132 +526,108 @@ public class DirectoryScanner implements Runnable {
|
|
|
}
|
|
|
while (m < bl.size()) {
|
|
|
ReplicaInfo current = bl.get(m++);
|
|
|
- addDifference(diffRecord, statsRecord,
|
|
|
- current.getBlockId(), current.getVolume());
|
|
|
+ addDifference(diffRecord, statsRecord, current.getBlockId(),
|
|
|
+ current.getVolume());
|
|
|
}
|
|
|
- while (d < blockpoolReport.length) {
|
|
|
- if (!dataset.isDeletingBlock(bpid, blockpoolReport[d].getBlockId())) {
|
|
|
+ while (d < blockpoolReport.size()) {
|
|
|
+ if (!dataset.isDeletingBlock(bpid,
|
|
|
+ blockpoolReport.get(d).getBlockId())) {
|
|
|
statsRecord.missingMemoryBlocks++;
|
|
|
- addDifference(diffRecord, statsRecord, blockpoolReport[d]);
|
|
|
+ addDifference(diffRecord, statsRecord, blockpoolReport.get(d));
|
|
|
}
|
|
|
d++;
|
|
|
}
|
|
|
- LOG.info(statsRecord.toString());
|
|
|
- } //end for
|
|
|
- } //end synchronized
|
|
|
+ diffs.addAll(bpid, diffRecord);
|
|
|
+ LOG.info("Scan Results: {}", statsRecord);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Add the ScanInfo object to the list of differences and adjust the stats
|
|
|
- * accordingly. This method is called when a block is found on the disk,
|
|
|
- * but the in-memory block is missing or does not match the block on the disk.
|
|
|
+ * accordingly. This method is called when a block is found on the disk, but
|
|
|
+ * the in-memory block is missing or does not match the block on the disk.
|
|
|
*
|
|
|
- * @param diffRecord the list to which to add the info
|
|
|
+ * @param diffRecord the collection to which to add the info
|
|
|
* @param statsRecord the stats to update
|
|
|
* @param info the differing info
|
|
|
*/
|
|
|
- private void addDifference(LinkedList<ScanInfo> diffRecord,
|
|
|
- Stats statsRecord, ScanInfo info) {
|
|
|
+ private void addDifference(Collection<ScanInfo> diffRecord, Stats statsRecord,
|
|
|
+ ScanInfo info) {
|
|
|
statsRecord.missingMetaFile += info.getMetaFile() == null ? 1 : 0;
|
|
|
statsRecord.missingBlockFile += info.getBlockFile() == null ? 1 : 0;
|
|
|
diffRecord.add(info);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Add a new ScanInfo object to the list of differences and adjust the stats
|
|
|
- * accordingly. This method is called when a block is not found on the disk.
|
|
|
+ * Add a new ScanInfo object to the collection of differences and adjust the
|
|
|
+ * stats accordingly. This method is called when a block is not found on the
|
|
|
+ * disk.
|
|
|
*
|
|
|
- * @param diffRecord the list to which to add the info
|
|
|
+ * @param diffRecord the collection to which to add the info
|
|
|
* @param statsRecord the stats to update
|
|
|
* @param blockId the id of the missing block
|
|
|
* @param vol the volume that contains the missing block
|
|
|
*/
|
|
|
- private void addDifference(LinkedList<ScanInfo> diffRecord,
|
|
|
- Stats statsRecord, long blockId,
|
|
|
- FsVolumeSpi vol) {
|
|
|
+ private void addDifference(Collection<ScanInfo> diffRecord, Stats statsRecord,
|
|
|
+ long blockId, FsVolumeSpi vol) {
|
|
|
statsRecord.missingBlockFile++;
|
|
|
statsRecord.missingMetaFile++;
|
|
|
diffRecord.add(new ScanInfo(blockId, null, null, vol));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Get the lists of blocks on the disks in the dataset, sorted by blockId.
|
|
|
- * The returned map contains one entry per blockpool, keyed by the blockpool
|
|
|
- * ID.
|
|
|
- *
|
|
|
- * @return a map of sorted arrays of block information
|
|
|
+ * Get the lists of blocks on the disks in the data set.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
- public Map<String, ScanInfo[]> getDiskReport() {
|
|
|
- ScanInfoPerBlockPool list = new ScanInfoPerBlockPool();
|
|
|
- ScanInfoPerBlockPool[] dirReports = null;
|
|
|
+ public Collection<ScanInfoVolumeReport> getVolumeReports() {
|
|
|
+ List<ScanInfoVolumeReport> volReports = new ArrayList<>();
|
|
|
+ List<Future<ScanInfoVolumeReport>> compilersInProgress = new ArrayList<>();
|
|
|
+
|
|
|
// First get list of data directories
|
|
|
try (FsDatasetSpi.FsVolumeReferences volumes =
|
|
|
dataset.getFsVolumeReferences()) {
|
|
|
|
|
|
- // Use an array since the threads may return out of order and
|
|
|
- // compilersInProgress#keySet may return out of order as well.
|
|
|
- dirReports = new ScanInfoPerBlockPool[volumes.size()];
|
|
|
-
|
|
|
- Map<Integer, Future<ScanInfoPerBlockPool>> compilersInProgress =
|
|
|
- new HashMap<Integer, Future<ScanInfoPerBlockPool>>();
|
|
|
-
|
|
|
- for (int i = 0; i < volumes.size(); i++) {
|
|
|
- if (volumes.get(i).getStorageType() == StorageType.PROVIDED) {
|
|
|
- // Disable scanning PROVIDED volumes to keep overhead low
|
|
|
- continue;
|
|
|
+ for (final FsVolumeSpi volume : volumes) {
|
|
|
+ // Disable scanning PROVIDED volumes to keep overhead low
|
|
|
+ if (volume.getStorageType() != StorageType.PROVIDED) {
|
|
|
+ ReportCompiler reportCompiler = new ReportCompiler(volume);
|
|
|
+ Future<ScanInfoVolumeReport> result =
|
|
|
+ reportCompileThreadPool.submit(reportCompiler);
|
|
|
+ compilersInProgress.add(result);
|
|
|
}
|
|
|
- ReportCompiler reportCompiler =
|
|
|
- new ReportCompiler(datanode, volumes.get(i));
|
|
|
- Future<ScanInfoPerBlockPool> result =
|
|
|
- reportCompileThreadPool.submit(reportCompiler);
|
|
|
- compilersInProgress.put(i, result);
|
|
|
}
|
|
|
|
|
|
- for (Entry<Integer, Future<ScanInfoPerBlockPool>> report :
|
|
|
- compilersInProgress.entrySet()) {
|
|
|
- Integer index = report.getKey();
|
|
|
+ for (Future<ScanInfoVolumeReport> future : compilersInProgress) {
|
|
|
try {
|
|
|
- dirReports[index] = report.getValue().get();
|
|
|
-
|
|
|
- // If our compiler threads were interrupted, give up on this run
|
|
|
- if (dirReports[index] == null) {
|
|
|
- dirReports = null;
|
|
|
+ final ScanInfoVolumeReport result = future.get();
|
|
|
+ if (!CollectionUtils.addIgnoreNull(volReports, result)) {
|
|
|
+ // This compiler thread were interrupted, give up on this run
|
|
|
+ volReports.clear();
|
|
|
break;
|
|
|
}
|
|
|
} catch (Exception ex) {
|
|
|
- FsVolumeSpi fsVolumeSpi = volumes.get(index);
|
|
|
- LOG.error("Error compiling report for the volume, StorageId: "
|
|
|
- + fsVolumeSpi.getStorageID(), ex);
|
|
|
- // Continue scanning the other volumes
|
|
|
+ LOG.warn("Error compiling report. Continuing.", ex);
|
|
|
}
|
|
|
}
|
|
|
} catch (IOException e) {
|
|
|
LOG.error("Unexpected IOException by closing FsVolumeReference", e);
|
|
|
}
|
|
|
- if (dirReports != null) {
|
|
|
- // Compile consolidated report for all the volumes
|
|
|
- for (ScanInfoPerBlockPool report : dirReports) {
|
|
|
- if(report != null){
|
|
|
- list.addAll(report);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return list.toSortedArrays();
|
|
|
+
|
|
|
+ return volReports;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* The ReportCompiler class encapsulates the process of searching a datanode's
|
|
|
- * disks for block information. It operates by performing a DFS of the
|
|
|
- * volume to discover block information.
|
|
|
+ * disks for block information. It operates by performing a DFS of the volume
|
|
|
+ * to discover block information.
|
|
|
*
|
|
|
* When the ReportCompiler discovers block information, it create a new
|
|
|
- * ScanInfo object for it and adds that object to its report list. The report
|
|
|
+ * ScanInfo object for it and adds that object to its report list. The report
|
|
|
* list is returned by the {@link #call()} method.
|
|
|
*/
|
|
|
- public class ReportCompiler implements Callable<ScanInfoPerBlockPool> {
|
|
|
+ public class ReportCompiler implements Callable<ScanInfoVolumeReport> {
|
|
|
private final FsVolumeSpi volume;
|
|
|
- private final DataNode datanode;
|
|
|
// Variable for tracking time spent running for throttling purposes
|
|
|
private final StopWatch throttleTimer = new StopWatch();
|
|
|
// Variable for tracking time spent running and waiting for testing
|
|
@@ -594,13 +635,11 @@ public class DirectoryScanner implements Runnable {
|
|
|
private final StopWatch perfTimer = new StopWatch();
|
|
|
|
|
|
/**
|
|
|
- * Create a report compiler for the given volume on the given datanode.
|
|
|
+ * Create a report compiler for the given volume.
|
|
|
*
|
|
|
- * @param datanode the target datanode
|
|
|
* @param volume the target volume
|
|
|
*/
|
|
|
- public ReportCompiler(DataNode datanode, FsVolumeSpi volume) {
|
|
|
- this.datanode = datanode;
|
|
|
+ public ReportCompiler(FsVolumeSpi volume) {
|
|
|
this.volume = volume;
|
|
|
}
|
|
|
|
|
@@ -608,12 +647,13 @@ public class DirectoryScanner implements Runnable {
|
|
|
* Run this report compiler thread.
|
|
|
*
|
|
|
* @return the block info report list
|
|
|
- * @throws IOException if the block pool isn't found
|
|
|
+ * @throws IOException if the block pool is not found
|
|
|
*/
|
|
|
@Override
|
|
|
- public ScanInfoPerBlockPool call() throws IOException {
|
|
|
+ public ScanInfoVolumeReport call() throws IOException {
|
|
|
String[] bpList = volume.getBlockPoolList();
|
|
|
- ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length);
|
|
|
+ ScanInfoVolumeReport result =
|
|
|
+ new ScanInfoVolumeReport(volume, Arrays.asList(bpList));
|
|
|
perfTimer.start();
|
|
|
throttleTimer.start();
|
|
|
for (String bpid : bpList) {
|
|
@@ -623,33 +663,45 @@ public class DirectoryScanner implements Runnable {
|
|
|
throttleTimer.reset().start();
|
|
|
|
|
|
try {
|
|
|
- result.put(bpid, volume.compileReport(bpid, report, this));
|
|
|
+ // ScanInfos are added directly to 'report' list
|
|
|
+ volume.compileReport(bpid, report, this);
|
|
|
+ result.addAll(bpid, report);
|
|
|
} catch (InterruptedException ex) {
|
|
|
// Exit quickly and flag the scanner to do the same
|
|
|
result = null;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
+ LOG.trace("Scanner volume report: {}", result);
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Called by the thread before each potential disk scan so that a pause
|
|
|
- * can be optionally inserted to limit the number of scans per second.
|
|
|
- * The limit is controlled by
|
|
|
+ * Called by the thread before each potential disk scan so that a pause can
|
|
|
+ * be optionally inserted to limit the number of scans per second. The limit
|
|
|
+ * is controlled by
|
|
|
* {@link DFSConfigKeys#DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY}.
|
|
|
*/
|
|
|
public void throttle() throws InterruptedException {
|
|
|
accumulateTimeRunning();
|
|
|
|
|
|
- if ((throttleLimitMsPerSec < 1000) &&
|
|
|
- (throttleTimer.now(TimeUnit.MILLISECONDS) > throttleLimitMsPerSec)) {
|
|
|
-
|
|
|
- Thread.sleep(MILLIS_PER_SECOND - throttleLimitMsPerSec);
|
|
|
- throttleTimer.reset().start();
|
|
|
+ if (throttleLimitMsPerSec > 0L) {
|
|
|
+ final long runningTime = throttleTimer.now(TimeUnit.MILLISECONDS);
|
|
|
+ if (runningTime >= throttleLimitMsPerSec) {
|
|
|
+ final long sleepTime;
|
|
|
+ if (runningTime >= 1000L) {
|
|
|
+ LOG.warn("Unable to throttle within the second. Blocking for 1s.");
|
|
|
+ sleepTime = 1000L;
|
|
|
+ } else {
|
|
|
+ // Sleep for the expected time plus any time processing ran over
|
|
|
+ final long overTime = runningTime - throttleLimitMsPerSec;
|
|
|
+ sleepTime = (1000L - throttleLimitMsPerSec) + overTime;
|
|
|
+ }
|
|
|
+ Thread.sleep(sleepTime);
|
|
|
+ throttleTimer.reset().start();
|
|
|
+ }
|
|
|
+ accumulateTimeWaiting();
|
|
|
}
|
|
|
-
|
|
|
- accumulateTimeWaiting();
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -679,4 +731,5 @@ public class DirectoryScanner implements Runnable {
|
|
|
|| name.startsWith(Block.BLOCK_FILE_PREFIX);
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
}
|