|
@@ -19,9 +19,11 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
|
|
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
import java.util.Iterator;
|
|
import java.util.Iterator;
|
|
|
|
+import java.util.LinkedList;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
-import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
|
|
|
|
|
|
+
|
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
|
|
|
+import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
|
|
|
|
|
|
/**
|
|
/**
|
|
* Keep prioritized queues of under replicated blocks.
|
|
* Keep prioritized queues of under replicated blocks.
|
|
@@ -34,7 +36,11 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
|
*
|
|
*
|
|
* <p/>
|
|
* <p/>
|
|
* The policy for choosing which priority to give added blocks
|
|
* The policy for choosing which priority to give added blocks
|
|
|
|
+<<<<<<< HEAD
|
|
* is implemented in {@link #getPriority(int, int, int)}.
|
|
* is implemented in {@link #getPriority(int, int, int)}.
|
|
|
|
+=======
|
|
|
|
+ * is implemented in {@link #getPriority(BlockInfo, int, int, int, int)}.
|
|
|
|
+>>>>>>> 5411dc5... HDFS-9205. Do not schedule corrupt blocks for replication. (szetszwo)
|
|
* </p>
|
|
* </p>
|
|
* <p>The queue order is as follows:</p>
|
|
* <p>The queue order is as follows:</p>
|
|
* <ol>
|
|
* <ol>
|
|
@@ -146,6 +152,7 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
* @return the priority for the blocks, between 0 and ({@link #LEVEL}-1)
|
|
* @return the priority for the blocks, between 0 and ({@link #LEVEL}-1)
|
|
*/
|
|
*/
|
|
private int getPriority(int curReplicas,
|
|
private int getPriority(int curReplicas,
|
|
|
|
+ int readOnlyReplicas,
|
|
int decommissionedReplicas,
|
|
int decommissionedReplicas,
|
|
int expectedReplicas) {
|
|
int expectedReplicas) {
|
|
assert curReplicas >= 0 : "Negative replicas!";
|
|
assert curReplicas >= 0 : "Negative replicas!";
|
|
@@ -158,6 +165,11 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
if (decommissionedReplicas > 0) {
|
|
if (decommissionedReplicas > 0) {
|
|
return QUEUE_HIGHEST_PRIORITY;
|
|
return QUEUE_HIGHEST_PRIORITY;
|
|
}
|
|
}
|
|
|
|
+ if (readOnlyReplicas > 0) {
|
|
|
|
+ // only has read-only replicas, highest risk
|
|
|
|
+ // since the read-only replicas may go down all together.
|
|
|
|
+ return QUEUE_HIGHEST_PRIORITY;
|
|
|
|
+ }
|
|
//all we have are corrupt blocks
|
|
//all we have are corrupt blocks
|
|
return QUEUE_WITH_CORRUPT_BLOCKS;
|
|
return QUEUE_WITH_CORRUPT_BLOCKS;
|
|
} else if (curReplicas == 1) {
|
|
} else if (curReplicas == 1) {
|
|
@@ -183,11 +195,12 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
*/
|
|
*/
|
|
synchronized boolean add(BlockInfo block,
|
|
synchronized boolean add(BlockInfo block,
|
|
int curReplicas,
|
|
int curReplicas,
|
|
|
|
+ int readOnlyReplicas,
|
|
int decomissionedReplicas,
|
|
int decomissionedReplicas,
|
|
int expectedReplicas) {
|
|
int expectedReplicas) {
|
|
assert curReplicas >= 0 : "Negative replicas!";
|
|
assert curReplicas >= 0 : "Negative replicas!";
|
|
- int priLevel = getPriority(curReplicas, decomissionedReplicas,
|
|
|
|
- expectedReplicas);
|
|
|
|
|
|
+ final int priLevel = getPriority(curReplicas, readOnlyReplicas,
|
|
|
|
+ decomissionedReplicas, expectedReplicas);
|
|
if(priorityQueues.get(priLevel).add(block)) {
|
|
if(priorityQueues.get(priLevel).add(block)) {
|
|
if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS &&
|
|
if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS &&
|
|
expectedReplicas == 1) {
|
|
expectedReplicas == 1) {
|
|
@@ -207,11 +220,11 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
/** remove a block from a under replication queue */
|
|
/** remove a block from a under replication queue */
|
|
synchronized boolean remove(BlockInfo block,
|
|
synchronized boolean remove(BlockInfo block,
|
|
int oldReplicas,
|
|
int oldReplicas,
|
|
|
|
+ int oldReadOnlyReplicas,
|
|
int decommissionedReplicas,
|
|
int decommissionedReplicas,
|
|
int oldExpectedReplicas) {
|
|
int oldExpectedReplicas) {
|
|
- int priLevel = getPriority(oldReplicas,
|
|
|
|
- decommissionedReplicas,
|
|
|
|
- oldExpectedReplicas);
|
|
|
|
|
|
+ final int priLevel = getPriority(oldReplicas, oldReadOnlyReplicas,
|
|
|
|
+ decommissionedReplicas, oldExpectedReplicas);
|
|
boolean removedBlock = remove(block, priLevel);
|
|
boolean removedBlock = remove(block, priLevel);
|
|
if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS &&
|
|
if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS &&
|
|
oldExpectedReplicas == 1 &&
|
|
oldExpectedReplicas == 1 &&
|
|
@@ -250,10 +263,10 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
// Try to remove the block from all queues if the block was
|
|
// Try to remove the block from all queues if the block was
|
|
// not found in the queue for the given priority level.
|
|
// not found in the queue for the given priority level.
|
|
for (int i = 0; i < LEVEL; i++) {
|
|
for (int i = 0; i < LEVEL; i++) {
|
|
- if (priorityQueues.get(i).remove(block)) {
|
|
|
|
|
|
+ if (i != priLevel && priorityQueues.get(i).remove(block)) {
|
|
NameNode.blockStateChangeLog.debug(
|
|
NameNode.blockStateChangeLog.debug(
|
|
"BLOCK* NameSystem.UnderReplicationBlock.remove: Removing block" +
|
|
"BLOCK* NameSystem.UnderReplicationBlock.remove: Removing block" +
|
|
- " {} from priority queue {}", block, priLevel);
|
|
|
|
|
|
+ " {} from priority queue {}", block, i);
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -278,15 +291,15 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
* @param expectedReplicasDelta the change in the expected replica count from before
|
|
* @param expectedReplicasDelta the change in the expected replica count from before
|
|
*/
|
|
*/
|
|
synchronized void update(BlockInfo block, int curReplicas,
|
|
synchronized void update(BlockInfo block, int curReplicas,
|
|
- int decommissionedReplicas,
|
|
|
|
|
|
+ int readOnlyReplicas, int decommissionedReplicas,
|
|
int curExpectedReplicas,
|
|
int curExpectedReplicas,
|
|
int curReplicasDelta, int expectedReplicasDelta) {
|
|
int curReplicasDelta, int expectedReplicasDelta) {
|
|
int oldReplicas = curReplicas-curReplicasDelta;
|
|
int oldReplicas = curReplicas-curReplicasDelta;
|
|
int oldExpectedReplicas = curExpectedReplicas-expectedReplicasDelta;
|
|
int oldExpectedReplicas = curExpectedReplicas-expectedReplicasDelta;
|
|
- int curPri = getPriority(curReplicas, decommissionedReplicas,
|
|
|
|
- curExpectedReplicas);
|
|
|
|
- int oldPri = getPriority(oldReplicas, decommissionedReplicas,
|
|
|
|
- oldExpectedReplicas);
|
|
|
|
|
|
+ int curPri = getPriority(curReplicas, readOnlyReplicas,
|
|
|
|
+ decommissionedReplicas, curExpectedReplicas);
|
|
|
|
+ int oldPri = getPriority(oldReplicas, readOnlyReplicas,
|
|
|
|
+ decommissionedReplicas, oldExpectedReplicas);
|
|
if(NameNode.stateChangeLog.isDebugEnabled()) {
|
|
if(NameNode.stateChangeLog.isDebugEnabled()) {
|
|
NameNode.stateChangeLog.debug("UnderReplicationBlocks.update " +
|
|
NameNode.stateChangeLog.debug("UnderReplicationBlocks.update " +
|
|
block +
|
|
block +
|
|
@@ -336,143 +349,69 @@ class UnderReplicatedBlocks implements Iterable<BlockInfo> {
|
|
* @return Return a list of block lists to be replicated. The block list index
|
|
* @return Return a list of block lists to be replicated. The block list index
|
|
* represents its replication priority.
|
|
* represents its replication priority.
|
|
*/
|
|
*/
|
|
- public synchronized List<List<BlockInfo>> chooseUnderReplicatedBlocks(
|
|
|
|
|
|
+ synchronized List<List<BlockInfo>> chooseUnderReplicatedBlocks(
|
|
int blocksToProcess) {
|
|
int blocksToProcess) {
|
|
- // initialize data structure for the return value
|
|
|
|
- List<List<BlockInfo>> blocksToReplicate = new ArrayList<>(LEVEL);
|
|
|
|
- for (int i = 0; i < LEVEL; i++) {
|
|
|
|
- blocksToReplicate.add(new ArrayList<BlockInfo>());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (size() == 0) { // There are no blocks to collect.
|
|
|
|
- return blocksToReplicate;
|
|
|
|
- }
|
|
|
|
|
|
+ final List<List<BlockInfo>> blocksToReplicate = new ArrayList<>(LEVEL);
|
|
|
|
|
|
- int blockCount = 0;
|
|
|
|
- for (int priority = 0; priority < LEVEL; priority++) {
|
|
|
|
|
|
+ int count = 0;
|
|
|
|
+ int priority = 0;
|
|
|
|
+ for (; count < blocksToProcess && priority < LEVEL; priority++) {
|
|
|
|
+ if (priority == QUEUE_WITH_CORRUPT_BLOCKS) {
|
|
|
|
+ // do not choose corrupted blocks.
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
// Go through all blocks that need replications with current priority.
|
|
// Go through all blocks that need replications with current priority.
|
|
- BlockIterator neededReplicationsIterator = iterator(priority);
|
|
|
|
// Set the iterator to the first unprocessed block at this priority level.
|
|
// Set the iterator to the first unprocessed block at this priority level.
|
|
- neededReplicationsIterator.setToBookmark();
|
|
|
|
-
|
|
|
|
- blocksToProcess = Math.min(blocksToProcess, size());
|
|
|
|
-
|
|
|
|
- if (blockCount == blocksToProcess) {
|
|
|
|
- break; // break if already expected blocks are obtained
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ final Iterator<BlockInfo> i = priorityQueues.get(priority).getBookmark();
|
|
|
|
+ final List<BlockInfo> blocks = new LinkedList<>();
|
|
|
|
+ blocksToReplicate.add(blocks);
|
|
// Loop through all remaining blocks in the list.
|
|
// Loop through all remaining blocks in the list.
|
|
- while (blockCount < blocksToProcess
|
|
|
|
- && neededReplicationsIterator.hasNext()) {
|
|
|
|
- BlockInfo block = neededReplicationsIterator.next();
|
|
|
|
- blocksToReplicate.get(priority).add(block);
|
|
|
|
- blockCount++;
|
|
|
|
|
|
+ for(; count < blocksToProcess && i.hasNext(); count++) {
|
|
|
|
+ blocks.add(i.next());
|
|
}
|
|
}
|
|
-
|
|
|
|
- if (!neededReplicationsIterator.hasNext()
|
|
|
|
- && neededReplicationsIterator.getPriority() == LEVEL - 1) {
|
|
|
|
- // Reset all priorities' bookmarks to the beginning because there were
|
|
|
|
- // no recently added blocks in any list.
|
|
|
|
- for (int i = 0; i < LEVEL; i++) {
|
|
|
|
- this.priorityQueues.get(i).resetBookmark();
|
|
|
|
- }
|
|
|
|
- break;
|
|
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (priority == LEVEL) {
|
|
|
|
+ // Reset all bookmarks because there were no recently added blocks.
|
|
|
|
+ for (LightWeightLinkedSet<BlockInfo> q : priorityQueues) {
|
|
|
|
+ q.resetBookmark();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
return blocksToReplicate;
|
|
return blocksToReplicate;
|
|
}
|
|
}
|
|
|
|
|
|
/** returns an iterator of all blocks in a given priority queue */
|
|
/** returns an iterator of all blocks in a given priority queue */
|
|
- synchronized BlockIterator iterator(int level) {
|
|
|
|
- return new BlockIterator(level);
|
|
|
|
|
|
+ synchronized Iterator<BlockInfo> iterator(int level) {
|
|
|
|
+ return priorityQueues.get(level).iterator();
|
|
}
|
|
}
|
|
|
|
|
|
/** return an iterator of all the under replication blocks */
|
|
/** return an iterator of all the under replication blocks */
|
|
@Override
|
|
@Override
|
|
- public synchronized BlockIterator iterator() {
|
|
|
|
- return new BlockIterator();
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * An iterator over blocks.
|
|
|
|
- */
|
|
|
|
- class BlockIterator implements Iterator<BlockInfo> {
|
|
|
|
- private int level;
|
|
|
|
- private boolean isIteratorForLevel = false;
|
|
|
|
- private final List<Iterator<BlockInfo>> iterators = new ArrayList<>();
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * Construct an iterator over all queues.
|
|
|
|
- */
|
|
|
|
- private BlockIterator() {
|
|
|
|
- level=0;
|
|
|
|
- for(int i=0; i<LEVEL; i++) {
|
|
|
|
- iterators.add(priorityQueues.get(i).iterator());
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * Constrict an iterator for a single queue level
|
|
|
|
- * @param l the priority level to iterate over
|
|
|
|
- */
|
|
|
|
- private BlockIterator(int l) {
|
|
|
|
- level = l;
|
|
|
|
- isIteratorForLevel = true;
|
|
|
|
- iterators.add(priorityQueues.get(level).iterator());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private void update() {
|
|
|
|
- if (isIteratorForLevel) {
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
- while(level< LEVEL-1 && !iterators.get(level).hasNext()) {
|
|
|
|
- level++;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ public synchronized Iterator<BlockInfo> iterator() {
|
|
|
|
+ final Iterator<LightWeightLinkedSet<BlockInfo>> q = priorityQueues.iterator();
|
|
|
|
+ return new Iterator<BlockInfo>() {
|
|
|
|
+ private Iterator<BlockInfo> b = q.next().iterator();
|
|
|
|
|
|
- @Override
|
|
|
|
- public BlockInfo next() {
|
|
|
|
- if (isIteratorForLevel) {
|
|
|
|
- return iterators.get(0).next();
|
|
|
|
|
|
+ @Override
|
|
|
|
+ public BlockInfo next() {
|
|
|
|
+ hasNext();
|
|
|
|
+ return b.next();
|
|
}
|
|
}
|
|
- update();
|
|
|
|
- return iterators.get(level).next();
|
|
|
|
- }
|
|
|
|
|
|
|
|
- @Override
|
|
|
|
- public boolean hasNext() {
|
|
|
|
- if (isIteratorForLevel) {
|
|
|
|
- return iterators.get(0).hasNext();
|
|
|
|
- }
|
|
|
|
- update();
|
|
|
|
- return iterators.get(level).hasNext();
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public void remove() {
|
|
|
|
- if (isIteratorForLevel) {
|
|
|
|
- iterators.get(0).remove();
|
|
|
|
- } else {
|
|
|
|
- iterators.get(level).remove();
|
|
|
|
|
|
+ @Override
|
|
|
|
+ public boolean hasNext() {
|
|
|
|
+ for(; !b.hasNext() && q.hasNext(); ) {
|
|
|
|
+ b = q.next().iterator();
|
|
|
|
+ }
|
|
|
|
+ return b.hasNext();
|
|
}
|
|
}
|
|
- }
|
|
|
|
-
|
|
|
|
- int getPriority() {
|
|
|
|
- return level;
|
|
|
|
- }
|
|
|
|
|
|
|
|
- /**
|
|
|
|
- * Sets iterator(s) to bookmarked elements.
|
|
|
|
- */
|
|
|
|
- private synchronized void setToBookmark() {
|
|
|
|
- if (this.isIteratorForLevel) {
|
|
|
|
- this.iterators.set(0, priorityQueues.get(this.level)
|
|
|
|
- .getBookmark());
|
|
|
|
- } else {
|
|
|
|
- for (int i = 0; i < LEVEL; i++) {
|
|
|
|
- this.iterators.set(i, priorityQueues.get(i).getBookmark());
|
|
|
|
- }
|
|
|
|
|
|
+ @Override
|
|
|
|
+ public void remove() {
|
|
|
|
+ throw new UnsupportedOperationException();
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
+ };
|
|
}
|
|
}
|
|
}
|
|
}
|