|
@@ -21,12 +21,10 @@ import com.google.common.base.Preconditions;
|
|
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
|
import org.apache.hadoop.hdds.scm.node.NodeManager;
|
|
import org.apache.hadoop.hdds.scm.node.NodePoolManager;
|
|
import org.apache.hadoop.hdds.scm.node.NodePoolManager;
|
|
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
|
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
|
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
|
|
|
|
import org.apache.hadoop.hdds.protocol.proto
|
|
import org.apache.hadoop.hdds.protocol.proto
|
|
.StorageContainerDatanodeProtocolProtos.ContainerInfo;
|
|
.StorageContainerDatanodeProtocolProtos.ContainerInfo;
|
|
import org.apache.hadoop.hdds.protocol.proto
|
|
import org.apache.hadoop.hdds.protocol.proto
|
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
|
.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
|
-import org.apache.hadoop.ozone.protocol.commands.SendContainerCommand;
|
|
|
|
import org.apache.hadoop.util.Time;
|
|
import org.apache.hadoop.util.Time;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.slf4j.LoggerFactory;
|
|
@@ -36,19 +34,10 @@ import java.util.Map;
|
|
import java.util.UUID;
|
|
import java.util.UUID;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.ExecutorService;
|
|
import java.util.concurrent.ExecutorService;
|
|
-import java.util.concurrent.TimeUnit;
|
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
import java.util.function.Predicate;
|
|
import java.util.function.Predicate;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
-import static com.google.common.util.concurrent.Uninterruptibles
|
|
|
|
- .sleepUninterruptibly;
|
|
|
|
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState
|
|
|
|
- .HEALTHY;
|
|
|
|
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState
|
|
|
|
- .INVALID;
|
|
|
|
-import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
|
|
|
|
-
|
|
|
|
/**
|
|
/**
|
|
* These are pools that are actively checking for replication status of the
|
|
* These are pools that are actively checking for replication status of the
|
|
* containers.
|
|
* containers.
|
|
@@ -177,56 +166,10 @@ public final class InProgressPool {
|
|
nodeProcessed = new AtomicInteger(0);
|
|
nodeProcessed = new AtomicInteger(0);
|
|
containerProcessedCount = new AtomicInteger(0);
|
|
containerProcessedCount = new AtomicInteger(0);
|
|
nodeCount = new AtomicInteger(0);
|
|
nodeCount = new AtomicInteger(0);
|
|
- /*
|
|
|
|
- Ask each datanode to send us commands.
|
|
|
|
- */
|
|
|
|
- SendContainerCommand cmd = SendContainerCommand.newBuilder().build();
|
|
|
|
- for (DatanodeDetails dd : datanodeDetailsList) {
|
|
|
|
- NodeState currentState = getNodestate(dd);
|
|
|
|
- if (currentState == HEALTHY || currentState == STALE) {
|
|
|
|
- nodeCount.incrementAndGet();
|
|
|
|
- // Queue commands to all datanodes in this pool to send us container
|
|
|
|
- // report. Since we ignore dead nodes, it is possible that we would have
|
|
|
|
- // over replicated the container if the node comes back.
|
|
|
|
- nodeManager.addDatanodeCommand(dd.getUuid(), cmd);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
this.status = ProgressStatus.InProgress;
|
|
this.status = ProgressStatus.InProgress;
|
|
this.getPool().setLastProcessedTime(Time.monotonicNow());
|
|
this.getPool().setLastProcessedTime(Time.monotonicNow());
|
|
}
|
|
}
|
|
|
|
|
|
- /**
|
|
|
|
- * Gets the node state.
|
|
|
|
- *
|
|
|
|
- * @param datanode - datanode information.
|
|
|
|
- * @return NodeState.
|
|
|
|
- */
|
|
|
|
- private NodeState getNodestate(DatanodeDetails datanode) {
|
|
|
|
- NodeState currentState = INVALID;
|
|
|
|
- int maxTry = 100;
|
|
|
|
- // We need to loop to make sure that we will retry if we get
|
|
|
|
- // node state unknown. This can lead to infinite loop if we send
|
|
|
|
- // in unknown node ID. So max try count is used to prevent it.
|
|
|
|
-
|
|
|
|
- int currentTry = 0;
|
|
|
|
- while (currentState == INVALID && currentTry < maxTry) {
|
|
|
|
- // Retry to make sure that we deal with the case of node state not
|
|
|
|
- // known.
|
|
|
|
- currentState = nodeManager.getNodeState(datanode);
|
|
|
|
- currentTry++;
|
|
|
|
- if (currentState == INVALID) {
|
|
|
|
- // Sleep to make sure that this is not a tight loop.
|
|
|
|
- sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if (currentState == INVALID) {
|
|
|
|
- LOG.error("Not able to determine the state of Node: {}, Exceeded max " +
|
|
|
|
- "try and node manager returns INVALID state. This indicates we " +
|
|
|
|
- "are dealing with a node that we don't know about.", datanode);
|
|
|
|
- }
|
|
|
|
- return currentState;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
/**
|
|
/**
|
|
* Queues a container Report for handling. This is done in a worker thread
|
|
* Queues a container Report for handling. This is done in a worker thread
|
|
* since decoding a container report might be compute intensive . We don't
|
|
* since decoding a container report might be compute intensive . We don't
|