123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388 |
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /**
- * These .proto interfaces are private and unstable.
- * Please see http://wiki.apache.org/hadoop/Compatibility
- * for what changes are allowed for a *unstable* .proto interface.
- */
- option java_package = "org.apache.hadoop.hdds.protocol.proto";
- option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
- option java_generic_services = true;
- option java_generate_equals_and_hash = true;
- package hadoop.hdds;
- import "hdds.proto";
- /**
- * Request for version info of the software stack on the server.
- */
- message SCMVersionRequestProto {}
- /**
- * Generic response that is send to a version request. This allows keys to be
- * added on the fly and protocol to remain stable.
- */
- message SCMVersionResponseProto {
- required uint32 softwareVersion = 1;
- repeated hadoop.hdds.KeyValue keys = 2;
- }
- message SCMRegisterRequestProto {
- required DatanodeDetailsProto datanodeDetails = 1;
- required NodeReportProto nodeReport = 2;
- required ContainerReportsProto containerReport = 3;
- required PipelineReportsProto pipelineReports = 4;
- }
- /**
- * Datanode ID returned by the SCM. This is similar to name node
- * registeration of a datanode.
- */
- message SCMRegisteredResponseProto {
- enum ErrorCode {
- success = 1;
- errorNodeNotPermitted = 2;
- }
- required ErrorCode errorCode = 1;
- required string datanodeUUID = 2;
- required string clusterID = 3;
- optional SCMNodeAddressList addressList = 4;
- optional string hostname = 5;
- optional string ipAddress = 6;
- }
- /**
- * This message is send by data node to indicate that it is alive or it is
- * registering with the node manager.
- */
- message SCMHeartbeatRequestProto {
- required DatanodeDetailsProto datanodeDetails = 1;
- optional NodeReportProto nodeReport = 2;
- optional ContainerReportsProto containerReport = 3;
- repeated CommandStatusReportsProto commandStatusReports = 4;
- optional ContainerActionsProto containerActions = 5;
- optional PipelineActionsProto pipelineActions = 6;
- optional PipelineReportsProto pipelineReports = 7;
- }
- /*
- * A group of commands for the datanode to execute
- */
- message SCMHeartbeatResponseProto {
- required string datanodeUUID = 1;
- repeated SCMCommandProto commands = 2;
- }
- message SCMNodeAddressList {
- repeated string addressList = 1;
- }
- /**
- * This message is send along with the heart beat to report datanode
- * storage utilization to SCM.
- */
- message NodeReportProto {
- repeated StorageReportProto storageReport = 1;
- }
- message StorageReportProto {
- required string storageUuid = 1;
- required string storageLocation = 2;
- optional uint64 capacity = 3 [default = 0];
- optional uint64 scmUsed = 4 [default = 0];
- optional uint64 remaining = 5 [default = 0];
- optional StorageTypeProto storageType = 6 [default = DISK];
- optional bool failed = 7 [default = false];
- }
- /**
- * Types of recognized storage media.
- */
- enum StorageTypeProto {
- DISK = 1;
- SSD = 2;
- ARCHIVE = 3;
- RAM_DISK = 4;
- PROVIDED = 5;
- }
- message ContainerReportsProto {
- repeated ContainerInfo reports = 1;
- }
- message CommandStatusReportsProto {
- repeated CommandStatus cmdStatus = 1;
- }
- message CommandStatus {
- enum Status {
- PENDING = 1;
- EXECUTED = 2;
- FAILED = 3;
- }
- required int64 cmdId = 1;
- required Status status = 2 [default = PENDING];
- required SCMCommandProto.Type type = 3;
- optional string msg = 4;
- optional ContainerBlocksDeletionACKProto blockDeletionAck = 5;
- }
- message ContainerActionsProto {
- repeated ContainerAction containerActions = 1;
- }
- message ContainerAction {
- enum Action {
- CLOSE = 1;
- }
- enum Reason {
- CONTAINER_FULL = 1;
- CONTAINER_UNHEALTHY = 2;
- }
- required int64 containerID = 1;
- required Action action = 2;
- optional Reason reason = 3;
- }
- message PipelineReport {
- required PipelineID pipelineID = 1;
- }
- message PipelineReportsProto {
- repeated PipelineReport pipelineReport = 1;
- }
- message PipelineActionsProto {
- repeated PipelineAction pipelineActions = 1;
- }
- message ClosePipelineInfo {
- enum Reason {
- PIPELINE_FAILED = 1;
- }
- required PipelineID pipelineID = 1;
- optional Reason reason = 3;
- optional string detailedReason = 4;
- }
- message PipelineAction {
- enum Action {
- CLOSE = 1;
- }
- /**
- * Action will be used to identify the correct pipeline action.
- */
- required Action action = 1;
- optional ClosePipelineInfo closePipeline = 2;
- }
- /**
- A container report contains the following information.
- */
- message ContainerInfo {
- required int64 containerID = 1;
- optional int64 size = 2;
- optional int64 used = 3;
- optional int64 keyCount = 4;
- // TODO: move the io count to separate message
- optional int64 readCount = 5;
- optional int64 writeCount = 6;
- optional int64 readBytes = 7;
- optional int64 writeBytes = 8;
- optional string finalhash = 9;
- optional hadoop.hdds.LifeCycleState state = 10;
- optional int64 deleteTransactionId = 11;
- optional uint64 blockCommitSequenceId = 12;
- }
- /*
- * These are commands returned by SCM for to the datanode to execute.
- */
- message SCMCommandProto {
- enum Type {
- reregisterCommand = 1;
- deleteBlocksCommand = 2;
- closeContainerCommand = 3;
- deleteContainerCommand = 4;
- replicateContainerCommand = 5;
- }
- // TODO: once we start using protoc 3.x, refactor this message using "oneof"
- required Type commandType = 1;
- optional ReregisterCommandProto reregisterCommandProto = 2;
- optional DeleteBlocksCommandProto deleteBlocksCommandProto = 3;
- optional CloseContainerCommandProto closeContainerCommandProto = 4;
- optional DeleteContainerCommandProto deleteContainerCommandProto = 5;
- optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
- }
- /**
- * SCM informs a datanode to register itself again.
- * With recieving this command, datanode will transit to REGISTER state.
- */
- message ReregisterCommandProto {}
- // HB response from SCM, contains a list of block deletion transactions.
- message DeleteBlocksCommandProto {
- repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
- required int64 cmdId = 3;
- }
- // The deleted blocks which are stored in deletedBlock.db of scm.
- // We don't use BlockID because this only contians multiple localIDs
- // of the same containerID.
- message DeletedBlocksTransaction {
- required int64 txID = 1;
- required int64 containerID = 2;
- repeated int64 localID = 3;
- // the retry time of sending deleting command to datanode.
- required int32 count = 4;
- }
- // ACK message datanode sent to SCM, contains the result of
- // block deletion transactions.
- message ContainerBlocksDeletionACKProto {
- message DeleteBlockTransactionResult {
- required int64 txID = 1;
- required int64 containerID = 2;
- required bool success = 3;
- }
- repeated DeleteBlockTransactionResult results = 1;
- required string dnId = 2;
- }
- /**
- This command asks the datanode to close a specific container.
- */
- message CloseContainerCommandProto {
- required int64 containerID = 1;
- required hadoop.hdds.ReplicationType replicationType = 2;
- required int64 cmdId = 3;
- required PipelineID pipelineID = 4;
- }
- /**
- This command asks the datanode to delete a specific container.
- */
- message DeleteContainerCommandProto {
- required int64 containerID = 1;
- required int64 cmdId = 2;
- }
- /**
- This command asks the datanode to replicate a container from specific sources.
- */
- message ReplicateContainerCommandProto {
- required int64 containerID = 1;
- repeated DatanodeDetailsProto sources = 2;
- required int64 cmdId = 3;
- }
- /**
- * Protocol used from a datanode to StorageContainerManager.
- *
- * Please see the request and response messages for details of the RPC calls.
- *
- * Here is a simple state diagram that shows how a datanode would boot up and
- * communicate with SCM.
- *
- * -----------------------
- * | Start |
- * ---------- ------------
- * |
- * |
- * |
- * |
- * |
- * |
- * |
- * ----------v-------------
- * | Searching for SCM ------------
- * ---------- ------------- |
- * | |
- * | |
- * | ----------v-------------
- * | | Register if needed |
- * | ----------- ------------
- * | |
- * v |
- * ----------- ---------------- |
- * --------- Heartbeat state <--------
- * | --------^-------------------
- * | |
- * | |
- * | |
- * | |
- * | |
- * | |
- * | |
- * ------------------
- *
- *
- *
- * Here is how this protocol is used by the datanode. When a datanode boots up
- * it moves into a stated called SEARCHING_SCM. In this state datanode is
- * trying to establish communication with the SCM. The address of the SCMs are
- * retrieved from the configuration information.
- *
- * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
- * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
- * persisted datanode ID. If it has this means that this datanode is already
- * registered with some SCM. If this file is not found, datanode assumes that
- * it needs to do a registration.
- *
- * If registration is need datanode moves into REGISTER state. It will
- * send a register call with DatanodeDetailsProto data structure and presist
- * that info.
- *
- * The response to the command contains clusterID. This information is
- * also persisted by the datanode and moves into heartbeat state.
- *
- * Once in the heartbeat state, datanode sends heartbeats and container reports
- * to SCM and process commands issued by SCM until it is shutdown.
- *
- */
- service StorageContainerDatanodeProtocolService {
- /**
- * Gets the version information from the SCM.
- */
- rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
- /**
- * Registers a data node with SCM.
- */
- rpc register (SCMRegisterRequestProto) returns (SCMRegisteredResponseProto);
- /**
- * Send heartbeat from datanode to SCM. HB's under SCM looks more
- * like life line protocol than HB's under HDFS. In other words, it is
- * extremely light weight and contains no data payload.
- */
- rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
- }
|