StorageContainerDatanodeProtocol.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.hdds.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdds;
  28. import "hdds.proto";
  29. /**
  30. * Request for version info of the software stack on the server.
  31. */
  32. message SCMVersionRequestProto {}
  33. /**
  34. * Generic response that is send to a version request. This allows keys to be
  35. * added on the fly and protocol to remain stable.
  36. */
  37. message SCMVersionResponseProto {
  38. required uint32 softwareVersion = 1;
  39. repeated hadoop.hdds.KeyValue keys = 2;
  40. }
  41. message SCMRegisterRequestProto {
  42. required DatanodeDetailsProto datanodeDetails = 1;
  43. required NodeReportProto nodeReport = 2;
  44. required ContainerReportsProto containerReport = 3;
  45. required PipelineReportsProto pipelineReports = 4;
  46. }
  47. /**
  48. * Datanode ID returned by the SCM. This is similar to name node
  49. * registeration of a datanode.
  50. */
  51. message SCMRegisteredResponseProto {
  52. enum ErrorCode {
  53. success = 1;
  54. errorNodeNotPermitted = 2;
  55. }
  56. required ErrorCode errorCode = 1;
  57. required string datanodeUUID = 2;
  58. required string clusterID = 3;
  59. optional SCMNodeAddressList addressList = 4;
  60. optional string hostname = 5;
  61. optional string ipAddress = 6;
  62. }
  63. /**
  64. * This message is send by data node to indicate that it is alive or it is
  65. * registering with the node manager.
  66. */
  67. message SCMHeartbeatRequestProto {
  68. required DatanodeDetailsProto datanodeDetails = 1;
  69. optional NodeReportProto nodeReport = 2;
  70. optional ContainerReportsProto containerReport = 3;
  71. repeated IncrementalContainerReportProto incrementalContainerReport = 4;
  72. repeated CommandStatusReportsProto commandStatusReports = 5;
  73. optional ContainerActionsProto containerActions = 6;
  74. optional PipelineActionsProto pipelineActions = 7;
  75. optional PipelineReportsProto pipelineReports = 8;
  76. }
  77. /*
  78. * A group of commands for the datanode to execute
  79. */
  80. message SCMHeartbeatResponseProto {
  81. required string datanodeUUID = 1;
  82. repeated SCMCommandProto commands = 2;
  83. }
  84. message SCMNodeAddressList {
  85. repeated string addressList = 1;
  86. }
  87. /**
  88. * This message is send along with the heart beat to report datanode
  89. * storage utilization to SCM.
  90. */
  91. message NodeReportProto {
  92. repeated StorageReportProto storageReport = 1;
  93. }
  94. message StorageReportProto {
  95. required string storageUuid = 1;
  96. required string storageLocation = 2;
  97. optional uint64 capacity = 3 [default = 0];
  98. optional uint64 scmUsed = 4 [default = 0];
  99. optional uint64 remaining = 5 [default = 0];
  100. optional StorageTypeProto storageType = 6 [default = DISK];
  101. optional bool failed = 7 [default = false];
  102. }
  103. /**
  104. * Types of recognized storage media.
  105. */
  106. enum StorageTypeProto {
  107. DISK = 1;
  108. SSD = 2;
  109. ARCHIVE = 3;
  110. RAM_DISK = 4;
  111. PROVIDED = 5;
  112. }
  113. message ContainerReportsProto {
  114. repeated ContainerReplicaProto reports = 1;
  115. }
  116. message IncrementalContainerReportProto {
  117. repeated ContainerReplicaProto report = 1;
  118. }
  119. message ContainerReplicaProto {
  120. enum State {
  121. OPEN = 1;
  122. CLOSING = 2;
  123. QUASI_CLOSED = 3;
  124. CLOSED = 4;
  125. UNHEALTHY = 5;
  126. INVALID = 6;
  127. }
  128. required int64 containerID = 1;
  129. required State state = 2;
  130. optional int64 size = 3;
  131. optional int64 used = 4;
  132. optional int64 keyCount = 5;
  133. optional int64 readCount = 6;
  134. optional int64 writeCount = 7;
  135. optional int64 readBytes = 8;
  136. optional int64 writeBytes = 9;
  137. optional string finalhash = 10;
  138. optional int64 deleteTransactionId = 11;
  139. optional uint64 blockCommitSequenceId = 12;
  140. optional string originNodeId = 13;
  141. }
  142. message CommandStatusReportsProto {
  143. repeated CommandStatus cmdStatus = 1;
  144. }
  145. message CommandStatus {
  146. enum Status {
  147. PENDING = 1;
  148. EXECUTED = 2;
  149. FAILED = 3;
  150. }
  151. required int64 cmdId = 1;
  152. required Status status = 2 [default = PENDING];
  153. required SCMCommandProto.Type type = 3;
  154. optional string msg = 4;
  155. optional ContainerBlocksDeletionACKProto blockDeletionAck = 5;
  156. }
  157. message ContainerActionsProto {
  158. repeated ContainerAction containerActions = 1;
  159. }
  160. message ContainerAction {
  161. enum Action {
  162. CLOSE = 1;
  163. }
  164. enum Reason {
  165. CONTAINER_FULL = 1;
  166. CONTAINER_UNHEALTHY = 2;
  167. }
  168. required int64 containerID = 1;
  169. required Action action = 2;
  170. optional Reason reason = 3;
  171. }
  172. message PipelineReport {
  173. required PipelineID pipelineID = 1;
  174. }
  175. message PipelineReportsProto {
  176. repeated PipelineReport pipelineReport = 1;
  177. }
  178. message PipelineActionsProto {
  179. repeated PipelineAction pipelineActions = 1;
  180. }
  181. message ClosePipelineInfo {
  182. enum Reason {
  183. PIPELINE_FAILED = 1;
  184. }
  185. required PipelineID pipelineID = 1;
  186. optional Reason reason = 3;
  187. optional string detailedReason = 4;
  188. }
  189. message PipelineAction {
  190. enum Action {
  191. CLOSE = 1;
  192. }
  193. /**
  194. * Action will be used to identify the correct pipeline action.
  195. */
  196. required Action action = 1;
  197. optional ClosePipelineInfo closePipeline = 2;
  198. }
  199. /*
  200. * These are commands returned by SCM for to the datanode to execute.
  201. */
  202. message SCMCommandProto {
  203. enum Type {
  204. reregisterCommand = 1;
  205. deleteBlocksCommand = 2;
  206. closeContainerCommand = 3;
  207. deleteContainerCommand = 4;
  208. replicateContainerCommand = 5;
  209. }
  210. // TODO: once we start using protoc 3.x, refactor this message using "oneof"
  211. required Type commandType = 1;
  212. optional ReregisterCommandProto reregisterCommandProto = 2;
  213. optional DeleteBlocksCommandProto deleteBlocksCommandProto = 3;
  214. optional CloseContainerCommandProto closeContainerCommandProto = 4;
  215. optional DeleteContainerCommandProto deleteContainerCommandProto = 5;
  216. optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
  217. }
  218. /**
  219. * SCM informs a datanode to register itself again.
  220. * With recieving this command, datanode will transit to REGISTER state.
  221. */
  222. message ReregisterCommandProto {}
  223. // HB response from SCM, contains a list of block deletion transactions.
  224. message DeleteBlocksCommandProto {
  225. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  226. required int64 cmdId = 3;
  227. }
  228. // The deleted blocks which are stored in deletedBlock.db of scm.
  229. // We don't use BlockID because this only contians multiple localIDs
  230. // of the same containerID.
  231. message DeletedBlocksTransaction {
  232. required int64 txID = 1;
  233. required int64 containerID = 2;
  234. repeated int64 localID = 3;
  235. // the retry time of sending deleting command to datanode.
  236. required int32 count = 4;
  237. }
  238. // ACK message datanode sent to SCM, contains the result of
  239. // block deletion transactions.
  240. message ContainerBlocksDeletionACKProto {
  241. message DeleteBlockTransactionResult {
  242. required int64 txID = 1;
  243. required int64 containerID = 2;
  244. required bool success = 3;
  245. }
  246. repeated DeleteBlockTransactionResult results = 1;
  247. required string dnId = 2;
  248. }
  249. /**
  250. This command asks the datanode to close a specific container.
  251. */
  252. message CloseContainerCommandProto {
  253. required int64 containerID = 1;
  254. required PipelineID pipelineID = 2;
  255. // cmdId will be removed
  256. required int64 cmdId = 3;
  257. // Force will be used when closing a container out side of ratis.
  258. optional bool force = 4 [default = false];
  259. }
  260. /**
  261. This command asks the datanode to delete a specific container.
  262. */
  263. message DeleteContainerCommandProto {
  264. required int64 containerID = 1;
  265. required int64 cmdId = 2;
  266. }
  267. /**
  268. This command asks the datanode to replicate a container from specific sources.
  269. */
  270. message ReplicateContainerCommandProto {
  271. required int64 containerID = 1;
  272. repeated DatanodeDetailsProto sources = 2;
  273. required int64 cmdId = 3;
  274. }
  275. /**
  276. * Protocol used from a datanode to StorageContainerManager.
  277. *
  278. * Please see the request and response messages for details of the RPC calls.
  279. *
  280. * Here is a simple state diagram that shows how a datanode would boot up and
  281. * communicate with SCM.
  282. *
  283. * -----------------------
  284. * | Start |
  285. * ---------- ------------
  286. * |
  287. * |
  288. * |
  289. * |
  290. * |
  291. * |
  292. * |
  293. * ----------v-------------
  294. * | Searching for SCM ------------
  295. * ---------- ------------- |
  296. * | |
  297. * | |
  298. * | ----------v-------------
  299. * | | Register if needed |
  300. * | ----------- ------------
  301. * | |
  302. * v |
  303. * ----------- ---------------- |
  304. * --------- Heartbeat state <--------
  305. * | --------^-------------------
  306. * | |
  307. * | |
  308. * | |
  309. * | |
  310. * | |
  311. * | |
  312. * | |
  313. * ------------------
  314. *
  315. *
  316. *
  317. * Here is how this protocol is used by the datanode. When a datanode boots up
  318. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  319. * trying to establish communication with the SCM. The address of the SCMs are
  320. * retrieved from the configuration information.
  321. *
  322. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  323. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  324. * persisted datanode ID. If it has this means that this datanode is already
  325. * registered with some SCM. If this file is not found, datanode assumes that
  326. * it needs to do a registration.
  327. *
  328. * If registration is need datanode moves into REGISTER state. It will
  329. * send a register call with DatanodeDetailsProto data structure and presist
  330. * that info.
  331. *
  332. * The response to the command contains clusterID. This information is
  333. * also persisted by the datanode and moves into heartbeat state.
  334. *
  335. * Once in the heartbeat state, datanode sends heartbeats and container reports
  336. * to SCM and process commands issued by SCM until it is shutdown.
  337. *
  338. */
  339. service StorageContainerDatanodeProtocolService {
  340. /**
  341. * Gets the version information from the SCM.
  342. */
  343. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  344. /**
  345. * Registers a data node with SCM.
  346. */
  347. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredResponseProto);
  348. /**
  349. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  350. * like life line protocol than HB's under HDFS. In other words, it is
  351. * extremely light weight and contains no data payload.
  352. */
  353. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  354. }