StorageContainerDatanodeProtocol.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.hdds.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdds;
  28. import "hdds.proto";
  29. /**
  30. * Request for version info of the software stack on the server.
  31. */
  32. message SCMVersionRequestProto {}
  33. /**
  34. * Generic response that is send to a version request. This allows keys to be
  35. * added on the fly and protocol to remain stable.
  36. */
  37. message SCMVersionResponseProto {
  38. required uint32 softwareVersion = 1;
  39. repeated hadoop.hdds.KeyValue keys = 2;
  40. }
  41. message SCMRegisterRequestProto {
  42. required DatanodeDetailsProto datanodeDetails = 1;
  43. required NodeReportProto nodeReport = 2;
  44. required ContainerReportsProto containerReport = 3;
  45. required PipelineReportsProto pipelineReports = 4;
  46. }
  47. /**
  48. * Datanode ID returned by the SCM. This is similar to name node
  49. * registeration of a datanode.
  50. */
  51. message SCMRegisteredResponseProto {
  52. enum ErrorCode {
  53. success = 1;
  54. errorNodeNotPermitted = 2;
  55. }
  56. required ErrorCode errorCode = 1;
  57. required string datanodeUUID = 2;
  58. required string clusterID = 3;
  59. optional SCMNodeAddressList addressList = 4;
  60. optional string hostname = 5;
  61. optional string ipAddress = 6;
  62. }
  63. /**
  64. * This message is send by data node to indicate that it is alive or it is
  65. * registering with the node manager.
  66. */
  67. message SCMHeartbeatRequestProto {
  68. required DatanodeDetailsProto datanodeDetails = 1;
  69. optional NodeReportProto nodeReport = 2;
  70. optional ContainerReportsProto containerReport = 3;
  71. repeated CommandStatusReportsProto commandStatusReports = 4;
  72. optional ContainerActionsProto containerActions = 5;
  73. optional PipelineActionsProto pipelineActions = 6;
  74. optional PipelineReportsProto pipelineReports = 7;
  75. }
  76. /*
  77. * A group of commands for the datanode to execute
  78. */
  79. message SCMHeartbeatResponseProto {
  80. required string datanodeUUID = 1;
  81. repeated SCMCommandProto commands = 2;
  82. }
  83. message SCMNodeAddressList {
  84. repeated string addressList = 1;
  85. }
  86. /**
  87. * This message is send along with the heart beat to report datanode
  88. * storage utilization to SCM.
  89. */
  90. message NodeReportProto {
  91. repeated StorageReportProto storageReport = 1;
  92. }
  93. message StorageReportProto {
  94. required string storageUuid = 1;
  95. required string storageLocation = 2;
  96. optional uint64 capacity = 3 [default = 0];
  97. optional uint64 scmUsed = 4 [default = 0];
  98. optional uint64 remaining = 5 [default = 0];
  99. optional StorageTypeProto storageType = 6 [default = DISK];
  100. optional bool failed = 7 [default = false];
  101. }
  102. /**
  103. * Types of recognized storage media.
  104. */
  105. enum StorageTypeProto {
  106. DISK = 1;
  107. SSD = 2;
  108. ARCHIVE = 3;
  109. RAM_DISK = 4;
  110. PROVIDED = 5;
  111. }
  112. message ContainerReportsProto {
  113. repeated ContainerInfo reports = 1;
  114. }
  115. message CommandStatusReportsProto {
  116. repeated CommandStatus cmdStatus = 1;
  117. }
  118. message CommandStatus {
  119. enum Status {
  120. PENDING = 1;
  121. EXECUTED = 2;
  122. FAILED = 3;
  123. }
  124. required int64 cmdId = 1;
  125. required Status status = 2 [default = PENDING];
  126. required SCMCommandProto.Type type = 3;
  127. optional string msg = 4;
  128. optional ContainerBlocksDeletionACKProto blockDeletionAck = 5;
  129. }
  130. message ContainerActionsProto {
  131. repeated ContainerAction containerActions = 1;
  132. }
  133. message ContainerAction {
  134. enum Action {
  135. CLOSE = 1;
  136. }
  137. enum Reason {
  138. CONTAINER_FULL = 1;
  139. CONTAINER_UNHEALTHY = 2;
  140. }
  141. required int64 containerID = 1;
  142. required Action action = 2;
  143. optional Reason reason = 3;
  144. }
  145. message PipelineReport {
  146. required PipelineID pipelineID = 1;
  147. }
  148. message PipelineReportsProto {
  149. repeated PipelineReport pipelineReport = 1;
  150. }
  151. message PipelineActionsProto {
  152. repeated PipelineAction pipelineActions = 1;
  153. }
  154. message ClosePipelineInfo {
  155. enum Reason {
  156. PIPELINE_FAILED = 1;
  157. }
  158. required PipelineID pipelineID = 1;
  159. optional Reason reason = 3;
  160. optional string detailedReason = 4;
  161. }
  162. message PipelineAction {
  163. enum Action {
  164. CLOSE = 1;
  165. }
  166. /**
  167. * Action will be used to identify the correct pipeline action.
  168. */
  169. required Action action = 1;
  170. optional ClosePipelineInfo closePipeline = 2;
  171. }
  172. /**
  173. A container report contains the following information.
  174. */
  175. message ContainerInfo {
  176. required int64 containerID = 1;
  177. optional int64 size = 2;
  178. optional int64 used = 3;
  179. optional int64 keyCount = 4;
  180. // TODO: move the io count to separate message
  181. optional int64 readCount = 5;
  182. optional int64 writeCount = 6;
  183. optional int64 readBytes = 7;
  184. optional int64 writeBytes = 8;
  185. optional string finalhash = 9;
  186. optional hadoop.hdds.LifeCycleState state = 10;
  187. optional int64 deleteTransactionId = 11;
  188. optional uint64 blockCommitSequenceId = 12;
  189. }
  190. /*
  191. * These are commands returned by SCM for to the datanode to execute.
  192. */
  193. message SCMCommandProto {
  194. enum Type {
  195. reregisterCommand = 1;
  196. deleteBlocksCommand = 2;
  197. closeContainerCommand = 3;
  198. deleteContainerCommand = 4;
  199. replicateContainerCommand = 5;
  200. }
  201. // TODO: once we start using protoc 3.x, refactor this message using "oneof"
  202. required Type commandType = 1;
  203. optional ReregisterCommandProto reregisterCommandProto = 2;
  204. optional DeleteBlocksCommandProto deleteBlocksCommandProto = 3;
  205. optional CloseContainerCommandProto closeContainerCommandProto = 4;
  206. optional DeleteContainerCommandProto deleteContainerCommandProto = 5;
  207. optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
  208. }
  209. /**
  210. * SCM informs a datanode to register itself again.
  211. * With recieving this command, datanode will transit to REGISTER state.
  212. */
  213. message ReregisterCommandProto {}
  214. // HB response from SCM, contains a list of block deletion transactions.
  215. message DeleteBlocksCommandProto {
  216. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  217. required int64 cmdId = 3;
  218. }
  219. // The deleted blocks which are stored in deletedBlock.db of scm.
  220. // We don't use BlockID because this only contians multiple localIDs
  221. // of the same containerID.
  222. message DeletedBlocksTransaction {
  223. required int64 txID = 1;
  224. required int64 containerID = 2;
  225. repeated int64 localID = 3;
  226. // the retry time of sending deleting command to datanode.
  227. required int32 count = 4;
  228. }
  229. // ACK message datanode sent to SCM, contains the result of
  230. // block deletion transactions.
  231. message ContainerBlocksDeletionACKProto {
  232. message DeleteBlockTransactionResult {
  233. required int64 txID = 1;
  234. required int64 containerID = 2;
  235. required bool success = 3;
  236. }
  237. repeated DeleteBlockTransactionResult results = 1;
  238. required string dnId = 2;
  239. }
  240. /**
  241. This command asks the datanode to close a specific container.
  242. */
  243. message CloseContainerCommandProto {
  244. required int64 containerID = 1;
  245. required hadoop.hdds.ReplicationType replicationType = 2;
  246. required int64 cmdId = 3;
  247. required PipelineID pipelineID = 4;
  248. }
  249. /**
  250. This command asks the datanode to delete a specific container.
  251. */
  252. message DeleteContainerCommandProto {
  253. required int64 containerID = 1;
  254. required int64 cmdId = 2;
  255. }
  256. /**
  257. This command asks the datanode to replicate a container from specific sources.
  258. */
  259. message ReplicateContainerCommandProto {
  260. required int64 containerID = 1;
  261. repeated DatanodeDetailsProto sources = 2;
  262. required int64 cmdId = 3;
  263. }
  264. /**
  265. * Protocol used from a datanode to StorageContainerManager.
  266. *
  267. * Please see the request and response messages for details of the RPC calls.
  268. *
  269. * Here is a simple state diagram that shows how a datanode would boot up and
  270. * communicate with SCM.
  271. *
  272. * -----------------------
  273. * | Start |
  274. * ---------- ------------
  275. * |
  276. * |
  277. * |
  278. * |
  279. * |
  280. * |
  281. * |
  282. * ----------v-------------
  283. * | Searching for SCM ------------
  284. * ---------- ------------- |
  285. * | |
  286. * | |
  287. * | ----------v-------------
  288. * | | Register if needed |
  289. * | ----------- ------------
  290. * | |
  291. * v |
  292. * ----------- ---------------- |
  293. * --------- Heartbeat state <--------
  294. * | --------^-------------------
  295. * | |
  296. * | |
  297. * | |
  298. * | |
  299. * | |
  300. * | |
  301. * | |
  302. * ------------------
  303. *
  304. *
  305. *
  306. * Here is how this protocol is used by the datanode. When a datanode boots up
  307. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  308. * trying to establish communication with the SCM. The address of the SCMs are
  309. * retrieved from the configuration information.
  310. *
  311. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  312. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  313. * persisted datanode ID. If it has this means that this datanode is already
  314. * registered with some SCM. If this file is not found, datanode assumes that
  315. * it needs to do a registration.
  316. *
  317. * If registration is need datanode moves into REGISTER state. It will
  318. * send a register call with DatanodeDetailsProto data structure and presist
  319. * that info.
  320. *
  321. * The response to the command contains clusterID. This information is
  322. * also persisted by the datanode and moves into heartbeat state.
  323. *
  324. * Once in the heartbeat state, datanode sends heartbeats and container reports
  325. * to SCM and process commands issued by SCM until it is shutdown.
  326. *
  327. */
  328. service StorageContainerDatanodeProtocolService {
  329. /**
  330. * Gets the version information from the SCM.
  331. */
  332. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  333. /**
  334. * Registers a data node with SCM.
  335. */
  336. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredResponseProto);
  337. /**
  338. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  339. * like life line protocol than HB's under HDFS. In other words, it is
  340. * extremely light weight and contains no data payload.
  341. */
  342. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  343. }