StorageContainerDatanodeProtocol.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.hdds.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdds;
  28. import "hdds.proto";
  29. /**
  30. * Request for version info of the software stack on the server.
  31. */
  32. message SCMVersionRequestProto {}
  33. /**
  34. * Generic response that is send to a version request. This allows keys to be
  35. * added on the fly and protocol to remain stable.
  36. */
  37. message SCMVersionResponseProto {
  38. required uint32 softwareVersion = 1;
  39. repeated hadoop.hdds.KeyValue keys = 2;
  40. }
  41. message SCMRegisterRequestProto {
  42. required DatanodeDetailsProto datanodeDetails = 1;
  43. required NodeReportProto nodeReport = 2;
  44. required ContainerReportsProto containerReport = 3;
  45. required PipelineReportsProto pipelineReports = 4;
  46. }
  47. /**
  48. * Datanode ID returned by the SCM. This is similar to name node
  49. * registeration of a datanode.
  50. */
  51. message SCMRegisteredResponseProto {
  52. enum ErrorCode {
  53. success = 1;
  54. errorNodeNotPermitted = 2;
  55. }
  56. required ErrorCode errorCode = 1;
  57. required string datanodeUUID = 2;
  58. required string clusterID = 3;
  59. optional SCMNodeAddressList addressList = 4;
  60. optional string hostname = 5;
  61. optional string ipAddress = 6;
  62. }
  63. /**
  64. * This message is send by data node to indicate that it is alive or it is
  65. * registering with the node manager.
  66. */
  67. message SCMHeartbeatRequestProto {
  68. required DatanodeDetailsProto datanodeDetails = 1;
  69. optional NodeReportProto nodeReport = 2;
  70. optional ContainerReportsProto containerReport = 3;
  71. repeated CommandStatusReportsProto commandStatusReports = 4;
  72. optional ContainerActionsProto containerActions = 5;
  73. optional PipelineActionsProto pipelineActions = 6;
  74. optional PipelineReportsProto pipelineReports = 7;
  75. }
  76. /*
  77. * A group of commands for the datanode to execute
  78. */
  79. message SCMHeartbeatResponseProto {
  80. required string datanodeUUID = 1;
  81. repeated SCMCommandProto commands = 2;
  82. }
  83. message SCMNodeAddressList {
  84. repeated string addressList = 1;
  85. }
  86. /**
  87. * This message is send along with the heart beat to report datanode
  88. * storage utilization to SCM.
  89. */
  90. message NodeReportProto {
  91. repeated StorageReportProto storageReport = 1;
  92. }
  93. message StorageReportProto {
  94. required string storageUuid = 1;
  95. required string storageLocation = 2;
  96. optional uint64 capacity = 3 [default = 0];
  97. optional uint64 scmUsed = 4 [default = 0];
  98. optional uint64 remaining = 5 [default = 0];
  99. optional StorageTypeProto storageType = 6 [default = DISK];
  100. optional bool failed = 7 [default = false];
  101. }
  102. /**
  103. * Types of recognized storage media.
  104. */
  105. enum StorageTypeProto {
  106. DISK = 1;
  107. SSD = 2;
  108. ARCHIVE = 3;
  109. RAM_DISK = 4;
  110. PROVIDED = 5;
  111. }
  112. message ContainerReportsProto {
  113. repeated ContainerInfo reports = 1;
  114. }
  115. message CommandStatusReportsProto {
  116. repeated CommandStatus cmdStatus = 1;
  117. }
  118. message CommandStatus {
  119. enum Status {
  120. PENDING = 1;
  121. EXECUTED = 2;
  122. FAILED = 3;
  123. }
  124. required int64 cmdId = 1;
  125. required Status status = 2 [default = PENDING];
  126. required SCMCommandProto.Type type = 3;
  127. optional string msg = 4;
  128. optional ContainerBlocksDeletionACKProto blockDeletionAck = 5;
  129. }
  130. message ContainerActionsProto {
  131. repeated ContainerAction containerActions = 1;
  132. }
  133. message ContainerAction {
  134. enum Action {
  135. CLOSE = 1;
  136. }
  137. enum Reason {
  138. CONTAINER_FULL = 1;
  139. }
  140. required int64 containerID = 1;
  141. required Action action = 2;
  142. optional Reason reason = 3;
  143. }
  144. message PipelineReport {
  145. required PipelineID pipelineID = 1;
  146. }
  147. message PipelineReportsProto {
  148. repeated PipelineReport pipelineReport = 1;
  149. }
  150. message PipelineActionsProto {
  151. repeated PipelineAction pipelineActions = 1;
  152. }
  153. message ClosePipelineInfo {
  154. enum Reason {
  155. PIPELINE_FAILED = 1;
  156. }
  157. required PipelineID pipelineID = 1;
  158. optional Reason reason = 3;
  159. optional string detailedReason = 4;
  160. }
  161. message PipelineAction {
  162. enum Action {
  163. CLOSE = 1;
  164. }
  165. /**
  166. * Action will be used to identify the correct pipeline action.
  167. */
  168. required Action action = 1;
  169. optional ClosePipelineInfo closePipeline = 2;
  170. }
  171. /**
  172. A container report contains the following information.
  173. */
  174. message ContainerInfo {
  175. required int64 containerID = 1;
  176. optional int64 size = 2;
  177. optional int64 used = 3;
  178. optional int64 keyCount = 4;
  179. // TODO: move the io count to separate message
  180. optional int64 readCount = 5;
  181. optional int64 writeCount = 6;
  182. optional int64 readBytes = 7;
  183. optional int64 writeBytes = 8;
  184. optional string finalhash = 9;
  185. optional hadoop.hdds.LifeCycleState state = 10;
  186. optional int64 deleteTransactionId = 11;
  187. optional uint64 blockCommitSequenceId = 12;
  188. }
  189. /*
  190. * These are commands returned by SCM for to the datanode to execute.
  191. */
  192. message SCMCommandProto {
  193. enum Type {
  194. reregisterCommand = 1;
  195. deleteBlocksCommand = 2;
  196. closeContainerCommand = 3;
  197. deleteContainerCommand = 4;
  198. replicateContainerCommand = 5;
  199. }
  200. // TODO: once we start using protoc 3.x, refactor this message using "oneof"
  201. required Type commandType = 1;
  202. optional ReregisterCommandProto reregisterCommandProto = 2;
  203. optional DeleteBlocksCommandProto deleteBlocksCommandProto = 3;
  204. optional CloseContainerCommandProto closeContainerCommandProto = 4;
  205. optional DeleteContainerCommandProto deleteContainerCommandProto = 5;
  206. optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
  207. }
  208. /**
  209. * SCM informs a datanode to register itself again.
  210. * With recieving this command, datanode will transit to REGISTER state.
  211. */
  212. message ReregisterCommandProto {}
  213. // HB response from SCM, contains a list of block deletion transactions.
  214. message DeleteBlocksCommandProto {
  215. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  216. required int64 cmdId = 3;
  217. }
  218. // The deleted blocks which are stored in deletedBlock.db of scm.
  219. // We don't use BlockID because this only contians multiple localIDs
  220. // of the same containerID.
  221. message DeletedBlocksTransaction {
  222. required int64 txID = 1;
  223. required int64 containerID = 2;
  224. repeated int64 localID = 3;
  225. // the retry time of sending deleting command to datanode.
  226. required int32 count = 4;
  227. }
  228. // ACK message datanode sent to SCM, contains the result of
  229. // block deletion transactions.
  230. message ContainerBlocksDeletionACKProto {
  231. message DeleteBlockTransactionResult {
  232. required int64 txID = 1;
  233. required int64 containerID = 2;
  234. required bool success = 3;
  235. }
  236. repeated DeleteBlockTransactionResult results = 1;
  237. required string dnId = 2;
  238. }
  239. /**
  240. This command asks the datanode to close a specific container.
  241. */
  242. message CloseContainerCommandProto {
  243. required int64 containerID = 1;
  244. required hadoop.hdds.ReplicationType replicationType = 2;
  245. required int64 cmdId = 3;
  246. required PipelineID pipelineID = 4;
  247. }
  248. /**
  249. This command asks the datanode to delete a specific container.
  250. */
  251. message DeleteContainerCommandProto {
  252. required int64 containerID = 1;
  253. required int64 cmdId = 2;
  254. }
  255. /**
  256. This command asks the datanode to replicate a container from specific sources.
  257. */
  258. message ReplicateContainerCommandProto {
  259. required int64 containerID = 1;
  260. repeated DatanodeDetailsProto sources = 2;
  261. required int64 cmdId = 3;
  262. }
  263. /**
  264. * Protocol used from a datanode to StorageContainerManager.
  265. *
  266. * Please see the request and response messages for details of the RPC calls.
  267. *
  268. * Here is a simple state diagram that shows how a datanode would boot up and
  269. * communicate with SCM.
  270. *
  271. * -----------------------
  272. * | Start |
  273. * ---------- ------------
  274. * |
  275. * |
  276. * |
  277. * |
  278. * |
  279. * |
  280. * |
  281. * ----------v-------------
  282. * | Searching for SCM ------------
  283. * ---------- ------------- |
  284. * | |
  285. * | |
  286. * | ----------v-------------
  287. * | | Register if needed |
  288. * | ----------- ------------
  289. * | |
  290. * v |
  291. * ----------- ---------------- |
  292. * --------- Heartbeat state <--------
  293. * | --------^-------------------
  294. * | |
  295. * | |
  296. * | |
  297. * | |
  298. * | |
  299. * | |
  300. * | |
  301. * ------------------
  302. *
  303. *
  304. *
  305. * Here is how this protocol is used by the datanode. When a datanode boots up
  306. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  307. * trying to establish communication with the SCM. The address of the SCMs are
  308. * retrieved from the configuration information.
  309. *
  310. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  311. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  312. * persisted datanode ID. If it has this means that this datanode is already
  313. * registered with some SCM. If this file is not found, datanode assumes that
  314. * it needs to do a registration.
  315. *
  316. * If registration is need datanode moves into REGISTER state. It will
  317. * send a register call with DatanodeDetailsProto data structure and presist
  318. * that info.
  319. *
  320. * The response to the command contains clusterID. This information is
  321. * also persisted by the datanode and moves into heartbeat state.
  322. *
  323. * Once in the heartbeat state, datanode sends heartbeats and container reports
  324. * to SCM and process commands issued by SCM until it is shutdown.
  325. *
  326. */
  327. service StorageContainerDatanodeProtocolService {
  328. /**
  329. * Gets the version information from the SCM.
  330. */
  331. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  332. /**
  333. * Registers a data node with SCM.
  334. */
  335. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredResponseProto);
  336. /**
  337. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  338. * like life line protocol than HB's under HDFS. In other words, it is
  339. * extremely light weight and contains no data payload.
  340. */
  341. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  342. }