StorageContainerDatanodeProtocol.proto 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.hdds.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdds;
  28. import "hdds.proto";
  29. /**
  30. * Request for version info of the software stack on the server.
  31. */
  32. message SCMVersionRequestProto {}
  33. /**
  34. * Generic response that is send to a version request. This allows keys to be
  35. * added on the fly and protocol to remain stable.
  36. */
  37. message SCMVersionResponseProto {
  38. required uint32 softwareVersion = 1;
  39. repeated hadoop.hdds.KeyValue keys = 2;
  40. }
  41. message SCMRegisterRequestProto {
  42. required DatanodeDetailsProto datanodeDetails = 1;
  43. required NodeReportProto nodeReport = 2;
  44. required ContainerReportsProto containerReport = 3;
  45. }
  46. /**
  47. * Datanode ID returned by the SCM. This is similar to name node
  48. * registeration of a datanode.
  49. */
  50. message SCMRegisteredResponseProto {
  51. enum ErrorCode {
  52. success = 1;
  53. errorNodeNotPermitted = 2;
  54. }
  55. required ErrorCode errorCode = 1;
  56. required string datanodeUUID = 2;
  57. required string clusterID = 3;
  58. optional SCMNodeAddressList addressList = 4;
  59. optional string hostname = 5;
  60. optional string ipAddress = 6;
  61. }
  62. /**
  63. * This message is send by data node to indicate that it is alive or it is
  64. * registering with the node manager.
  65. */
  66. message SCMHeartbeatRequestProto {
  67. required DatanodeDetailsProto datanodeDetails = 1;
  68. optional NodeReportProto nodeReport = 2;
  69. optional ContainerReportsProto containerReport = 3;
  70. optional CommandStatusReportsProto commandStatusReport = 4;
  71. optional ContainerActionsProto containerActions = 5;
  72. }
  73. /*
  74. * A group of commands for the datanode to execute
  75. */
  76. message SCMHeartbeatResponseProto {
  77. required string datanodeUUID = 1;
  78. repeated SCMCommandProto commands = 2;
  79. }
  80. message SCMNodeAddressList {
  81. repeated string addressList = 1;
  82. }
  83. /**
  84. * This message is send along with the heart beat to report datanode
  85. * storage utilization to SCM.
  86. */
  87. message NodeReportProto {
  88. repeated StorageReportProto storageReport = 1;
  89. }
  90. message StorageReportProto {
  91. required string storageUuid = 1;
  92. required string storageLocation = 2;
  93. optional uint64 capacity = 3 [default = 0];
  94. optional uint64 scmUsed = 4 [default = 0];
  95. optional uint64 remaining = 5 [default = 0];
  96. optional StorageTypeProto storageType = 6 [default = DISK];
  97. optional bool failed = 7 [default = false];
  98. }
  99. /**
  100. * Types of recognized storage media.
  101. */
  102. enum StorageTypeProto {
  103. DISK = 1;
  104. SSD = 2;
  105. ARCHIVE = 3;
  106. RAM_DISK = 4;
  107. PROVIDED = 5;
  108. }
  109. message ContainerReportsProto {
  110. repeated ContainerInfo reports = 1;
  111. }
  112. message CommandStatusReportsProto {
  113. repeated CommandStatus cmdStatus = 1;
  114. }
  115. message CommandStatus {
  116. enum Status {
  117. PENDING = 1;
  118. EXECUTED = 2;
  119. FAILED = 3;
  120. }
  121. required int64 cmdId = 1;
  122. required Status status = 2 [default = PENDING];
  123. required SCMCommandProto.Type type = 3;
  124. optional string msg = 4;
  125. }
  126. message ContainerActionsProto {
  127. repeated ContainerAction containerActions = 1;
  128. }
  129. message ContainerAction {
  130. enum Action {
  131. CLOSE = 1;
  132. }
  133. enum Reason {
  134. CONTAINER_FULL = 1;
  135. }
  136. required int64 containerID = 1;
  137. required Action action = 2;
  138. optional Reason reason = 3;
  139. }
  140. /**
  141. A container report contains the following information.
  142. */
  143. message ContainerInfo {
  144. required int64 containerID = 1;
  145. optional int64 size = 2;
  146. optional int64 used = 3;
  147. optional int64 keyCount = 4;
  148. // TODO: move the io count to separate message
  149. optional int64 readCount = 5;
  150. optional int64 writeCount = 6;
  151. optional int64 readBytes = 7;
  152. optional int64 writeBytes = 8;
  153. optional string finalhash = 9;
  154. optional hadoop.hdds.LifeCycleState state = 10;
  155. optional int64 deleteTransactionId = 11;
  156. }
  157. /*
  158. * These are commands returned by SCM for to the datanode to execute.
  159. */
  160. message SCMCommandProto {
  161. enum Type {
  162. reregisterCommand = 1;
  163. deleteBlocksCommand = 2;
  164. closeContainerCommand = 3;
  165. deleteContainerCommand = 4;
  166. replicateContainerCommand = 5;
  167. }
  168. // TODO: once we start using protoc 3.x, refactor this message using "oneof"
  169. required Type commandType = 1;
  170. optional ReregisterCommandProto reregisterCommandProto = 2;
  171. optional DeleteBlocksCommandProto deleteBlocksCommandProto = 3;
  172. optional CloseContainerCommandProto closeContainerCommandProto = 4;
  173. optional DeleteContainerCommandProto deleteContainerCommandProto = 5;
  174. optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
  175. }
  176. /**
  177. * SCM informs a datanode to register itself again.
  178. * With recieving this command, datanode will transit to REGISTER state.
  179. */
  180. message ReregisterCommandProto {}
  181. // HB response from SCM, contains a list of block deletion transactions.
  182. message DeleteBlocksCommandProto {
  183. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  184. required int64 cmdId = 3;
  185. }
  186. // The deleted blocks which are stored in deletedBlock.db of scm.
  187. // We don't use BlockID because this only contians multiple localIDs
  188. // of the same containerID.
  189. message DeletedBlocksTransaction {
  190. required int64 txID = 1;
  191. required int64 containerID = 2;
  192. repeated int64 localID = 3;
  193. // the retry time of sending deleting command to datanode.
  194. required int32 count = 4;
  195. }
  196. // ACK message datanode sent to SCM, contains the result of
  197. // block deletion transactions.
  198. message ContainerBlocksDeletionACKProto {
  199. message DeleteBlockTransactionResult {
  200. required int64 txID = 1;
  201. required int64 containerID = 2;
  202. required bool success = 3;
  203. }
  204. repeated DeleteBlockTransactionResult results = 1;
  205. required string dnId = 2;
  206. }
  207. // SendACK response returned by datanode to SCM, currently empty.
  208. message ContainerBlocksDeletionACKResponseProto {
  209. }
  210. /**
  211. This command asks the datanode to close a specific container.
  212. */
  213. message CloseContainerCommandProto {
  214. required int64 containerID = 1;
  215. required hadoop.hdds.ReplicationType replicationType = 2;
  216. required int64 cmdId = 3;
  217. }
  218. /**
  219. This command asks the datanode to delete a specific container.
  220. */
  221. message DeleteContainerCommandProto {
  222. required int64 containerID = 1;
  223. required int64 cmdId = 2;
  224. }
  225. /**
  226. This command asks the datanode to replicate a container from specific sources.
  227. */
  228. message ReplicateContainerCommandProto {
  229. required int64 containerID = 1;
  230. repeated DatanodeDetailsProto sources = 2;
  231. required int64 cmdId = 3;
  232. }
  233. /**
  234. * Protocol used from a datanode to StorageContainerManager.
  235. *
  236. * Please see the request and response messages for details of the RPC calls.
  237. *
  238. * Here is a simple state diagram that shows how a datanode would boot up and
  239. * communicate with SCM.
  240. *
  241. * -----------------------
  242. * | Start |
  243. * ---------- ------------
  244. * |
  245. * |
  246. * |
  247. * |
  248. * |
  249. * |
  250. * |
  251. * ----------v-------------
  252. * | Searching for SCM ------------
  253. * ---------- ------------- |
  254. * | |
  255. * | |
  256. * | ----------v-------------
  257. * | | Register if needed |
  258. * | ----------- ------------
  259. * | |
  260. * v |
  261. * ----------- ---------------- |
  262. * --------- Heartbeat state <--------
  263. * | --------^-------------------
  264. * | |
  265. * | |
  266. * | |
  267. * | |
  268. * | |
  269. * | |
  270. * | |
  271. * ------------------
  272. *
  273. *
  274. *
  275. * Here is how this protocol is used by the datanode. When a datanode boots up
  276. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  277. * trying to establish communication with the SCM. The address of the SCMs are
  278. * retrieved from the configuration information.
  279. *
  280. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  281. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  282. * persisted datanode ID. If it has this means that this datanode is already
  283. * registered with some SCM. If this file is not found, datanode assumes that
  284. * it needs to do a registration.
  285. *
  286. * If registration is need datanode moves into REGISTER state. It will
  287. * send a register call with DatanodeDetailsProto data structure and presist
  288. * that info.
  289. *
  290. * The response to the command contains clusterID. This information is
  291. * also persisted by the datanode and moves into heartbeat state.
  292. *
  293. * Once in the heartbeat state, datanode sends heartbeats and container reports
  294. * to SCM and process commands issued by SCM until it is shutdown.
  295. *
  296. */
  297. service StorageContainerDatanodeProtocolService {
  298. /**
  299. * Gets the version information from the SCM.
  300. */
  301. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  302. /**
  303. * Registers a data node with SCM.
  304. */
  305. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredResponseProto);
  306. /**
  307. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  308. * like life line protocol than HB's under HDFS. In other words, it is
  309. * extremely light weight and contains no data payload.
  310. */
  311. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  312. /**
  313. * Sends the block deletion ACK to SCM.
  314. */
  315. rpc sendContainerBlocksDeletionACK (ContainerBlocksDeletionACKProto) returns (ContainerBlocksDeletionACKResponseProto);
  316. }