StorageContainerDatanodeProtocol.proto 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.ozone.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdfs;
  28. import "hdfs.proto";
  29. import "HdfsServer.proto";
  30. import "DatanodeProtocol.proto";
  31. import "Ozone.proto";
  32. /**
  33. * This message is send by data node to indicate that it is alive or it is
  34. * registering with the node manager.
  35. */
  36. message SCMHeartbeatRequestProto {
  37. required DatanodeIDProto datanodeID = 1;
  38. optional SCMNodeReport nodeReport = 2;
  39. optional ReportState containerReportState = 3;
  40. }
  41. enum DatanodeContainerState {
  42. closed = 0;
  43. open = 1;
  44. }
  45. /**
  46. NodeState contains messages from datanode to SCM saying that it has
  47. some information that SCM might be interested in.*/
  48. message ReportState {
  49. enum states {
  50. noContainerReports = 0;
  51. completeContinerReport = 1;
  52. deltaContainerReport = 2;
  53. }
  54. required states state = 1;
  55. required int64 count = 2 [default = 0];
  56. }
  57. /**
  58. This message is used to persist the information about a container in the
  59. SCM database, This information allows SCM to startup faster and avoid having
  60. all container info in memory all the time.
  61. */
  62. message ContainerPersistanceProto {
  63. required DatanodeContainerState state = 1;
  64. required hadoop.hdfs.ozone.Pipeline pipeline = 2;
  65. required ContainerInfo info = 3;
  66. }
  67. /**
  68. This message is used to do a quick look up of which containers are effected
  69. if a node goes down
  70. */
  71. message NodeContianerMapping {
  72. repeated string contianerName = 1;
  73. }
  74. /**
  75. A container report contains the following information.
  76. */
  77. message ContainerInfo {
  78. required string containerName = 1;
  79. required string finalhash = 2;
  80. optional int64 size = 3;
  81. optional int64 used = 4;
  82. optional int64 keyCount = 5;
  83. // TODO: move the io count to separate message
  84. optional int64 readCount = 6;
  85. optional int64 writeCount = 7;
  86. optional int64 readBytes = 8;
  87. optional int64 writeBytes = 9;
  88. }
  89. // The deleted blocks which are stored in deletedBlock.db of scm.
  90. message DeletedBlocksTransaction {
  91. required int64 txID = 1;
  92. required string containerName = 2;
  93. repeated string blockID = 3;
  94. // the retry time of sending deleting command to datanode.
  95. required int32 count = 4;
  96. }
  97. /**
  98. A set of container reports, max count is generally set to
  99. 8192 since that keeps the size of the reports under 1 MB.
  100. */
  101. message ContainerReportsRequestProto {
  102. enum reportType {
  103. fullReport = 0;
  104. deltaReport = 1;
  105. }
  106. required DatanodeIDProto datanodeID = 1;
  107. repeated ContainerInfo reports = 2;
  108. required reportType type = 3;
  109. }
  110. message ContainerReportsResponseProto {
  111. }
  112. /**
  113. * This message is send along with the heart beat to report datanode
  114. * storage utilization by SCM.
  115. */
  116. message SCMNodeReport {
  117. repeated SCMStorageReport storageReport = 1;
  118. }
  119. message SCMStorageReport {
  120. required string storageUuid = 1;
  121. optional uint64 capacity = 2 [default = 0];
  122. optional uint64 scmUsed = 3 [default = 0];
  123. optional uint64 remaining = 4 [default = 0];
  124. optional StorageTypeProto storageType = 5 [default = DISK];
  125. }
  126. message SCMRegisterRequestProto {
  127. required DatanodeIDProto datanodeID = 1;
  128. optional SCMNodeAddressList addressList = 2;
  129. }
  130. /**
  131. * Request for version info of the software stack on the server.
  132. */
  133. message SCMVersionRequestProto {
  134. }
  135. /**
  136. * Generic response that is send to a version request. This allows keys to be
  137. * added on the fly and protocol to remain stable.
  138. */
  139. message SCMVersionResponseProto {
  140. required uint32 softwareVersion = 1;
  141. repeated hadoop.hdfs.ozone.KeyValue keys = 2;
  142. }
  143. message SCMNodeAddressList {
  144. repeated string addressList = 1;
  145. }
  146. /**
  147. * Datanode ID returned by the SCM. This is similar to name node
  148. * registeration of a datanode.
  149. */
  150. message SCMRegisteredCmdResponseProto {
  151. enum ErrorCode {
  152. success = 1;
  153. errorNodeNotPermitted = 2;
  154. }
  155. required ErrorCode errorCode = 2;
  156. optional string datanodeUUID = 3;
  157. optional string clusterID = 4;
  158. optional SCMNodeAddressList addressList = 5;
  159. }
  160. /**
  161. * SCM informs a datanode to register itself again.
  162. * With recieving this command, datanode will transit to REGISTER state.
  163. */
  164. message SCMReregisterCmdResponseProto {}
  165. /**
  166. * Container ID maintains the container's Identity along with cluster ID
  167. * after the registration is done.
  168. */
  169. message ContainerNodeIDProto {
  170. required DatanodeIDProto datanodeID = 1;
  171. optional string clusterID = 2;
  172. }
  173. /**
  174. This command tells the data node to send in the container report when possible
  175. */
  176. message SendContainerReportProto {
  177. }
  178. /**
  179. Type of commands supported by SCM to datanode protocol.
  180. */
  181. enum Type {
  182. versionCommand = 2;
  183. registeredCommand = 3;
  184. sendContainerReport = 4;
  185. reregisterCommand = 5;
  186. deleteBlocksCommand = 6;
  187. }
  188. /*
  189. * These are commands returned by SCM for to the datanode to execute.
  190. */
  191. message SCMCommandResponseProto {
  192. required Type cmdType = 2; // Type of the command
  193. optional SCMRegisteredCmdResponseProto registeredProto = 3;
  194. optional SCMVersionResponseProto versionProto = 4;
  195. optional SendContainerReportProto sendReport = 5;
  196. optional SCMReregisterCmdResponseProto reregisterProto = 6;
  197. optional SCMDeleteBlocksCmdResponseProto deleteBlocksProto = 7;
  198. }
  199. /*
  200. * A group of commands for the datanode to execute
  201. */
  202. message SCMHeartbeatResponseProto {
  203. repeated SCMCommandResponseProto commands = 1;
  204. }
  205. // HB response from SCM, contains a list of block deletion transactions.
  206. message SCMDeleteBlocksCmdResponseProto {
  207. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  208. }
  209. // SendACK response returned by datanode to SCM, currently empty.
  210. message ContainerBlocksDeletionACKResponseProto {
  211. }
  212. // ACK message datanode sent to SCM, contains the result of
  213. // block deletion transactions.
  214. message ContainerBlocksDeletionACKProto {
  215. message DeleteBlockTransactionResult {
  216. required int64 txID = 1;
  217. required bool success = 2;
  218. }
  219. repeated DeleteBlockTransactionResult results = 1;
  220. }
  221. /**
  222. * Protocol used from a datanode to StorageContainerManager.
  223. *
  224. * Please see the request and response messages for details of the RPC calls.
  225. *
  226. * Here is a simple state diagram that shows how a datanode would boot up and
  227. * communicate with SCM.
  228. *
  229. * -----------------------
  230. * | Start |
  231. * ---------- ------------
  232. * |
  233. * |
  234. * |
  235. * |
  236. * |
  237. * |
  238. * |
  239. * ----------v-------------
  240. * | Searching for SCM ------------
  241. * ---------- ------------- |
  242. * | |
  243. * | |
  244. * | ----------v-------------
  245. * | | Register if needed |
  246. * | ----------- ------------
  247. * | |
  248. * v |
  249. * ----------- ---------------- |
  250. * --------- Heartbeat state <--------
  251. * | --------^-------------------
  252. * | |
  253. * | |
  254. * | |
  255. * | |
  256. * | |
  257. * | |
  258. * | |
  259. * ------------------
  260. *
  261. *
  262. *
  263. * Here is how this protocol is used by the datanode. When a datanode boots up
  264. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  265. * trying to establish communication with the SCM. The address of the SCMs are
  266. * retrieved from the configuration information.
  267. *
  268. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  269. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  270. * persisted datanode ID. If it has this means that this datanode is already
  271. * registered with some SCM. If this file is not found, datanode assumes that
  272. * it needs to do a registration.
  273. *
  274. * If registration is need datanode moves into REGISTER state. It will
  275. * send a register call with datanodeID data structure and presist that info.
  276. *
  277. * The response to the command contains clusterID. This information is
  278. * also persisted by the datanode and moves into heartbeat state.
  279. *
  280. * Once in the heartbeat state, datanode sends heartbeats and container reports
  281. * to SCM and process commands issued by SCM until it is shutdown.
  282. *
  283. */
  284. service StorageContainerDatanodeProtocolService {
  285. /**
  286. * Gets the version information from the SCM.
  287. */
  288. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  289. /**
  290. * Registers a data node with SCM.
  291. */
  292. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredCmdResponseProto);
  293. /**
  294. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  295. * like life line protocol than HB's under HDFS. In other words, it is
  296. * extremely light weight and contains no data payload.
  297. */
  298. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  299. /**
  300. send container reports sends the container report to SCM. This will
  301. return a null command as response.
  302. */
  303. rpc sendContainerReport(ContainerReportsRequestProto) returns (ContainerReportsResponseProto);
  304. /**
  305. * Sends the block deletion ACK to SCM.
  306. */
  307. rpc sendContainerBlocksDeletionACK (ContainerBlocksDeletionACKProto) returns (ContainerBlocksDeletionACKResponseProto);
  308. }