StorageContainerDatanodeProtocol.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.hdds.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdds;
  28. import "hdds.proto";
  29. /**
  30. * This message is send by data node to indicate that it is alive or it is
  31. * registering with the node manager.
  32. */
  33. message SCMHeartbeatRequestProto {
  34. required DatanodeDetailsProto datanodeDetails = 1;
  35. optional SCMNodeReport nodeReport = 2;
  36. optional ReportState containerReportState = 3;
  37. }
  38. enum DatanodeContainerState {
  39. closed = 0;
  40. open = 1;
  41. }
  42. /**
  43. NodeState contains messages from datanode to SCM saying that it has
  44. some information that SCM might be interested in.*/
  45. message ReportState {
  46. enum states {
  47. noContainerReports = 0;
  48. completeContinerReport = 1;
  49. deltaContainerReport = 2;
  50. }
  51. required states state = 1;
  52. required int64 count = 2 [default = 0];
  53. }
  54. /**
  55. This message is used to persist the information about a container in the
  56. SCM database, This information allows SCM to startup faster and avoid having
  57. all container info in memory all the time.
  58. */
  59. message ContainerPersistanceProto {
  60. required DatanodeContainerState state = 1;
  61. required hadoop.hdds.Pipeline pipeline = 2;
  62. required ContainerInfo info = 3;
  63. }
  64. /**
  65. This message is used to do a quick look up of which containers are effected
  66. if a node goes down
  67. */
  68. message NodeContianerMapping {
  69. repeated string contianerName = 1;
  70. }
  71. /**
  72. A container report contains the following information.
  73. */
  74. message ContainerInfo {
  75. optional string finalhash = 2;
  76. optional int64 size = 3;
  77. optional int64 used = 4;
  78. optional int64 keyCount = 5;
  79. // TODO: move the io count to separate message
  80. optional int64 readCount = 6;
  81. optional int64 writeCount = 7;
  82. optional int64 readBytes = 8;
  83. optional int64 writeBytes = 9;
  84. required int64 containerID = 10;
  85. optional hadoop.hdds.LifeCycleState state = 11;
  86. }
  87. // The deleted blocks which are stored in deletedBlock.db of scm.
  88. // We don't use BlockID because this only contians multiple localIDs
  89. // of the same containerID.
  90. message DeletedBlocksTransaction {
  91. required int64 txID = 1;
  92. required int64 containerID = 2;
  93. repeated int64 localID = 3;
  94. // the retry time of sending deleting command to datanode.
  95. required int32 count = 4;
  96. }
  97. /**
  98. A set of container reports, max count is generally set to
  99. 8192 since that keeps the size of the reports under 1 MB.
  100. */
  101. message ContainerReportsRequestProto {
  102. enum reportType {
  103. fullReport = 0;
  104. deltaReport = 1;
  105. }
  106. required DatanodeDetailsProto datanodeDetails = 1;
  107. repeated ContainerInfo reports = 2;
  108. required reportType type = 3;
  109. }
  110. message ContainerReportsResponseProto {
  111. }
  112. /**
  113. * This message is send along with the heart beat to report datanode
  114. * storage utilization by SCM.
  115. */
  116. message SCMNodeReport {
  117. repeated SCMStorageReport storageReport = 1;
  118. }
  119. /**
  120. * Types of recognized storage media.
  121. */
  122. enum StorageTypeProto {
  123. DISK = 1;
  124. SSD = 2;
  125. ARCHIVE = 3;
  126. RAM_DISK = 4;
  127. PROVIDED = 5;
  128. }
  129. message SCMStorageReport {
  130. required string storageUuid = 1;
  131. required string storageLocation = 2;
  132. optional uint64 capacity = 3 [default = 0];
  133. optional uint64 scmUsed = 4 [default = 0];
  134. optional uint64 remaining = 5 [default = 0];
  135. optional StorageTypeProto storageType = 6 [default = DISK];
  136. optional bool failed = 7 [default = false];
  137. }
  138. message SCMRegisterRequestProto {
  139. required DatanodeDetailsProto datanodeDetails = 1;
  140. required SCMNodeReport nodeReport = 2;
  141. required ContainerReportsRequestProto containerReport = 3;
  142. }
  143. /**
  144. * Request for version info of the software stack on the server.
  145. */
  146. message SCMVersionRequestProto {
  147. }
  148. /**
  149. * Generic response that is send to a version request. This allows keys to be
  150. * added on the fly and protocol to remain stable.
  151. */
  152. message SCMVersionResponseProto {
  153. required uint32 softwareVersion = 1;
  154. repeated hadoop.hdds.KeyValue keys = 2;
  155. }
  156. message SCMNodeAddressList {
  157. repeated string addressList = 1;
  158. }
  159. /**
  160. * Datanode ID returned by the SCM. This is similar to name node
  161. * registeration of a datanode.
  162. */
  163. message SCMRegisteredCmdResponseProto {
  164. enum ErrorCode {
  165. success = 1;
  166. errorNodeNotPermitted = 2;
  167. }
  168. required ErrorCode errorCode = 2;
  169. required string datanodeUUID = 3;
  170. required string clusterID = 4;
  171. optional SCMNodeAddressList addressList = 5;
  172. optional string hostname = 6;
  173. optional string ipAddress = 7;
  174. }
  175. /**
  176. * SCM informs a datanode to register itself again.
  177. * With recieving this command, datanode will transit to REGISTER state.
  178. */
  179. message SCMReregisterCmdResponseProto {}
  180. /**
  181. This command tells the data node to send in the container report when possible
  182. */
  183. message SendContainerReportProto {
  184. }
  185. /**
  186. This command asks the datanode to close a specific container.
  187. */
  188. message SCMCloseContainerCmdResponseProto {
  189. required int64 containerID = 1;
  190. }
  191. /**
  192. Type of commands supported by SCM to datanode protocol.
  193. */
  194. enum SCMCmdType {
  195. versionCommand = 2;
  196. registeredCommand = 3;
  197. sendContainerReport = 4;
  198. reregisterCommand = 5;
  199. deleteBlocksCommand = 6;
  200. closeContainerCommand = 7;
  201. }
  202. /*
  203. * These are commands returned by SCM for to the datanode to execute.
  204. */
  205. message SCMCommandResponseProto {
  206. required SCMCmdType cmdType = 2; // Type of the command
  207. optional SCMRegisteredCmdResponseProto registeredProto = 3;
  208. optional SCMVersionResponseProto versionProto = 4;
  209. optional SendContainerReportProto sendReport = 5;
  210. optional SCMReregisterCmdResponseProto reregisterProto = 6;
  211. optional SCMDeleteBlocksCmdResponseProto deleteBlocksProto = 7;
  212. required string datanodeUUID = 8;
  213. optional SCMCloseContainerCmdResponseProto closeContainerProto = 9;
  214. }
  215. /*
  216. * A group of commands for the datanode to execute
  217. */
  218. message SCMHeartbeatResponseProto {
  219. repeated SCMCommandResponseProto commands = 1;
  220. }
  221. // HB response from SCM, contains a list of block deletion transactions.
  222. message SCMDeleteBlocksCmdResponseProto {
  223. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  224. }
  225. // SendACK response returned by datanode to SCM, currently empty.
  226. message ContainerBlocksDeletionACKResponseProto {
  227. }
  228. // ACK message datanode sent to SCM, contains the result of
  229. // block deletion transactions.
  230. message ContainerBlocksDeletionACKProto {
  231. message DeleteBlockTransactionResult {
  232. required int64 txID = 1;
  233. required bool success = 2;
  234. }
  235. repeated DeleteBlockTransactionResult results = 1;
  236. }
  237. /**
  238. * Protocol used from a datanode to StorageContainerManager.
  239. *
  240. * Please see the request and response messages for details of the RPC calls.
  241. *
  242. * Here is a simple state diagram that shows how a datanode would boot up and
  243. * communicate with SCM.
  244. *
  245. * -----------------------
  246. * | Start |
  247. * ---------- ------------
  248. * |
  249. * |
  250. * |
  251. * |
  252. * |
  253. * |
  254. * |
  255. * ----------v-------------
  256. * | Searching for SCM ------------
  257. * ---------- ------------- |
  258. * | |
  259. * | |
  260. * | ----------v-------------
  261. * | | Register if needed |
  262. * | ----------- ------------
  263. * | |
  264. * v |
  265. * ----------- ---------------- |
  266. * --------- Heartbeat state <--------
  267. * | --------^-------------------
  268. * | |
  269. * | |
  270. * | |
  271. * | |
  272. * | |
  273. * | |
  274. * | |
  275. * ------------------
  276. *
  277. *
  278. *
  279. * Here is how this protocol is used by the datanode. When a datanode boots up
  280. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  281. * trying to establish communication with the SCM. The address of the SCMs are
  282. * retrieved from the configuration information.
  283. *
  284. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  285. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  286. * persisted datanode ID. If it has this means that this datanode is already
  287. * registered with some SCM. If this file is not found, datanode assumes that
  288. * it needs to do a registration.
  289. *
  290. * If registration is need datanode moves into REGISTER state. It will
  291. * send a register call with DatanodeDetailsProto data structure and presist
  292. * that info.
  293. *
  294. * The response to the command contains clusterID. This information is
  295. * also persisted by the datanode and moves into heartbeat state.
  296. *
  297. * Once in the heartbeat state, datanode sends heartbeats and container reports
  298. * to SCM and process commands issued by SCM until it is shutdown.
  299. *
  300. */
  301. service StorageContainerDatanodeProtocolService {
  302. /**
  303. * Gets the version information from the SCM.
  304. */
  305. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  306. /**
  307. * Registers a data node with SCM.
  308. */
  309. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredCmdResponseProto);
  310. /**
  311. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  312. * like life line protocol than HB's under HDFS. In other words, it is
  313. * extremely light weight and contains no data payload.
  314. */
  315. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  316. /**
  317. send container reports sends the container report to SCM. This will
  318. return a null command as response.
  319. */
  320. rpc sendContainerReport(ContainerReportsRequestProto) returns (ContainerReportsResponseProto);
  321. /**
  322. * Sends the block deletion ACK to SCM.
  323. */
  324. rpc sendContainerBlocksDeletionACK (ContainerBlocksDeletionACKProto) returns (ContainerBlocksDeletionACKResponseProto);
  325. }