StorageContainerDatanodeProtocol.proto 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /**
  19. * These .proto interfaces are private and unstable.
  20. * Please see http://wiki.apache.org/hadoop/Compatibility
  21. * for what changes are allowed for a *unstable* .proto interface.
  22. */
  23. option java_package = "org.apache.hadoop.hdds.protocol.proto";
  24. option java_outer_classname = "StorageContainerDatanodeProtocolProtos";
  25. option java_generic_services = true;
  26. option java_generate_equals_and_hash = true;
  27. package hadoop.hdds;
  28. import "hdds.proto";
  29. /**
  30. * Request for version info of the software stack on the server.
  31. */
  32. message SCMVersionRequestProto {}
  33. /**
  34. * Generic response that is send to a version request. This allows keys to be
  35. * added on the fly and protocol to remain stable.
  36. */
  37. message SCMVersionResponseProto {
  38. required uint32 softwareVersion = 1;
  39. repeated hadoop.hdds.KeyValue keys = 2;
  40. }
  41. message SCMRegisterRequestProto {
  42. required DatanodeDetailsProto datanodeDetails = 1;
  43. required NodeReportProto nodeReport = 2;
  44. required ContainerReportsProto containerReport = 3;
  45. required PipelineReportsProto pipelineReports = 4;
  46. }
  47. /**
  48. * Datanode ID returned by the SCM. This is similar to name node
  49. * registeration of a datanode.
  50. */
  51. message SCMRegisteredResponseProto {
  52. enum ErrorCode {
  53. success = 1;
  54. errorNodeNotPermitted = 2;
  55. }
  56. required ErrorCode errorCode = 1;
  57. required string datanodeUUID = 2;
  58. required string clusterID = 3;
  59. optional SCMNodeAddressList addressList = 4;
  60. optional string hostname = 5;
  61. optional string ipAddress = 6;
  62. }
  63. /**
  64. * This message is send by data node to indicate that it is alive or it is
  65. * registering with the node manager.
  66. */
  67. message SCMHeartbeatRequestProto {
  68. required DatanodeDetailsProto datanodeDetails = 1;
  69. optional NodeReportProto nodeReport = 2;
  70. optional ContainerReportsProto containerReport = 3;
  71. repeated CommandStatusReportsProto commandStatusReports = 4;
  72. optional ContainerActionsProto containerActions = 5;
  73. optional PipelineActionsProto pipelineActions = 6;
  74. optional PipelineReportsProto pipelineReports = 7;
  75. }
  76. /*
  77. * A group of commands for the datanode to execute
  78. */
  79. message SCMHeartbeatResponseProto {
  80. required string datanodeUUID = 1;
  81. repeated SCMCommandProto commands = 2;
  82. }
  83. message SCMNodeAddressList {
  84. repeated string addressList = 1;
  85. }
  86. /**
  87. * This message is send along with the heart beat to report datanode
  88. * storage utilization to SCM.
  89. */
  90. message NodeReportProto {
  91. repeated StorageReportProto storageReport = 1;
  92. }
  93. message StorageReportProto {
  94. required string storageUuid = 1;
  95. required string storageLocation = 2;
  96. optional uint64 capacity = 3 [default = 0];
  97. optional uint64 scmUsed = 4 [default = 0];
  98. optional uint64 remaining = 5 [default = 0];
  99. optional StorageTypeProto storageType = 6 [default = DISK];
  100. optional bool failed = 7 [default = false];
  101. }
  102. /**
  103. * Types of recognized storage media.
  104. */
  105. enum StorageTypeProto {
  106. DISK = 1;
  107. SSD = 2;
  108. ARCHIVE = 3;
  109. RAM_DISK = 4;
  110. PROVIDED = 5;
  111. }
  112. message ContainerReportsProto {
  113. repeated ContainerInfo reports = 1;
  114. }
  115. message CommandStatusReportsProto {
  116. repeated CommandStatus cmdStatus = 1;
  117. }
  118. message CommandStatus {
  119. enum Status {
  120. PENDING = 1;
  121. EXECUTED = 2;
  122. FAILED = 3;
  123. }
  124. required int64 cmdId = 1;
  125. required Status status = 2 [default = PENDING];
  126. required SCMCommandProto.Type type = 3;
  127. optional string msg = 4;
  128. optional ContainerBlocksDeletionACKProto blockDeletionAck = 5;
  129. }
  130. message ContainerActionsProto {
  131. repeated ContainerAction containerActions = 1;
  132. }
  133. message ContainerAction {
  134. enum Action {
  135. CLOSE = 1;
  136. }
  137. enum Reason {
  138. CONTAINER_FULL = 1;
  139. }
  140. required int64 containerID = 1;
  141. required Action action = 2;
  142. optional Reason reason = 3;
  143. }
  144. message PipelineReport {
  145. required PipelineID pipelineID = 1;
  146. }
  147. message PipelineReportsProto {
  148. repeated PipelineReport pipelineReport = 1;
  149. }
  150. message PipelineActionsProto {
  151. repeated PipelineAction pipelineActions = 1;
  152. }
  153. message ClosePipelineInfo {
  154. enum Reason {
  155. PIPELINE_FAILED = 1;
  156. }
  157. required PipelineID pipelineID = 1;
  158. optional Reason reason = 3;
  159. optional string detailedReason = 4;
  160. }
  161. message PipelineAction {
  162. enum Action {
  163. CLOSE = 1;
  164. }
  165. /**
  166. * Action will be used to identify the correct pipeline action.
  167. */
  168. required Action action = 1;
  169. optional ClosePipelineInfo closePipeline = 2;
  170. }
  171. /**
  172. A container report contains the following information.
  173. */
  174. message ContainerInfo {
  175. required int64 containerID = 1;
  176. optional int64 size = 2;
  177. optional int64 used = 3;
  178. optional int64 keyCount = 4;
  179. // TODO: move the io count to separate message
  180. optional int64 readCount = 5;
  181. optional int64 writeCount = 6;
  182. optional int64 readBytes = 7;
  183. optional int64 writeBytes = 8;
  184. optional string finalhash = 9;
  185. optional hadoop.hdds.LifeCycleState state = 10;
  186. optional int64 deleteTransactionId = 11;
  187. }
  188. /*
  189. * These are commands returned by SCM for to the datanode to execute.
  190. */
  191. message SCMCommandProto {
  192. enum Type {
  193. reregisterCommand = 1;
  194. deleteBlocksCommand = 2;
  195. closeContainerCommand = 3;
  196. deleteContainerCommand = 4;
  197. replicateContainerCommand = 5;
  198. }
  199. // TODO: once we start using protoc 3.x, refactor this message using "oneof"
  200. required Type commandType = 1;
  201. optional ReregisterCommandProto reregisterCommandProto = 2;
  202. optional DeleteBlocksCommandProto deleteBlocksCommandProto = 3;
  203. optional CloseContainerCommandProto closeContainerCommandProto = 4;
  204. optional DeleteContainerCommandProto deleteContainerCommandProto = 5;
  205. optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
  206. }
  207. /**
  208. * SCM informs a datanode to register itself again.
  209. * With recieving this command, datanode will transit to REGISTER state.
  210. */
  211. message ReregisterCommandProto {}
  212. // HB response from SCM, contains a list of block deletion transactions.
  213. message DeleteBlocksCommandProto {
  214. repeated DeletedBlocksTransaction deletedBlocksTransactions = 1;
  215. required int64 cmdId = 3;
  216. }
  217. // The deleted blocks which are stored in deletedBlock.db of scm.
  218. // We don't use BlockID because this only contians multiple localIDs
  219. // of the same containerID.
  220. message DeletedBlocksTransaction {
  221. required int64 txID = 1;
  222. required int64 containerID = 2;
  223. repeated int64 localID = 3;
  224. // the retry time of sending deleting command to datanode.
  225. required int32 count = 4;
  226. }
  227. // ACK message datanode sent to SCM, contains the result of
  228. // block deletion transactions.
  229. message ContainerBlocksDeletionACKProto {
  230. message DeleteBlockTransactionResult {
  231. required int64 txID = 1;
  232. required int64 containerID = 2;
  233. required bool success = 3;
  234. }
  235. repeated DeleteBlockTransactionResult results = 1;
  236. required string dnId = 2;
  237. }
  238. /**
  239. This command asks the datanode to close a specific container.
  240. */
  241. message CloseContainerCommandProto {
  242. required int64 containerID = 1;
  243. required hadoop.hdds.ReplicationType replicationType = 2;
  244. required int64 cmdId = 3;
  245. required PipelineID pipelineID = 4;
  246. }
  247. /**
  248. This command asks the datanode to delete a specific container.
  249. */
  250. message DeleteContainerCommandProto {
  251. required int64 containerID = 1;
  252. required int64 cmdId = 2;
  253. }
  254. /**
  255. This command asks the datanode to replicate a container from specific sources.
  256. */
  257. message ReplicateContainerCommandProto {
  258. required int64 containerID = 1;
  259. repeated DatanodeDetailsProto sources = 2;
  260. required int64 cmdId = 3;
  261. }
  262. /**
  263. * Protocol used from a datanode to StorageContainerManager.
  264. *
  265. * Please see the request and response messages for details of the RPC calls.
  266. *
  267. * Here is a simple state diagram that shows how a datanode would boot up and
  268. * communicate with SCM.
  269. *
  270. * -----------------------
  271. * | Start |
  272. * ---------- ------------
  273. * |
  274. * |
  275. * |
  276. * |
  277. * |
  278. * |
  279. * |
  280. * ----------v-------------
  281. * | Searching for SCM ------------
  282. * ---------- ------------- |
  283. * | |
  284. * | |
  285. * | ----------v-------------
  286. * | | Register if needed |
  287. * | ----------- ------------
  288. * | |
  289. * v |
  290. * ----------- ---------------- |
  291. * --------- Heartbeat state <--------
  292. * | --------^-------------------
  293. * | |
  294. * | |
  295. * | |
  296. * | |
  297. * | |
  298. * | |
  299. * | |
  300. * ------------------
  301. *
  302. *
  303. *
  304. * Here is how this protocol is used by the datanode. When a datanode boots up
  305. * it moves into a stated called SEARCHING_SCM. In this state datanode is
  306. * trying to establish communication with the SCM. The address of the SCMs are
  307. * retrieved from the configuration information.
  308. *
  309. * In the SEARCHING_SCM state, only rpc call made by datanode is a getVersion
  310. * call to SCM. Once any of the SCMs reply, datanode checks if it has a local
  311. * persisted datanode ID. If it has this means that this datanode is already
  312. * registered with some SCM. If this file is not found, datanode assumes that
  313. * it needs to do a registration.
  314. *
  315. * If registration is need datanode moves into REGISTER state. It will
  316. * send a register call with DatanodeDetailsProto data structure and presist
  317. * that info.
  318. *
  319. * The response to the command contains clusterID. This information is
  320. * also persisted by the datanode and moves into heartbeat state.
  321. *
  322. * Once in the heartbeat state, datanode sends heartbeats and container reports
  323. * to SCM and process commands issued by SCM until it is shutdown.
  324. *
  325. */
  326. service StorageContainerDatanodeProtocolService {
  327. /**
  328. * Gets the version information from the SCM.
  329. */
  330. rpc getVersion (SCMVersionRequestProto) returns (SCMVersionResponseProto);
  331. /**
  332. * Registers a data node with SCM.
  333. */
  334. rpc register (SCMRegisterRequestProto) returns (SCMRegisteredResponseProto);
  335. /**
  336. * Send heartbeat from datanode to SCM. HB's under SCM looks more
  337. * like life line protocol than HB's under HDFS. In other words, it is
  338. * extremely light weight and contains no data payload.
  339. */
  340. rpc sendHeartbeat (SCMHeartbeatRequestProto) returns (SCMHeartbeatResponseProto);
  341. }