retry_policy.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef LIB_COMMON_RETRY_POLICY_H_
  19. #define LIB_COMMON_RETRY_POLICY_H_
  20. #include "common/util.h"
  21. #include <string>
  22. #include <stdint.h>
  23. namespace hdfs {
  24. class RetryAction {
  25. public:
  26. enum RetryDecision { FAIL, RETRY, FAILOVER_AND_RETRY };
  27. RetryDecision action;
  28. uint64_t delayMillis;
  29. std::string reason;
  30. RetryAction(RetryDecision in_action, uint64_t in_delayMillis,
  31. const std::string &in_reason)
  32. : action(in_action), delayMillis(in_delayMillis), reason(in_reason) {}
  33. static RetryAction fail(const std::string &reason) {
  34. return RetryAction(FAIL, 0, reason);
  35. }
  36. static RetryAction retry(uint64_t delay) {
  37. return RetryAction(RETRY, delay, "");
  38. }
  39. static RetryAction failover(uint64_t delay) {
  40. return RetryAction(FAILOVER_AND_RETRY, delay, "");
  41. }
  42. std::string decision_str() const {
  43. switch(action) {
  44. case FAIL: return "FAIL";
  45. case RETRY: return "RETRY";
  46. case FAILOVER_AND_RETRY: return "FAILOVER_AND_RETRY";
  47. default: return "UNDEFINED ACTION";
  48. }
  49. };
  50. };
  51. class RetryPolicy {
  52. protected:
  53. uint64_t delay_;
  54. uint64_t max_retries_;
  55. RetryPolicy(uint64_t delay, uint64_t max_retries) :
  56. delay_(delay), max_retries_(max_retries) {}
  57. public:
  58. RetryPolicy() {};
  59. virtual ~RetryPolicy() {}
  60. /*
  61. * If there was an error in communications, responds with the configured
  62. * action to take.
  63. */
  64. virtual RetryAction ShouldRetry(const Status &s, uint64_t retries,
  65. uint64_t failovers,
  66. bool isIdempotentOrAtMostOnce) const = 0;
  67. virtual std::string str() const { return "Base RetryPolicy"; }
  68. };
  69. /*
  70. * Overview of how the failover retry policy works:
  71. *
  72. * 1) Acts the same as FixedDelayRetryPolicy in terms of connection retries against a single NN
  73. * with two differences:
  74. * a) If we have retried more than the maximum number of retries we will failover to the
  75. * other node and reset the retry counter rather than error out. It will begin the same
  76. * routine on the other node.
  77. * b) If an attempted connection times out and max_failover_conn_retries_ is less than the
  78. * normal number of retries it will failover sooner. The connection timeout retry limit
  79. * defaults to zero; the idea being that if a node is unresponsive it's better to just
  80. * try the secondary rather than incur the timeout cost multiple times.
  81. *
  82. * 2) Keeps track of the failover count in the same way that the retry count is tracked. If failover
  83. * is triggered more than a set number (dfs.client.failover.max.attempts) of times then the operation
  84. * will error out in the same way that a non-HA operation would error if it ran out of retries.
  85. *
  86. * 3) Failover between namenodes isn't instantaneous so the RPC retry delay is reused to add a small
  87. * delay between failover attempts. This helps prevent the client from quickly using up all of
  88. * its failover attempts while thrashing between namenodes that are both temporarily marked standby.
  89. * Note: The java client implements exponential backoff here with a base other than the rpc delay,
  90. * and this will do the same here in the future. This doesn't do any sort of exponential backoff
  91. * and the name can be changed to ExponentialDelayWithFailover when backoff is implemented.
  92. */
  93. class FixedDelayWithFailover : public RetryPolicy {
  94. public:
  95. FixedDelayWithFailover(uint64_t delay, uint64_t max_retries,
  96. uint64_t max_failover_retries,
  97. uint64_t max_failover_conn_retries)
  98. : RetryPolicy(delay, max_retries), max_failover_retries_(max_failover_retries),
  99. max_failover_conn_retries_(max_failover_conn_retries) {}
  100. RetryAction ShouldRetry(const Status &s, uint64_t retries,
  101. uint64_t failovers,
  102. bool isIdempotentOrAtMostOnce) const override;
  103. std::string str() const override { return "FixedDelayWithFailover"; }
  104. private:
  105. // Attempts to fail over
  106. uint64_t max_failover_retries_;
  107. // Attempts to fail over if connection times out rather than
  108. // tring to connect and wait for the timeout delay failover_retries_
  109. // times.
  110. uint64_t max_failover_conn_retries_;
  111. };
  112. /*
  113. * Returns a fixed delay up to a certain number of retries
  114. */
  115. class FixedDelayRetryPolicy : public RetryPolicy {
  116. public:
  117. FixedDelayRetryPolicy(uint64_t delay, uint64_t max_retries)
  118. : RetryPolicy(delay, max_retries) {}
  119. RetryAction ShouldRetry(const Status &s, uint64_t retries,
  120. uint64_t failovers,
  121. bool isIdempotentOrAtMostOnce) const override;
  122. std::string str() const override { return "FixedDelayRetryPolicy"; }
  123. };
  124. /*
  125. * Never retries
  126. */
  127. class NoRetryPolicy : public RetryPolicy {
  128. public:
  129. NoRetryPolicy() {};
  130. RetryAction ShouldRetry(const Status &s, uint64_t retries,
  131. uint64_t failovers,
  132. bool isIdempotentOrAtMostOnce) const override;
  133. std::string str() const override { return "NoRetryPolicy"; }
  134. };
  135. }
  136. #endif