|
@@ -533,16 +533,16 @@ final class FSDirEncryptionZoneOp {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Proactively warm up the edek cache. We'll get all the edek key names,
|
|
|
- * then launch up a separate thread to warm them up.
|
|
|
+ * Best-effort attempt to proactively warm up the edek cache. We'll get all the edek key names,
|
|
|
+ * then launch up a separate thread to warm them up. Retries happen if any of keys fail to warm up.
|
|
|
*/
|
|
|
static void warmUpEdekCache(final ExecutorService executor,
|
|
|
- final FSDirectory fsd, final int delay, final int interval) {
|
|
|
+ final FSDirectory fsd, final int delay, final int interval, final int maxRetries) {
|
|
|
fsd.readLock();
|
|
|
try {
|
|
|
String[] edeks = fsd.ezManager.getKeyNames();
|
|
|
executor.execute(
|
|
|
- new EDEKCacheLoader(edeks, fsd.getProvider(), delay, interval));
|
|
|
+ new EDEKCacheLoader(edeks, fsd.getProvider(), delay, interval, maxRetries));
|
|
|
} finally {
|
|
|
fsd.readUnlock();
|
|
|
}
|
|
@@ -557,19 +557,22 @@ final class FSDirEncryptionZoneOp {
|
|
|
private final KeyProviderCryptoExtension kp;
|
|
|
private int initialDelay;
|
|
|
private int retryInterval;
|
|
|
+ private int maxRetries;
|
|
|
|
|
|
EDEKCacheLoader(final String[] names, final KeyProviderCryptoExtension kp,
|
|
|
- final int delay, final int interval) {
|
|
|
+ final int delay, final int interval, final int maxRetries) {
|
|
|
this.keyNames = names;
|
|
|
this.kp = kp;
|
|
|
this.initialDelay = delay;
|
|
|
this.retryInterval = interval;
|
|
|
+ this.maxRetries = maxRetries;
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public void run() {
|
|
|
NameNode.LOG.info("Warming up {} EDEKs... (initialDelay={}, "
|
|
|
- + "retryInterval={})", keyNames.length, initialDelay, retryInterval);
|
|
|
+ + "retryInterval={}, maxRetries={})", keyNames.length, initialDelay, retryInterval,
|
|
|
+ maxRetries);
|
|
|
try {
|
|
|
Thread.sleep(initialDelay);
|
|
|
} catch (InterruptedException ie) {
|
|
@@ -577,42 +580,39 @@ final class FSDirEncryptionZoneOp {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- final int logCoolDown = 10000; // periodically print error log (if any)
|
|
|
- int sinceLastLog = logCoolDown; // always print the first failure
|
|
|
boolean success = false;
|
|
|
+ int retryCount = 0;
|
|
|
IOException lastSeenIOE = null;
|
|
|
long warmUpEDEKStartTime = monotonicNow();
|
|
|
- while (true) {
|
|
|
+
|
|
|
+ while (!success && retryCount < maxRetries) {
|
|
|
try {
|
|
|
kp.warmUpEncryptedKeys(keyNames);
|
|
|
- NameNode.LOG
|
|
|
- .info("Successfully warmed up {} EDEKs.", keyNames.length);
|
|
|
+ NameNode.LOG.info("Successfully warmed up {} EDEKs.", keyNames.length);
|
|
|
success = true;
|
|
|
- break;
|
|
|
} catch (IOException ioe) {
|
|
|
lastSeenIOE = ioe;
|
|
|
- if (sinceLastLog >= logCoolDown) {
|
|
|
- NameNode.LOG.info("Failed to warm up EDEKs.", ioe);
|
|
|
- sinceLastLog = 0;
|
|
|
- } else {
|
|
|
- NameNode.LOG.debug("Failed to warm up EDEKs.", ioe);
|
|
|
- }
|
|
|
+ NameNode.LOG.info("Failed to warm up EDEKs.", ioe);
|
|
|
} catch (Exception e) {
|
|
|
NameNode.LOG.error("Cannot warm up EDEKs.", e);
|
|
|
throw e;
|
|
|
}
|
|
|
- try {
|
|
|
- Thread.sleep(retryInterval);
|
|
|
- } catch (InterruptedException ie) {
|
|
|
- NameNode.LOG.info("EDEKCacheLoader interrupted during retry.");
|
|
|
- break;
|
|
|
+
|
|
|
+ if (!success) {
|
|
|
+ try {
|
|
|
+ Thread.sleep(retryInterval);
|
|
|
+ } catch (InterruptedException ie) {
|
|
|
+ NameNode.LOG.info("EDEKCacheLoader interrupted during retry.");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ retryCount++;
|
|
|
}
|
|
|
- sinceLastLog += retryInterval;
|
|
|
}
|
|
|
+
|
|
|
long warmUpEDEKTime = monotonicNow() - warmUpEDEKStartTime;
|
|
|
NameNode.getNameNodeMetrics().addWarmUpEDEKTime(warmUpEDEKTime);
|
|
|
if (!success) {
|
|
|
- NameNode.LOG.warn("Unable to warm up EDEKs.");
|
|
|
+ NameNode.LOG.warn("Max retry {} reached, unable to warm up EDEKs.", maxRetries);
|
|
|
if (lastSeenIOE != null) {
|
|
|
NameNode.LOG.warn("Last seen exception:", lastSeenIOE);
|
|
|
}
|