فهرست منبع

YARN-6054. TimelineServer fails to start when some LevelDb state files are missing. Contributed by Ravi Prakash.

(cherry picked from commit 4c431a694059e40e78365b02a1497a6c7e479a70)
(cherry picked from commit a7cdd39234c3a236cd9b85853636d42dc8c850a4)
(cherry picked from commit 49d459a3ed82169c02fca98dd7b54e3f09768881)
Naganarasimha 8 سال پیش
والد
کامیت
68380262ff

+ 31 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/LeveldbTimelineStore.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.timeline;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.commons.collections.map.LRUMap;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -33,6 +34,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.WritableComparator;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.Time;
 import org.apache.hadoop.yarn.api.records.timeline.*;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents.EventsOfOneEntity;
 import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelinePutError;
@@ -123,6 +125,11 @@ public class LeveldbTimelineStore extends AbstractService
   @VisibleForTesting
   static final String FILENAME = "leveldb-timeline-store.ldb";
 
+  @VisibleForTesting
+  //Extension to FILENAME where backup will be stored in case we need to
+  //call LevelDb recovery
+  static final String BACKUP_EXT = ".backup-";
+
   private static final byte[] START_TIME_LOOKUP_PREFIX = "k".getBytes(Charset.forName("UTF-8"));
   private static final byte[] ENTITY_ENTRY_PREFIX = "e".getBytes(Charset.forName("UTF-8"));
   private static final byte[] INDEXED_ENTRY_PREFIX = "i".getBytes(Charset.forName("UTF-8"));
@@ -175,6 +182,13 @@ public class LeveldbTimelineStore extends AbstractService
     super(LeveldbTimelineStore.class.getName());
   }
 
+  private JniDBFactory factory;
+
+  @VisibleForTesting
+  void setFactory(JniDBFactory fact) {
+    this.factory = fact;
+  }
+
   @Override
   @SuppressWarnings("unchecked")
   protected void serviceInit(Configuration conf) throws Exception {
@@ -209,7 +223,10 @@ public class LeveldbTimelineStore extends AbstractService
     options.cacheSize(conf.getLong(
         YarnConfiguration.TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE,
         YarnConfiguration.DEFAULT_TIMELINE_SERVICE_LEVELDB_READ_CACHE_SIZE));
-    JniDBFactory factory = new JniDBFactory();
+    if(factory == null) {
+      factory = new JniDBFactory();
+    }
+
     Path dbPath = new Path(
         conf.get(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH), FILENAME);
     FileSystem localFS = null;
@@ -226,7 +243,19 @@ public class LeveldbTimelineStore extends AbstractService
       IOUtils.cleanup(LOG, localFS);
     }
     LOG.info("Using leveldb path " + dbPath);
-    db = factory.open(new File(dbPath.toString()), options);
+    try {
+      db = factory.open(new File(dbPath.toString()), options);
+    } catch (IOException ioe) {
+      File dbFile = new File(dbPath.toString());
+      File backupPath = new File(
+          dbPath.toString() + BACKUP_EXT + Time.monotonicNow());
+      LOG.warn("Incurred exception while loading LevelDb database. Backing " +
+          "up at "+ backupPath, ioe);
+      FileUtils.copyDirectory(dbFile, backupPath);
+      LOG.warn("Going to try repair");
+      factory.repair(dbFile, options);
+      db = factory.open(dbFile, options);
+    }
     checkVersion();
     startTimeWriteCache =
         Collections.synchronizedMap(new LRUMap(getStartTimeWriteCacheSize(

+ 39 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/TestLeveldbTimelineStore.java

@@ -22,12 +22,14 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
 import java.io.File;
+import java.io.FileFilter;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.commons.io.filefilter.WildcardFileFilter;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -44,11 +46,14 @@ import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelineP
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.records.Version;
 import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
+import org.fusesource.leveldbjni.JniDBFactory;
 import org.iq80.leveldb.DBException;
+import org.iq80.leveldb.Options;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
@@ -464,4 +469,38 @@ public class TestLeveldbTimelineStore extends TimelineStoreTestUtils {
         .iterator().next().size());
   }
 
+  @Test
+  /**
+   * Test that LevelDb repair is attempted at least once during
+   * serviceInit for LeveldbTimelineStore in case open fails the
+   * first time.
+   */
+  public void testLevelDbRepair() throws IOException {
+    LeveldbTimelineStore store = new LeveldbTimelineStore();
+
+    JniDBFactory factory = Mockito.mock(JniDBFactory.class);
+    Mockito.when(
+        factory.open(Mockito.any(File.class), Mockito.any(Options.class)))
+        .thenThrow(new IOException()).thenCallRealMethod();
+    store.setFactory(factory);
+
+    //Create the LevelDb in a different location
+    File path = new File("target", this.getClass().getSimpleName() +
+        "-tmpDir1").getAbsoluteFile();
+    Configuration conf = new Configuration(this.config);
+    conf.set(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH,
+        path.getAbsolutePath());
+
+    try {
+      store.init(conf);
+      Mockito.verify(factory, Mockito.times(1))
+          .repair(Mockito.any(File.class), Mockito.any(Options.class));
+      FileFilter fileFilter = new WildcardFileFilter(
+          "*" + LeveldbTimelineStore.BACKUP_EXT +"*");
+      Assert.assertTrue(path.listFiles(fileFilter).length > 0);
+    } finally {
+      store.close();
+      fsContext.delete(new Path(path.getAbsolutePath()), true);
+    }
+  }
 }