|
@@ -18,16 +18,16 @@
|
|
|
|
|
|
package org.apache.hadoop.yarn.server.timelineservice.storage;
|
|
|
|
|
|
-import java.io.BufferedWriter;
|
|
|
import java.io.File;
|
|
|
-import java.io.FileOutputStream;
|
|
|
import java.io.IOException;
|
|
|
-import java.io.OutputStreamWriter;
|
|
|
-import java.io.PrintWriter;
|
|
|
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
+import org.apache.hadoop.fs.FSDataOutputStream;
|
|
|
+import org.apache.hadoop.fs.FileSystem;
|
|
|
+import org.apache.hadoop.fs.Path;
|
|
|
+import org.apache.hadoop.io.IOUtils;
|
|
|
import org.apache.hadoop.security.UserGroupInformation;
|
|
|
import org.apache.hadoop.service.AbstractService;
|
|
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineDomain;
|
|
@@ -35,14 +35,17 @@ import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntities;
|
|
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
|
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineWriteResponse;
|
|
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineWriteResponse.TimelineWriteError;
|
|
|
+import org.apache.hadoop.yarn.client.api.impl.FileSystemTimelineWriter;
|
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
|
import org.apache.hadoop.yarn.server.timelineservice.collector.TimelineCollectorContext;
|
|
|
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
|
|
|
|
|
import com.google.common.annotations.VisibleForTesting;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
|
|
|
/**
|
|
|
- * This implements a local file based backend for storing application timeline
|
|
|
+ * This implements a FileSystem based backend for storing application timeline
|
|
|
* information. This implementation may not provide a complete implementation of
|
|
|
* all the necessary features. This implementation is provided solely for basic
|
|
|
* testing purposes, and should not be used in a non-test situation.
|
|
@@ -52,20 +55,36 @@ import com.google.common.annotations.VisibleForTesting;
|
|
|
public class FileSystemTimelineWriterImpl extends AbstractService
|
|
|
implements TimelineWriter {
|
|
|
|
|
|
- private String outputRoot;
|
|
|
-
|
|
|
/** Config param for timeline service storage tmp root for FILE YARN-3264. */
|
|
|
- public static final String TIMELINE_SERVICE_STORAGE_DIR_ROOT
|
|
|
- = YarnConfiguration.TIMELINE_SERVICE_PREFIX + "fs-writer.root-dir";
|
|
|
+ public static final String TIMELINE_SERVICE_STORAGE_DIR_ROOT =
|
|
|
+ YarnConfiguration.TIMELINE_SERVICE_PREFIX + "fs-writer.root-dir";
|
|
|
+
|
|
|
+ public static final String TIMELINE_FS_WRITER_NUM_RETRIES =
|
|
|
+ YarnConfiguration.TIMELINE_SERVICE_PREFIX + "fs-writer.num-retries";
|
|
|
+ public static final int DEFAULT_TIMELINE_FS_WRITER_NUM_RETRIES = 0;
|
|
|
+
|
|
|
+ public static final String TIMELINE_FS_WRITER_RETRY_INTERVAL_MS =
|
|
|
+ YarnConfiguration.TIMELINE_SERVICE_PREFIX +
|
|
|
+ "fs-writer.retry-interval-ms";
|
|
|
+ public static final long DEFAULT_TIMELINE_FS_WRITER_RETRY_INTERVAL_MS = 1000L;
|
|
|
|
|
|
public static final String ENTITIES_DIR = "entities";
|
|
|
|
|
|
/** Default extension for output files. */
|
|
|
public static final String TIMELINE_SERVICE_STORAGE_EXTENSION = ".thist";
|
|
|
|
|
|
+ private FileSystem fs;
|
|
|
+ private Path rootPath;
|
|
|
+ private int fsNumRetries;
|
|
|
+ private long fsRetryInterval;
|
|
|
+ private Path entitiesPath;
|
|
|
+
|
|
|
/** default value for storage location on local disk. */
|
|
|
private static final String STORAGE_DIR_ROOT = "timeline_service_data";
|
|
|
|
|
|
+ private static final Logger LOG =
|
|
|
+ LoggerFactory.getLogger(FileSystemTimelineWriter.class);
|
|
|
+
|
|
|
FileSystemTimelineWriterImpl() {
|
|
|
super((FileSystemTimelineWriterImpl.class.getName()));
|
|
|
}
|
|
@@ -83,8 +102,8 @@ public class FileSystemTimelineWriterImpl extends AbstractService
|
|
|
String appId = context.getAppId();
|
|
|
|
|
|
for (TimelineEntity entity : entities.getEntities()) {
|
|
|
- write(clusterId, userId, flowName, flowVersion, flowRunId, appId, entity,
|
|
|
- response);
|
|
|
+ writeInternal(clusterId, userId, flowName, flowVersion,
|
|
|
+ flowRunId, appId, entity, response);
|
|
|
}
|
|
|
return response;
|
|
|
}
|
|
@@ -97,59 +116,78 @@ public class FileSystemTimelineWriterImpl extends AbstractService
|
|
|
return null;
|
|
|
}
|
|
|
|
|
|
- private synchronized void write(String clusterId, String userId,
|
|
|
- String flowName, String flowVersion, long flowRun, String appId,
|
|
|
- TimelineEntity entity, TimelineWriteResponse response)
|
|
|
- throws IOException {
|
|
|
- PrintWriter out = null;
|
|
|
+ private synchronized void writeInternal(String clusterId, String userId,
|
|
|
+ String flowName, String flowVersion,
|
|
|
+ long flowRun, String appId,
|
|
|
+ TimelineEntity entity,
|
|
|
+ TimelineWriteResponse response)
|
|
|
+ throws IOException {
|
|
|
+ Path clusterIdPath = new Path(entitiesPath, clusterId);
|
|
|
+ Path userIdPath = new Path(clusterIdPath, userId);
|
|
|
+ Path flowNamePath = new Path(userIdPath, escape(flowName));
|
|
|
+ Path flowVersionPath = new Path(flowNamePath, escape(flowVersion));
|
|
|
+ Path flowRunPath = new Path(flowVersionPath, String.valueOf(flowRun));
|
|
|
+ Path appIdPath = new Path(flowRunPath, appId);
|
|
|
+ Path entityTypePath = new Path(appIdPath, entity.getType());
|
|
|
try {
|
|
|
- String dir = mkdirs(outputRoot, ENTITIES_DIR, clusterId, userId,
|
|
|
- escape(flowName), escape(flowVersion), String.valueOf(flowRun), appId,
|
|
|
- entity.getType());
|
|
|
- String fileName = dir + entity.getId() +
|
|
|
- TIMELINE_SERVICE_STORAGE_EXTENSION;
|
|
|
- out =
|
|
|
- new PrintWriter(new BufferedWriter(new OutputStreamWriter(
|
|
|
- new FileOutputStream(fileName, true), "UTF-8")));
|
|
|
- out.println(TimelineUtils.dumpTimelineRecordtoJSON(entity));
|
|
|
- out.write("\n");
|
|
|
- } catch (IOException ioe) {
|
|
|
- TimelineWriteError error = new TimelineWriteError();
|
|
|
- error.setEntityId(entity.getId());
|
|
|
- error.setEntityType(entity.getType());
|
|
|
+ mkdirs(rootPath, entitiesPath, clusterIdPath, userIdPath,
|
|
|
+ flowNamePath, flowVersionPath, flowRunPath, appIdPath,
|
|
|
+ entityTypePath);
|
|
|
+ Path filePath =
|
|
|
+ new Path(entityTypePath,
|
|
|
+ entity.getId() + TIMELINE_SERVICE_STORAGE_EXTENSION);
|
|
|
+ createFileWithRetries(filePath);
|
|
|
+
|
|
|
+ byte[] record = new StringBuilder()
|
|
|
+ .append(TimelineUtils.dumpTimelineRecordtoJSON(entity))
|
|
|
+ .append("\n").toString().getBytes("UTF-8");
|
|
|
+ writeFileWithRetries(filePath, record);
|
|
|
+ } catch (Exception ioe) {
|
|
|
+ LOG.warn("Interrupted operation:" + ioe.getMessage());
|
|
|
+ TimelineWriteError error = createTimelineWriteError(entity);
|
|
|
/*
|
|
|
* TODO: set an appropriate error code after PoC could possibly be:
|
|
|
* error.setErrorCode(TimelineWriteError.IO_EXCEPTION);
|
|
|
*/
|
|
|
response.addError(error);
|
|
|
- } finally {
|
|
|
- if (out != null) {
|
|
|
- out.close();
|
|
|
- }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ private TimelineWriteError createTimelineWriteError(TimelineEntity entity) {
|
|
|
+ TimelineWriteError error = new TimelineWriteError();
|
|
|
+ error.setEntityId(entity.getId());
|
|
|
+ error.setEntityType(entity.getType());
|
|
|
+ return error;
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public TimelineWriteResponse aggregate(TimelineEntity data,
|
|
|
TimelineAggregationTrack track) throws IOException {
|
|
|
return null;
|
|
|
-
|
|
|
}
|
|
|
|
|
|
@VisibleForTesting
|
|
|
String getOutputRoot() {
|
|
|
- return outputRoot;
|
|
|
+ return rootPath.toString();
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public void serviceInit(Configuration conf) throws Exception {
|
|
|
- outputRoot = conf.get(TIMELINE_SERVICE_STORAGE_DIR_ROOT,
|
|
|
+ String outputRoot = conf.get(TIMELINE_SERVICE_STORAGE_DIR_ROOT,
|
|
|
conf.get("hadoop.tmp.dir") + File.separator + STORAGE_DIR_ROOT);
|
|
|
+ rootPath = new Path(outputRoot);
|
|
|
+ entitiesPath = new Path(rootPath, ENTITIES_DIR);
|
|
|
+ fsNumRetries = conf.getInt(TIMELINE_FS_WRITER_NUM_RETRIES,
|
|
|
+ DEFAULT_TIMELINE_FS_WRITER_NUM_RETRIES);
|
|
|
+ fsRetryInterval = conf.getLong(TIMELINE_FS_WRITER_RETRY_INTERVAL_MS,
|
|
|
+ DEFAULT_TIMELINE_FS_WRITER_RETRY_INTERVAL_MS);
|
|
|
+ fs = rootPath.getFileSystem(getConfig());
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public void serviceStart() throws Exception {
|
|
|
- mkdirs(outputRoot, ENTITIES_DIR);
|
|
|
+ mkdirsWithRetries(rootPath);
|
|
|
+ mkdirsWithRetries(entitiesPath);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -157,18 +195,103 @@ public class FileSystemTimelineWriterImpl extends AbstractService
|
|
|
// no op
|
|
|
}
|
|
|
|
|
|
- private static String mkdirs(String... dirStrs) throws IOException {
|
|
|
- StringBuilder path = new StringBuilder();
|
|
|
- for (String dirStr : dirStrs) {
|
|
|
- path.append(dirStr).append(File.separatorChar);
|
|
|
- File dir = new File(path.toString());
|
|
|
- if (!dir.exists()) {
|
|
|
- if (!dir.mkdirs()) {
|
|
|
- throw new IOException("Could not create directories for " + dir);
|
|
|
+ private void mkdirs(Path... paths) throws IOException, InterruptedException {
|
|
|
+ for (Path path: paths) {
|
|
|
+ if (!existsWithRetries(path)) {
|
|
|
+ mkdirsWithRetries(path);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Code from FSRMStateStore.
|
|
|
+ private void mkdirsWithRetries(final Path dirPath)
|
|
|
+ throws IOException, InterruptedException {
|
|
|
+ new FSAction<Void>() {
|
|
|
+ @Override
|
|
|
+ public Void run() throws IOException {
|
|
|
+ fs.mkdirs(dirPath);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }.runWithRetries();
|
|
|
+ }
|
|
|
+
|
|
|
+ private void writeFileWithRetries(final Path outputPath, final byte[] data)
|
|
|
+ throws Exception {
|
|
|
+ new FSAction<Void>() {
|
|
|
+ @Override
|
|
|
+ public Void run() throws IOException {
|
|
|
+ writeFile(outputPath, data);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }.runWithRetries();
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean createFileWithRetries(final Path newFile)
|
|
|
+ throws IOException, InterruptedException {
|
|
|
+ return new FSAction<Boolean>() {
|
|
|
+ @Override
|
|
|
+ public Boolean run() throws IOException {
|
|
|
+ return createFile(newFile);
|
|
|
+ }
|
|
|
+ }.runWithRetries();
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean existsWithRetries(final Path path)
|
|
|
+ throws IOException, InterruptedException {
|
|
|
+ return new FSAction<Boolean>() {
|
|
|
+ @Override
|
|
|
+ public Boolean run() throws IOException {
|
|
|
+ return fs.exists(path);
|
|
|
+ }
|
|
|
+ }.runWithRetries();
|
|
|
+ }
|
|
|
+
|
|
|
+ private abstract class FSAction<T> {
|
|
|
+ abstract T run() throws IOException;
|
|
|
+
|
|
|
+ T runWithRetries() throws IOException, InterruptedException {
|
|
|
+ int retry = 0;
|
|
|
+ while (true) {
|
|
|
+ try {
|
|
|
+ return run();
|
|
|
+ } catch (IOException e) {
|
|
|
+ LOG.info("Exception while executing a FS operation.", e);
|
|
|
+ if (++retry > fsNumRetries) {
|
|
|
+ LOG.info("Maxed out FS retries. Giving up!");
|
|
|
+ throw e;
|
|
|
+ }
|
|
|
+ LOG.info("Will retry operation on FS. Retry no. " + retry +
|
|
|
+ " after sleeping for " + fsRetryInterval + " seconds");
|
|
|
+ Thread.sleep(fsRetryInterval);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- return path.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean createFile(Path newFile) throws IOException {
|
|
|
+ return fs.createNewFile(newFile);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * In order to make this writeInternal atomic as a part of writeInternal
|
|
|
+ * we will first writeInternal data to .tmp file and then rename it.
|
|
|
+ * Here we are assuming that rename is atomic for underlying file system.
|
|
|
+ */
|
|
|
+ protected void writeFile(Path outputPath, byte[] data) throws IOException {
|
|
|
+ Path tempPath =
|
|
|
+ new Path(outputPath.getParent(), outputPath.getName() + ".tmp");
|
|
|
+ FSDataOutputStream fsOut = null;
|
|
|
+ // This file will be overwritten when app/attempt finishes for saving the
|
|
|
+ // final status.
|
|
|
+ try {
|
|
|
+ fsOut = fs.create(tempPath, true);
|
|
|
+ fsOut.write(data);
|
|
|
+ fsOut.close();
|
|
|
+ fsOut = null;
|
|
|
+ fs.rename(tempPath, outputPath);
|
|
|
+ } finally {
|
|
|
+ IOUtils.cleanupWithLogger(LOG, fsOut);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// specifically escape the separator character
|