|
@@ -17,6 +17,7 @@
|
|
|
*/
|
|
|
package org.apache.hadoop.hdfs.qjournal.server;
|
|
|
|
|
|
+import com.google.protobuf.ByteString;
|
|
|
import java.io.Closeable;
|
|
|
import java.io.File;
|
|
|
import java.io.FileInputStream;
|
|
@@ -24,7 +25,9 @@ import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.io.OutputStreamWriter;
|
|
|
import java.net.URL;
|
|
|
+import java.nio.ByteBuffer;
|
|
|
import java.security.PrivilegedExceptionAction;
|
|
|
+import java.util.ArrayList;
|
|
|
import java.util.Iterator;
|
|
|
import java.util.List;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
@@ -34,10 +37,12 @@ import org.apache.commons.logging.Log;
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
import org.apache.hadoop.fs.FileUtil;
|
|
|
+import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.JournalNotFormattedException;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
|
|
|
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournaledEditsResponseProto;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData;
|
|
|
import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
|
|
@@ -83,6 +88,7 @@ public class Journal implements Closeable {
|
|
|
// Current writing state
|
|
|
private EditLogOutputStream curSegment;
|
|
|
private long curSegmentTxId = HdfsServerConstants.INVALID_TXID;
|
|
|
+ private int curSegmentLayoutVersion = 0;
|
|
|
private long nextTxId = HdfsServerConstants.INVALID_TXID;
|
|
|
private long highestWrittenTxId = 0;
|
|
|
|
|
@@ -131,6 +137,8 @@ public class Journal implements Closeable {
|
|
|
|
|
|
private final FileJournalManager fjm;
|
|
|
|
|
|
+ private final JournaledEditsCache cache;
|
|
|
+
|
|
|
private final JournalMetrics metrics;
|
|
|
|
|
|
private long lastJournalTimestamp = 0;
|
|
@@ -149,6 +157,13 @@ public class Journal implements Closeable {
|
|
|
refreshCachedData();
|
|
|
|
|
|
this.fjm = storage.getJournalManager();
|
|
|
+
|
|
|
+ if (conf.getBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY,
|
|
|
+ DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_DEFAULT)) {
|
|
|
+ this.cache = new JournaledEditsCache(conf);
|
|
|
+ } else {
|
|
|
+ this.cache = null;
|
|
|
+ }
|
|
|
|
|
|
this.metrics = JournalMetrics.create(this);
|
|
|
|
|
@@ -347,6 +362,7 @@ public class Journal implements Closeable {
|
|
|
curSegment.abort();
|
|
|
curSegment = null;
|
|
|
curSegmentTxId = HdfsServerConstants.INVALID_TXID;
|
|
|
+ curSegmentLayoutVersion = 0;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -385,6 +401,9 @@ public class Journal implements Closeable {
|
|
|
LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId +
|
|
|
" ; journal id: " + journalId);
|
|
|
}
|
|
|
+ if (cache != null) {
|
|
|
+ cache.storeEdits(records, firstTxnId, lastTxnId, curSegmentLayoutVersion);
|
|
|
+ }
|
|
|
|
|
|
// If the edit has already been marked as committed, we know
|
|
|
// it has been fsynced on a quorum of other nodes, and we are
|
|
@@ -572,6 +591,7 @@ public class Journal implements Closeable {
|
|
|
|
|
|
curSegment = fjm.startLogSegment(txid, layoutVersion);
|
|
|
curSegmentTxId = txid;
|
|
|
+ curSegmentLayoutVersion = layoutVersion;
|
|
|
nextTxId = txid;
|
|
|
}
|
|
|
|
|
@@ -591,6 +611,7 @@ public class Journal implements Closeable {
|
|
|
curSegment.close();
|
|
|
curSegment = null;
|
|
|
curSegmentTxId = HdfsServerConstants.INVALID_TXID;
|
|
|
+ curSegmentLayoutVersion = 0;
|
|
|
}
|
|
|
|
|
|
checkSync(nextTxId == endTxId + 1,
|
|
@@ -691,6 +712,44 @@ public class Journal implements Closeable {
|
|
|
return new RemoteEditLogManifest(logs);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * @see QJournalProtocol#getJournaledEdits(String, String, long, int)
|
|
|
+ */
|
|
|
+ public GetJournaledEditsResponseProto getJournaledEdits(long sinceTxId,
|
|
|
+ int maxTxns) throws IOException {
|
|
|
+ if (cache == null) {
|
|
|
+ throw new IOException("The journal edits cache is not enabled, which " +
|
|
|
+ "is a requirement to fetch journaled edits via RPC. Please enable " +
|
|
|
+ "it via " + DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY);
|
|
|
+ }
|
|
|
+ if (sinceTxId > getHighestWrittenTxId()) {
|
|
|
+ // Requested edits that don't exist yet; short-circuit the cache here
|
|
|
+ metrics.rpcEmptyResponses.incr();
|
|
|
+ return GetJournaledEditsResponseProto.newBuilder().setTxnCount(0).build();
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ List<ByteBuffer> buffers = new ArrayList<>();
|
|
|
+ int txnCount = cache.retrieveEdits(sinceTxId, maxTxns, buffers);
|
|
|
+ int totalSize = 0;
|
|
|
+ for (ByteBuffer buf : buffers) {
|
|
|
+ totalSize += buf.remaining();
|
|
|
+ }
|
|
|
+ metrics.txnsServedViaRpc.incr(txnCount);
|
|
|
+ metrics.bytesServedViaRpc.incr(totalSize);
|
|
|
+ ByteString.Output output = ByteString.newOutput(totalSize);
|
|
|
+ for (ByteBuffer buf : buffers) {
|
|
|
+ output.write(buf.array(), buf.position(), buf.remaining());
|
|
|
+ }
|
|
|
+ return GetJournaledEditsResponseProto.newBuilder()
|
|
|
+ .setTxnCount(txnCount)
|
|
|
+ .setEditLog(output.toByteString())
|
|
|
+ .build();
|
|
|
+ } catch (JournaledEditsCache.CacheMissException cme) {
|
|
|
+ metrics.rpcRequestCacheMissAmount.add(cme.getCacheMissAmount());
|
|
|
+ throw cme;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* @return the current state of the given segment, or null if the
|
|
|
* segment does not exist.
|