|
@@ -0,0 +1,369 @@
|
|
|
+/**
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
+ * distributed with this work for additional information
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
+ * limitations under the License.
|
|
|
+ */
|
|
|
+package org.apache.hadoop.hdfs.server.datanode;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.TreeMap;
|
|
|
+
|
|
|
+import junit.framework.Assert;
|
|
|
+
|
|
|
+import org.apache.commons.logging.Log;
|
|
|
+import org.apache.commons.logging.LogFactory;
|
|
|
+import org.apache.hadoop.conf.Configuration;
|
|
|
+import org.apache.hadoop.fs.FSDataInputStream;
|
|
|
+import org.apache.hadoop.fs.FSDataOutputStream;
|
|
|
+import org.apache.hadoop.fs.FileSystem;
|
|
|
+import org.apache.hadoop.fs.Path;
|
|
|
+import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
|
+import org.apache.hadoop.hdfs.HdfsConfiguration;
|
|
|
+import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
|
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
|
+import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
|
|
|
+import org.apache.hadoop.io.IOUtils;
|
|
|
+import org.apache.hadoop.io.nativeio.NativeIO;
|
|
|
+import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheTracker;
|
|
|
+import org.junit.BeforeClass;
|
|
|
+import org.junit.Test;
|
|
|
+
|
|
|
+public class TestCachingStrategy {
|
|
|
+ private static final Log LOG = LogFactory.getLog(TestCachingStrategy.class);
|
|
|
+ private static int MAX_TEST_FILE_LEN = 1024 * 1024;
|
|
|
+ private static int WRITE_PACKET_SIZE = DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_DEFAULT;
|
|
|
+
|
|
|
+ private final static TestRecordingCacheTracker tracker =
|
|
|
+ new TestRecordingCacheTracker();
|
|
|
+
|
|
|
+ @BeforeClass
|
|
|
+ public static void setupTest() {
|
|
|
+ EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
|
|
|
+
|
|
|
+ // Track calls to posix_fadvise.
|
|
|
+ NativeIO.POSIX.cacheTracker = tracker;
|
|
|
+
|
|
|
+ // Normally, we wait for a few megabytes of data to be read or written
|
|
|
+ // before dropping the cache. This is to avoid an excessive number of
|
|
|
+ // JNI calls to the posix_fadvise function. However, for the purpose
|
|
|
+ // of this test, we want to use small files and see all fadvise calls
|
|
|
+ // happen.
|
|
|
+ BlockSender.CACHE_DROP_INTERVAL_BYTES = 4096;
|
|
|
+ BlockReceiver.CACHE_DROP_LAG_BYTES = 4096;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static class Stats {
|
|
|
+ private final String fileName;
|
|
|
+ private final boolean dropped[] = new boolean[MAX_TEST_FILE_LEN];
|
|
|
+
|
|
|
+ Stats(String fileName) {
|
|
|
+ this.fileName = fileName;
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized void fadvise(int offset, int len, int flags) {
|
|
|
+ LOG.debug("got fadvise(offset=" + offset + ", len=" + len +
|
|
|
+ ",flags=" + flags + ")");
|
|
|
+ if (flags == NativeIO.POSIX.POSIX_FADV_DONTNEED) {
|
|
|
+ for (int i = 0; i < (int)len; i++) {
|
|
|
+ dropped[(int)(offset + i)] = true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized void assertNotDroppedInRange(int start, int end) {
|
|
|
+ for (int i = start; i < end; i++) {
|
|
|
+ if (dropped[i]) {
|
|
|
+ throw new RuntimeException("in file " + fileName + ", we " +
|
|
|
+ "dropped the cache at offset " + i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized void assertDroppedInRange(int start, int end) {
|
|
|
+ for (int i = start; i < end; i++) {
|
|
|
+ if (!dropped[i]) {
|
|
|
+ throw new RuntimeException("in file " + fileName + ", we " +
|
|
|
+ "did not drop the cache at offset " + i);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized void clear() {
|
|
|
+ Arrays.fill(dropped, false);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static class TestRecordingCacheTracker implements CacheTracker {
|
|
|
+ private final Map<String, Stats> map = new TreeMap<String, Stats>();
|
|
|
+
|
|
|
+ @Override
|
|
|
+ synchronized public void fadvise(String name,
|
|
|
+ long offset, long len, int flags) {
|
|
|
+ if ((len < 0) || (len > Integer.MAX_VALUE)) {
|
|
|
+ throw new RuntimeException("invalid length of " + len +
|
|
|
+ " passed to posixFadviseIfPossible");
|
|
|
+ }
|
|
|
+ if ((offset < 0) || (offset > Integer.MAX_VALUE)) {
|
|
|
+ throw new RuntimeException("invalid offset of " + offset +
|
|
|
+ " passed to posixFadviseIfPossible");
|
|
|
+ }
|
|
|
+ Stats stats = map.get(name);
|
|
|
+ if (stats == null) {
|
|
|
+ stats = new Stats(name);
|
|
|
+ map.put(name, stats);
|
|
|
+ }
|
|
|
+ stats.fadvise((int)offset, (int)len, flags);
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized void clear() {
|
|
|
+ map.clear();
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized Stats getStats(String fileName) {
|
|
|
+ return map.get(fileName);
|
|
|
+ }
|
|
|
+
|
|
|
+ synchronized public String toString() {
|
|
|
+ StringBuilder bld = new StringBuilder();
|
|
|
+ bld.append("TestRecordingCacheManipulator{");
|
|
|
+ String prefix = "";
|
|
|
+ for (String fileName : map.keySet()) {
|
|
|
+ bld.append(prefix);
|
|
|
+ prefix = ", ";
|
|
|
+ bld.append(fileName);
|
|
|
+ }
|
|
|
+ bld.append("}");
|
|
|
+ return bld.toString();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static void createHdfsFile(FileSystem fs, Path p, long length,
|
|
|
+ Boolean dropBehind) throws Exception {
|
|
|
+ FSDataOutputStream fos = null;
|
|
|
+ try {
|
|
|
+ // create file with replication factor of 1
|
|
|
+ fos = fs.create(p, (short)1);
|
|
|
+ if (dropBehind != null) {
|
|
|
+ fos.setDropBehind(dropBehind);
|
|
|
+ }
|
|
|
+ byte buf[] = new byte[8196];
|
|
|
+ while (length > 0) {
|
|
|
+ int amt = (length > buf.length) ? (int)buf.length : (int)length;
|
|
|
+ fos.write(buf, 0, amt);
|
|
|
+ length -= amt;
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ LOG.error("ioexception", e);
|
|
|
+ } finally {
|
|
|
+ if (fos != null) {
|
|
|
+ fos.close();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static long readHdfsFile(FileSystem fs, Path p, long length,
|
|
|
+ Boolean dropBehind) throws Exception {
|
|
|
+ FSDataInputStream fis = null;
|
|
|
+ long totalRead = 0;
|
|
|
+ try {
|
|
|
+ fis = fs.open(p);
|
|
|
+ if (dropBehind != null) {
|
|
|
+ fis.setDropBehind(dropBehind);
|
|
|
+ }
|
|
|
+ byte buf[] = new byte[8196];
|
|
|
+ while (length > 0) {
|
|
|
+ int amt = (length > buf.length) ? (int)buf.length : (int)length;
|
|
|
+ int ret = fis.read(buf, 0, amt);
|
|
|
+ if (ret == -1) {
|
|
|
+ return totalRead;
|
|
|
+ }
|
|
|
+ totalRead += ret;
|
|
|
+ length -= ret;
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ LOG.error("ioexception", e);
|
|
|
+ } finally {
|
|
|
+ if (fis != null) {
|
|
|
+ fis.close();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ throw new RuntimeException("unreachable");
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout=120000)
|
|
|
+ public void testFadviseAfterWriteThenRead() throws Exception {
|
|
|
+ // start a cluster
|
|
|
+ LOG.info("testFadviseAfterWriteThenRead");
|
|
|
+ tracker.clear();
|
|
|
+ Configuration conf = new HdfsConfiguration();
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ String TEST_PATH = "/test";
|
|
|
+ int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
|
|
+ .build();
|
|
|
+ cluster.waitActive();
|
|
|
+ FileSystem fs = cluster.getFileSystem();
|
|
|
+
|
|
|
+ // create new file
|
|
|
+ createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, true);
|
|
|
+ // verify that we dropped everything from the cache during file creation.
|
|
|
+ ExtendedBlock block = cluster.getNameNode().getRpcServer().getBlockLocations(
|
|
|
+ TEST_PATH, 0, Long.MAX_VALUE).get(0).getBlock();
|
|
|
+ String fadvisedFileName = MiniDFSCluster.getBlockFile(0, block).getName();
|
|
|
+ Stats stats = tracker.getStats(fadvisedFileName);
|
|
|
+ stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
|
|
|
+ stats.clear();
|
|
|
+
|
|
|
+ // read file
|
|
|
+ readHdfsFile(fs, new Path(TEST_PATH), Long.MAX_VALUE, true);
|
|
|
+ // verify that we dropped everything from the cache.
|
|
|
+ Assert.assertNotNull(stats);
|
|
|
+ stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
|
|
|
+ } finally {
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /***
|
|
|
+ * Test the scenario where the DataNode defaults to not dropping the cache,
|
|
|
+ * but our client defaults are set.
|
|
|
+ */
|
|
|
+ @Test(timeout=120000)
|
|
|
+ public void testClientDefaults() throws Exception {
|
|
|
+ // start a cluster
|
|
|
+ LOG.info("testClientDefaults");
|
|
|
+ tracker.clear();
|
|
|
+ Configuration conf = new HdfsConfiguration();
|
|
|
+ conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY, false);
|
|
|
+ conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY, false);
|
|
|
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS, true);
|
|
|
+ conf.setBoolean(DFSConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_WRITES, true);
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ String TEST_PATH = "/test";
|
|
|
+ int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
|
|
+ .build();
|
|
|
+ cluster.waitActive();
|
|
|
+ FileSystem fs = cluster.getFileSystem();
|
|
|
+
|
|
|
+ // create new file
|
|
|
+ createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, null);
|
|
|
+ // verify that we dropped everything from the cache during file creation.
|
|
|
+ ExtendedBlock block = cluster.getNameNode().getRpcServer().getBlockLocations(
|
|
|
+ TEST_PATH, 0, Long.MAX_VALUE).get(0).getBlock();
|
|
|
+ String fadvisedFileName = MiniDFSCluster.getBlockFile(0, block).getName();
|
|
|
+ Stats stats = tracker.getStats(fadvisedFileName);
|
|
|
+ stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
|
|
|
+ stats.clear();
|
|
|
+
|
|
|
+ // read file
|
|
|
+ readHdfsFile(fs, new Path(TEST_PATH), Long.MAX_VALUE, null);
|
|
|
+ // verify that we dropped everything from the cache.
|
|
|
+ Assert.assertNotNull(stats);
|
|
|
+ stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
|
|
|
+ } finally {
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout=120000)
|
|
|
+ public void testFadviseSkippedForSmallReads() throws Exception {
|
|
|
+ // start a cluster
|
|
|
+ LOG.info("testFadviseSkippedForSmallReads");
|
|
|
+ tracker.clear();
|
|
|
+ Configuration conf = new HdfsConfiguration();
|
|
|
+ conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_READS_KEY, true);
|
|
|
+ conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DROP_CACHE_BEHIND_WRITES_KEY, true);
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ String TEST_PATH = "/test";
|
|
|
+ int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
|
|
|
+ FSDataInputStream fis = null;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
|
|
+ .build();
|
|
|
+ cluster.waitActive();
|
|
|
+ FileSystem fs = cluster.getFileSystem();
|
|
|
+
|
|
|
+ // create new file
|
|
|
+ createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, null);
|
|
|
+ // Since the DataNode was configured with drop-behind, and we didn't
|
|
|
+ // specify any policy, we should have done drop-behind.
|
|
|
+ ExtendedBlock block = cluster.getNameNode().getRpcServer().getBlockLocations(
|
|
|
+ TEST_PATH, 0, Long.MAX_VALUE).get(0).getBlock();
|
|
|
+ String fadvisedFileName = MiniDFSCluster.getBlockFile(0, block).getName();
|
|
|
+ Stats stats = tracker.getStats(fadvisedFileName);
|
|
|
+ stats.assertDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
|
|
|
+ stats.clear();
|
|
|
+ stats.assertNotDroppedInRange(0, TEST_PATH_LEN);
|
|
|
+
|
|
|
+ // read file
|
|
|
+ fis = fs.open(new Path(TEST_PATH));
|
|
|
+ byte buf[] = new byte[17];
|
|
|
+ fis.readFully(4096, buf, 0, buf.length);
|
|
|
+
|
|
|
+ // we should not have dropped anything because of the small read.
|
|
|
+ stats = tracker.getStats(fadvisedFileName);
|
|
|
+ stats.assertNotDroppedInRange(0, TEST_PATH_LEN - WRITE_PACKET_SIZE);
|
|
|
+ } finally {
|
|
|
+ IOUtils.cleanup(null, fis);
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout=120000)
|
|
|
+ public void testNoFadviseAfterWriteThenRead() throws Exception {
|
|
|
+ // start a cluster
|
|
|
+ LOG.info("testNoFadviseAfterWriteThenRead");
|
|
|
+ tracker.clear();
|
|
|
+ Configuration conf = new HdfsConfiguration();
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ String TEST_PATH = "/test";
|
|
|
+ int TEST_PATH_LEN = MAX_TEST_FILE_LEN;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
|
|
+ .build();
|
|
|
+ cluster.waitActive();
|
|
|
+ FileSystem fs = cluster.getFileSystem();
|
|
|
+
|
|
|
+ // create new file
|
|
|
+ createHdfsFile(fs, new Path(TEST_PATH), TEST_PATH_LEN, false);
|
|
|
+ // verify that we did not drop everything from the cache during file creation.
|
|
|
+ ExtendedBlock block = cluster.getNameNode().getRpcServer().getBlockLocations(
|
|
|
+ TEST_PATH, 0, Long.MAX_VALUE).get(0).getBlock();
|
|
|
+ String fadvisedFileName = MiniDFSCluster.getBlockFile(0, block).getName();
|
|
|
+ Stats stats = tracker.getStats(fadvisedFileName);
|
|
|
+ Assert.assertNull(stats);
|
|
|
+
|
|
|
+ // read file
|
|
|
+ readHdfsFile(fs, new Path(TEST_PATH), Long.MAX_VALUE, false);
|
|
|
+ // verify that we dropped everything from the cache.
|
|
|
+ Assert.assertNull(stats);
|
|
|
+ } finally {
|
|
|
+ if (cluster != null) {
|
|
|
+ cluster.shutdown();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|