|
@@ -23,6 +23,7 @@ import static org.junit.Assert.*;
|
|
|
import java.io.BufferedReader;
|
|
|
import java.io.ByteArrayOutputStream;
|
|
|
import java.io.File;
|
|
|
+import java.io.FileNotFoundException;
|
|
|
import java.io.FileReader;
|
|
|
import java.io.IOException;
|
|
|
import java.io.PrintStream;
|
|
@@ -37,6 +38,8 @@ import java.security.PrivilegedExceptionAction;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.Map;
|
|
|
import java.util.Random;
|
|
|
+import java.util.Set;
|
|
|
+import java.util.regex.Matcher;
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
import org.apache.commons.logging.impl.Log4JLogger;
|
|
@@ -44,14 +47,17 @@ import org.apache.hadoop.conf.Configuration;
|
|
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
import org.apache.hadoop.fs.Path;
|
|
|
+import org.apache.hadoop.fs.UnresolvedLinkException;
|
|
|
import org.apache.hadoop.fs.permission.FsPermission;
|
|
|
import org.apache.hadoop.hdfs.DFSClient;
|
|
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
|
+import org.apache.hadoop.hdfs.DFSInputStream;
|
|
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
|
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
|
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
|
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
|
|
|
+import org.apache.hadoop.hdfs.protocol.DirectoryListing;
|
|
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
|
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
|
|
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
@@ -60,6 +66,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
|
|
import org.apache.hadoop.hdfs.tools.DFSck;
|
|
|
import org.apache.hadoop.io.IOUtils;
|
|
|
import org.apache.hadoop.net.NetworkTopology;
|
|
|
+import org.apache.hadoop.security.AccessControlException;
|
|
|
import org.apache.hadoop.security.UserGroupInformation;
|
|
|
import org.apache.hadoop.util.ToolRunner;
|
|
|
import org.apache.log4j.Level;
|
|
@@ -68,6 +75,8 @@ import org.apache.log4j.PatternLayout;
|
|
|
import org.apache.log4j.RollingFileAppender;
|
|
|
import org.junit.Test;
|
|
|
|
|
|
+import com.google.common.collect.Sets;
|
|
|
+
|
|
|
/**
|
|
|
* A JUnit test for doing fsck
|
|
|
*/
|
|
@@ -84,6 +93,9 @@ public class TestFsck {
|
|
|
"cmd=fsck\\ssrc=\\/\\sdst=null\\s" +
|
|
|
"perm=null");
|
|
|
|
|
|
+ static final Pattern numCorruptBlocksPattern = Pattern.compile(
|
|
|
+ ".*Corrupt blocks:\t\t([0123456789]*).*");
|
|
|
+
|
|
|
static String runFsck(Configuration conf, int expectedErrCode,
|
|
|
boolean checkErrorCode,String... path)
|
|
|
throws Exception {
|
|
@@ -95,6 +107,7 @@ public class TestFsck {
|
|
|
assertEquals(expectedErrCode, errCode);
|
|
|
}
|
|
|
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO);
|
|
|
+ FSImage.LOG.error("OUTPUT = " + bStream.toString());
|
|
|
return bStream.toString();
|
|
|
}
|
|
|
|
|
@@ -246,6 +259,192 @@ public class TestFsck {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ @Test
|
|
|
+ public void testFsckMove() throws Exception {
|
|
|
+ Configuration conf = new HdfsConfiguration();
|
|
|
+ final int DFS_BLOCK_SIZE = 1024;
|
|
|
+ final int NUM_DATANODES = 4;
|
|
|
+ conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE);
|
|
|
+ conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
|
|
|
+ conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
|
|
|
+ DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3,
|
|
|
+ (5 * DFS_BLOCK_SIZE) + (DFS_BLOCK_SIZE - 1), 5 * DFS_BLOCK_SIZE);
|
|
|
+ MiniDFSCluster cluster = null;
|
|
|
+ FileSystem fs = null;
|
|
|
+ try {
|
|
|
+ cluster = new MiniDFSCluster.Builder(conf).
|
|
|
+ numDataNodes(NUM_DATANODES).build();
|
|
|
+ String topDir = "/srcdat";
|
|
|
+ fs = cluster.getFileSystem();
|
|
|
+ cluster.waitActive();
|
|
|
+ util.createFiles(fs, topDir);
|
|
|
+ util.waitReplication(fs, topDir, (short)3);
|
|
|
+ String outStr = runFsck(conf, 0, true, "/");
|
|
|
+ assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
|
|
+ DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost",
|
|
|
+ cluster.getNameNodePort()), conf);
|
|
|
+ String fileNames[] = util.getFileNames(topDir);
|
|
|
+ CorruptedTestFile ctFiles[] = new CorruptedTestFile[] {
|
|
|
+ new CorruptedTestFile(fileNames[0], Sets.newHashSet(0),
|
|
|
+ dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE),
|
|
|
+ new CorruptedTestFile(fileNames[1], Sets.newHashSet(2, 3),
|
|
|
+ dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE),
|
|
|
+ new CorruptedTestFile(fileNames[2], Sets.newHashSet(4),
|
|
|
+ dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE),
|
|
|
+ new CorruptedTestFile(fileNames[3], Sets.newHashSet(0, 1, 2, 3),
|
|
|
+ dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE),
|
|
|
+ new CorruptedTestFile(fileNames[4], Sets.newHashSet(1, 2, 3, 4),
|
|
|
+ dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE)
|
|
|
+ };
|
|
|
+ int totalMissingBlocks = 0;
|
|
|
+ for (CorruptedTestFile ctFile : ctFiles) {
|
|
|
+ totalMissingBlocks += ctFile.getTotalMissingBlocks();
|
|
|
+ }
|
|
|
+ for (CorruptedTestFile ctFile : ctFiles) {
|
|
|
+ ctFile.removeBlocks();
|
|
|
+ }
|
|
|
+ // Wait for fsck to discover all the missing blocks
|
|
|
+ while (true) {
|
|
|
+ outStr = runFsck(conf, 1, false, "/");
|
|
|
+ String numCorrupt = null;
|
|
|
+ for (String line : outStr.split("\n")) {
|
|
|
+ Matcher m = numCorruptBlocksPattern.matcher(line);
|
|
|
+ if (m.matches()) {
|
|
|
+ numCorrupt = m.group(1);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (numCorrupt == null) {
|
|
|
+ throw new IOException("failed to find number of corrupt " +
|
|
|
+ "blocks in fsck output.");
|
|
|
+ }
|
|
|
+ if (numCorrupt.equals(Integer.toString(totalMissingBlocks))) {
|
|
|
+ assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ Thread.sleep(100);
|
|
|
+ } catch (InterruptedException ignore) {
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Copy the non-corrupt blocks of corruptFileName to lost+found.
|
|
|
+ outStr = runFsck(conf, 1, false, "/", "-move");
|
|
|
+ assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
|
|
+
|
|
|
+ // Make sure that we properly copied the block files from the DataNodes
|
|
|
+ // to lost+found
|
|
|
+ for (CorruptedTestFile ctFile : ctFiles) {
|
|
|
+ ctFile.checkSalvagedRemains();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Fix the filesystem by removing corruptFileName
|
|
|
+ outStr = runFsck(conf, 1, true, "/", "-delete");
|
|
|
+ assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
|
|
+
|
|
|
+ // Check to make sure we have a healthy filesystem
|
|
|
+ outStr = runFsck(conf, 0, true, "/");
|
|
|
+ assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
|
|
+ util.cleanup(fs, topDir);
|
|
|
+ } finally {
|
|
|
+ if (fs != null) {try{fs.close();} catch(Exception e){}}
|
|
|
+ if (cluster != null) { cluster.shutdown(); }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static private class CorruptedTestFile {
|
|
|
+ final private String name;
|
|
|
+ final private Set<Integer> blocksToCorrupt;
|
|
|
+ final private DFSClient dfsClient;
|
|
|
+ final private int numDataNodes;
|
|
|
+ final private int blockSize;
|
|
|
+ final private byte[] initialContents;
|
|
|
+
|
|
|
+ public CorruptedTestFile(String name, Set<Integer> blocksToCorrupt,
|
|
|
+ DFSClient dfsClient, int numDataNodes, int blockSize)
|
|
|
+ throws IOException {
|
|
|
+ this.name = name;
|
|
|
+ this.blocksToCorrupt = blocksToCorrupt;
|
|
|
+ this.dfsClient = dfsClient;
|
|
|
+ this.numDataNodes = numDataNodes;
|
|
|
+ this.blockSize = blockSize;
|
|
|
+ this.initialContents = cacheInitialContents();
|
|
|
+ }
|
|
|
+
|
|
|
+ public int getTotalMissingBlocks() {
|
|
|
+ return blocksToCorrupt.size();
|
|
|
+ }
|
|
|
+
|
|
|
+ private byte[] cacheInitialContents() throws IOException {
|
|
|
+ HdfsFileStatus status = dfsClient.getFileInfo(name);
|
|
|
+ byte[] content = new byte[(int)status.getLen()];
|
|
|
+ DFSInputStream in = null;
|
|
|
+ try {
|
|
|
+ in = dfsClient.open(name);
|
|
|
+ IOUtils.readFully(in, content, 0, content.length);
|
|
|
+ } finally {
|
|
|
+ in.close();
|
|
|
+ }
|
|
|
+ return content;
|
|
|
+ }
|
|
|
+
|
|
|
+ public void removeBlocks() throws AccessControlException,
|
|
|
+ FileNotFoundException, UnresolvedLinkException, IOException {
|
|
|
+ for (int corruptIdx : blocksToCorrupt) {
|
|
|
+ // Corrupt a block by deleting it
|
|
|
+ ExtendedBlock block = dfsClient.getNamenode().getBlockLocations(
|
|
|
+ name, blockSize * corruptIdx, Long.MAX_VALUE).get(0).getBlock();
|
|
|
+ for (int i = 0; i < numDataNodes; i++) {
|
|
|
+ File blockFile = MiniDFSCluster.getBlockFile(i, block);
|
|
|
+ if(blockFile != null && blockFile.exists()) {
|
|
|
+ assertTrue(blockFile.delete());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void checkSalvagedRemains() throws IOException {
|
|
|
+ int chainIdx = 0;
|
|
|
+ HdfsFileStatus status = dfsClient.getFileInfo(name);
|
|
|
+ long length = status.getLen();
|
|
|
+ int numBlocks = (int)((length + blockSize - 1) / blockSize);
|
|
|
+ DFSInputStream in = null;
|
|
|
+ byte[] blockBuffer = new byte[blockSize];
|
|
|
+
|
|
|
+ try {
|
|
|
+ for (int blockIdx = 0; blockIdx < numBlocks; blockIdx++) {
|
|
|
+ if (blocksToCorrupt.contains(blockIdx)) {
|
|
|
+ if (in != null) {
|
|
|
+ in.close();
|
|
|
+ in = null;
|
|
|
+ }
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (in == null) {
|
|
|
+ in = dfsClient.open("/lost+found" + name + "/" + chainIdx);
|
|
|
+ chainIdx++;
|
|
|
+ }
|
|
|
+ int len = blockBuffer.length;
|
|
|
+ if (blockIdx == (numBlocks - 1)) {
|
|
|
+ // The last block might not be full-length
|
|
|
+ len = (int)(in.getFileLength() % blockSize);
|
|
|
+ if (len == 0) len = blockBuffer.length;
|
|
|
+ }
|
|
|
+ IOUtils.readFully(in, blockBuffer, 0, (int)len);
|
|
|
+ int startIdx = blockIdx * blockSize;
|
|
|
+ for (int i = 0; i < len; i++) {
|
|
|
+ if (initialContents[startIdx + i] != blockBuffer[i]) {
|
|
|
+ throw new IOException("salvaged file " + name + " differed " +
|
|
|
+ "from what we expected on block " + blockIdx);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ IOUtils.cleanup(null, in);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
@Test
|
|
|
public void testFsckMoveAndDelete() throws Exception {
|
|
|
final int MAX_MOVE_TRIES = 5;
|