|
@@ -56,8 +56,17 @@ import org.apache.hadoop.security.UserGroupInformation;
|
|
* root path. The following abnormal conditions are detected and handled:</p>
|
|
* root path. The following abnormal conditions are detected and handled:</p>
|
|
* <ul>
|
|
* <ul>
|
|
* <li>files with blocks that are completely missing from all datanodes.<br/>
|
|
* <li>files with blocks that are completely missing from all datanodes.<br/>
|
|
- * <li>files with under-replicated or over-replicated blocks</li>
|
|
|
|
- * </ul>
|
|
|
|
|
|
+ * In this case the tool can perform one of the following actions:
|
|
|
|
+ * <ul>
|
|
|
|
+ * <li>none ({@link #FIXING_NONE})</li>
|
|
|
|
+ * <li>move corrupted files to /lost+found directory on DFS
|
|
|
|
+ * ({@link #FIXING_MOVE}). Remaining data blocks are saved as a
|
|
|
|
+ * block chains, representing longest consecutive series of valid blocks.</li>
|
|
|
|
+ * <li>delete corrupted files ({@link #FIXING_DELETE})</li>
|
|
|
|
+ * </ul>
|
|
|
|
+ * </li>
|
|
|
|
+ * <li>detect files with under-replicated or over-replicated blocks</li>
|
|
|
|
+ * </ul>
|
|
* Additionally, the tool collects a detailed overall DFS statistics, and
|
|
* Additionally, the tool collects a detailed overall DFS statistics, and
|
|
* optionally can print detailed statistics on block locations and replication
|
|
* optionally can print detailed statistics on block locations and replication
|
|
* factors of each file.
|
|
* factors of each file.
|
|
@@ -71,6 +80,13 @@ public class NamenodeFsck {
|
|
public static final String NONEXISTENT_STATUS = "does not exist";
|
|
public static final String NONEXISTENT_STATUS = "does not exist";
|
|
public static final String FAILURE_STATUS = "FAILED";
|
|
public static final String FAILURE_STATUS = "FAILED";
|
|
|
|
|
|
|
|
+ /** Don't attempt any fixing . */
|
|
|
|
+ public static final int FIXING_NONE = 0;
|
|
|
|
+ /** Move corrupted files to /lost+found . */
|
|
|
|
+ public static final int FIXING_MOVE = 1;
|
|
|
|
+ /** Delete corrupted files. */
|
|
|
|
+ public static final int FIXING_DELETE = 2;
|
|
|
|
+
|
|
private final NameNode namenode;
|
|
private final NameNode namenode;
|
|
private final NetworkTopology networktopology;
|
|
private final NetworkTopology networktopology;
|
|
private final int totalDatanodes;
|
|
private final int totalDatanodes;
|
|
@@ -85,21 +101,7 @@ public class NamenodeFsck {
|
|
private boolean showBlocks = false;
|
|
private boolean showBlocks = false;
|
|
private boolean showLocations = false;
|
|
private boolean showLocations = false;
|
|
private boolean showRacks = false;
|
|
private boolean showRacks = false;
|
|
-
|
|
|
|
- /**
|
|
|
|
- * True if the user specified the -move option.
|
|
|
|
- *
|
|
|
|
- * Whe this option is in effect, we will copy salvaged blocks into the lost
|
|
|
|
- * and found. */
|
|
|
|
- private boolean doMove = false;
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * True if the user specified the -delete option.
|
|
|
|
- *
|
|
|
|
- * Whe this option is in effect, we will delete corrupted files.
|
|
|
|
- */
|
|
|
|
- private boolean doDelete = false;
|
|
|
|
-
|
|
|
|
|
|
+ private int fixing = FIXING_NONE;
|
|
private String path = "/";
|
|
private String path = "/";
|
|
|
|
|
|
private final Configuration conf;
|
|
private final Configuration conf;
|
|
@@ -131,8 +133,8 @@ public class NamenodeFsck {
|
|
for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) {
|
|
for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) {
|
|
String key = it.next();
|
|
String key = it.next();
|
|
if (key.equals("path")) { this.path = pmap.get("path")[0]; }
|
|
if (key.equals("path")) { this.path = pmap.get("path")[0]; }
|
|
- else if (key.equals("move")) { this.doMove = true; }
|
|
|
|
- else if (key.equals("delete")) { this.doDelete = true; }
|
|
|
|
|
|
+ else if (key.equals("move")) { this.fixing = FIXING_MOVE; }
|
|
|
|
+ else if (key.equals("delete")) { this.fixing = FIXING_DELETE; }
|
|
else if (key.equals("files")) { this.showFiles = true; }
|
|
else if (key.equals("files")) { this.showFiles = true; }
|
|
else if (key.equals("blocks")) { this.showBlocks = true; }
|
|
else if (key.equals("blocks")) { this.showBlocks = true; }
|
|
else if (key.equals("locations")) { this.showLocations = true; }
|
|
else if (key.equals("locations")) { this.showLocations = true; }
|
|
@@ -216,11 +218,8 @@ public class NamenodeFsck {
|
|
long fileLen = file.getLen();
|
|
long fileLen = file.getLen();
|
|
// Get block locations without updating the file access time
|
|
// Get block locations without updating the file access time
|
|
// and without block access tokens
|
|
// and without block access tokens
|
|
- LocatedBlocks blocks = null;
|
|
|
|
- if (fileLen >=0) {
|
|
|
|
- blocks = namenode.getNamesystem().getBlockLocations(path, 0,
|
|
|
|
|
|
+ LocatedBlocks blocks = namenode.getNamesystem().getBlockLocations(path, 0,
|
|
fileLen, false, false);
|
|
fileLen, false, false);
|
|
- }
|
|
|
|
if (blocks == null) { // the file is deleted
|
|
if (blocks == null) { // the file is deleted
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
@@ -329,20 +328,16 @@ public class NamenodeFsck {
|
|
+ " blocks of total size " + missize + " B.");
|
|
+ " blocks of total size " + missize + " B.");
|
|
}
|
|
}
|
|
res.corruptFiles++;
|
|
res.corruptFiles++;
|
|
- try {
|
|
|
|
- if (doMove) {
|
|
|
|
- if (!isOpen) {
|
|
|
|
- copyBlocksToLostFound(parent, file, blocks);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if (doDelete) {
|
|
|
|
- if (!isOpen) {
|
|
|
|
- LOG.warn("\n - deleting corrupted file " + path);
|
|
|
|
- namenode.delete(path, true);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- } catch (IOException e) {
|
|
|
|
- LOG.error("error processing " + path + ": " + e.toString());
|
|
|
|
|
|
+ switch(fixing) {
|
|
|
|
+ case FIXING_NONE:
|
|
|
|
+ break;
|
|
|
|
+ case FIXING_MOVE:
|
|
|
|
+ if (!isOpen)
|
|
|
|
+ lostFoundMove(parent, file, blocks);
|
|
|
|
+ break;
|
|
|
|
+ case FIXING_DELETE:
|
|
|
|
+ if (!isOpen)
|
|
|
|
+ namenode.delete(path, true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (showFiles) {
|
|
if (showFiles) {
|
|
@@ -357,8 +352,8 @@ public class NamenodeFsck {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private void copyBlocksToLostFound(String parent, HdfsFileStatus file,
|
|
|
|
- LocatedBlocks blocks) throws IOException {
|
|
|
|
|
|
+ private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blocks)
|
|
|
|
+ throws IOException {
|
|
final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf);
|
|
final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf);
|
|
try {
|
|
try {
|
|
if (!lfInited) {
|
|
if (!lfInited) {
|
|
@@ -391,10 +386,12 @@ public class NamenodeFsck {
|
|
}
|
|
}
|
|
if (fos == null) {
|
|
if (fos == null) {
|
|
fos = dfs.create(target + "/" + chain, true);
|
|
fos = dfs.create(target + "/" + chain, true);
|
|
- if (fos != null)
|
|
|
|
- chain++;
|
|
|
|
|
|
+ if (fos != null) chain++;
|
|
else {
|
|
else {
|
|
- throw new IOException(errmsg + ": could not store chain " + chain);
|
|
|
|
|
|
+ LOG.warn(errmsg + ": could not store chain " + chain);
|
|
|
|
+ // perhaps we should bail out here...
|
|
|
|
+ // return;
|
|
|
|
+ continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -411,7 +408,8 @@ public class NamenodeFsck {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (fos != null) fos.close();
|
|
if (fos != null) fos.close();
|
|
- LOG.warn("\n - copied corrupted file " + fullName + " to /lost+found");
|
|
|
|
|
|
+ LOG.warn("\n - moved corrupted file " + fullName + " to /lost+found");
|
|
|
|
+ dfs.delete(fullName, true);
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
e.printStackTrace();
|
|
LOG.warn(errmsg + ": " + e.getMessage());
|
|
LOG.warn(errmsg + ": " + e.getMessage());
|