Browse Source

MAPREDUCE-4246. Failure in deleting user directories in secure Hadoop. Contributed by Benoy Antony.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.22@1346270 13f79535-47bb-0310-9956-ffa450edef68
Konstantin Shvachko 13 years ago
parent
commit
54eeeb4e0a

+ 3 - 0
mapreduce/CHANGES.txt

@@ -64,6 +64,9 @@ Release 0.22.1 - Unreleased
 
     MAPREDUCE-4314. Synchronization in JvmManager. (Benoy Antony via shv)
 
+    MAPREDUCE-4246. Failure in deleting user directories in secure Hadoop.
+    (Benoy Antony via shv)
+
 Release 0.22.0 - 2011-11-29
 
   INCOMPATIBLE CHANGES

+ 2 - 2
mapreduce/src/java/org/apache/hadoop/mapred/CleanupQueue.java

@@ -29,7 +29,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.UserGroupInformation;
 
-class CleanupQueue {
+public class CleanupQueue {
 
   public static final Log LOG =
     LogFactory.getLog(CleanupQueue.class);
@@ -53,7 +53,7 @@ class CleanupQueue {
   /**
    * Contains info related to the path of the file/dir to be deleted
    */
-  static class PathDeletionContext {
+  public static class PathDeletionContext {
     final Path fullPath;// full path of file or dir
     final Configuration conf;
 

+ 6 - 6
mapreduce/src/java/org/apache/hadoop/mapred/LinuxTaskController.java

@@ -24,6 +24,8 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalDirAllocator;
@@ -31,10 +33,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.util.StringUtils;
-import static org.apache.hadoop.mapred.TaskController.Signal;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 
 /**
  * A {@link TaskController} that runs the task JVMs as the user 
@@ -262,8 +260,10 @@ class LinuxTaskController extends TaskController {
                    user,
                    Integer.toString(Commands.DELETE_AS_USER.getValue()),
                    subDir));
-    for (String baseDir : baseDirs) {
-      command.add(baseDir);
+    if (baseDirs != null){
+      for (String baseDir : baseDirs) {
+        command.add(baseDir);
+      }
     }
     String[] commandArray = command.toArray(new String[0]);
     ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);

+ 2 - 2
mapreduce/src/java/org/apache/hadoop/mapred/TaskController.java

@@ -173,13 +173,13 @@ public abstract class TaskController implements Configurable {
   public abstract void deleteLogAsUser(String user, 
                                        String subDir) throws IOException;
   
-  static class DeletionContext extends CleanupQueue.PathDeletionContext {
+  public static class DeletionContext extends CleanupQueue.PathDeletionContext {
     private TaskController controller;
     private boolean isLog;
     private String user;
     private String subDir;
     private String[] baseDirs;
-    DeletionContext(TaskController controller, boolean isLog, String user, 
+    public DeletionContext(TaskController controller, boolean isLog, String user, 
                     String subDir, String[] baseDirs) {
       super(null, null);
       this.controller = controller;

+ 12 - 13
mapreduce/src/java/org/apache/hadoop/mapred/TaskTracker.java

@@ -17,10 +17,12 @@
  */
  package org.apache.hadoop.mapred;
 
+import static org.apache.hadoop.mapred.QueueManager.toFullPropertyName;
+
 import java.io.DataOutputStream;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.RandomAccessFile;
@@ -57,8 +59,8 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.DF;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.DF;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -67,14 +69,13 @@ import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.http.HttpServer;
-import org.apache.hadoop.io.SecureIOUtils;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SecureIOUtils;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.mapred.CleanupQueue.PathDeletionContext;
-import org.apache.hadoop.mapred.TaskController.DeletionContext;
 import org.apache.hadoop.mapred.TaskTrackerStatus.TaskTrackerHealthStatus;
 import org.apache.hadoop.mapred.pipes.Submitter;
 import org.apache.hadoop.mapreduce.MRConfig;
@@ -89,16 +90,16 @@ import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
 import org.apache.hadoop.mapreduce.server.tasktracker.Localizer;
 import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
 import org.apache.hadoop.mapreduce.task.reduce.ShuffleHeader;
-import org.apache.hadoop.metrics.MetricsContext;
-import org.apache.hadoop.metrics.MetricsException;
-import org.apache.hadoop.metrics.MetricsRecord;
-import org.apache.hadoop.metrics.MetricsUtil;
-import org.apache.hadoop.metrics.Updater;
 import org.apache.hadoop.mapreduce.util.ConfigUtil;
 import org.apache.hadoop.mapreduce.util.MRAsyncDiskService;
 import org.apache.hadoop.mapreduce.util.MemoryCalculatorPlugin;
 import org.apache.hadoop.mapreduce.util.ProcfsBasedProcessTree;
 import org.apache.hadoop.mapreduce.util.ResourceCalculatorPlugin;
+import org.apache.hadoop.metrics.MetricsContext;
+import org.apache.hadoop.metrics.MetricsException;
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.metrics.MetricsUtil;
+import org.apache.hadoop.metrics.Updater;
 import org.apache.hadoop.net.DNS;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.Credentials;
@@ -107,13 +108,11 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.PolicyProvider;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
-import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.DiskChecker;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.hadoop.util.RunJar;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.VersionInfo;
-import static org.apache.hadoop.mapred.QueueManager.toFullPropertyName;
 
 /*******************************************************
  * TaskTracker is a process that starts and tracks MR Tasks

+ 49 - 5
mapreduce/src/java/org/apache/hadoop/mapreduce/util/MRAsyncDiskService.java

@@ -26,15 +26,18 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.CleanupQueue;
+import org.apache.hadoop.mapred.CleanupQueue.PathDeletionContext;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.TaskController;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.AsyncDiskService;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
 
 /**
  * This class is a container of multiple thread pools, each for a volume,
@@ -56,10 +59,14 @@ public class MRAsyncDiskService {
   
   public static final Log LOG = LogFactory.getLog(MRAsyncDiskService.class);
   
+  boolean shouldBeSecure = UserGroupInformation.isSecurityEnabled();
+  
   AsyncDiskService asyncDiskService;
   
   TaskController taskController;
   
+  private CleanupQueue cleanupQueue;
+  
   public static final String TOBEDELETED = "toBeDeleted";
   
   /**
@@ -93,6 +100,8 @@ public class MRAsyncDiskService {
     
     this.taskController = taskController;
     
+    cleanupQueue = CleanupQueue.getInstance();
+    
     // Create one ThreadPool per volume
     for (int v = 0 ; v < volumes.length; v++) {
       // Create the root for file deletion
@@ -117,9 +126,13 @@ public class MRAsyncDiskService {
           throw new IOException("Cannot delete " + absoluteFilename
               + " because it's outside of " + volumes[v]);
         }
+        if (shouldBeSecure) {
+          deletePathsInSecureCluster(absoluteFilename, files[f]);     
+        }else {
         DeleteTask task = new DeleteTask(volumes[v], absoluteFilename,
             relative, files[f].getOwner());
         execute(volumes[v], task);
+        }
       }
     }
   }
@@ -300,12 +313,43 @@ public class MRAsyncDiskService {
       return false;
     }
     FileStatus status = localFileSystem.getFileStatus(target);
-    DeleteTask task = new DeleteTask(volume, pathName, newPathName, 
-                                     status.getOwner());
-    execute(volume, task);
+    
+    if (shouldBeSecure) {
+      deletePathsInSecureCluster(newPathName, status);     
+    }else {
+      DeleteTask task = new DeleteTask(volume, pathName, newPathName, 
+          status.getOwner());
+      execute(volume, task);
+    }
     return true;
   }
 
+  private void deletePathsInSecureCluster(String newPathName,
+      FileStatus status) throws FileNotFoundException, IOException {
+    // In a secure tasktracker, the subdirectories belong
+    // to different user
+    PathDeletionContext item = null;
+    
+    //iterate and queue subdirectories for cleanup
+    for(FileStatus subDirStatus: localFileSystem.listStatus(status.getPath())) {
+      String owner = subDirStatus.getOwner();
+      String path = subDirStatus.getPath().getName();
+      if (path.equals(owner)) {
+        //add it to the cleanup queue
+
+        item = new TaskController.DeletionContext(
+            taskController, false, owner, newPathName + Path.SEPARATOR_CHAR + path,
+            null);
+        cleanupQueue.addToQueue(item);
+      }
+    } 
+    //queue the parent directory  for cleanup
+    item = new TaskController.DeletionContext(
+        taskController, false, status.getOwner(), newPathName,
+        null);
+    cleanupQueue.addToQueue(item);
+  }
+
   /**
    * Move the path name on each volume to a temporary location and then 
    * delete them.