|
@@ -86,297 +86,6 @@ class JobSubmitter {
|
|
this.submitClient = submitClient;
|
|
this.submitClient = submitClient;
|
|
this.jtFs = submitFs;
|
|
this.jtFs = submitFs;
|
|
}
|
|
}
|
|
- /*
|
|
|
|
- * see if two file systems are the same or not.
|
|
|
|
- */
|
|
|
|
- private boolean compareFs(FileSystem srcFs, FileSystem destFs) {
|
|
|
|
- URI srcUri = srcFs.getUri();
|
|
|
|
- URI dstUri = destFs.getUri();
|
|
|
|
- if (srcUri.getScheme() == null) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- if (!srcUri.getScheme().equals(dstUri.getScheme())) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- String srcHost = srcUri.getHost();
|
|
|
|
- String dstHost = dstUri.getHost();
|
|
|
|
- if ((srcHost != null) && (dstHost != null)) {
|
|
|
|
- try {
|
|
|
|
- srcHost = InetAddress.getByName(srcHost).getCanonicalHostName();
|
|
|
|
- dstHost = InetAddress.getByName(dstHost).getCanonicalHostName();
|
|
|
|
- } catch(UnknownHostException ue) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- if (!srcHost.equals(dstHost)) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- } else if (srcHost == null && dstHost != null) {
|
|
|
|
- return false;
|
|
|
|
- } else if (srcHost != null && dstHost == null) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- //check for ports
|
|
|
|
- if (srcUri.getPort() != dstUri.getPort()) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- return true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // copies a file to the jobtracker filesystem and returns the path where it
|
|
|
|
- // was copied to
|
|
|
|
- private Path copyRemoteFiles(Path parentDir,
|
|
|
|
- Path originalPath, Configuration conf, short replication)
|
|
|
|
- throws IOException {
|
|
|
|
- //check if we do not need to copy the files
|
|
|
|
- // is jt using the same file system.
|
|
|
|
- // just checking for uri strings... doing no dns lookups
|
|
|
|
- // to see if the filesystems are the same. This is not optimal.
|
|
|
|
- // but avoids name resolution.
|
|
|
|
-
|
|
|
|
- FileSystem remoteFs = null;
|
|
|
|
- remoteFs = originalPath.getFileSystem(conf);
|
|
|
|
- if (compareFs(remoteFs, jtFs)) {
|
|
|
|
- return originalPath;
|
|
|
|
- }
|
|
|
|
- // this might have name collisions. copy will throw an exception
|
|
|
|
- //parse the original path to create new path
|
|
|
|
- Path newPath = new Path(parentDir, originalPath.getName());
|
|
|
|
- FileUtil.copy(remoteFs, originalPath, jtFs, newPath, false, conf);
|
|
|
|
- jtFs.setReplication(newPath, replication);
|
|
|
|
- return newPath;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // configures -files, -libjars and -archives.
|
|
|
|
- private void copyAndConfigureFiles(Job job, Path submitJobDir,
|
|
|
|
- short replication) throws IOException {
|
|
|
|
- Configuration conf = job.getConfiguration();
|
|
|
|
- if (!(conf.getBoolean(Job.USED_GENERIC_PARSER, false))) {
|
|
|
|
- LOG.warn("Hadoop command-line option parsing not performed. " +
|
|
|
|
- "Implement the Tool interface and execute your application " +
|
|
|
|
- "with ToolRunner to remedy this.");
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // get all the command line arguments passed in by the user conf
|
|
|
|
- String files = conf.get("tmpfiles");
|
|
|
|
- String libjars = conf.get("tmpjars");
|
|
|
|
- String archives = conf.get("tmparchives");
|
|
|
|
- String jobJar = job.getJar();
|
|
|
|
-
|
|
|
|
- //
|
|
|
|
- // Figure out what fs the JobTracker is using. Copy the
|
|
|
|
- // job to it, under a temporary name. This allows DFS to work,
|
|
|
|
- // and under the local fs also provides UNIX-like object loading
|
|
|
|
- // semantics. (that is, if the job file is deleted right after
|
|
|
|
- // submission, we can still run the submission to completion)
|
|
|
|
- //
|
|
|
|
-
|
|
|
|
- // Create a number of filenames in the JobTracker's fs namespace
|
|
|
|
- LOG.debug("default FileSystem: " + jtFs.getUri());
|
|
|
|
- if (jtFs.exists(submitJobDir)) {
|
|
|
|
- throw new IOException("Not submitting job. Job directory " + submitJobDir
|
|
|
|
- +" already exists!! This is unexpected.Please check what's there in" +
|
|
|
|
- " that directory");
|
|
|
|
- }
|
|
|
|
- submitJobDir = jtFs.makeQualified(submitJobDir);
|
|
|
|
- submitJobDir = new Path(submitJobDir.toUri().getPath());
|
|
|
|
- FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
|
|
|
|
- FileSystem.mkdirs(jtFs, submitJobDir, mapredSysPerms);
|
|
|
|
- Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir);
|
|
|
|
- Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir);
|
|
|
|
- Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir);
|
|
|
|
- // add all the command line files/ jars and archive
|
|
|
|
- // first copy them to jobtrackers filesystem
|
|
|
|
-
|
|
|
|
- if (files != null) {
|
|
|
|
- FileSystem.mkdirs(jtFs, filesDir, mapredSysPerms);
|
|
|
|
- String[] fileArr = files.split(",");
|
|
|
|
- for (String tmpFile: fileArr) {
|
|
|
|
- URI tmpURI = null;
|
|
|
|
- try {
|
|
|
|
- tmpURI = new URI(tmpFile);
|
|
|
|
- } catch (URISyntaxException e) {
|
|
|
|
- throw new IllegalArgumentException(e);
|
|
|
|
- }
|
|
|
|
- Path tmp = new Path(tmpURI);
|
|
|
|
- Path newPath = copyRemoteFiles(filesDir, tmp, conf, replication);
|
|
|
|
- try {
|
|
|
|
- URI pathURI = getPathURI(newPath, tmpURI.getFragment());
|
|
|
|
- DistributedCache.addCacheFile(pathURI, conf);
|
|
|
|
- } catch(URISyntaxException ue) {
|
|
|
|
- //should not throw a uri exception
|
|
|
|
- throw new IOException("Failed to create uri for " + tmpFile, ue);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (libjars != null) {
|
|
|
|
- FileSystem.mkdirs(jtFs, libjarsDir, mapredSysPerms);
|
|
|
|
- String[] libjarsArr = libjars.split(",");
|
|
|
|
- for (String tmpjars: libjarsArr) {
|
|
|
|
- Path tmp = new Path(tmpjars);
|
|
|
|
- Path newPath = copyRemoteFiles(libjarsDir, tmp, conf, replication);
|
|
|
|
- DistributedCache.addFileToClassPath(
|
|
|
|
- new Path(newPath.toUri().getPath()), conf);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (archives != null) {
|
|
|
|
- FileSystem.mkdirs(jtFs, archivesDir, mapredSysPerms);
|
|
|
|
- String[] archivesArr = archives.split(",");
|
|
|
|
- for (String tmpArchives: archivesArr) {
|
|
|
|
- URI tmpURI;
|
|
|
|
- try {
|
|
|
|
- tmpURI = new URI(tmpArchives);
|
|
|
|
- } catch (URISyntaxException e) {
|
|
|
|
- throw new IllegalArgumentException(e);
|
|
|
|
- }
|
|
|
|
- Path tmp = new Path(tmpURI);
|
|
|
|
- Path newPath = copyRemoteFiles(archivesDir, tmp, conf,
|
|
|
|
- replication);
|
|
|
|
- try {
|
|
|
|
- URI pathURI = getPathURI(newPath, tmpURI.getFragment());
|
|
|
|
- DistributedCache.addCacheArchive(pathURI, conf);
|
|
|
|
- } catch(URISyntaxException ue) {
|
|
|
|
- //should not throw an uri excpetion
|
|
|
|
- throw new IOException("Failed to create uri for " + tmpArchives, ue);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (jobJar != null) { // copy jar to JobTracker's fs
|
|
|
|
- // use jar name if job is not named.
|
|
|
|
- if ("".equals(job.getJobName())){
|
|
|
|
- job.setJobName(new Path(jobJar).getName());
|
|
|
|
- }
|
|
|
|
- Path jobJarPath = new Path(jobJar);
|
|
|
|
- URI jobJarURI = jobJarPath.toUri();
|
|
|
|
- // If the job jar is already in a global fs,
|
|
|
|
- // we don't need to copy it from local fs
|
|
|
|
- if ( jobJarURI.getScheme() == null
|
|
|
|
- || jobJarURI.getScheme().equals("file")) {
|
|
|
|
- copyJar(jobJarPath, JobSubmissionFiles.getJobJar(submitJobDir),
|
|
|
|
- replication);
|
|
|
|
- job.setJar(JobSubmissionFiles.getJobJar(submitJobDir).toString());
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- LOG.warn("No job jar file set. User classes may not be found. "+
|
|
|
|
- "See Job or Job#setJar(String).");
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- addLog4jToDistributedCache(job, submitJobDir);
|
|
|
|
-
|
|
|
|
- // set the timestamps of the archives and files
|
|
|
|
- // set the public/private visibility of the archives and files
|
|
|
|
- ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(conf);
|
|
|
|
- // get DelegationToken for cached file
|
|
|
|
- ClientDistributedCacheManager.getDelegationTokens(conf, job
|
|
|
|
- .getCredentials());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // copy user specified log4j.property file in local
|
|
|
|
- // to HDFS with putting on distributed cache and adding its parent directory
|
|
|
|
- // to classpath.
|
|
|
|
- @SuppressWarnings("deprecation")
|
|
|
|
- private void copyLog4jPropertyFile(Job job, Path submitJobDir,
|
|
|
|
- short replication) throws IOException {
|
|
|
|
- Configuration conf = job.getConfiguration();
|
|
|
|
-
|
|
|
|
- String file = validateFilePath(
|
|
|
|
- conf.get(MRJobConfig.MAPREDUCE_JOB_LOG4J_PROPERTIES_FILE), conf);
|
|
|
|
- LOG.debug("default FileSystem: " + jtFs.getUri());
|
|
|
|
- FsPermission mapredSysPerms =
|
|
|
|
- new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
|
|
|
|
- if (!jtFs.exists(submitJobDir)) {
|
|
|
|
- throw new IOException("Cannot find job submission directory! "
|
|
|
|
- + "It should just be created, so something wrong here.");
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- Path fileDir = JobSubmissionFiles.getJobLog4jFile(submitJobDir);
|
|
|
|
-
|
|
|
|
- // first copy local log4j.properties file to HDFS under submitJobDir
|
|
|
|
- if (file != null) {
|
|
|
|
- FileSystem.mkdirs(jtFs, fileDir, mapredSysPerms);
|
|
|
|
- URI tmpURI = null;
|
|
|
|
- try {
|
|
|
|
- tmpURI = new URI(file);
|
|
|
|
- } catch (URISyntaxException e) {
|
|
|
|
- throw new IllegalArgumentException(e);
|
|
|
|
- }
|
|
|
|
- Path tmp = new Path(tmpURI);
|
|
|
|
- Path newPath = copyRemoteFiles(fileDir, tmp, conf, replication);
|
|
|
|
- DistributedCache.addFileToClassPath(new Path(newPath.toUri().getPath()), conf);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * takes input as a path string for file and verifies if it exist.
|
|
|
|
- * It defaults for file:/// if the files specified do not have a scheme.
|
|
|
|
- * it returns the paths uri converted defaulting to file:///.
|
|
|
|
- * So an input of /home/user/file1 would return file:///home/user/file1
|
|
|
|
- * @param file
|
|
|
|
- * @param conf
|
|
|
|
- * @return
|
|
|
|
- */
|
|
|
|
- private String validateFilePath(String file, Configuration conf)
|
|
|
|
- throws IOException {
|
|
|
|
- if (file == null) {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
- if (file.isEmpty()) {
|
|
|
|
- throw new IllegalArgumentException("File name can't be empty string");
|
|
|
|
- }
|
|
|
|
- String finalPath;
|
|
|
|
- URI pathURI;
|
|
|
|
- try {
|
|
|
|
- pathURI = new URI(file);
|
|
|
|
- } catch (URISyntaxException e) {
|
|
|
|
- throw new IllegalArgumentException(e);
|
|
|
|
- }
|
|
|
|
- Path path = new Path(pathURI);
|
|
|
|
- FileSystem localFs = FileSystem.getLocal(conf);
|
|
|
|
- if (pathURI.getScheme() == null) {
|
|
|
|
- //default to the local file system
|
|
|
|
- //check if the file exists or not first
|
|
|
|
- if (!localFs.exists(path)) {
|
|
|
|
- throw new FileNotFoundException("File " + file + " does not exist.");
|
|
|
|
- }
|
|
|
|
- finalPath = path.makeQualified(localFs.getUri(),
|
|
|
|
- localFs.getWorkingDirectory()).toString();
|
|
|
|
- }
|
|
|
|
- else {
|
|
|
|
- // check if the file exists in this file system
|
|
|
|
- // we need to recreate this filesystem object to copy
|
|
|
|
- // these files to the file system ResourceManager is running
|
|
|
|
- // on.
|
|
|
|
- FileSystem fs = path.getFileSystem(conf);
|
|
|
|
- if (!fs.exists(path)) {
|
|
|
|
- throw new FileNotFoundException("File " + file + " does not exist.");
|
|
|
|
- }
|
|
|
|
- finalPath = path.makeQualified(fs.getUri(),
|
|
|
|
- fs.getWorkingDirectory()).toString();
|
|
|
|
- }
|
|
|
|
- return finalPath;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private URI getPathURI(Path destPath, String fragment)
|
|
|
|
- throws URISyntaxException {
|
|
|
|
- URI pathURI = destPath.toUri();
|
|
|
|
- if (pathURI.getFragment() == null) {
|
|
|
|
- if (fragment == null) {
|
|
|
|
- pathURI = new URI(pathURI.toString() + "#" + destPath.getName());
|
|
|
|
- } else {
|
|
|
|
- pathURI = new URI(pathURI.toString() + "#" + fragment);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return pathURI;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private void copyJar(Path originalJarPath, Path submitJarFile,
|
|
|
|
- short replication) throws IOException {
|
|
|
|
- jtFs.copyFromLocalFile(originalJarPath, submitJarFile);
|
|
|
|
- jtFs.setReplication(submitJarFile, replication);
|
|
|
|
- jtFs.setPermission(submitJarFile, new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION));
|
|
|
|
- }
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
* configure the jobconf of the user with the command line options of
|
|
* configure the jobconf of the user with the command line options of
|
|
@@ -386,9 +95,8 @@ class JobSubmitter {
|
|
*/
|
|
*/
|
|
private void copyAndConfigureFiles(Job job, Path jobSubmitDir)
|
|
private void copyAndConfigureFiles(Job job, Path jobSubmitDir)
|
|
throws IOException {
|
|
throws IOException {
|
|
- Configuration conf = job.getConfiguration();
|
|
|
|
- short replication = (short)conf.getInt(Job.SUBMIT_REPLICATION, 10);
|
|
|
|
- copyAndConfigureFiles(job, jobSubmitDir, replication);
|
|
|
|
|
|
+ JobResourceUploader rUploader = new JobResourceUploader(jtFs);
|
|
|
|
+ rUploader.uploadFiles(job, jobSubmitDir);
|
|
|
|
|
|
// Get the working directory. If not set, sets it to filesystem working dir
|
|
// Get the working directory. If not set, sets it to filesystem working dir
|
|
// This code has been added so that working directory reset before running
|
|
// This code has been added so that working directory reset before running
|
|
@@ -396,8 +104,8 @@ class JobSubmitter {
|
|
// might use the public API JobConf#setWorkingDirectory to reset the working
|
|
// might use the public API JobConf#setWorkingDirectory to reset the working
|
|
// directory.
|
|
// directory.
|
|
job.getWorkingDirectory();
|
|
job.getWorkingDirectory();
|
|
-
|
|
|
|
}
|
|
}
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* Internal method for submitting jobs to the system.
|
|
* Internal method for submitting jobs to the system.
|
|
*
|
|
*
|
|
@@ -484,10 +192,7 @@ class JobSubmitter {
|
|
}
|
|
}
|
|
|
|
|
|
copyAndConfigureFiles(job, submitJobDir);
|
|
copyAndConfigureFiles(job, submitJobDir);
|
|
-
|
|
|
|
-
|
|
|
|
|
|
|
|
-
|
|
|
|
Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
|
|
Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
|
|
|
|
|
|
// Create the splits for the job
|
|
// Create the splits for the job
|
|
@@ -766,15 +471,4 @@ class JobSubmitter {
|
|
DistributedCache.addCacheArchive(uri, conf);
|
|
DistributedCache.addCacheArchive(uri, conf);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
-
|
|
|
|
- private void addLog4jToDistributedCache(Job job,
|
|
|
|
- Path jobSubmitDir) throws IOException {
|
|
|
|
- Configuration conf = job.getConfiguration();
|
|
|
|
- String log4jPropertyFile =
|
|
|
|
- conf.get(MRJobConfig.MAPREDUCE_JOB_LOG4J_PROPERTIES_FILE, "");
|
|
|
|
- if (!log4jPropertyFile.isEmpty()) {
|
|
|
|
- short replication = (short)conf.getInt(Job.SUBMIT_REPLICATION, 10);
|
|
|
|
- copyLog4jPropertyFile(job, jobSubmitDir, replication);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
}
|
|
}
|