Browse Source

AMBARI-8859. Webhcat Hive2Server and some others start takes too long due to a lot of hdfs operations, Part 1 (aonishuk)

Andrew Onishuk 10 years ago
parent
commit
ad9e720270

+ 86 - 0
contrib/fast-hdfs-resource/pom.xml

@@ -0,0 +1,86 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>org.apache.ambari</groupId>
+  <artifactId>fast-hdfs-resource</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <name>fast-hdfs-resource</name>
+  <url>http://maven.apache.org</url>
+  <repositories>
+    <repository>
+      <id>hdp.internal</id>
+      <url>http://repo1.maven.org/maven2</url>
+    </repository>
+  </repositories>
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-tools</artifactId>
+      <version>1.2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-core</artifactId>
+      <version>1.2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>2.2.2</version>
+    </dependency>
+  </dependencies>
+
+
+  <!-- Create executable jar with the application entry point -->
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <transformers>
+                <transformer
+                  implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                  <mainClass>org.apache.ambari.fast_hdfs_resource.Runner
+                  </mainClass>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>

+ 57 - 0
contrib/fast-hdfs-resource/resources/example.json

@@ -0,0 +1,57 @@
+[
+{
+	"target":"/tmp/some999",
+	"type":"directory",
+	"action":"delete"
+},
+{
+	"target":"/tmp/some999/more/dirs/for/recursive/tests",
+	"type":"directory",
+	"action":"create"
+},
+{
+	"target":"/tmp/some999/more/dirs/for/recursive/tests/file_empty.txt",
+	"type":"file",
+	"action":"create"
+},
+{
+	"target":"/tmp/some999",
+	"type":"directory",
+	"action":"create",
+	"owner":"oozie"
+},
+{
+	"target":"/tmp/some999",
+	"type":"directory",
+	"action":"create",
+	"group":"hive"
+},
+{
+	"target":"/tmp/some999",
+	"type":"directory",
+	"action":"create",
+	"mode":"777"
+},
+{
+	"target":"/tmp/some999/more/dirs",
+	"type":"directory",
+	"action":"create",
+	"owner":"yarn",
+	"group":"mapred",
+	"recursiveChown":true,
+	"mode":"757",
+	"recursiveChmod":true
+},
+{
+	"source":"/tmp/my.txt",
+	"target":"/tmp/some999/my_file.txt",
+	"type":"file",
+	"action":"create"
+},
+{
+	"source":"/tmp/a",
+	"target":"/tmp/some999/a_dir",
+	"type":"directory",
+	"action":"create"
+}
+]

+ 46 - 0
contrib/fast-hdfs-resource/resources/test_perfomance.sh

@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Delete 2 dirs
+sudo -u hdfs hadoop fs -rm -r /tmp/some999
+sudo -u hdfs hadoop fs -rm -r /tmp/some888
+# Create
+sudo -u hdfs hadoop fs -mkdir -p /tmp/some999/more/dirs/for/recursive/tests
+# Create + permissions + owner
+sudo -u hdfs hadoop fs -mkdir -p /tmp/some888/more/dirs/for/recursive/tests
+sudo -u hdfs hadoop fs -chown hadoop:hadoop /tmp/some888/more/dirs/for/recursive/tests
+sudo -u hdfs hadoop fs -chmod 777 /tmp/some888/more/dirs/for/recursive/tests
+# Empty dirs with permissions/owners to last dir"
+sudo -u hdfs hadoop fs -mkdir -p /tmp/some888/and_more/and_dirs/_andfor/recursive/tests
+sudo -u hdfs hadoop fs -chmod 777 /tmp/some888/and_more/and_dirs/_andfor/recursive/tests
+sudo -u hdfs hadoop fs -chown hadoop:hadoop /tmp/some888/and_more/and_dirs/_andfor/recursive/tests
+# Empty dirs with permissions/owners to last file
+sudo -u hdfs hadoop fs -touchz /tmp/some888/file.txt
+sudo -u hdfs hadoop fs -chown hadoop:hadoop /tmp/some888/file.txt
+sudo -u hdfs hadoop fs -chmod 777 /tmp/some888/file.txt
+# Empty dirs with permissions/owners to last file
+sudo -u hdfs hadoop fs -touchz /tmp/some888/and_more/and_dirs/file2.txt
+sudo -u hdfs hadoop fs -chown hadoop:hadoop /tmp/some888/and_more/and_dirs/file2.txt
+sudo -u hdfs hadoop fs -chmod 777 /tmp/some888/and_more/and_dirs/file2.txt
+# Recursive permissions
+sudo -u hdfs hadoop fs -chmod -R 700 /tmp/some888
+sudo -u hdfs hadoop fs -chown -R hive:hive /tmp/some999
+
+

+ 253 - 0
contrib/fast-hdfs-resource/src/main/java/org/apache/ambari/fast_hdfs_resource/Resource.java

@@ -0,0 +1,253 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ambari.fast_hdfs_resource;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.lang.System;
+import java.util.ArrayList;
+import java.util.HashSet;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+
+/**
+ * Used to: 1) copy files/directories from localFS to hadoopFs 2) create empty
+ * files/directories in hadoopFs
+ */
+public class Resource {
+  private String source;
+  private String target;
+  private String type;
+  private String action;
+  private String owner;
+  private String group;
+  private String mode;
+  private boolean recursiveChown;
+  private boolean recursiveChmod;
+
+  public String getSource() {
+    return source;
+  }
+
+  public void setSource(String source) {
+    this.source = source;
+  }
+
+  public String getTarget() {
+    return target;
+  }
+
+  public void setTarget(String target) {
+    this.target = target;
+  }
+
+  public String getType() {
+    return type;
+  }
+
+  public void setType(String type) {
+    this.type = type;
+  }
+
+  public String getAction() {
+    return action;
+  }
+
+  public void setAction(String action) {
+    this.action = action;
+  }
+
+  public String getOwner() {
+    return owner;
+  }
+
+  public void setOwner(String owner) {
+    this.owner = owner;
+  }
+
+  public String getGroup() {
+    return group;
+  }
+
+  public void setGroup(String group) {
+    this.group = group;
+  }
+
+  public String getMode() {
+    return mode;
+  }
+
+  public void setMode(String mode) {
+    this.mode = mode;
+  }
+
+  public boolean isRecursiveChown() {
+    return recursiveChown;
+  }
+
+  public void setRecursiveChown(boolean recursiveChown) {
+    this.recursiveChown = recursiveChown;
+  }
+
+  public boolean isRecursiveChmod() {
+    return recursiveChmod;
+  }
+
+  public void setRecursiveChmod(boolean recursiveChmod) {
+    this.recursiveChmod = recursiveChmod;
+  }
+
+  @Override
+  public String toString() {
+    return "Resource [source=" + source + ", target=" + target + ", type="
+        + type + ", action=" + action + ", owner=" + owner + ", group=" + group
+        + ", mode=" + mode + ", recursiveChown=" + recursiveChown
+        + ", recursiveChmod=" + recursiveChmod + "]";
+  }
+
+  /*
+   * Check if parameters are correctly set
+   */
+  public static void checkResourceParameters(Resource resource,
+      DistributedFileSystem dfs) throws IllegalArgumentException, IOException {
+
+    ArrayList<String> actionsAvailable = new ArrayList<String>();
+    actionsAvailable.add("create");
+    actionsAvailable.add("delete");
+    ArrayList<String> typesAvailable = new ArrayList<String>();
+    typesAvailable.add("file");
+    typesAvailable.add("directory");
+
+    if (resource.getTarget() == null)
+      throw new IllegalArgumentException(
+          "Path to resource in HadoopFs must be filled.");
+
+    if (resource.getAction() == null
+        || !actionsAvailable.contains(resource.getAction()))
+      throw new IllegalArgumentException("Action is not supported.");
+
+    if (resource.getType() == null
+        || !typesAvailable.contains(resource.getType()))
+      throw new IllegalArgumentException("Type is not supported.");
+
+    // Check consistency for ("type":"file" == file in hadoop)
+    if (dfs.isFile(new Path(resource.getTarget()))
+        && !"file".equals(resource.getType()))
+      throw new IllegalArgumentException(
+          "Cannot create a file " + resource.getTarget() +
+              " because directory is present on the given path.");
+
+    // Check consistency for ("type":"directory" == directory in hadoop)
+    if (dfs.isDirectory(new Path(resource.getTarget()))
+        && !"directory".equals(resource.getType()))
+      throw new IllegalArgumentException(
+          "Cannot create a directory " + resource.getTarget() +
+              " because file is present on the given path.");
+
+  }
+
+  /*
+   * Create/copy resource - {type}
+   */
+  public static void createResource(Resource resource,
+      DistributedFileSystem dfs, Path pathHadoop) throws IOException {
+
+    boolean isCreate = (resource.getSource() == null) ? true : false;
+
+    if (isCreate && resource.getType().equals("directory")) {
+      dfs.mkdirs(pathHadoop); // empty dir(s)
+    } else if (isCreate && resource.getType().equals("file")) {
+      dfs.createNewFile(pathHadoop); // empty file
+    } else {
+      dfs.copyFromLocalFile(new Path(resource.getSource()), pathHadoop);// copy
+    }
+  }
+
+  /*
+   * Set permissions on resource - {mode}
+   */
+  public static void setMode(Resource resource,
+      DistributedFileSystem dfs, Path pathHadoop) throws IOException {
+
+    if (resource.getMode() != null) {
+      FsPermission permission = new FsPermission(resource.getMode());
+      dfs.setPermission(pathHadoop, permission);
+
+      // Recursive
+      if (resource.isRecursiveChmod()) {
+        // Get the list of sub-directories and files
+        HashSet<String> resultSet = new HashSet<String>();
+        resource.fillDirectoryList(dfs, resource.getTarget(), resultSet);
+
+        for (String path : resultSet) {
+          dfs.setPermission(new Path(path), permission);
+        }
+
+      }
+    }
+  }
+
+  /*
+   * Set owner on resource - {owner}
+   */
+  public static void setOwner(Resource resource, DistributedFileSystem dfs,
+      Path pathHadoop) throws IOException {
+
+    if (!(resource.getOwner() == null && resource.getGroup() == null)) {
+      dfs.setOwner(pathHadoop, resource.getOwner(), resource.getGroup());
+
+      // Recursive
+      if (resource.isRecursiveChown()) {
+        // Get the list of sub-directories and files
+        HashSet<String> resultSet = new HashSet<String>();
+        resource.fillDirectoryList(dfs, resource.getTarget(), resultSet);
+
+        for (String path : resultSet) {
+          dfs.setOwner(new Path(path), resource.getOwner(), resource.getGroup());
+        }
+
+      }
+    }
+
+  }
+
+  /*
+   * List all files and sub-directories recursively
+   */
+  public void fillDirectoryList(DistributedFileSystem dfs, String path,
+      HashSet<String> resultSet) throws IOException {
+
+    FileStatus[] fileStatus = dfs.listStatus(new Path(path));
+    if (fileStatus != null) {
+      // Go through all resources in directory
+      for (FileStatus fs : fileStatus) {
+        String pathToResource = path + "/" + fs.getPath().getName();
+
+        resultSet.add(pathToResource);
+
+        if (fs.isDir()) {
+          // recursive
+          fillDirectoryList(dfs, pathToResource, resultSet);
+        }
+      }
+    }
+  }
+}

+ 93 - 0
contrib/fast-hdfs-resource/src/main/java/org/apache/ambari/fast_hdfs_resource/Runner.java

@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ambari.fast_hdfs_resource;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+
+import com.google.gson.Gson;
+
+public class Runner {
+  public static void main(String[] args)
+      throws IOException, URISyntaxException {
+    // 1 - Check arguments
+    if (args.length != 2) {
+      System.err.println("Incorrect number of arguments. Please provide:\n"
+          + "1) Path to file with json\n"
+          + "2) Path to Hadoop FS (fs.default.name form core-site.xml)\n"
+          + "Exiting...");
+      System.exit(1);
+    }
+
+    // 2 - Check if json-file exists
+    final String jsonFilePath = args[0];
+    final String fsName = args[1];
+    File file = new File(jsonFilePath);
+
+    if (!file.isFile()) {
+      System.err
+          .println("File " + jsonFilePath + " doesn't exist.\nExiting...");
+      System.exit(1);
+    }
+
+    Gson gson = new Gson();
+    Resource[] resources = null;
+    DistributedFileSystem dfs = null;
+
+    try {
+      dfs = new DistributedFileSystem();
+
+      // 3 - Load data from JSON
+      resources = (Resource[]) gson.fromJson(new FileReader(jsonFilePath),
+          Resource[].class);
+
+      // 4 - Connect to HDFS
+      dfs.initialize(new URI(fsName), new Configuration());
+
+      for (Resource resource : resources) {
+        System.out.println("Creating: " + resource);
+
+        Resource.checkResourceParameters(resource, dfs);
+
+        Path pathHadoop = new Path(resource.getTarget());
+        if (resource.getAction().equals("create")) {
+          // 5 - Create
+          Resource.createResource(resource, dfs, pathHadoop);
+          Resource.setMode(resource, dfs, pathHadoop);
+          Resource.setOwner(resource, dfs, pathHadoop);
+        } else if (resource.getAction().equals("delete")) {
+          // 6 - Delete
+          dfs.delete(pathHadoop, true);
+        }
+      }
+
+    } finally {
+      dfs.close();
+    }
+
+    System.out.println("All resources created.");
+  }
+
+}