瀏覽代碼

svn merge -c 1365839 from trunk for HDFS-3696. Set chunked streaming mode in WebHdfsFileSystem write operations to get around a Java library bug causing OutOfMemoryError.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1365842 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 12 年之前
父節點
當前提交
d86cffd7ef

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -1283,6 +1283,9 @@ Release 0.23.3 - UNRELEASED
     HDFS-3646. LeaseRenewer can hold reference to inactive DFSClient
     HDFS-3646. LeaseRenewer can hold reference to inactive DFSClient
     instances forever. (Kihwal Lee via daryn)
     instances forever. (Kihwal Lee via daryn)
 
 
+    HDFS-3696. Set chunked streaming mode in WebHdfsFileSystem write operations
+    to get around a Java library bug causing OutOfMemoryError.  (szetszwo)
+
 Release 0.23.2 - UNRELEASED
 Release 0.23.2 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 4 - 14
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -423,6 +423,7 @@ public class WebHdfsFileSystem extends FileSystem
     //Step 2) Submit another Http request with the URL from the Location header with data.
     //Step 2) Submit another Http request with the URL from the Location header with data.
     conn = (HttpURLConnection)new URL(redirect).openConnection();
     conn = (HttpURLConnection)new URL(redirect).openConnection();
     conn.setRequestMethod(op.getType().toString());
     conn.setRequestMethod(op.getType().toString());
+    conn.setChunkedStreamingMode(32 << 10); //32kB-chunk
     return conn;
     return conn;
   }
   }
 
 
@@ -835,8 +836,7 @@ public class WebHdfsFileSystem extends FileSystem
     }
     }
 
 
     private static WebHdfsFileSystem getWebHdfs(
     private static WebHdfsFileSystem getWebHdfs(
-        final Token<?> token, final Configuration conf
-        ) throws IOException, InterruptedException, URISyntaxException {
+        final Token<?> token, final Configuration conf) throws IOException {
       
       
       final InetSocketAddress nnAddr = SecurityUtil.getTokenServiceAddr(token);
       final InetSocketAddress nnAddr = SecurityUtil.getTokenServiceAddr(token);
       final URI uri = DFSUtil.createUri(WebHdfsFileSystem.SCHEME, nnAddr);
       final URI uri = DFSUtil.createUri(WebHdfsFileSystem.SCHEME, nnAddr);
@@ -850,12 +850,7 @@ public class WebHdfsFileSystem extends FileSystem
       // update the kerberos credentials, if they are coming from a keytab
       // update the kerberos credentials, if they are coming from a keytab
       ugi.reloginFromKeytab();
       ugi.reloginFromKeytab();
 
 
-      try {
-        WebHdfsFileSystem webhdfs = getWebHdfs(token, conf);
-        return webhdfs.renewDelegationToken(token);
-      } catch (URISyntaxException e) {
-        throw new IOException(e);
-      }
+      return getWebHdfs(token, conf).renewDelegationToken(token);
     }
     }
   
   
     @Override
     @Override
@@ -865,12 +860,7 @@ public class WebHdfsFileSystem extends FileSystem
       // update the kerberos credentials, if they are coming from a keytab
       // update the kerberos credentials, if they are coming from a keytab
       ugi.checkTGTAndReloginFromKeytab();
       ugi.checkTGTAndReloginFromKeytab();
 
 
-      try {
-        final WebHdfsFileSystem webhdfs = getWebHdfs(token, conf);
-        webhdfs.cancelDelegationToken(token);
-      } catch (URISyntaxException e) {
-        throw new IOException(e);
-      }
+      getWebHdfs(token, conf).cancelDelegationToken(token);
     }
     }
   }
   }
   
   

+ 199 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java

@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.web;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.Assert;
+import org.junit.Test;
+
+/** Test WebHDFS */
+public class TestWebHDFS {
+  static final Log LOG = LogFactory.getLog(TestWebHDFS.class);
+  
+  static final Random RANDOM = new Random();
+  
+  static final long systemStartTime = System.nanoTime();
+
+  /** A timer for measuring performance. */
+  static class Ticker {
+    final String name;
+    final long startTime = System.nanoTime();
+    private long previousTick = startTime;
+
+    Ticker(final String name, String format, Object... args) {
+      this.name = name;
+      LOG.info(String.format("\n\n%s START: %s\n",
+          name, String.format(format, args)));
+    }
+
+    void tick(final long nBytes, String format, Object... args) {
+      final long now = System.nanoTime();
+      if (now - previousTick > 10000000000L) {
+        previousTick = now;
+        final double mintues = (now - systemStartTime)/60000000000.0;
+        LOG.info(String.format("\n\n%s %.2f min) %s %s\n", name, mintues,
+            String.format(format, args), toMpsString(nBytes, now)));
+      }
+    }
+    
+    void end(final long nBytes) {
+      final long now = System.nanoTime();
+      final double seconds = (now - startTime)/1000000000.0;
+      LOG.info(String.format("\n\n%s END: duration=%.2fs %s\n",
+          name, seconds, toMpsString(nBytes, now)));
+    }
+    
+    String toMpsString(final long nBytes, final long now) {
+      final double mb = nBytes/(double)(1<<20);
+      final double mps = mb*1000000000.0/(now - startTime);
+      return String.format("[nBytes=%.2fMB, speed=%.2fMB/s]", mb, mps);
+    }
+  }
+
+  @Test
+  public void testLargeFile() throws Exception {
+    largeFileTest(200L << 20); //200MB file length
+  }
+
+  /** Test read and write large files. */
+  static void largeFileTest(final long fileLength) throws Exception {
+    final Configuration conf = WebHdfsTestUtil.createConf();
+
+    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(3)
+        .build();
+    try {
+      cluster.waitActive();
+
+      final FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf);
+      final Path dir = new Path("/test/largeFile");
+      Assert.assertTrue(fs.mkdirs(dir));
+
+      final byte[] data = new byte[1 << 20];
+      RANDOM.nextBytes(data);
+
+      final byte[] expected = new byte[2 * data.length];
+      System.arraycopy(data, 0, expected, 0, data.length);
+      System.arraycopy(data, 0, expected, data.length, data.length);
+
+      final Path p = new Path(dir, "file");
+      final Ticker t = new Ticker("WRITE", "fileLength=" + fileLength);
+      final FSDataOutputStream out = fs.create(p);
+      try {
+        long remaining = fileLength;
+        for(; remaining > 0;) {
+          t.tick(fileLength - remaining, "remaining=%d", remaining);
+          
+          final int n = (int)Math.min(remaining, data.length);
+          out.write(data, 0, n);
+          remaining -= n;
+        }
+      } finally {
+        out.close();
+      }
+      t.end(fileLength);
+  
+      Assert.assertEquals(fileLength, fs.getFileStatus(p).getLen());
+
+      final long smallOffset = RANDOM.nextInt(1 << 20) + (1 << 20);
+      final long largeOffset = fileLength - smallOffset;
+      final byte[] buf = new byte[data.length];
+
+      verifySeek(fs, p, largeOffset, fileLength, buf, expected);
+      verifySeek(fs, p, smallOffset, fileLength, buf, expected);
+  
+      verifyPread(fs, p, largeOffset, fileLength, buf, expected);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  static void checkData(long offset, long remaining, int n,
+      byte[] actual, byte[] expected) {
+    if (RANDOM.nextInt(100) == 0) {
+      int j = (int)(offset % actual.length);
+      for(int i = 0; i < n; i++) {
+        if (expected[j] != actual[i]) {
+          Assert.fail("expected[" + j + "]=" + expected[j]
+              + " != actual[" + i + "]=" + actual[i]
+              + ", offset=" + offset + ", remaining=" + remaining + ", n=" + n);
+        }
+        j++;
+      }
+    }
+  }
+
+  /** test seek */
+  static void verifySeek(FileSystem fs, Path p, long offset, long length,
+      byte[] buf, byte[] expected) throws IOException { 
+    long remaining = length - offset;
+    long checked = 0;
+    LOG.info("XXX SEEK: offset=" + offset + ", remaining=" + remaining);
+
+    final Ticker t = new Ticker("SEEK", "offset=%d, remaining=%d",
+        offset, remaining);
+    final FSDataInputStream in = fs.open(p, 64 << 10);
+    in.seek(offset);
+    for(; remaining > 0; ) {
+      t.tick(checked, "offset=%d, remaining=%d", offset, remaining);
+      final int n = (int)Math.min(remaining, buf.length);
+      in.readFully(buf, 0, n);
+      checkData(offset, remaining, n, buf, expected);
+
+      offset += n;
+      remaining -= n;
+      checked += n;
+    }
+    in.close();
+    t.end(checked);
+  }
+
+  static void verifyPread(FileSystem fs, Path p, long offset, long length,
+      byte[] buf, byte[] expected) throws IOException {
+    long remaining = length - offset;
+    long checked = 0;
+    LOG.info("XXX PREAD: offset=" + offset + ", remaining=" + remaining);
+
+    final Ticker t = new Ticker("PREAD", "offset=%d, remaining=%d",
+        offset, remaining);
+    final FSDataInputStream in = fs.open(p, 64 << 10);
+    for(; remaining > 0; ) {
+      t.tick(checked, "offset=%d, remaining=%d", offset, remaining);
+      final int n = (int)Math.min(remaining, buf.length);
+      in.readFully(offset, buf, 0, n);
+      checkData(offset, remaining, n, buf, expected);
+
+      offset += n;
+      remaining -= n;
+      checked += n;
+    }
+    in.close();
+    t.end(checked);
+  }
+}