فهرست منبع

HADOOP-887: HDFS command on WASB Windows: copyFromLocal fails for Unicode filename

Ivan Mitic 11 سال پیش
والد
کامیت
865ed68aa9

+ 33 - 5
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/azurenative/AzureNativeFileSystemStore.java

@@ -26,8 +26,11 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
 import java.security.InvalidKeyException;
 import java.util.ArrayList;
 import java.util.Calendar;
@@ -1376,18 +1379,34 @@ class AzureNativeFileSystemStore implements NativeFileSystemStore {
   }
 
   private static void storeLinkAttribute(CloudBlobWrapper blob,
-      String linkTarget) {
+      String linkTarget) throws UnsupportedEncodingException {
+    // We have to URL encode the link attribute as the link URI could
+    // have URI special characters which unless encoded will result
+    // in 403 errors from the server. This is due to metadata properties
+    // being sent in the HTTP header of the request which is in turn used
+    // on the server side to authorize the request.
+    String encodedLinkTarget = null;
+    if (linkTarget != null) {
+      encodedLinkTarget = URLEncoder.encode(linkTarget, "UTF-8");
+    }
     storeMetadataAttribute(blob,
-        LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY, linkTarget);
+        LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY,
+        encodedLinkTarget);
     // Remove the old metadata key if present
     removeMetadataAttribute(blob,
         OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY);
   }
 
-  private static String getLinkAttributeValue(CloudBlobWrapper blob) {
-    return getMetadataAttribute(blob,
+  private static String getLinkAttributeValue(CloudBlobWrapper blob)
+      throws UnsupportedEncodingException {
+    String encodedLinkTarget = getMetadataAttribute(blob,
         LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY,
         OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY);
+    String linkTarget = null;
+    if (encodedLinkTarget != null) {
+      linkTarget = URLDecoder.decode(encodedLinkTarget, "UTF-8");
+    }
+    return linkTarget;
   }
 
   private static boolean retrieveFolderAttribute(CloudBlobWrapper blob) {
@@ -2385,10 +2404,19 @@ class AzureNativeFileSystemStore implements NativeFileSystemStore {
       //
       CloudBlobWrapper dstBlob = getBlobReference(dstKey);
 
+      // TODO: Remove at the time when we move to Azure Java SDK 1.2+.
+      // This is the workaround provided by Azure Java SDK team to
+      // mitigate the issue with un-encoded x-ms-copy-source HTTP
+      // request header. Azure sdk version before 1.2+ does not encode this
+      // header what causes all URIs that have special (category "other")
+      // characters in the URI not to work with startCopyFromBlob when
+      // specified as source (requests fail with HTTP 403).
+      URI srcUri = new URI(srcBlob.getUri().toASCIIString());
+
       // Rename the source blob to the destination blob by copying it to
       // the destination blob then deleting it.
       //
-      dstBlob.startCopyFromBlob(srcBlob, getInstrumentedContext());
+      dstBlob.startCopyFromBlob(srcUri, getInstrumentedContext());
       waitForCopyToComplete(dstBlob, getInstrumentedContext());
 
       safeDelete(srcBlob, lease);

+ 3 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/azurenative/StorageInterface.java

@@ -312,8 +312,8 @@ abstract class StorageInterface {
      * Copies an existing blob's contents, properties, and metadata to this instance of the <code>CloudBlob</code>
      * class, using the specified operation context.
      *
-     * @param sourceBlob
-     *            A <code>CloudBlob</code> object that represents the source blob to copy.
+     * @param source
+     *            A <code>java.net.URI</code> The URI of a source blob.
      * @param opContext
      *            An {@link OperationContext} object that represents the context for the current operation. This object
      *            is used to track requests to the storage service, and to provide additional runtime information about
@@ -324,7 +324,7 @@ abstract class StorageInterface {
      * @throws URISyntaxException
      *
      */
-    public abstract void startCopyFromBlob(CloudBlobWrapper sourceBlob,
+    public abstract void startCopyFromBlob(URI source,
         OperationContext opContext)
         throws StorageException, URISyntaxException;
 

+ 2 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/azurenative/StorageInterfaceImpl.java

@@ -344,10 +344,10 @@ class StorageInterfaceImpl extends StorageInterface {
     }
 
     @Override
-    public void startCopyFromBlob(CloudBlobWrapper sourceBlob,
+    public void startCopyFromBlob(URI source,
         OperationContext opContext)
             throws StorageException, URISyntaxException {
-      blob.startCopyFromBlob(((CloudBlobWrapperImpl)sourceBlob).blob,
+      blob.startCopyFromBlob(source,
           null, null, null, opContext);
     }
 

+ 55 - 24
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/azurenative/MockStorageInterface.java

@@ -4,12 +4,11 @@ import java.io.*;
 import java.net.*;
 import java.util.*;
 
+import javax.ws.rs.core.UriBuilder;
+import javax.ws.rs.core.UriBuilderException;
+
 import org.apache.commons.httpclient.URIException;
 import org.apache.commons.httpclient.util.URIUtil;
-import org.apache.commons.io.output.ByteArrayOutputStream;
-
-
-
 
 import sun.reflect.generics.reflectiveObjects.NotImplementedException;
 
@@ -72,6 +71,32 @@ public class MockStorageInterface extends StorageInterface {
     return null;
   }
 
+  /**
+   * Utility function used to convert a given URI to a decoded string
+   * representation sent to the backing store. URIs coming as input
+   * to this class will be encoded by the URI class, and we want
+   * the underlying storage to store keys in their original UTF-8 form.
+   */
+  private static String convertUriToDecodedString(URI uri) {
+    try {
+      String result = URIUtil.decode(uri.toString());
+      return result;
+    } catch (URIException e) {
+      throw new AssertionError("Failed to decode URI: " + uri.toString());
+    }
+  }
+
+  private static URI convertKeyToEncodedUri(String key) {
+    try {
+      String encodedKey = URIUtil.encodePath(key);
+      URI uri = new URI(encodedKey);
+      return uri;
+    } catch (URISyntaxException e) {
+      throw new AssertionError("Failed to encode key: " + key);
+    } catch (URIException e) {
+      throw new AssertionError("Failed to encode key: " + key);
+    }
+  }
 
   @Override
   public CloudBlobContainerWrapper getContainerReference(String name)
@@ -232,14 +257,18 @@ public class MockStorageInterface extends StorageInterface {
         BlobRequestOptions options, OperationContext opContext)
         throws URISyntaxException, StorageException {
       ArrayList<ListBlobItem> ret = new ArrayList<ListBlobItem>();
-      String fullPrefix = prefix == null ?
-          uri.toString() :
-          new URI(
-              uri.getScheme(),
-              uri.getAuthority(),
-              uri.getPath() + prefix,
-              uri.getQuery(),
-              uri.getFragment()).toString();
+      URI searchUri = null;
+      if (prefix == null) {
+        searchUri = uri;
+      } else {
+        try {
+          searchUri = UriBuilder.fromUri(uri).path(prefix).build();
+        } catch (UriBuilderException e) {
+          throw new AssertionError("Failed to encode path: " + prefix);
+        }
+      }
+
+      String fullPrefix = convertUriToDecodedString(searchUri);
       boolean includeMetadata = listingDetails.contains(BlobListingDetails.METADATA);
       HashSet<String> addedDirectories = new HashSet<String>();
       for (InMemoryBlockBlobStore.ListBlobEntry current : backingStore.listBlobs(
@@ -248,12 +277,12 @@ public class MockStorageInterface extends StorageInterface {
         if (useFlatBlobListing || indexOfSlash < 0) {
           if (current.isPageBlob()) {
             ret.add(new MockCloudPageBlobWrapper(
-                new URI(current.getKey()),
+                convertKeyToEncodedUri(current.getKey()),
                 current.getMetadata(),
                 current.getContentLength()));
           } else {
           ret.add(new MockCloudBlockBlobWrapper(
-              new URI(current.getKey()),
+              convertKeyToEncodedUri(current.getKey()),
               current.getMetadata(),
               current.getContentLength()));
           }
@@ -292,14 +321,14 @@ public class MockStorageInterface extends StorageInterface {
     }
 
     protected void refreshProperties(boolean getMetadata) {
-      if (backingStore.exists(uri.toString())) {
-        byte[] content = backingStore.getContent(uri.toString());
+      if (backingStore.exists(convertUriToDecodedString(uri))) {
+        byte[] content = backingStore.getContent(convertUriToDecodedString(uri));
         properties = new BlobProperties();
         properties.setLength(content.length);
         properties.setLastModified(
             Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTime());
         if (getMetadata) {
-          metadata = backingStore.getMetadata(uri.toString());
+          metadata = backingStore.getMetadata(convertUriToDecodedString(uri));
         }
       }
     }
@@ -332,9 +361,9 @@ public class MockStorageInterface extends StorageInterface {
     }
 
     @Override
-    public void startCopyFromBlob(CloudBlobWrapper sourceBlob,
+    public void startCopyFromBlob(URI source,
         OperationContext opContext) throws StorageException, URISyntaxException {
-      backingStore.copy(sourceBlob.getUri().toString(), uri.toString());
+      backingStore.copy(convertUriToDecodedString(source), convertUriToDecodedString(uri));
       //TODO: set the backingStore.properties.CopyState and
       //      update azureNativeFileSystemStore.waitForCopyToComplete
     }
@@ -347,12 +376,12 @@ public class MockStorageInterface extends StorageInterface {
     @Override
     public void delete(OperationContext opContext, SelfRenewingLease lease)
         throws StorageException {
-      backingStore.delete(uri.toString());
+      backingStore.delete(convertUriToDecodedString(uri));
     }
 
     @Override
     public boolean exists(OperationContext opContext) throws StorageException {
-      return backingStore.exists(uri.toString());
+      return backingStore.exists(convertUriToDecodedString(uri));
     }
 
     @Override
@@ -369,13 +398,14 @@ public class MockStorageInterface extends StorageInterface {
     @Override
     public InputStream openInputStream(BlobRequestOptions options,
         OperationContext opContext) throws StorageException {
-      return new ByteArrayInputStream(backingStore.getContent(uri.toString()));
+      return new ByteArrayInputStream(
+          backingStore.getContent(convertUriToDecodedString(uri)));
     }
 
     @Override
     public void uploadMetadata(OperationContext opContext)
         throws StorageException {
-      backingStore.setMetadata(uri.toString(), metadata);
+      backingStore.setMetadata(convertUriToDecodedString(uri), metadata);
     }
 
     @Override
@@ -396,7 +426,8 @@ public class MockStorageInterface extends StorageInterface {
     @Override
     public OutputStream openOutputStream(BlobRequestOptions options,
         OperationContext opContext) throws StorageException {
-      return backingStore.uploadBlockBlob(uri.toString(), metadata);
+      return backingStore.uploadBlockBlob(convertUriToDecodedString(uri),
+          metadata);
     }
 
     @Override

+ 37 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/azurenative/NativeAzureFileSystemBaseTest.java

@@ -380,6 +380,43 @@ public abstract class NativeAzureFileSystemBaseTest {
     assertTrue(fs.delete(new Path(directoryName), true));
   }
 
+  @Test
+  public void testChineseCharacters() throws Exception {
+    // Create a file and a folder with Chinese (non-ASCI) characters
+    String chinese = "" + '\u963f' + '\u4db5';
+    String fileName = "filename" + chinese;
+    String directoryName = chinese;
+    fs.create(new Path(directoryName, fileName)).close();
+    FileStatus[] listing = fs.listStatus(new Path(directoryName));
+    assertEquals(1, listing.length);
+    assertEquals(fileName, listing[0].getPath().getName());
+    FileStatus status = fs.getFileStatus(new Path(directoryName, fileName));
+    assertEquals(fileName, status.getPath().getName());
+    InputStream stream = fs.open(new Path(directoryName, fileName));
+    assertNotNull(stream);
+    stream.close();
+    assertTrue(fs.delete(new Path(directoryName, fileName), true));
+    assertTrue(fs.delete(new Path(directoryName), true));
+  }
+
+  @Test
+  public void testChineseCharactersFolderRename() throws Exception {
+    // Create a file and a folder with Chinese (non-ASCI) characters
+    String chinese = "" + '\u963f' + '\u4db5';
+    String fileName = "filename" + chinese;
+    String srcDirectoryName = chinese;
+    String targetDirectoryName = "target" + chinese;
+    fs.create(new Path(srcDirectoryName, fileName)).close();
+    fs.rename(new Path(srcDirectoryName), new Path(targetDirectoryName));
+    FileStatus[] listing = fs.listStatus(new Path(targetDirectoryName));
+    assertEquals(1, listing.length);
+    assertEquals(fileName, listing[0].getPath().getName());
+    FileStatus status = fs.getFileStatus(new Path(targetDirectoryName, fileName));
+    assertEquals(fileName, status.getPath().getName());
+    assertTrue(fs.delete(new Path(targetDirectoryName, fileName), true));
+    assertTrue(fs.delete(new Path(targetDirectoryName), true));
+  }
+
   @Test
   public void testReadingDirectoryAsFile() throws Exception {
     Path dir = new Path("/x");

+ 2 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/azurenative/TestNativeAzureFileSystemConcurrency.java

@@ -3,6 +3,7 @@ package org.apache.hadoop.fs.azurenative;
 import static org.junit.Assert.*;
 
 import java.io.*;
+import java.net.URLDecoder;
 import java.util.*;
 import java.util.concurrent.*;
 
@@ -40,6 +41,7 @@ public class TestNativeAzureFileSystemConcurrency {
             AzureBlobStorageTestAccount.toMockUri(filePath));
     assertNotNull(metadata);
     String linkValue = metadata.get(AzureNativeFileSystemStore.LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY);
+    linkValue = URLDecoder.decode(linkValue, "UTF-8");
     assertNotNull(linkValue);
     assertTrue(backingStore.exists(
         AzureBlobStorageTestAccount.toMockUri(linkValue)));