فهرست منبع

HADOOP-19494: [ABFS][FnsOverBlob] Fix Case Sensitivity Issue for hdi_isfolder metadata (#7496)

Contributed by Manish Bhatt
Reviewed by Anmol, Manika, Anuj

Signed off by: Anuj Modi<anujmodi@apache.org>
Manish Bhatt 1 ماه پیش
والد
کامیت
431f29e910

+ 1 - 1
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/BlobListXmlParser.java

@@ -207,7 +207,7 @@ public class BlobListXmlParser extends DefaultHandler {
     if (parentNode.equals(AbfsHttpConstants.XML_TAG_METADATA)) {
       currentBlobEntry.addMetadata(currentNode, value);
       // For Marker blobs hdi_isFolder will be present as metadata
-      if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equals(currentNode)) {
+      if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equalsIgnoreCase(currentNode)) {
         currentBlobEntry.setIsDirectory(Boolean.valueOf(value));
       }
     }

+ 16 - 0
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java

@@ -312,6 +312,22 @@ public class AbfsAHCHttpOperation extends AbfsHttpOperation {
     return headers;
   }
 
+  /**{@inheritDoc}*/
+  @Override
+  public String getResponseHeaderIgnoreCase(final String headerName) {
+    Map<String, List<String>> responseHeaders = getResponseHeaders();
+    if (responseHeaders == null || responseHeaders.isEmpty()) {
+      return null;
+    }
+    // Search for the header value case-insensitively
+    return responseHeaders.entrySet().stream()
+        .filter(entry -> entry.getKey() != null
+            && entry.getKey().equalsIgnoreCase(headerName))
+        .flatMap(entry -> entry.getValue().stream())
+        .findFirst()
+        .orElse(null); // Return null if no match is found
+  }
+
   /**{@inheritDoc}*/
   @Override
   protected InputStream getContentInputStream()

+ 3 - 2
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java

@@ -1137,7 +1137,8 @@ public class AbfsBlobClient extends AbfsClient {
         this.createPathRestOp(path, false, false, false, null,
             contextEncryptionAdapter, tracingContext);
         // Make sure hdi_isFolder is added to the list of properties to be set.
-        boolean hdiIsFolderExists = properties.containsKey(XML_TAG_HDI_ISFOLDER);
+        boolean hdiIsFolderExists = properties.keySet()
+            .stream().anyMatch(XML_TAG_HDI_ISFOLDER::equalsIgnoreCase);
         if (!hdiIsFolderExists) {
           properties.put(XML_TAG_HDI_ISFOLDER, TRUE);
         }
@@ -1548,7 +1549,7 @@ public class AbfsBlobClient extends AbfsClient {
    */
   @Override
   public boolean checkIsDir(AbfsHttpOperation result) {
-    String resourceType = result.getResponseHeader(X_MS_META_HDI_ISFOLDER);
+    String resourceType = result.getResponseHeaderIgnoreCase(X_MS_META_HDI_ISFOLDER);
     return resourceType != null && resourceType.equals(TRUE);
   }
 

+ 2 - 1
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java

@@ -1626,7 +1626,8 @@ public abstract class AbfsClient implements Closeable {
    * @param requestHeaders  The list of HTTP headers for the request.
    * @return An AbfsRestOperation instance.
    */
-  AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType,
+  @VisibleForTesting
+  public AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType,
       final String httpMethod,
       final URL url,
       final List<AbfsHttpHeader> requestHeaders) {

+ 32 - 2
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java

@@ -235,6 +235,14 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable {
 
   public abstract Map<String, List<String>> getResponseHeaders();
 
+  /**
+   * Get response header value for the given headerKey ignoring case.
+   *
+   * @param httpHeader header key.
+   * @return header value.
+   */
+  public abstract String getResponseHeaderIgnoreCase(String httpHeader);
+
   // Returns a trace message for the request
   @Override
   public String toString() {
@@ -743,7 +751,7 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable {
 
     @Override
     public String getResponseHeader(final String httpHeader) {
-      return "";
+      return EMPTY_STRING;
     }
 
     @Override
@@ -751,6 +759,12 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable {
       return new HashMap<>();
     }
 
+    /**{@inheritDoc}*/
+    @Override
+    public String getResponseHeaderIgnoreCase(final String headerName) {
+      return EMPTY_STRING;
+    }
+
     @Override
     public void sendPayload(final byte[] buffer,
         final int offset,
@@ -787,6 +801,16 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable {
       return new HashMap<>();
     }
 
+    /**{@inheritDoc}*/
+    @Override
+    public String getResponseHeaderIgnoreCase(final String httpHeader) {
+      // Directories on FNS-Blob are identified by a special metadata header.
+      if (httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER)) {
+        return TRUE;
+      }
+      return EMPTY_STRING;
+    }
+
     @Override
     public void processResponse(final byte[] buffer,
         final int offset,
@@ -935,7 +959,7 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable {
 
     @Override
     public String getResponseHeader(final String httpHeader) {
-      return "";
+      return EMPTY_STRING;
     }
 
     @Override
@@ -943,6 +967,12 @@ public abstract class AbfsHttpOperation implements AbfsPerfLoggable {
       return new HashMap<>();
     }
 
+    /**{@inheritDoc}*/
+    @Override
+    public String getResponseHeaderIgnoreCase(final String headerName) {
+      return EMPTY_STRING;
+    }
+
     @Override
     public void sendPayload(final byte[] buffer,
         final int offset,

+ 16 - 0
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java

@@ -104,6 +104,22 @@ public class AbfsJdkHttpOperation extends AbfsHttpOperation {
     return connection.getHeaderFields();
   }
 
+  /**{@inheritDoc}*/
+  @Override
+  public String getResponseHeaderIgnoreCase(final String headerName) {
+    Map<String, List<String>> responseHeaders = getResponseHeaders();
+    if (responseHeaders == null || responseHeaders.isEmpty()) {
+      return null;
+    }
+    // Search for the header value case-insensitively
+    return responseHeaders.entrySet().stream()
+        .filter(entry -> entry.getKey() != null
+            && entry.getKey().equalsIgnoreCase(headerName))
+        .flatMap(entry -> entry.getValue().stream())
+        .findFirst()
+        .orElse(null); // Return null if no match is found
+  }
+
   /**{@inheritDoc}*/
   public void sendPayload(byte[] buffer, int offset, int length)
       throws IOException {

+ 60 - 0
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java

@@ -29,9 +29,12 @@ import org.mockito.Mockito;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
 import org.apache.hadoop.fs.permission.FsPermission;
 
 import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY;
+import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockAbfsRestOperation;
+import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockIngressClientHandler;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists;
 import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 import static org.mockito.ArgumentMatchers.any;
@@ -284,4 +287,61 @@ public class ITestAzureBlobFileSystemFileStatus extends
     Assertions.assertThat(ex).isNotNull();
     Assertions.assertThat(ex.getMessage()).contains(key);
   }
+
+  /**
+   * Test directory status with different HDI folder configuration,
+   * verifying the correct header and directory state.
+   */
+  private void testIsDirectory(boolean expected, String... configName) throws Exception {
+    try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
+      assumeBlobServiceType();
+      AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
+      // Mock the operation to modify the headers
+      mockAbfsRestOperation(abfsBlobClient, configName);
+
+      // Create the path and invoke mkdirs method
+      Path path = new Path("/testPath");
+      fs.mkdirs(path);
+
+      // Assert that the response header has the updated value
+      FileStatus fileStatus = fs.getFileStatus(path);
+
+      AbfsHttpOperation op = abfsBlobClient.getPathStatus(
+          path.toUri().getPath(),
+          true, getTestTracingContext(fs, true),
+          null).getResult();
+
+      Assertions.assertThat(abfsBlobClient.checkIsDir(op))
+          .describedAs("Directory should be marked as " + expected)
+          .isEqualTo(expected);
+
+      // Verify the header and directory state
+      Assertions.assertThat(fileStatus.isDirectory())
+          .describedAs("Expected directory state: " + expected)
+          .isEqualTo(expected);
+
+      fs.delete(path, true);
+    }
+  }
+
+  /**
+   * Test to verify the directory status with different HDI folder configurations.
+   * Verifying the correct header and directory state.
+   */
+  @Test
+  public void testIsDirectoryWithDifferentCases() throws Exception {
+    testIsDirectory(true,  "HDI_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_isfolder");
+
+    testIsDirectory(true, "hdi_isfolder");
+
+    testIsDirectory(false, "Hdi_isfolder1");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
+  }
 }

+ 114 - 0
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.fs.azurebfs;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.SocketTimeoutException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
@@ -40,7 +41,13 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
+import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
 import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
+import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
+import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
+import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType;
 import org.apache.hadoop.fs.azurebfs.services.ListResponseData;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
 import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil;
@@ -51,8 +58,11 @@ import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator;
 import org.apache.hadoop.fs.contract.ContractTestUtils;
 
 import static java.net.HttpURLConnection.HTTP_OK;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
 import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE;
 import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX;
 import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX;
 import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs;
@@ -62,6 +72,8 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.rename;
 
 import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyList;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.ArgumentMatchers.nullable;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.when;
@@ -524,4 +536,106 @@ public class ITestAzureBlobFileSystemListStatus extends
     Assertions.assertThat(fileStatus.getLen())
         .describedAs("Content Length Not as Expected").isEqualTo(0);
   }
+
+  /**
+   * Helper method to mock the AbfsRestOperation and modify the request headers.
+   *
+   * @param abfsBlobClient the mocked AbfsBlobClient
+   * @param newHeader the header to add in place of the old one
+   */
+  public static void mockAbfsRestOperation(AbfsBlobClient abfsBlobClient, String... newHeader) {
+    Mockito.doAnswer(invocation -> {
+      List<AbfsHttpHeader> requestHeaders = invocation.getArgument(3);
+
+      // Remove the actual HDI config header and add the new one
+      requestHeaders.removeIf(header ->
+          HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER.equals(header.getName()));
+      for (String header : newHeader) {
+        requestHeaders.add(new AbfsHttpHeader(X_MS_METADATA_PREFIX + header, TRUE));
+      }
+
+      // Call the real method
+      return invocation.callRealMethod();
+    }).when(abfsBlobClient).getAbfsRestOperation(eq(AbfsRestOperationType.PutBlob),
+        eq(HTTP_METHOD_PUT), any(URL.class), anyList());
+  }
+
+  /**
+   * Helper method to mock the AbfsBlobClient and set up the client handler.
+   *
+   * @param fs the AzureBlobFileSystem instance
+   * @return the mocked AbfsBlobClient
+   */
+  public static AbfsBlobClient mockIngressClientHandler(AzureBlobFileSystem fs) {
+    AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
+    AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler());
+    AbfsBlobClient abfsBlobClient = (AbfsBlobClient) Mockito.spy(
+        clientHandler.getClient());
+    fs.getAbfsStore().setClient(abfsBlobClient);
+    fs.getAbfsStore().setClientHandler(clientHandler);
+    Mockito.doReturn(abfsBlobClient).when(clientHandler).getIngressClient();
+    return abfsBlobClient;
+  }
+
+  /**
+   * Test directory status with different HDI folder configuration,
+   * verifying the correct header and directory state.
+   */
+  private void testIsDirectory(boolean expected, String... configName) throws Exception {
+    try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
+      assumeBlobServiceType();
+      AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
+      // Mock the operation to modify the headers
+      mockAbfsRestOperation(abfsBlobClient, configName);
+
+      // Create the path and invoke mkdirs method
+      Path path = new Path("/testPath");
+      fs.mkdirs(path);
+
+      // Assert that the response header has the updated value
+      FileStatus[] fileStatus = fs.listStatus(path.getParent());
+
+      AbfsHttpOperation op = abfsBlobClient.getPathStatus(
+          path.toUri().getPath(),
+          true, getTestTracingContext(fs, true),
+          null).getResult();
+
+      Assertions.assertThat(abfsBlobClient.checkIsDir(op))
+          .describedAs("Directory should be marked as " + expected)
+          .isEqualTo(expected);
+
+      // Verify the header and directory state
+      Assertions.assertThat(fileStatus.length)
+          .describedAs("Expected directory state: " + expected)
+          .isEqualTo(1);
+
+      // Verify the header and directory state
+      Assertions.assertThat(fileStatus[0].isDirectory())
+          .describedAs("Expected directory state: " + expected)
+          .isEqualTo(expected);
+
+      fs.delete(path, true);
+    }
+  }
+
+  /**
+   * Test to verify the directory status with different HDI folder configurations.
+   * Verifying the correct header and directory state.
+   */
+  @Test
+  public void testIsDirectoryWithDifferentCases() throws Exception {
+    testIsDirectory(true,  "HDI_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_ISFOLDER");
+
+    testIsDirectory(true, "Hdi_isfolder");
+
+    testIsDirectory(true, "hdi_isfolder");
+
+    testIsDirectory(false, "Hdi_isfolder1");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder");
+
+    testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
+  }
 }