|
@@ -51,6 +51,7 @@ import org.xml.sax.SAXException;
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.apache.hadoop.fs.FileStatus;
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
import org.apache.hadoop.classification.VisibleForTesting;
|
|
|
import org.apache.hadoop.fs.Path;
|
|
@@ -77,6 +78,7 @@ import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;
|
|
|
import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider;
|
|
|
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
|
|
|
import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter;
|
|
|
+import org.apache.hadoop.fs.azurebfs.utils.ListUtils;
|
|
|
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
|
|
|
|
|
|
import static java.net.HttpURLConnection.HTTP_CONFLICT;
|
|
@@ -348,13 +350,9 @@ public class AbfsBlobClient extends AbfsClient {
|
|
|
*/
|
|
|
@Override
|
|
|
public ListResponseData listPath(final String relativePath, final boolean recursive,
|
|
|
- final int listMaxResults, final String continuation, TracingContext tracingContext, URI uri) throws IOException {
|
|
|
- return listPath(relativePath, recursive, listMaxResults, continuation, tracingContext, uri, true);
|
|
|
- }
|
|
|
-
|
|
|
- public ListResponseData listPath(final String relativePath, final boolean recursive,
|
|
|
- final int listMaxResults, final String continuation, TracingContext tracingContext, URI uri, boolean is404CheckRequired)
|
|
|
+ final int listMaxResults, final String continuation, TracingContext tracingContext, URI uri)
|
|
|
throws AzureBlobFileSystemException {
|
|
|
+
|
|
|
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
|
|
|
|
|
|
AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
|
|
@@ -400,37 +398,46 @@ public class AbfsBlobClient extends AbfsClient {
|
|
|
listResponseData.setOp(retryListOp);
|
|
|
}
|
|
|
}
|
|
|
+ return listResponseData;
|
|
|
+ }
|
|
|
|
|
|
- if (isEmptyListResults(listResponseData) && is404CheckRequired) {
|
|
|
+ /**
|
|
|
+ * Post-processing of the list operation on Blob endpoint.
|
|
|
+ * There are two client handing to be done on list output.
|
|
|
+ * 1. Empty List returned on server could potentially mean path is a file.
|
|
|
+ * 2. There can be duplicates returned from the server for explicit non-empty directory.
|
|
|
+ * @param relativePath relative path to be listed.
|
|
|
+ * @param fileStatuses list of file statuses returned from the server.
|
|
|
+ * @param tracingContext tracing context to trace server calls.
|
|
|
+ * @param uri URI to be used for path conversion.
|
|
|
+ * @return rectified list of file statuses.
|
|
|
+ * @throws AzureBlobFileSystemException if any failure occurs.
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public List<FileStatus> postListProcessing(String relativePath, List<FileStatus> fileStatuses,
|
|
|
+ TracingContext tracingContext, URI uri) throws AzureBlobFileSystemException {
|
|
|
+ List<FileStatus> rectifiedFileStatuses = new ArrayList<>();
|
|
|
+ if (fileStatuses.isEmpty() && !ROOT_PATH.equals(relativePath)) {
|
|
|
// If the list operation returns no paths, we need to check if the path is a file.
|
|
|
// If it is a file, we need to return the file in the list.
|
|
|
+ // If it is a directory or root path, we need to return an empty list.
|
|
|
// If it is a non-existing path, we need to throw a FileNotFoundException.
|
|
|
- if (relativePath.equals(ROOT_PATH)) {
|
|
|
- // Root Always exists as directory. It can be an empty listing.
|
|
|
- return listResponseData;
|
|
|
- }
|
|
|
AbfsRestOperation pathStatus = this.getPathStatus(relativePath, tracingContext, null, false);
|
|
|
BlobListResultSchema listResultSchema = getListResultSchemaFromPathStatus(relativePath, pathStatus);
|
|
|
- LOG.debug("ListBlob attempted on a file path. Returning file status.");
|
|
|
- List<VersionedFileStatus> fileStatusList = new ArrayList<>();
|
|
|
+ LOG.debug("ListStatus attempted on a file path {}. Returning file status.", relativePath);
|
|
|
for (BlobListResultEntrySchema entry : listResultSchema.paths()) {
|
|
|
- fileStatusList.add(getVersionedFileStatusFromEntry(entry, uri));
|
|
|
+ rectifiedFileStatuses.add(getVersionedFileStatusFromEntry(entry, uri));
|
|
|
}
|
|
|
- AbfsRestOperation listOp = getAbfsRestOperation(
|
|
|
- AbfsRestOperationType.ListBlobs,
|
|
|
- HTTP_METHOD_GET,
|
|
|
- url,
|
|
|
- requestHeaders);
|
|
|
- listOp.hardSetGetListStatusResult(HTTP_OK, listResultSchema);
|
|
|
- listResponseData.setFileStatusList(fileStatusList);
|
|
|
- listResponseData.setContinuationToken(null);
|
|
|
- listResponseData.setRenamePendingJsonPaths(null);
|
|
|
- listResponseData.setOp(listOp);
|
|
|
- return listResponseData;
|
|
|
+ } else {
|
|
|
+ // Remove duplicates from the non-empty list output only.
|
|
|
+ rectifiedFileStatuses.addAll(ListUtils.getUniqueListResult(fileStatuses));
|
|
|
+ LOG.debug(
|
|
|
+ "ListBlob API returned a total of {} elements including duplicates."
|
|
|
+ + "Number of unique Elements are {}", fileStatuses.size(),
|
|
|
+ rectifiedFileStatuses.size());
|
|
|
}
|
|
|
- return listResponseData;
|
|
|
+ return rectifiedFileStatuses;
|
|
|
}
|
|
|
-
|
|
|
/**
|
|
|
* Filter the paths for which no rename redo operation is performed.
|
|
|
* Update BlobListResultSchema path with filtered entries.
|
|
@@ -2013,6 +2020,8 @@ public class AbfsBlobClient extends AbfsClient {
|
|
|
|
|
|
/**
|
|
|
* Checks if the listing of the specified path is non-empty.
|
|
|
+ * Since listing is incomplete as long as continuation token is returned by server,
|
|
|
+ * we need to iterate until either we get one entry or continuation token becomes null.
|
|
|
*
|
|
|
* @param path The path to be listed.
|
|
|
* @param tracingContext The tracing context for tracking the operation.
|
|
@@ -2024,26 +2033,15 @@ public class AbfsBlobClient extends AbfsClient {
|
|
|
TracingContext tracingContext) throws AzureBlobFileSystemException {
|
|
|
// This method is only called internally to determine state of a path
|
|
|
// and hence don't need identity transformation to happen.
|
|
|
- ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, null, false);
|
|
|
- return !isEmptyListResults(listResponseData);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * Check if the list call returned empty results without any continuation token.
|
|
|
- * @param listResponseData The response of listing API from the server.
|
|
|
- * @return True if empty results without continuation token.
|
|
|
- */
|
|
|
- private boolean isEmptyListResults(ListResponseData listResponseData) {
|
|
|
- AbfsHttpOperation result = listResponseData.getOp().getResult();
|
|
|
- boolean isEmptyList = result != null && result.getStatusCode() == HTTP_OK && // List Call was successful
|
|
|
- result.getListResultSchema() != null && // Parsing of list response was successful
|
|
|
- listResponseData.getFileStatusList().isEmpty() && listResponseData.getRenamePendingJsonPaths().isEmpty() &&// No paths were returned
|
|
|
- StringUtils.isEmpty(listResponseData.getContinuationToken()); // No continuation token was returned
|
|
|
- if (isEmptyList) {
|
|
|
- LOG.debug("List call returned empty results without any continuation token.");
|
|
|
- return true;
|
|
|
- }
|
|
|
- return false;
|
|
|
+ String continuationToken = null;
|
|
|
+ List<FileStatus> fileStatusList = new ArrayList<>();
|
|
|
+ // We need to loop on continuation token until we get an entry or continuation token becomes null.
|
|
|
+ do {
|
|
|
+ ListResponseData listResponseData = listPath(path, false, 1, null, tracingContext, null);
|
|
|
+ fileStatusList.addAll(listResponseData.getFileStatusList());
|
|
|
+ continuationToken = listResponseData.getContinuationToken();
|
|
|
+ } while (StringUtils.isNotEmpty(continuationToken) && fileStatusList.isEmpty());
|
|
|
+ return !fileStatusList.isEmpty();
|
|
|
}
|
|
|
|
|
|
/**
|