Browse Source

HDFS-10837. Standardize serializiation of WebHDFS DirectoryListing.

Andrew Wang 8 years ago
parent
commit
db6d243cf8

+ 32 - 10
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java

@@ -143,23 +143,36 @@ class JsonUtilClient {
         storagePolicy, null);
   }
 
+  static HdfsFileStatus[] toHdfsFileStatusArray(final Map<?, ?> json) {
+    Preconditions.checkNotNull(json);
+    final Map<?, ?> rootmap =
+        (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
+    final List<?> array = JsonUtilClient.getList(rootmap,
+        FileStatus.class.getSimpleName());
+
+    // convert FileStatus
+    Preconditions.checkNotNull(array);
+    final HdfsFileStatus[] statuses = new HdfsFileStatus[array.size()];
+    int i = 0;
+    for (Object object : array) {
+      final Map<?, ?> m = (Map<?, ?>) object;
+      statuses[i++] = JsonUtilClient.toFileStatus(m, false);
+    }
+    return statuses;
+  }
+
   static DirectoryListing toDirectoryListing(final Map<?, ?> json) {
     if (json == null) {
       return null;
     }
-    final List<?> list = JsonUtilClient.getList(json,
-        "partialListing");
+    final Map<?, ?> listing = getMap(json, "DirectoryListing");
+    final Map<?, ?> partialListing = getMap(listing, "partialListing");
+    HdfsFileStatus[] fileStatuses = toHdfsFileStatusArray(partialListing);
 
-    HdfsFileStatus[] partialListing = new HdfsFileStatus[list.size()];
-    int i = 0;
-    for (Object o : list) {
-      final Map<?, ?> m = (Map<?, ?>) o;
-      partialListing[i++] = toFileStatus(m, false);
-    }
-    int remainingEntries = getInt(json, "remainingEntries", -1);
+    int remainingEntries = getInt(listing, "remainingEntries", -1);
     Preconditions.checkState(remainingEntries != -1,
         "remainingEntries was not set");
-    return new DirectoryListing(partialListing, remainingEntries);
+    return new DirectoryListing(fileStatuses, remainingEntries);
   }
 
   /** Convert a Json map to an ExtendedBlock object. */
@@ -210,6 +223,15 @@ class JsonUtilClient {
     }
   }
 
+  static Map<?, ?> getMap(Map<?, ?> m, String key) {
+    Object map = m.get(key);
+    if (map instanceof Map<?, ?>) {
+      return (Map<?, ?>) map;
+    } else {
+      return null;
+    }
+  }
+
   /** Convert a Json map to an DatanodeInfo object. */
   static DatanodeInfo toDatanodeInfo(final Map<?, ?> m)
       throws IOException {

+ 6 - 13
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -1491,20 +1491,13 @@ public class WebHdfsFileSystem extends FileSystem
     return new FsPathResponseRunner<FileStatus[]>(op, f) {
       @Override
       FileStatus[] decodeResponse(Map<?,?> json) {
-        final Map<?, ?> rootmap =
-            (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
-        final List<?> array = JsonUtilClient.getList(rootmap,
-            FileStatus.class.getSimpleName());
-
-        //convert FileStatus
-        assert array != null;
-        final FileStatus[] statuses = new FileStatus[array.size()];
-        int i = 0;
-        for (Object object : array) {
-          final Map<?, ?> m = (Map<?, ?>) object;
-          statuses[i++] = makeQualified(JsonUtilClient.toFileStatus(m, false),
-              f);
+        HdfsFileStatus[] hdfsStatuses =
+            JsonUtilClient.toHdfsFileStatusArray(json);
+        final FileStatus[] statuses = new FileStatus[hdfsStatuses.length];
+        for (int i = 0; i < hdfsStatuses.length; i++) {
+          statuses[i] = makeQualified(hdfsStatuses[i], f);
         }
+
         return statuses;
       }
     }.run();

+ 22 - 12
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java

@@ -232,32 +232,42 @@ public class JsonUtil {
     return m;
   }
 
+  private static Map<String, Object> toJson(final DirectoryListing listing)
+      throws IOException {
+    final Map<String, Object> m = new TreeMap<>();
+    // Serialize FileStatus[] to a FileStatuses map
+    m.put("partialListing", toJsonMap(listing.getPartialListing()));
+    // Simple int
+    m.put("remainingEntries", listing.getRemainingEntries());
+
+    return m;
+  }
+
   public static String toJsonString(final DirectoryListing listing) throws
       IOException {
 
     if (listing == null) {
       return null;
     }
-
-    final Map<String, Object> m = new TreeMap<>();
-    m.put("partialListing", toJsonArray(listing.getPartialListing()));
-    m.put("remainingEntries", listing.getRemainingEntries());
-    return MAPPER.writeValueAsString(m);
+    return toJsonString(DirectoryListing.class, toJson(listing));
   }
 
-  private static Object[] toJsonArray(HdfsFileStatus[] statuses) throws
+  private static Map<String, Object> toJsonMap(HdfsFileStatus[] statuses) throws
       IOException {
     if (statuses == null) {
       return null;
     }
-    if (statuses.length == 0) {
-      return EMPTY_OBJECT_ARRAY;
-    }
-    final Object[] a = new Object[statuses.length];
+
+    final Map<String, Object> fileStatuses = new TreeMap<>();
+    final Map<String, Object> fileStatus = new TreeMap<>();
+    fileStatuses.put("FileStatuses", fileStatus);
+    final Object[] array = new Object[statuses.length];
+    fileStatus.put("FileStatus", array);
     for (int i = 0; i < statuses.length; i++) {
-      a[i] = toJsonMap(statuses[i]);
+      array[i] = toJsonMap(statuses[i]);
     }
-    return a;
+
+    return fileStatuses;
   }
 
   /** Convert a LocatedBlock[] to a Json array. */

+ 122 - 61
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md

@@ -597,15 +597,15 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
 
         curl -i  "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=<CHILD>"
 
-    The client receives a response with a batch of [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information:
+    The client receives a response with a [`DirectoryListing` JSON object](#DirectoryListing_JSON_Schema), which contains a [`FileStatuses` JSON object](#FileStatuses_JSON_Schema), as well as iteration information:
 
         HTTP/1.1 200 OK
         Cache-Control: no-cache
-        Expires: Tue, 30 Aug 2016 16:42:16 GMT
-        Date: Tue, 30 Aug 2016 16:42:16 GMT
+        Expires: Thu, 08 Sep 2016 03:40:38 GMT
+        Date: Thu, 08 Sep 2016 03:40:38 GMT
         Pragma: no-cache
-        Expires: Tue, 30 Aug 2016 16:42:16 GMT
-        Date: Tue, 30 Aug 2016 16:42:16 GMT
+        Expires: Thu, 08 Sep 2016 03:40:38 GMT
+        Date: Thu, 08 Sep 2016 03:40:38 GMT
         Pragma: no-cache
         Content-Type: application/json
         X-FRAME-OPTIONS: SAMEORIGIN
@@ -613,56 +613,61 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).listStatu
         Server: Jetty(6.1.26)
 
         {
-            "partialListing": [
-                {
-                    "accessTime": 0,
-                    "blockSize": 0,
-                    "childrenNum": 0,
-                    "fileId": 16389,
-                    "group": "supergroup",
-                    "length": 0,
-                    "modificationTime": 1472575493064,
-                    "owner": "andrew",
-                    "pathSuffix": "anotherdir",
-                    "permission": "755",
-                    "replication": 0,
-                    "storagePolicy": 0,
-                    "type": "DIRECTORY"
+            "DirectoryListing": {
+                "partialListing": {
+                    "FileStatuses": {
+                        "FileStatus": [
+                            {
+                                "accessTime": 0,
+                                "blockSize": 0,
+                                "childrenNum": 0,
+                                "fileId": 16387,
+                                "group": "supergroup",
+                                "length": 0,
+                                "modificationTime": 1473305882563,
+                                "owner": "andrew",
+                                "pathSuffix": "bardir",
+                                "permission": "755",
+                                "replication": 0,
+                                "storagePolicy": 0,
+                                "type": "DIRECTORY"
+                            },
+                            {
+                                "accessTime": 1473305896945,
+                                "blockSize": 1024,
+                                "childrenNum": 0,
+                                "fileId": 16388,
+                                "group": "supergroup",
+                                "length": 0,
+                                "modificationTime": 1473305896965,
+                                "owner": "andrew",
+                                "pathSuffix": "bazfile",
+                                "permission": "644",
+                                "replication": 3,
+                                "storagePolicy": 0,
+                                "type": "FILE"
+                            }
+                        ]
+                    }
                 },
-                {
-                    "accessTime": 0,
-                    "blockSize": 0,
-                    "childrenNum": 0,
-                    "fileId": 16386,
-                    "group": "supergroup",
-                    "length": 0,
-                    "modificationTime": 1472575274776,
-                    "owner": "andrew",
-                    "pathSuffix": "somedir",
-                    "permission": "755",
-                    "replication": 0,
-                    "storagePolicy": 0,
-                    "type": "DIRECTORY"
-                }
-            ],
-            "remainingEntries": 1
+                "remainingEntries": 2
+            }
         }
 
-
 If `remainingEntries` is non-zero, there are additional entries in the directory.
 To query the next batch, set the `startAfter` parameter to the `pathSuffix` of the last item returned in the current batch. For example:
 
-        curl -i  "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=somedir"
+        curl -i  "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS_BATCH&startAfter=bazfile"
 
 Which will return the next batch of directory entries:
 
         HTTP/1.1 200 OK
         Cache-Control: no-cache
-        Expires: Tue, 30 Aug 2016 16:46:23 GMT
-        Date: Tue, 30 Aug 2016 16:46:23 GMT
+        Expires: Thu, 08 Sep 2016 03:43:20 GMT
+        Date: Thu, 08 Sep 2016 03:43:20 GMT
         Pragma: no-cache
-        Expires: Tue, 30 Aug 2016 16:46:23 GMT
-        Date: Tue, 30 Aug 2016 16:46:23 GMT
+        Expires: Thu, 08 Sep 2016 03:43:20 GMT
+        Date: Thu, 08 Sep 2016 03:43:20 GMT
         Pragma: no-cache
         Content-Type: application/json
         X-FRAME-OPTIONS: SAMEORIGIN
@@ -670,24 +675,45 @@ Which will return the next batch of directory entries:
         Server: Jetty(6.1.26)
 
         {
-            "partialListing": [
-                {
-                    "accessTime": 1472575333568,
-                    "blockSize": 1024,
-                    "childrenNum": 0,
-                    "fileId": 16388,
-                    "group": "supergroup",
-                    "length": 224,
-                    "modificationTime": 1472575334222,
-                    "owner": "andrew",
-                    "pathSuffix": "somefile",
-                    "permission": "644",
-                    "replication": 3,
-                    "storagePolicy": 0,
-                    "type": "FILE"
-                }
-            ],
-            "remainingEntries": 0
+            "DirectoryListing": {
+                "partialListing": {
+                    "FileStatuses": {
+                        "FileStatus": [
+                            {
+                                "accessTime": 0,
+                                "blockSize": 0,
+                                "childrenNum": 0,
+                                "fileId": 16386,
+                                "group": "supergroup",
+                                "length": 0,
+                                "modificationTime": 1473305878951,
+                                "owner": "andrew",
+                                "pathSuffix": "foodir",
+                                "permission": "755",
+                                "replication": 0,
+                                "storagePolicy": 0,
+                                "type": "DIRECTORY"
+                            },
+                            {
+                                "accessTime": 1473305902864,
+                                "blockSize": 1024,
+                                "childrenNum": 0,
+                                "fileId": 16389,
+                                "group": "supergroup",
+                                "length": 0,
+                                "modificationTime": 1473305902878,
+                                "owner": "andrew",
+                                "pathSuffix": "quxfile",
+                                "permission": "644",
+                                "replication": 3,
+                                "storagePolicy": 0,
+                                "type": "FILE"
+                            }
+                        ]
+                    }
+                },
+                "remainingEntries": 0
+            }
         }
 
 Batch size is controlled by the `dfs.ls.limit` option on the NameNode.
@@ -1672,6 +1698,41 @@ A `FileStatuses` JSON object represents an array of `FileStatus` JSON objects.
 
 See also: [`FileStatus` Properties](#FileStatus_Properties), [`LISTSTATUS`](#List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html)
 
+### DirectoryListing JSON Schema
+
+A `DirectoryListing` JSON object represents a batch of directory entries while iteratively listing a directory. It contains a `FileStatuses` JSON object as well as iteration information.
+
+```json
+{
+  "name"      : "DirectoryListing",
+  "properties":
+  {
+    "DirectoryListing":
+    {
+      "type"      : "object",
+      "properties":
+      {
+        "partialListing":
+        {
+          "description": "A partial directory listing",
+          "type"       : "object", // A FileStatuses object
+          "required"   : true
+        },
+        "remainingEntries":
+        {
+          "description": "Number of remaining entries",
+          "type"       : "integer",
+          "required"   : true
+        }
+      }
+    }
+  }
+
+}
+```
+
+See also: [`FileStatuses` JSON Schema](#FileStatuses_JSON_Schema), [`LISTSTATUS_BATCH`](#Iteratively_List_a_Directory), [FileStatus](../../api/org/apache/hadoop/fs/FileStatus.html)
+
 ### Long JSON Schema
 
 ```json