Browse Source

HADOOP-17913. Filter deps with release labels (#3437)

Gautham B A 3 years ago
parent
commit
16ca362564
2 changed files with 89 additions and 12 deletions
  1. 43 3
      dev-support/docker/README.md
  2. 46 9
      dev-support/docker/pkg-resolver/resolve.py

+ 43 - 3
dev-support/docker/README.md

@@ -26,9 +26,11 @@ the other. Different platforms have different toolchains. Some packages tend to
 across platforms and most commonly, a package that's readily available in one platform's toolchain
 across platforms and most commonly, a package that's readily available in one platform's toolchain
 isn't available on another. We thus, resort to building and installing the package from source,
 isn't available on another. We thus, resort to building and installing the package from source,
 causing duplication of code since this needs to be done for all the Dockerfiles pertaining to all
 causing duplication of code since this needs to be done for all the Dockerfiles pertaining to all
-the platforms. We need a system to track a dependency - for a package - for a platform. Thus,
-there's a lot of diversity that needs to be handled for managing package dependencies and
-`pkg-resolver` caters to that.
+the platforms. We need a system to track a dependency - for a package - for a platform
+
+- (and optionally) for a release. Thus, there's a lot of diversity that needs to be handled for
+  managing package dependencies and
+  `pkg-resolver` caters to that.
 
 
 ## Supported platforms
 ## Supported platforms
 
 
@@ -53,6 +55,21 @@ there's a lot of diversity that needs to be handled for managing package depende
       "package_2",
       "package_2",
       "package_3"
       "package_3"
     ]
     ]
+  },
+  "dependency_3": {
+    "platform_1": {
+      "release_1": "package_1_1_1",
+      "release_2": [
+        "package_1_2_1",
+        "package_1_2_2"
+      ]
+    },
+    "platform_2": [
+      "package_2_1",
+      {
+        "release_1": "package_2_1_1"
+      }
+    ]
   }
   }
 }
 }
 ```
 ```
@@ -65,6 +82,29 @@ how to interpret the above JSON -
 2. For `dependency_2`, `package_1` and `package_2` needs to be installed for `platform_2`.
 2. For `dependency_2`, `package_1` and `package_2` needs to be installed for `platform_2`.
 3. For `dependency_2`, `package_1`, `package_3` and `package_3` needs to be installed for
 3. For `dependency_2`, `package_1`, `package_3` and `package_3` needs to be installed for
    `platform_1`.
    `platform_1`.
+4. For `dependency_3`, `package_1_1_1` gets installed only if `release_1` has been specified
+   for `platform_1`.
+5. For `dependency_3`, the packages `package_1_2_1` and `package_1_2_2` gets installed only
+   if `release_2` has been specified for `platform_1`.
+6. For `dependency_3`, for `platform_2`, `package_2_1` is always installed, but `package_2_1_1` gets
+   installed only if `release_1` has been specified.
+
+### Tool help
+
+```shell
+$ pkg-resolver/resolve.py -h
+usage: resolve.py [-h] [-r RELEASE] platform
+
+Platform package dependency resolver for building Apache Hadoop
+
+positional arguments:
+  platform              The name of the platform to resolve the dependencies for
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -r RELEASE, --release RELEASE
+                        The release label to filter the packages for the given platform
+```
 
 
 ## Standalone packages
 ## Standalone packages
 
 

+ 46 - 9
dev-support/docker/pkg-resolver/resolve.py

@@ -20,26 +20,55 @@
 Platform package dependency resolver for building Apache Hadoop.
 Platform package dependency resolver for building Apache Hadoop.
 """
 """
 
 
+import argparse
 import json
 import json
 import sys
 import sys
 from check_platform import is_supported_platform
 from check_platform import is_supported_platform
 
 
 
 
-def get_packages(platform):
+def get_packages(platform, release=None):
     """
     """
     Resolve and get the list of packages to install for the given platform.
     Resolve and get the list of packages to install for the given platform.
 
 
     :param platform: The platform for which the packages needs to be resolved.
     :param platform: The platform for which the packages needs to be resolved.
+    :param release: An optional parameter that filters the packages of the given platform for the
+    specified release.
     :return: A list of resolved packages to install.
     :return: A list of resolved packages to install.
     """
     """
     with open('pkg-resolver/packages.json', encoding='utf-8', mode='r') as pkg_file:
     with open('pkg-resolver/packages.json', encoding='utf-8', mode='r') as pkg_file:
         pkgs = json.loads(pkg_file.read())
         pkgs = json.loads(pkg_file.read())
     packages = []
     packages = []
-    for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()):
-        if isinstance(platforms.get(platform), list):
-            packages.extend(platforms.get(platform))
+
+    def process_package(package, in_release=False):
+        """
+        Processes the given package object that belongs to a platform and adds it to the packages
+        list variable in the parent scope.
+        In essence, this method recursively traverses the JSON structure defined in packages.json
+        and performs the core filtering.
+
+        :param package: The package object to process.
+        :param in_release: A boolean that indicates whether the current travels belongs to a package
+        that needs to be filtered for the given release label.
+        """
+        if isinstance(package, list):
+            for entry in package:
+                process_package(entry, in_release)
+        elif isinstance(package, dict):
+            if release is None:
+                return
+            for entry in package.get(release, []):
+                process_package(entry, in_release=True)
+        elif isinstance(package, str):
+            # Filter out the package that doesn't belong to this release,
+            # if a release label has been specified.
+            if release is not None and not in_release:
+                return
+            packages.append(package)
         else:
         else:
-            packages.append(platforms.get(platform))
+            raise Exception('Unknown package of type: {}'.format(type(package)))
+
+    for platforms in filter(lambda x: x.get(platform) is not None, pkgs.values()):
+        process_package(platforms.get(platform))
     return packages
     return packages
 
 
 
 
@@ -49,13 +78,21 @@ if __name__ == '__main__':
               file=sys.stderr)
               file=sys.stderr)
         sys.exit(1)
         sys.exit(1)
 
 
-    platform_arg = sys.argv[1]
-    if not is_supported_platform(platform_arg):
+    arg_parser = argparse.ArgumentParser(
+        description='Platform package dependency resolver for building Apache Hadoop')
+    arg_parser.add_argument('-r', '--release', nargs=1, type=str,
+                            help='The release label to filter the packages for the given platform')
+    arg_parser.add_argument('platform', nargs=1, type=str,
+                            help='The name of the platform to resolve the dependencies for')
+    args = arg_parser.parse_args()
+
+    if not is_supported_platform(args.platform[0]):
         print(
         print(
             'ERROR: The given platform {} is not supported. '
             'ERROR: The given platform {} is not supported. '
             'Please refer to platforms.json for a list of supported platforms'.format(
             'Please refer to platforms.json for a list of supported platforms'.format(
-                platform_arg), file=sys.stderr)
+                args.platform), file=sys.stderr)
         sys.exit(1)
         sys.exit(1)
 
 
-    packages_to_install = get_packages(platform_arg)
+    packages_to_install = get_packages(args.platform[0],
+                                       args.release[0] if args.release is not None else None)
     print(' '.join(packages_to_install))
     print(' '.join(packages_to_install))