Browse Source

Merge pull request #2400 from swapanshridhar/branch-feature-AMBARI-14714-stable

swapanshridhar 7 years ago
parent
commit
a9d01a3303
100 changed files with 2689 additions and 1799 deletions
  1. 3 0
      .gitignore
  2. 0 19
      .reviewboardrc
  3. 70 0
      KEYS
  4. 14 0
      MANIFEST.in
  5. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/app.js
  6. 6 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/ambariViews/ViewsEditCtrl.js
  7. 6 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/ambariViews/ViewsListCtrl.js
  8. 2 2
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/clusters/ClusterInformationCtrl.js
  9. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/remoteClusters/RemoteClustersEditCtrl.js
  10. 22 8
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/stackVersions/StackVersionsCreateCtrl.js
  11. 18 4
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/stackVersions/StackVersionsEditCtrl.js
  12. 11 8
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/GroupCreateCtrl.js
  13. 0 46
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/GroupEditCtrl.js
  14. 4 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/GroupsListCtrl.js
  15. 0 45
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/UserEditCtrl.js
  16. 4 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/UsersListCtrl.js
  17. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/directives/comboSearch.js
  18. 53 29
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/directives/editableList.js
  19. 10 8
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/i18n.config.js
  20. 4 2
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/services/ConfirmationModal.js
  21. 4 2
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/services/Filters.js
  22. 1 0
      ambari-admin/src/main/resources/ui/admin-web/app/scripts/services/Stack.js
  23. 21 0
      ambari-admin/src/main/resources/ui/admin-web/app/styles/bootstrap-overrides.css
  24. 9 40
      ambari-admin/src/main/resources/ui/admin-web/app/styles/main.css
  25. 74 0
      ambari-admin/src/main/resources/ui/admin-web/app/styles/stack-versions.css
  26. 9 3
      ambari-admin/src/main/resources/ui/admin-web/app/styles/user-management.css
  27. 4 0
      ambari-admin/src/main/resources/ui/admin-web/app/styles/views.css
  28. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/views/ambariViews/edit.html
  29. 0 8
      ambari-admin/src/main/resources/ui/admin-web/app/views/ambariViews/modals/create.html
  30. 1 2
      ambari-admin/src/main/resources/ui/admin-web/app/views/clusters/clusterInformation.html
  31. 10 1
      ambari-admin/src/main/resources/ui/admin-web/app/views/directives/editableList.html
  32. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/views/modals/ConfirmationModal.html
  33. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/views/sideNav.html
  34. 1 1
      ambari-admin/src/main/resources/ui/admin-web/app/views/stackVersions/list.html
  35. 24 8
      ambari-admin/src/main/resources/ui/admin-web/app/views/stackVersions/stackVersionPage.html
  36. 5 18
      ambari-admin/src/main/resources/ui/admin-web/app/views/userManagement/groupEdit.html
  37. 10 2
      ambari-admin/src/main/resources/ui/admin-web/app/views/userManagement/modals/userCreate.html
  38. 3 9
      ambari-admin/src/main/resources/ui/admin-web/app/views/userManagement/userEdit.html
  39. 3 1
      ambari-agent/conf/unix/ambari-agent.ini
  40. 168 103
      ambari-agent/conf/unix/install-helper.sh
  41. 1 1
      ambari-agent/conf/windows/ambari-agent.ini
  42. 44 1
      ambari-agent/pom.xml
  43. 10 6
      ambari-agent/src/main/package/deb/control/postrm
  44. 25 36
      ambari-agent/src/main/package/deb/control/preinst
  45. 1 0
      ambari-agent/src/main/package/deb/control/prerm
  46. 24 0
      ambari-agent/src/main/package/rpm/postremove.sh
  47. 2 2
      ambari-agent/src/main/package/rpm/posttrans_agent.sh
  48. 27 28
      ambari-agent/src/main/package/rpm/preinstall.sh
  49. 0 1
      ambari-agent/src/main/package/rpm/preremove.sh
  50. 155 196
      ambari-agent/src/main/python/ambari_agent/ActionQueue.py
  51. 3 1
      ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
  52. 18 4
      ambari-agent/src/main/python/ambari_agent/AlertStatusReporter.py
  53. 2 1
      ambari-agent/src/main/python/ambari_agent/AmbariAgent.py
  54. 68 9
      ambari-agent/src/main/python/ambari_agent/AmbariConfig.py
  55. 4 7
      ambari-agent/src/main/python/ambari_agent/BackgroundCommandExecutionHandle.py
  56. 18 0
      ambari-agent/src/main/python/ambari_agent/ClusterMetadataCache.py
  57. 27 4
      ambari-agent/src/main/python/ambari_agent/ClusterTopologyCache.py
  58. 168 0
      ambari-agent/src/main/python/ambari_agent/CommandHooksOrchestrator.py
  59. 82 33
      ambari-agent/src/main/python/ambari_agent/CommandStatusDict.py
  60. 104 42
      ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
  61. 112 0
      ambari-agent/src/main/python/ambari_agent/ComponentVersionReporter.py
  62. 9 3
      ambari-agent/src/main/python/ambari_agent/ConfigurationBuilder.py
  63. 6 2
      ambari-agent/src/main/python/ambari_agent/Constants.py
  64. 254 168
      ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
  65. 4 3
      ambari-agent/src/main/python/ambari_agent/ExitHelper.py
  66. 36 44
      ambari-agent/src/main/python/ambari_agent/FileCache.py
  67. 14 0
      ambari-agent/src/main/python/ambari_agent/Grep.py
  68. 5 5
      ambari-agent/src/main/python/ambari_agent/Hardware.py
  69. 36 21
      ambari-agent/src/main/python/ambari_agent/HeartbeatThread.py
  70. 7 2
      ambari-agent/src/main/python/ambari_agent/HostCleanup.py
  71. 3 1
      ambari-agent/src/main/python/ambari_agent/HostInfo.py
  72. 10 6
      ambari-agent/src/main/python/ambari_agent/HostStatusReporter.py
  73. 44 6
      ambari-agent/src/main/python/ambari_agent/InitializerModule.py
  74. 5 8
      ambari-agent/src/main/python/ambari_agent/LiveStatus.py
  75. 3 3
      ambari-agent/src/main/python/ambari_agent/NetUtil.py
  76. 4 1
      ambari-agent/src/main/python/ambari_agent/PingPortListener.py
  77. 92 128
      ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
  78. 6 6
      ambari-agent/src/main/python/ambari_agent/PythonReflectiveExecutor.py
  79. 182 270
      ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
  80. 6 2
      ambari-agent/src/main/python/ambari_agent/Utils.py
  81. 31 20
      ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
  82. 66 0
      ambari-agent/src/main/python/ambari_agent/listeners/AgentActionsListener.py
  83. 4 3
      ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py
  84. 3 2
      ambari-agent/src/main/python/ambari_agent/listeners/CommandsEventListener.py
  85. 11 4
      ambari-agent/src/main/python/ambari_agent/listeners/ConfigurationEventListener.py
  86. 6 5
      ambari-agent/src/main/python/ambari_agent/listeners/HostLevelParamsEventListener.py
  87. 19 3
      ambari-agent/src/main/python/ambari_agent/listeners/MetadataEventListener.py
  88. 21 5
      ambari-agent/src/main/python/ambari_agent/listeners/ServerResponsesListener.py
  89. 3 2
      ambari-agent/src/main/python/ambari_agent/listeners/TopologyEventListener.py
  90. 40 2
      ambari-agent/src/main/python/ambari_agent/listeners/__init__.py
  91. 11 29
      ambari-agent/src/main/python/ambari_agent/main.py
  92. 17 0
      ambari-agent/src/main/python/ambari_agent/models/__init__.py
  93. 47 0
      ambari-agent/src/main/python/ambari_agent/models/commands.py
  94. 22 0
      ambari-agent/src/main/python/ambari_agent/models/hooks.py
  95. 20 3
      ambari-agent/src/main/python/ambari_agent/security.py
  96. 47 211
      ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
  97. 2 2
      ambari-agent/src/test/python/ambari_agent/TestAlerts.py
  98. 29 0
      ambari-agent/src/test/python/ambari_agent/TestAmbariConfig.py
  99. 0 78
      ambari-agent/src/test/python/ambari_agent/TestCheckWebUI.py
  100. 89 0
      ambari-agent/src/test/python/ambari_agent/TestCommandHooksOrchestrator.py

+ 3 - 0
.gitignore

@@ -30,3 +30,6 @@ createDDL.jdbc
 /contrib/views/storm/src/main/resources/ui/node_modules/
 /contrib/views/storm/src/main/resources/ui/public/
 /contrib/views/storm/src/main/resources/ui/npm-debug.log
+/dist
+/build
+ambari_python.egg-info

+ 0 - 19
.reviewboardrc

@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific
-
-REVIEWBOARD_URL = 'https://reviews.apache.org'
-REPOSITORY = 'ambari'
-
-# Default value for the request's Reviewers > Groups field
-TARGET_GROUPS = 'Ambari'

+ 70 - 0
KEYS

@@ -592,3 +592,73 @@ y98PGhV5I/D/lnMEmt0jiGBolpFHvFn7wdTXgebU0yng5w8XNFlNMwrj3NuTl+oF
 a2PUifxYEOeykKn4
 =id3i
 -----END PGP PUBLIC KEY BLOCK-----
+pub   rsa2048 2018-07-09 [SC] [expires: 2020-07-08]
+      561D640083511C434148B45C6AB46D7B4E0D7845
+uid           [ultimate] Ishan Bhatt <ishanbha@apache.org>
+sub   rsa2048 2018-07-09 [E] [expires: 2020-07-08]
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBFtDtRoBCADHmvavRKXQt5eCS3qIojd00QEta5v0ZFze5iPK71FHxkBrAa29
+r9bZWh0dp80FdlOmpzOeyMblMGEi1LHI3mzC8/JoV5bvgdRtG7aZ6aWf3lkwdeCw
+XOuEe/HggSGDknlyZgPwRTu87nzzJLzfk5ks5tYbqv841/FtHOhMELXyqtKM5VFV
+mys53hIqtn+TeO5eJGIbpUw1mccafJRJgWv9Wf4tShKf4kGLAtU/r9/LxLoeQDYM
+mfVg4WCHHqJfNqXVvaQP+JnA/nBFsOA/0FVOJDr8Wk6knxryjs6Ag3oZQSbsSp7/
+pCTJFb2yHMxmjkMdn7UsLZykVPD+IsjKj7ypABEBAAG0IUlzaGFuIEJoYXR0IDxp
+c2hhbmJoYUBhcGFjaGUub3JnPokBVAQTAQgAPhYhBFYdZACDURxDQUi0XGq0bXtO
+DXhFBQJbQ7UaAhsDBQkDwmcABQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAAAoJEGq0
+bXtODXhFQsYH/ROTnlj2eJCFbva33ehJBxPdakvdQaPK9MyqpAtgzD0h8tKjWiGB
+acB3CuRMqoeaunyUG7bNHIhPooAUh2UopyRxZjG12LeXw3YQCYs/U9Xy2ASYKzcE
+stGWy3hy61lZImyR/E5lnWxZ9oqQxxRQbVbvFhxom/GJO9y+cPaSApm3YiT9+SYT
+OCLU827SW+g2LOuAPKwt2FioKvin6MN1ayYcFx/1A/UTUjk7iZAl6hE7WQKAOK5o
+ACfe2AhQLX46alLwUZhNdkKkWEX8hbgMLAnnQWvaLKK739rOduLiRqOktY/xtRj6
+TLA3xW3u1fmVW70i+Kwujrn9tqiXHnDNs8S5AQ0EW0O1GgEIANH/P5rqXLIdoi+L
+ks3uawYQ7YhxkRx3U9eWvL+4zExOhcifkB5iTQSo35t0CMWvdug0T24HgJpQSYDQ
+z8wXJe3A64zsGKIXCsN5TuA4oBz8WwIYegHmj0V0HKUc/wbohtXcus/MGCZz08AU
+svaT26Z6u1/wtWlNRdi5uiy96Fj+AEwYICD3SoNcoSGy6EU+Ip/QiXUPcS/Odh59
+izzLyslKwxGKZaY7yp2mU4OySu2AooGzc6LlOora0JKHHHjHH4y57JI80KM5AG3U
+QBy2Hnpj84lNL3oxPzeYG03isMScKU/n0gFsRILUkWNMiPX6IvaF/q0tf1K9mi9f
+whtGH/EAEQEAAYkBPAQYAQgAJhYhBFYdZACDURxDQUi0XGq0bXtODXhFBQJbQ7Ua
+AhsMBQkDwmcAAAoJEGq0bXtODXhFyA0IALMFr9HxUryX9lT88MXAI/NwPnqjjekk
+vFg6JrPDJQLOW9SYiOJlM2Zkgu7aMIeQOHWKTw5QJnLQCnFHH8WXsTOiDgr9Ta4A
+pQ7IJ3gvmfBS3kX7VyQD248RvF1H/UxDypwXQnWLpciR22+aHzzr7H44zr9Vee7e
+ZERa8bo5TVmVYiUSCYXP78Afc7uecNe+66U+tpYiqr6nzGqk1pvS2TBfixnnh9Nl
+FWbAngFSPgG+I7FfGXDMK3MvU8zOK5X456WEfpWVNJPcQuLWp1qw6fJwnRU17BYe
+L6iR19lcaL2CweiBdUrORuWJsnb3WOLPD6RE8WIMWSOUniPgD2XvzRA=
+=4Crm
+-----END PGP PUBLIC KEY BLOCK-----
+pub   rsa2048 2018-08-21 [SC] [expires: 2020-08-20]
+      1364238D9A46F50AA189C976BD8BECB63DEC72A9
+uid           [ultimate] Oliver Szabo <oleewere@apache.org>
+sub   rsa2048 2018-08-21 [E] [expires: 2020-08-20]
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBFt8ILcBCACiv3jQaXWw7KuFuOioBPGxOia/25S+NqhyGGBjZMBac0UY135t
+tdxzZ7x5kD6AsceOlAqqMGSy9TLAM5AWyir3PWRgKBrEUhnN+2zjUudwer1Vdm7N
+3nkrbAdrfjVkNPLLVud4EaZXNbxPSF4XZByueNdZLaEs2xRWTxh7aBvCDjOnOmQe
+k1sPx6rCn/EkmIvq33A/2h86YFBy15yfNfb0EdBdof+HlbPtT6W5Ttx38d4a7uEk
+kFtZSWDMPs9JJx9UwIjG9l4syEtE1uNYFCNg7YLrPbPMQBJahtfgDuBczG+xx3bC
+i3N5UHGNiYuDKcgFoBoYUIag1zRPo1vk5uftABEBAAG0Ik9saXZlciBTemFibyA8
+b2xlZXdlcmVAYXBhY2hlLm9yZz6JAVQEEwEIAD4WIQQTZCONmkb1CqGJyXa9i+y2
+PexyqQUCW3wgtwIbAwUJA8JnAAULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRC9
+i+y2PexyqcPtB/9SNRLL8+edm36hBRAt3/9ZpIaT/vO6Q1vvYht5sZmN0B8fkDi0
+S6WSg4WdeHaVynuZAcvntPkfx8BWJGYbWhj+aQnA6l1vSEtbq2Kj4f9RK+wPFLGX
+D4+7G3PkWycbGWUd/kQU5hBQMmiQo5kIidfMc/zLsGN/CEB+8Yikw66p7Px5GtvU
+RyisPrIenRJW4AcZr6cUuHmN5qwFS8NKYzSRf8b+KuZsy/Py8OJrwH8I5bK1eVDw
+N5l3sN01wrw817qTwIGSX1iLGcs3VIYHPkcXN1l2V8X59e16wCrOHG8Kv6C81if8
+eu8aZby4J0EEqhuu9wHPKSn0wP+NGaMkzYv4uQENBFt8ILcBCAC4dErCRiKtFPAM
+H5eInuJWH8wZv8Td3me6gDdi7DNatjp8tmI2qaHiQ9MUDOyNIhPkS1c6Yu2CYVx/
+5oZAoXm4ovNoPYjM6/B9c4ECm2dFu+Mm7jW5hw0MtEHk4olNN3TePyU+T7HVsIe+
+vROu2eC+mNXIwXddSWeE1IEKlfM7OXL4AEUtY+kUYTnX2yaQNrZtgJ0cS6rskBT4
+2AOdn0L0aW9isZkbmbYSlU/9HtDDcZ60CeNsOvNpUm4pSvliVJPKftX/1139XlI6
+nSMShh7un4ORLGID0HTsesh3mYKoKGMKbOOoMCkUGwrgYxn1H7/G29d6vFSPdMIm
+YrfAGE3pABEBAAGJATwEGAEIACYWIQQTZCONmkb1CqGJyXa9i+y2PexyqQUCW3wg
+twIbDAUJA8JnAAAKCRC9i+y2PexyqSrZB/9CSxZ7kQhwq0Ryt4pposz5lCLuevGw
+ZlbbxOM5AvOT64OpYUZDwnQYdxJWEEX+MR94PYpiBUCiCHd/Fz1XmuIoAZ509tdt
+zNhNfq44uF6Cabu2/grPMS68AsJZmVKmoEeT5nI6cgrSns8rc+TpVf7LM+lV+PHW
+u4VmDh/cnPSggSAHu8YNDckYVDwykDzLOC9VdPoRaEE2V16OaqTu08E6YfTAszpl
+79yiRnAwZrLsxK58TXrNc8pDI00CNpPkXYpG0zczvB83bFAm/X62KqLlw6Eij2uo
+J5nBsZ9Xh9tVWA7NDwfhIO9ex0FF418Pr52JiUyvUnw/8kaKi84p8yyG
+=CsuN
+-----END PGP PUBLIC KEY BLOCK-----

+ 14 - 0
MANIFEST.in

@@ -0,0 +1,14 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+include ambari-common/src/main/python/ambari_commons/resources/*
+include ambari-common/src/main/python/ambari_commons/libs/**/*.so
+include ambari-common/src/main/python/pluggable_stack_definition/configs/*

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/app.js

@@ -28,7 +28,7 @@ angular.module('ambariAdminConsole', [
 .constant('Settings', {
   siteRoot: '{proxy_root}/'.replace(/\{.+\}/g, ''),
 	baseUrl: '{proxy_root}/api/v1'.replace(/\{.+\}/g, ''),
-  testMode: (window.location.port == 8000),
+  testMode: false,
   mockDataPrefix: 'assets/data/',
   isLDAPConfigurationSupported: false,
   isLoginActivitiesSupported: false,

+ 6 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/ambariViews/ViewsEditCtrl.js

@@ -441,7 +441,12 @@ angular.module('ambariAdminConsole')
         $t('common.deleteConfirmation', {
           instanceType: $t('views.viewInstance'),
           instanceName: instance.ViewInstanceInfo.label
-        })
+        }),
+        null,
+        null,
+        {
+          primaryClass: 'btn-danger'
+        }
       ).then(function() {
         View.deleteInstance(instance.ViewInstanceInfo.view_name, instance.ViewInstanceInfo.version, instance.ViewInstanceInfo.instance_name)
           .then(function() {

+ 6 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/ambariViews/ViewsListCtrl.js

@@ -107,7 +107,12 @@ function($scope, View, $modal, Alert, ConfirmationModal, $translate, Settings, P
       $t('common.deleteConfirmation', {
         instanceType: $t('views.viewInstance'),
         instanceName: instance.label
-      })
+      }),
+      null,
+      null,
+      {
+        primaryClass: 'btn-danger'
+      }
     ).then(function () {
       View.deleteInstance(instance.view_name, instance.version, instance.instance_name)
         .then(function () {

+ 2 - 2
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/clusters/ClusterInformationCtrl.js

@@ -55,13 +55,13 @@ function($scope, $http, $location, Cluster, $routeParams, $translate, $rootScope
 
   $scope.downloadBlueprint = function () {
     if (window.navigator.msSaveOrOpenBlob) {
-      var blob = new Blob([decodeURIComponent(encodeURI($scope.blueprint))], {
+      var blob = new Blob([decodeURIComponent(encodeURIComponent($scope.blueprint))], {
         type: "text/csv;charset=utf-8;"
       });
       navigator.msSaveBlob(blob, 'blueprint.json');
     } else {
       var a = document.createElement('a');
-      a.href = 'data:attachment/csv;charset=utf-8,' + encodeURI($scope.blueprint);
+      a.href = 'data:attachment/json;charset=utf-8,' + encodeURIComponent($scope.blueprint);
       a.target = '_blank';
       a.download = 'blueprint.json';
       document.body.appendChild(a);

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/remoteClusters/RemoteClustersEditCtrl.js

@@ -80,7 +80,7 @@ angular.module('ambariAdminConsole')
 
             RemoteCluster.edit(payload, config).then(function(data) {
                 Alert.success($t('views.alerts.credentialsUpdated'));
-                $scope.form.passwordChangeForm.$setPristine();
+                $scope.form.passwordChangeForm = {};
               })
               .catch(function(data) {
                 console.log(data);

+ 22 - 8
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/stackVersions/StackVersionsCreateCtrl.js

@@ -215,6 +215,7 @@ angular.module('ambariAdminConsole')
             stackOs.selected = false;
             stackOs.repositories.forEach(function(repo) {
               repo.Repositories.initial_base_url = repo.Repositories.default_base_url;
+              repo.Repositories.initial_repo_id = repo.Repositories.repo_id;
             });
             $scope.osList.push(stackOs);
           }
@@ -274,7 +275,7 @@ angular.module('ambariAdminConsole')
       $scope.osList.forEach(function(os) {
         if (os.repositories && os.selected) {
           os.repositories.forEach(function(repo) {
-            if (repo.invalidBaseUrl) {
+            if (repo.invalidBaseUrl && $scope.showRepo(repo)) {
               validBaseUrlsExist = false;
             }
           })
@@ -292,7 +293,7 @@ angular.module('ambariAdminConsole')
         enabled = true
       }
     });
-    return !(enabled && $scope.validBaseUrlsExist());
+    return !($scope.useRedhatSatellite || (enabled && $scope.validBaseUrlsExist()));
   };
 
   $scope.defaulfOSRepos = {};
@@ -346,7 +347,7 @@ angular.module('ambariAdminConsole')
                 $t('versions.register.error.body'),
                 null,
                 null,
-                true
+                {hideCancelButton: true}
               )
             });
           }
@@ -399,15 +400,28 @@ angular.module('ambariAdminConsole')
         }
       });
     }
+  };
+
+  $scope.useRedHatCheckbox = function() {
     if ($scope.useRedhatSatellite) {
       ConfirmationModal.show(
-          $t('common.important'),
-          {
-            "url": 'views/modals/BodyForUseRedhatSatellite.html'
-          }
+        $t('versions.useRedhatSatellite.title'),
+        {
+          "url": 'views/modals/BodyForUseRedhatSatellite.html'
+        }
       ).catch(function () {
         $scope.useRedhatSatellite = !$scope.useRedhatSatellite;
       });
+    } else {
+      if ($scope.osList) {
+        $scope.osList.forEach(function(os) {
+          if (os.repositories) {
+            os.repositories.forEach(function(repo) {
+              repo.isEditing = false;
+            })
+          }
+        });
+      }
     }
   };
 
@@ -419,7 +433,7 @@ angular.module('ambariAdminConsole')
       },
       $t('common.controls.ok'),
       $t('common.controls.cancel'),
-      true
+      {hideCancelButton: true}
     )
   };
 

+ 18 - 4
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/stackVersions/StackVersionsEditCtrl.js

@@ -66,6 +66,7 @@ angular.module('ambariAdminConsole')
         $scope.defaulfOSRepos[os.OperatingSystems.os_type] = {};
         os.repositories.forEach(function(repo) {
           $scope.defaulfOSRepos[os.OperatingSystems.os_type][repo.Repositories.repo_id] = repo.Repositories.base_url;
+          repo.Repositories.initial_repo_id = repo.Repositories.repo_id;
         });
       });
       $scope.repoVersionFullName = response.repoVersionFullName;
@@ -331,15 +332,28 @@ angular.module('ambariAdminConsole')
         }
       });
     }
+  };
+
+  $scope.useRedHatCheckbox = function() {
     if ($scope.useRedhatSatellite) {
       ConfirmationModal.show(
-          $t('common.important'),
-          {
-            "url": 'views/modals/BodyForUseRedhatSatellite.html'
-          }
+        $t('versions.useRedhatSatellite.title'),
+        {
+          "url": 'views/modals/BodyForUseRedhatSatellite.html'
+        }
       ).catch(function () {
         $scope.useRedhatSatellite = !$scope.useRedhatSatellite;
       });
+    } else {
+      if ($scope.osList) {
+        $scope.osList.forEach(function(os) {
+          if (os.repositories) {
+            os.repositories.forEach(function(repo) {
+              repo.isEditing = false;
+            })
+          }
+        });
+      }
     }
   };
 

+ 11 - 8
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/GroupCreateCtrl.js

@@ -19,8 +19,8 @@
 
 angular.module('ambariAdminConsole')
 .controller('GroupCreateCtrl',
-['$scope', '$rootScope', 'Group', '$location', 'Alert', 'UnsavedDialog', '$translate', '$modalInstance', 'Cluster', 'RoleDetailsModal',
-function($scope, $rootScope, Group, $location, Alert, UnsavedDialog, $translate, $modalInstance, Cluster, RoleDetailsModal) {
+['$scope', '$rootScope', 'Group', '$location', 'Alert', 'UnsavedDialog', '$translate', '$modalInstance', 'Cluster', 'RoleDetailsModal', '$q',
+function($scope, $rootScope, Group, $location, Alert, UnsavedDialog, $translate, $modalInstance, Cluster, RoleDetailsModal, $q) {
   var $t = $translate.instant;
 
   $scope.form = {};
@@ -66,7 +66,7 @@ function($scope, $rootScope, Group, $location, Alert, UnsavedDialog, $translate,
     }).map(function(item) {
       return item.trim();
     });
-    group.saveMembers().catch(function(data) {
+    return group.saveMembers().catch(function(data) {
       Alert.error($t('groups.alerts.cannotUpdateGroupMembers'), "<div class='break-word'>" + data.message + "</div>");
     });
   }
@@ -82,10 +82,13 @@ function($scope, $rootScope, Group, $location, Alert, UnsavedDialog, $translate,
     if ($scope.form.groupCreateForm.$valid) {
       var group = new Group($scope.formData.groupName);
       group.save().then(function () {
-        saveMembers(group, $scope.formData.members);
-        saveRole();
-        $modalInstance.dismiss('created');
-        Alert.success($t('groups.alerts.groupCreated', {groupName: $scope.formData.groupName}));
+        $q.all([
+          saveMembers(group, $scope.formData.members),
+          saveRole()
+        ]).then(function (value) {
+          $modalInstance.dismiss('created');
+          Alert.success($t('groups.alerts.groupCreated', {groupName: $scope.formData.groupName}));
+        });
       })
       .catch(function (data) {
         Alert.error($t('groups.alerts.groupCreationError'), data.data.message);
@@ -97,7 +100,7 @@ function($scope, $rootScope, Group, $location, Alert, UnsavedDialog, $translate,
     if (!$scope.formData.role || $scope.formData.role === 'NONE') {
       return;
     }
-    Cluster.createPrivileges(
+    return Cluster.createPrivileges(
       {
         clusterId: $rootScope.cluster.Clusters.cluster_name
       },

+ 0 - 46
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/GroupEditCtrl.js

@@ -71,52 +71,6 @@ function($scope, $rootScope, Group, $routeParams, Cluster, View, Alert, Confirma
     $scope.isMembersEditing = false;
   };
 
-  $scope.deleteGroup = function(group) {
-    ConfirmationModal.show(
-      $t('common.delete', {
-        term: $t('common.group')
-      }),
-      $t('common.deleteConfirmation', {
-        instanceType: $t('common.group').toLowerCase(),
-        instanceName: '"' + group.group_name + '"'
-      })
-    ).then(function() {
-      Cluster.getPrivilegesForResource({
-        nameFilter : group.group_name,
-        typeFilter : {value: 'GROUP'}
-      }).then(function(data) {
-        var clusterPrivilegesIds = [];
-        var viewsPrivileges = [];
-        if (data.items && data.items.length) {
-          angular.forEach(data.items[0].privileges, function(privilege) {
-            if (privilege.PrivilegeInfo.principal_type === 'GROUP') {
-              if (privilege.PrivilegeInfo.type === 'VIEW') {
-                viewsPrivileges.push({
-                  id: privilege.PrivilegeInfo.privilege_id,
-                  view_name: privilege.PrivilegeInfo.view_name,
-                  version: privilege.PrivilegeInfo.version,
-                  instance_name: privilege.PrivilegeInfo.instance_name
-                });
-              } else {
-                clusterPrivilegesIds.push(privilege.PrivilegeInfo.privilege_id);
-              }
-            }
-          });
-        }
-        group.destroy().then(function() {
-          $location.url('/userManagement?tab=groups');
-          if (clusterPrivilegesIds.length) {
-            Cluster.deleteMultiplePrivileges($rootScope.cluster.Clusters.cluster_name, clusterPrivilegesIds);
-          }
-          angular.forEach(viewsPrivileges, function(privilege) {
-            View.deletePrivilege(privilege);
-          });
-        });
-      });
-    });
-  };
-
-
   $scope.removeViewPrivilege = function(name, privilege) {
     var privilegeObject = {
         id: privilege.privilege_id,

+ 4 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/GroupsListCtrl.js

@@ -107,7 +107,10 @@ function($scope, Group, $modal, ConfirmationModal, $rootScope, $translate, Setti
       $t('common.deleteConfirmation', {
         instanceType: $t('common.group').toLowerCase(),
         instanceName: '"' + group.group_name + '"'
-      })
+      }),
+      null,
+      null,
+      {primaryClass: 'btn-danger'}
     ).then(function() {
       Cluster.getPrivilegesForResource({
         nameFilter : group.group_name,

+ 0 - 45
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/UserEditCtrl.js

@@ -218,51 +218,6 @@ function($scope, $rootScope, $routeParams, Cluster, User, View, $modal, $locatio
     });
   };
 
-  $scope.deleteUser = function () {
-    ConfirmationModal.show(
-      $t('common.delete', {
-        term: $t('common.user')
-      }),
-      $t('common.deleteConfirmation', {
-        instanceType: $t('common.user').toLowerCase(),
-        instanceName: '"' + $scope.user.user_name + '"'
-      })
-    ).then(function () {
-      Cluster.getPrivilegesForResource({
-        nameFilter: $scope.user.user_name,
-        typeFilter: {value: 'USER'}
-      }).then(function (data) {
-        var clusterPrivilegesIds = [];
-        var viewsPrivileges = [];
-        if (data.items && data.items.length) {
-          angular.forEach(data.items[0].privileges, function (privilege) {
-            if (privilege.PrivilegeInfo.principal_type === 'USER') {
-              if (privilege.PrivilegeInfo.type === 'VIEW') {
-                viewsPrivileges.push({
-                  id: privilege.PrivilegeInfo.privilege_id,
-                  view_name: privilege.PrivilegeInfo.view_name,
-                  version: privilege.PrivilegeInfo.version,
-                  instance_name: privilege.PrivilegeInfo.instance_name
-                });
-              } else {
-                clusterPrivilegesIds.push(privilege.PrivilegeInfo.privilege_id);
-              }
-            }
-          });
-        }
-        User.delete($scope.user.user_name).then(function () {
-          $location.url('/userManagement?tab=users');
-          if (clusterPrivilegesIds.length) {
-            Cluster.deleteMultiplePrivileges($rootScope.cluster.Clusters.cluster_name, clusterPrivilegesIds);
-          }
-          angular.forEach(viewsPrivileges, function (privilege) {
-            View.deletePrivilege(privilege);
-          });
-        });
-      });
-    });
-  };
-
   function deleteUserRoles(clusterName, user, ignoreAlert) {
     return Cluster.deleteMultiplePrivileges(
       clusterName,

+ 4 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/controllers/userManagement/UsersListCtrl.js

@@ -148,7 +148,10 @@ function($scope, User, $modal, $rootScope, UserConstants, $translate, Cluster, V
       $t('common.deleteConfirmation', {
         instanceType: $t('common.user').toLowerCase(),
         instanceName: '"' + user.user_name + '"'
-      })
+      }),
+      null,
+      null,
+      {primaryClass: 'btn-danger'}
     ).then(function () {
       Cluster.getPrivilegesForResource({
         nameFilter: user.user_name,

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/scripts/directives/comboSearch.js

@@ -82,7 +82,7 @@ angular.module('ambariAdminConsole')
         $scope.appliedFilters = $scope.appliedFilters.filter(function(item) {
           return filter.id !== item.id;
         });
-        $scope.observeSearchFilterInput(event);
+        $scope.observeSearchFilterInput();
         mainInputElement.focus();
         $scope.updateFilters($scope.appliedFilters);
       };

+ 53 - 29
ambari-admin/src/main/resources/ui/admin-web/app/scripts/directives/editableList.js

@@ -98,8 +98,9 @@ angular.module('ambariAdminConsole')
             break;
           case 13: // Enter
             $scope.$apply(function() {
-              $scope.addItem();
-              $scope.focusOnInput();
+              if ($scope.addItem()) {
+                $scope.focusOnInput();
+              }
             });
             return false;
             break;
@@ -129,6 +130,10 @@ angular.module('ambariAdminConsole')
       $scope.input = '';
       $scope.typeahead = [];
       $scope.selectedTypeahed = 0;
+      $scope.resources = [];
+      $scope.invalidInput = false;
+
+      preloadResources();
 
       // Watch source of items
       $scope.$watch(function() {
@@ -141,37 +146,33 @@ angular.module('ambariAdminConsole')
       $scope.$watch(function() {
         return $scope.input;
       }, function(newValue) {
+        $scope.invalidInput = false;
         if(newValue){
           var newValue = newValue.split(',').filter(function(i){ 
             i = i.replace('&nbsp;', ''); // Sanitize from spaces
             return !!i.trim();
           }).map(function(i) { return i.trim(); });
           if( newValue.length > 1){
+            var validInput = true;
             // If someone paste coma separated string, then just add all items to list
             angular.forEach(newValue, function(item) {
-              $scope.addItem(item);
+              if (validInput) {
+                validInput = $scope.addItem(item);
+              }
             });
-            $scope.clearInput();
-            $scope.focusOnInput();
-            
+            if (validInput) {
+              $scope.clearInput();
+              $scope.focusOnInput();
+            }
           } else {
-            // Load typeahed items based on current input
-            $resource.listByName(encodeURIComponent(newValue)).then(function(data) {
-              var items = [];
-              angular.forEach(data.data.items, function(item) {
-                var name;
-                if($scope.resourceType === 'User'){
-                  name = item.Users.user_name;
-                } else if($scope.resourceType === 'Group'){
-                  name = item.Groups.group_name;
-                }
-                if($scope.items.indexOf(name) < 0){ // Only if item not in list
-                  items.push(name);
-                }
-              });
-              $scope.typeahead = items.slice(0, 5);
-              $scope.selectedTypeahed = 0;
+            var items = [];
+            angular.forEach($scope.resources, function (name) {
+              if (name.indexOf(newValue) !== -1 && $scope.items.indexOf(name) === -1) {
+                items.push(name);
+              }
             });
+            $scope.typeahead = items.slice(0, 5);
+            $scope.selectedTypeahed = 0;
           }
         } else {
           $scope.typeahead = [];
@@ -180,6 +181,20 @@ angular.module('ambariAdminConsole')
         }
       });
 
+      function preloadResources() {
+        $resource.listByName('').then(function(data) {
+          if (data && data.data.items) {
+            $scope.resources = data.data.items.map(function(item) {
+              if ($scope.resourceType === 'User') {
+                return item.Users.user_name;
+              } else if ($scope.resourceType === 'Group') {
+                return item.Groups.group_name;
+              }
+            });
+          }
+        });
+      }
+
       $scope.enableEditMode = function(event) {
         if( $scope.editable && !$scope.editMode){
           //only one editable-list could be in edit mode at once
@@ -197,12 +212,15 @@ angular.module('ambariAdminConsole')
         event.stopPropagation();
       };
       $scope.save = function(event) {
+        var validInput = true;
         if( $scope.input ){
-          $scope.addItem($scope.input);
+          validInput = $scope.addItem($scope.input);
+        }
+        if (validInput) {
+          $scope.itemsSource = $scope.items;
+          $scope.editMode = false;
+          $scope.input = '';
         }
-        $scope.itemsSource = $scope.items;
-        $scope.editMode = false;
-        $scope.input = '';
         if(event){
           event.stopPropagation();
         }
@@ -224,10 +242,16 @@ angular.module('ambariAdminConsole')
       $scope.addItem = function(item) {
         item = item ? item : $scope.typeahead.length ? $scope.typeahead[$scope.selectedTypeahed] : $scope.input;
         
-        if(item && $scope.items.indexOf(item) < 0){
-          $scope.items.push(item);
-          $scope.input = '';
+        if (item && $scope.items.indexOf(item) === -1){
+          if ($scope.resources.indexOf(item) !== -1) {
+            $scope.items.push(item);
+            $scope.input = '';
+          } else {
+            $scope.invalidInput = true;
+            return false;
+          }
         }
+        return true;
       };
 
       $scope.removeFromItems = function(item) {

+ 10 - 8
ambari-admin/src/main/resources/ui/admin-web/app/scripts/i18n.config.js

@@ -274,11 +274,11 @@ angular.module('ambariAdminConsole')
     'clusters.assignRoles': 'Assign roles to these {{term}}',
 
     'clusters.alerts.cannotLoadClusterData': 'Cannot load cluster data',
-    'clusters.devBlueprint': 'Dev Blueprint',
+    'clusters.devBlueprint': 'Cluster Blueprint',
 
     'groups.createLocal': 'Add Groups',
     'groups.name': 'Group name',
-    'groups.role': 'Add roles to this group',
+    'groups.role': 'Group Access',
     'groups.addUsers': 'Add users to this group',
     'groups.members': 'Members',
     'groups.membersPlural': '{{n}} member{{n == 1 ? "" : "s"}}',
@@ -304,16 +304,18 @@ angular.module('ambariAdminConsole')
     'users.inactive': 'Inactive',
     'users.status': 'Status',
     'users.password': 'Password',
-    'users.role': 'Add roles for this user',
+    'users.role': 'User Access',
     'users.confirmPassword': 'Confirm Password',
     'users.passwordConfirmation': 'Password сonfirmation',
     'users.isAmbariAdmin': 'Is this user an Ambari Admin?',
-    'users.isActive': 'Deactivate this user?',
+    'users.isActive': 'User Status',
     'users.userIsAdmin': 'This user is an Ambari Admin and has all privileges.',
     'users.showAll': 'Show all users',
     'users.showAdmin': 'Show only admin users',
     'users.groupMembership': 'Group Membership',
     'users.userNameTip': 'Maximum length is 80 characters. \\, &, |, <, >, ` are not allowed.',
+    'users.adminTip': 'An Ambari Admin can create new clusters and other Ambari Admin Users.',
+    'users.deactivateTip': 'Active Users can log in to Ambari. Inactive Users cannot.',
 
     'users.changeStatusConfirmation.title': 'Change Status',
     'users.changeStatusConfirmation.message': 'Are you sure you want to change status for user "{{userName}}" to {{status}}?',
@@ -403,9 +405,9 @@ angular.module('ambariAdminConsole')
     'versions.repository.add': 'Add Repository',
 
     'versions.useRedhatSatellite.title': 'Use RedHat Satellite/Spacewalk',
-    'versions.useRedhatSatellite.warning': 'By selecting to <b>"Use RedHat Satellite/Spacewalk"</b> for the software repositories, ' +
-      'you are responsible for configuring the repository channel in Satellite/Spacewalk and confirming the repositories for the selected <b>stack version</b> are available on the hosts in the cluster. ' +
-      'Refer to the Ambari documentation for more information.',
+    'versions.useRedhatSatellite.warning': "In order for Ambari to install packages from the right repositories, " +
+    "it is recommended that you edit the names of the repo's for each operating system so they match the channel " +
+    "names in your RedHat Satellite/Spacewalk instance.",
     'versions.useRedhatSatellite.disabledMsg': 'Use of RedHat Satellite/Spacewalk is not available when using Public Repositories',
 
     'versions.changeBaseURLConfirmation.title': 'Confirm Base URL Change',
@@ -413,7 +415,7 @@ angular.module('ambariAdminConsole')
 
     'versions.alerts.baseURLs': 'Provide Base URLs for the Operating Systems you are configuring.',
     'versions.alerts.validationFailed': 'Some of the repositories failed validation. Make changes to the base url or skip validation if you are sure that urls are correct',
-    'versions.alerts.skipValidationWarning': '<b>Warning:</b> This is for advanced users only. Use this option if you want to skip validation for Repository Base URLs.',
+    'versions.alerts.skipValidationWarning': 'Warning: This is for advanced users only. Use this option if you want to skip validation for Repository Base URLs.',
     'versions.alerts.useRedhatSatelliteWarning': 'Disable distributed repositories and use RedHat Satellite/Spacewalk channels instead',
     'versions.alerts.filterListError': 'Fetch stack version filter list error',
     'versions.alerts.versionCreated': 'Created version <a href="#/stackVersions/{{stackName}}/{{versionName}}/edit">{{stackName}}-{{versionName}}</a>',

+ 4 - 2
ambari-admin/src/main/resources/ui/admin-web/app/scripts/services/ConfirmationModal.js

@@ -23,8 +23,9 @@ angular.module('ambariAdminConsole')
   var $t = $translate.instant;
 
 	return {
-		show: function(header, body, confirmText, cancelText, hideCancelButton) {
+		show: function(header, body, confirmText, cancelText, options) {
 			var deferred = $q.defer();
+      options = options || {};
 
 			var modalInstance = $modal.open({
 				templateUrl: 'views/modals/ConfirmationModal.html',
@@ -35,7 +36,8 @@ angular.module('ambariAdminConsole')
           $scope.innerScope = body.scope;
           $scope.confirmText = confirmText || $t('common.controls.ok');
           $scope.cancelText = cancelText || $t('common.controls.cancel');
-					$scope.showCancelButton = !hideCancelButton;
+          $scope.primaryClass = options.primaryClass || 'btn-primary',
+					$scope.showCancelButton = !options.hideCancelButton;
 
 					$scope.ok = function() {
 						$modalInstance.close();

+ 4 - 2
ambari-admin/src/main/resources/ui/admin-web/app/scripts/services/Filters.js

@@ -62,12 +62,14 @@ angular.module('ambariAdminConsole')
         if (customValueFilter) {
           return filter.values.every(function(value) {
             var itemValue = customValueFilter.customValueConverter(item);
-            return String(Array.isArray(itemValue) ? itemValue.join() : itemValue).indexOf(value) === -1;
+            var preparedValue = Array.isArray(itemValue) ? itemValue.join().toLowerCase() : itemValue.toLowerCase();
+            return String(preparedValue).indexOf(value.toLowerCase()) === -1;
           });
         }
         return filter.values.every(function(value) {
           var itemValue = item[filter.key];
-          return String(Array.isArray(itemValue) ? itemValue.join() : itemValue).indexOf(value) === -1;
+          var preparedValue = Array.isArray(itemValue) ? itemValue.join().toLowerCase() : itemValue.toLowerCase();
+          return String(preparedValue).indexOf(value.toLowerCase()) === -1;
 
         });
       }));

+ 1 - 0
ambari-admin/src/main/resources/ui/admin-web/app/scripts/services/Stack.js

@@ -148,6 +148,7 @@ angular.module('ambariAdminConsole')
             versionObj.osList.forEach(function (os) {
               os.repositories.forEach(function(repo) {
                 repo.Repositories.initial_base_url = repo.Repositories.base_url;
+                repo.Repositories.initial_repo_id = repo.Repositories.repo_id;
               });
             });
             versions.push(versionObj);

+ 21 - 0
ambari-admin/src/main/resources/ui/admin-web/app/styles/bootstrap-overrides.css

@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+.modal-body .form-group label {
+  font-weight: normal;
+}

+ 9 - 40
ambari-admin/src/main/resources/ui/admin-web/app/styles/main.css

@@ -139,6 +139,12 @@
   position: relative;
   cursor: pointer;
 }
+
+.has-error.add-item-input {
+  color: #666;
+  border: 1px solid #EF6162 !important;
+}
+
 .add-item-input span:focus{
   cursor: default;
 }
@@ -723,14 +729,6 @@ accordion .panel-group .panel{
   word-break: break-all;
 }
 
-#stack-versions .table .col-small {
-  width: 15%
-}
-
-#stack-versions .table .col-medium {
-  width: 30%
-}
-
 .table-bar {
   padding-top: 4px;
   border: 1px solid #E4E4E4;
@@ -806,21 +804,6 @@ accordion .panel-group .panel{
   cursor: default;
 }
 
-.repo-table-title #name-label-adjust {
-  width: 20.7%;
-  padding-left:0px;
-  right:5px;
-}
-
-.repo-table-title #repo-base-url-label {
-  padding-left:0px;
-  right:3px;
-}
-
-.verison-label-row .label {
-  font-size: 100%;
-}
-
 .panel-body .sub-group {
     margin-left: 10px;
 }
@@ -1148,21 +1131,6 @@ thead.view-permission-header > tr > th {
   margin: 0px 5px;
 }
 
-#stack-versions .no-version-alert {
-  text-align: center;
-}
-#stack-versions table {
-  table-layout: fixed;
-}
-#stack-versions table .text-search-container {
-  font-weight: normal;
-  position: relative;
-}
-#stack-versions table .text-search-container .close {
-  position: absolute;
-  right: 15px;
-  top: 40px;
-}
 .pull-up {
   margin-top: -2px;
 }
@@ -1330,12 +1298,13 @@ body {
   color: inherit;
   font-size: 16px;
   cursor: pointer;
-  padding: 0 5px;
+  padding: 0 3px;
 }
 
 td.entity-actions,
 th.entity-actions {
   width: 10%;
+  min-width: 80px;
 }
 
 .entity-actions a:hover,
@@ -1377,7 +1346,7 @@ th.entity-actions {
   transform: rotate(45deg);
   top: 10px;
   left: 2px;
-  box-shadow: -2px -2px 10px -3px rgba(0, 0, 0, 0.5);
+  border: 1px solid #ccc;
 }
 
 a.disabled i:before,

+ 74 - 0
ambari-admin/src/main/resources/ui/admin-web/app/styles/stack-versions.css

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#stack-versions .table .col-small {
+  width: 15%
+}
+
+#stack-versions .table .col-medium {
+  width: 30%
+}
+
+#stack-versions .no-version-alert {
+  text-align: center;
+}
+#stack-versions table {
+  table-layout: fixed;
+}
+#stack-versions table .text-search-container {
+  font-weight: normal;
+  position: relative;
+}
+#stack-versions table .text-search-container .close {
+  position: absolute;
+  right: 15px;
+  top: 40px;
+}
+
+#stack-versions table td > * {
+  line-height: 24px;
+}
+
+.repo-table-title #name-label-adjust {
+  width: 20.7%;
+  padding-left:0px;
+  right:5px;
+}
+
+.repo-table-title #repo-base-url-label {
+  padding-left:0px;
+  right:3px;
+}
+
+.verison-label-row .label {
+  font-size: 100%;
+}
+
+.verison-label-row .btn {
+  padding: 10px;
+}
+
+.verison-label-row.button-padding {
+  padding: 4px;
+}
+
+.repo-name-label input {
+  width: calc(100% - 30px);
+  margin-right: 2px;
+  display: inline-block;
+}

+ 9 - 3
ambari-admin/src/main/resources/ui/admin-web/app/styles/user-management.css

@@ -33,9 +33,15 @@
   margin-top: -35px;
 }
 
-#group-edit .roles-label,
-#user-edit .roles-label {
-  line-height: 30px;
+#user-edit label,
+#group-edit label {
+  max-width: 150px;
+  line-height: 34px;
+}
+
+#user-edit .one-row-value,
+#group-edit .one-row-value {
+  line-height: 20px;
 }
 
 #create-user-form .roles-label i,

+ 4 - 0
ambari-admin/src/main/resources/ui/admin-web/app/styles/views.css

@@ -103,3 +103,7 @@ input[type="checkbox"] + label {
   -webkit-animation-delay: -1.0s;
   animation-delay: -1.0s;
 }
+
+#views-table td {
+  word-break: break-all;
+}

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/views/ambariViews/edit.html

@@ -282,7 +282,7 @@
       <div class="col-sm-12">
         <span translate="views.clusterPermissions.infoMessage" translate-values="{cluster: cluster.name}"></span>
       </div>
-      <div class="col-sm-offset-2 col-sm-10">
+      <div class="col-sm-10">
         <div class="checkbox col-sm-12" ng-repeat="key in permissionRoles">
           <div ng-init="i18nKey = 'views.clusterPermissions.' + key.split('.').join('').toLowerCase()">
             <input ng-attr-id="{{i18nKey}}" type="checkbox" class="form-control" ng-model="permissionsEdit['VIEW.USER']['ROLE'][key]">

+ 0 - 8
ambari-admin/src/main/resources/ui/admin-web/app/views/ambariViews/modals/create.html

@@ -34,7 +34,6 @@
     <div class="form-group col-sm-6" ng-class="{ 'has-error': form.instanceCreateForm.view.$error.required && form.instanceCreateForm.submitted }">
       <label for="view">
         {{'views.createInstance.selectView' | translate}}<span>*</span>&nbsp;
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
       <select
         ng-disabled="instanceClone"
@@ -54,7 +53,6 @@
     <div class="form-group col-sm-6" ng-class="{ 'has-error': form.instanceCreateForm.version.$error.required && form.instanceCreateForm.submitted }">
       <label for="version">
         {{'views.createInstance.selectVersion' | translate}}<span>*</span>&nbsp;
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
       <select
         ng-disabled="instanceClone"
@@ -77,7 +75,6 @@
          ng-class="{ 'has-error': (form.instanceCreateForm.instanceName.$error.required || form.instanceCreateForm.instanceName.$error.pattern || isInstanceExists) && form.instanceCreateForm.submitted }">
       <label for="instanceName">
         {{'views.instanceName' | translate}}<span>*</span>&nbsp;
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
       <input type="text" class="form-control"
              ng-model="formData.instanceName"
@@ -101,7 +98,6 @@
     <div class="form-group" ng-class="{ 'has-error': form.instanceCreateForm.displayName.$error.required && form.instanceCreateForm.submitted }">
       <label for="displayName">
         {{'views.displayName' | translate}}<span>*</span>&nbsp;
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
       <input type="text" class="form-control" required
              name="displayName"
@@ -115,7 +111,6 @@
     <div class="form-group" ng-class="{ 'has-error': form.instanceCreateForm.description.$error.required && form.instanceCreateForm.submitted }">
       <label for="description">
         {{'views.description' | translate}}<span>*</span>&nbsp;
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
       <input type="text" class="form-control" required
              name="description"
@@ -133,7 +128,6 @@
              id="visible">
       <label for="visible">
         {{'views.visible' | translate}}
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
     </div>
   </div>
@@ -171,7 +165,6 @@
     <div class="form-group">
       <label for="clusterType">
         {{'views.createInstance.clusterType' | translate}}?&nbsp;
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
       </label>
       <div>
         <div class="btn-group" role="group" id="clusterType">
@@ -198,7 +191,6 @@
       <div class="form-group col-sm-6" ng-class="{ 'has-error': form.instanceCreateForm.clusterName.$error.required && form.instanceCreateForm.submitted }">
         <label for="clusterName">
           {{'views.clusterName' | translate}}<span>*</span>&nbsp;
-          <i class="fa fa-question-circle" aria-hidden="true"></i>
         </label>
         <select
           ng-required="clusterType !== 'NONE'"

+ 1 - 2
ambari-admin/src/main/resources/ui/admin-web/app/views/clusters/clusterInformation.html

@@ -51,8 +51,7 @@
                ng-model="edit.clusterName"
                required
                autofocus
-               ng-pattern="/^\w*$/"
-               ng-maxlength="80"
+               ng-maxlength="100"
                tooltip="{{'common.renameClusterTip' | translate}}"
                tooltip-trigger="focus"
                tooltip-placement="bottom"

+ 10 - 1
ambari-admin/src/main/resources/ui/admin-web/app/views/directives/editableList.html

@@ -19,7 +19,16 @@
 <div class="editable-list-container well" ng-class="{'edit-mode' : editMode, 'disabled' : !editable}" ng-click="enableEditMode($event)">
   <div class="items-box">
     <ul class="items-list">
-      <li class="item" ng-repeat="item in items | orderBy:identity"><span><a href>{{item}}</a><button ng-click="removeFromItems(item)" type="button" class="close"><span aria-hidden="true">&times;</span><span class="sr-only">{{'common.controls.close' | translate}}</span></button></span></li><li class="item add-item-input" ng-show="editMode">
+      <li class="item" ng-repeat="item in items | orderBy:identity">
+        <span>
+          <a href>{{item}}</a>
+          <button ng-click="removeFromItems(item)" type="button" class="close">
+            <span aria-hidden="true">&times;</span>
+            <span class="sr-only">{{'common.controls.close' | translate}}</span>
+          </button>
+        </span>
+      </li>
+      <li class="item add-item-input" ng-class="{'has-error': invalidInput}" ng-show="editMode">
         <span contenteditable></span>
         <div class="typeahead-box" ng-show="typeahead.length != 0">
           <ul>

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/views/modals/ConfirmationModal.html

@@ -24,5 +24,5 @@
 </div>
 <div class="modal-footer">
     <button class="btn btn-default" ng-if="showCancelButton" ng-click="cancel()">{{cancelText}}</button>
-    <button class="btn btn-primary" ng-click="ok()">{{confirmText}}</button>
+    <button class="btn" ng-class="primaryClass" ng-click="ok()">{{confirmText}}</button>
 </div>

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/views/sideNav.html

@@ -34,7 +34,7 @@
     </ul>
     <ul class="nav side-nav-menu nav-pills nav-stacked">
       <li class="mainmenu-li active" ng-show="cluster.Clusters.provisioning_state === 'INSTALLED'">
-        <a title="{{'common.dashboard' | translate}}" rel="tooltip" data-placement="right" href="{{fromSiteRoot('/#/dashboard')}}" class="gotodashboard">
+        <a title="{{'common.dashboard' | translate}}" rel="tooltip" data-placement="right" href="{{fromSiteRoot('/#/main/dashboard')}}" class="gotodashboard">
           <i class="navigation-icon fa fa-tachometer" aria-hidden="true"></i>
           <span class="navigation-menu-item">{{'common.dashboard' | translate}}</span>
         </a>

+ 1 - 1
ambari-admin/src/main/resources/ui/admin-web/app/views/stackVersions/list.html

@@ -81,7 +81,7 @@
           {{'common.none' | translate}}
         </span>
       </td>
-      <td class="verison-label-row">
+      <td class="verison-label-row" ng-class="{'button-padding': !repo.cluster}">
         <div ng-show="repo.status == 'CURRENT'">
           <span class="label {{'status-' + repo.status}}">{{'versions.current' | translate}}:&nbsp;{{repo.currentHosts}}/{{repo.totalHosts}}</span>
         </div>

+ 24 - 8
ambari-admin/src/main/resources/ui/admin-web/app/views/stackVersions/stackVersionPage.html

@@ -177,14 +177,26 @@
                 </div>
                 <div class="col-sm-9">
                   <div class="form-group repo-name-url {{repository.Repositories.repo_name}}"
-                       ng-class="{'has-error': repository.hasError }" ng-repeat="repository in os.repositories">
-                    <div ng-if="showRepo(repository)">
-                      <span class="repo-name-label control-label col-sm-3">{{repository.Repositories.repo_id}}</span>
+                       ng-class="{'has-error': repository.hasError }"
+                       ng-repeat="repository in os.repositories"
+                       ng-if="showRepo(repository)">
+                      <span class="repo-name-label control-label col-sm-3">
+                        <span ng-if="!repository.isEditing">{{repository.Repositories.repo_id}}</span>
+                        <i class="fa fa-pencil cursor-pointer"
+                           ng-click="repository.isEditing = true"
+                           ng-show="useRedhatSatellite && !repository.isEditing"></i>
+                        <input type="text" class="form-control"
+                               ng-show="useRedhatSatellite && repository.isEditing"
+                               ng-model="repository.Repositories.repo_id">
+                        <i class="fa fa-undo orange-icon cursor-pointer"
+                          ng-show="repository.isEditing && (repository.Repositories.repo_id !== repository.Repositories.initial_repo_id)"
+                          ng-click="repository.Repositories.repo_id = repository.Repositories.initial_repo_id"></i>
+                      </span>
                       <div class="col-sm-7 repo-url">
                         <input type="text" class="form-control"
                                placeholder="{{(repository.Repositories.repo_name.indexOf('UTILS') < 0 )?('versions.repository.placeholder' | translate) : ''}}"
                                ng-model="repository.Repositories.base_url"
-                               ng-change="onRepoUrlChange(repository)" ng-disabled="useRedhatSatellite && os.OperatingSystems.os_type.indexOf('redhat') === -1">
+                               ng-change="onRepoUrlChange(repository)" ng-disabled="useRedhatSatellite">
                       </div>
                       <i class="fa fa-undo orange-icon cursor-pointer"
                          ng-if="selectedOption.index == 1 && repository.Repositories.base_url != repository.Repositories.initial_base_url
@@ -193,7 +205,6 @@
                          ng-click="undoChange(repository)"
                          tooltip-html-unsafe="{{'common.undo' | translate}}"
                          aria-hidden="true"></i>
-                    </div>
                   </div>
                 </div>
                 <div class="col-sm-1 remove-icon" ng-click="removeOS()" ng-class="{'disabled' : useRedhatSatellite}">
@@ -211,19 +222,24 @@
                 <label for="skip-validation">
                   <span ng-class="{'disabled' : useRedhatSatellite}">{{'versions.skipValidation' | translate}}</span>
                   <i class="fa fa-question-circle"
-                     tooltip-html-unsafe="{{'versions.alerts.skipValidationWarning' | translate}}" aria-hidden="true"></i>
+                     tooltip="{{'versions.alerts.skipValidationWarning' | translate}}"
+                     tooltip-placement="right"
+                     tooltip-trigger="mouseenter"
+                     aria-hidden="true"></i>
                 </label>
               </div>
             </div>
             <div class="col-sm-9">
               <div class="checkbox">
-                <input type="checkbox" id="use-redhat" ng-model="useRedhatSatellite" ng-change="clearErrors()"
+                <input type="checkbox" id="use-redhat" ng-model="useRedhatSatellite" ng-change="useRedHatCheckbox()"
                        ng-disabled="isPublicRepoSelected()">
                 <label for="use-redhat">
                   <span ng-class="{'disabled' : isPublicRepoSelected()}"
                         tooltip="{{(isPublicRepoSelected())? ('versions.useRedhatSatellite.disabledMsg' | translate) : ''}}">{{'versions.useRedhatSatellite.title' | translate}}</span>
                   <i class="fa fa-question-circle"
-                     tooltip-html-unsafe="{{'versions.alerts.useRedhatSatelliteWarning' | translate}}"
+                     tooltip="{{'versions.alerts.useRedhatSatelliteWarning' | translate}}"
+                     tooltip-placement="right"
+                     tooltip-trigger="mouseenter"
                      aria-hidden="true"></i>
                 </label>
               </div>

+ 5 - 18
ambari-admin/src/main/resources/ui/admin-web/app/views/userManagement/groupEdit.html

@@ -22,30 +22,17 @@
       <li><a href="#/userManagement?tab=groups">{{'common.groups' | translate}}</a></li>
       <li class="active">{{group.group_name}}</li>
     </ol>
-    <div class="pull-right">
-      <div ng-switch="group.group_type != 'LOCAL'">
-        <button
-          ng-switch-when="true"
-          class="btn disabled deletegroup-btn"
-          tooltip="{{'common.cannotDelete' | translate:{term: constants.group} }}">
-          {{'common.delete' | translate:{term: constants.group} }}
-        </button>
-        <button ng-switch-when="false" class="btn btn-danger deletegroup-btn" ng-click="deleteGroup(group)">
-          {{'common.delete' | translate:{term: constants.group} }}
-        </button>
-      </div>
-    </div>
   </div>
-
+  <hr>
   <form class="form-horizontal" role="form" novalidate name="form" >
     <div class="form-group">
-      <label class="col-sm-2 control-label">{{'common.type' | translate}}</label>
+      <label class="col-sm-2 one-row-value">{{'common.type' | translate}}</label>
       <div class="col-sm-10">
-        <label class="control-label">{{group.groupTypeName | translate}}</label>
+        <label class="one-row-value">{{group.groupTypeName | translate}}</label>
       </div>
     </div>
     <div class="form-group">
-      <label class="col-sm-2 control-label">{{group.groupTypeName | translate}} {{'groups.members' | translate}}</label>
+      <label class="col-sm-2">{{group.groupTypeName | translate}} {{'groups.members' | translate}}</label>
       <div class="col-sm-10">
         <editable-list items-source="group.editingUsers" resource-type="User" editable="group.group_type == 'LOCAL'"></editable-list>
       </div>
@@ -66,7 +53,7 @@
       </div>
     </div>
     <div class="form-group">
-      <label class="col-sm-2 control-label">{{'common.privileges' | translate}}</label>
+      <label class="col-sm-2">{{'common.privileges' | translate}}</label>
       <div class="col-sm-10">
         <table class="table" ng-hide="hidePrivileges">
           <thead>

+ 10 - 2
ambari-admin/src/main/resources/ui/admin-web/app/views/userManagement/modals/userCreate.html

@@ -124,7 +124,11 @@
     <div class="form-group">
       <label>
         {{'users.isAmbariAdmin' | translate}}<span>&nbsp;*</span>
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
+        <i class="fa fa-question-circle"
+           aria-hidden="true"
+           tooltip="{{'users.adminTip' | translate}}"
+           tooltip-trigger="click"
+           tooltip-placement="top"></i>
       </label>
       <div>
         <toggle-switch model="formData.isAdmin" class="switch-success" data-off-color="danger"></toggle-switch>
@@ -136,7 +140,11 @@
     <div class="form-group">
       <label>
         {{'users.isActive' | translate}}<span>&nbsp;*</span>
-        <i class="fa fa-question-circle" aria-hidden="true"></i>
+        <i class="fa fa-question-circle"
+           aria-hidden="true"
+           tooltip="{{'users.deactivateTip' | translate}}"
+           tooltip-trigger="click"
+           tooltip-placement="top"></i>
       </label>
       <div>
         <toggle-switch model="formData.isActive" class="switch-success" data-off-color="danger"></toggle-switch>

+ 3 - 9
ambari-admin/src/main/resources/ui/admin-web/app/views/userManagement/userEdit.html

@@ -22,19 +22,13 @@
       <li><a href="#/userManagement?tab=users">{{'common.users' | translate}}</a></li>
       <li class="active">{{user.user_name}}</li>
     </ol>
-    <div class="pull-right">
-      <div ng-switch="isCurrentUser || user.user_type != 'LOCAL'">
-        <button class="btn deleteuser-btn disabled btn-default" ng-switch-when="true" tooltip="{{'common.cannotDelete' | translate:{term: constants.user} }}">{{'common.delete' | translate:{term: constants.user} }}</button>
-        <button class="btn deleteuser-btn btn-danger" ng-switch-when="false" ng-click="deleteUser()">{{'common.delete' | translate:{term: constants.user} }}</button>
-      </div>
-    </div>
   </div>
   <hr>
   <form class="form-horizontal" role="form" >
     <div class="form-group">
-      <label class="col-sm-2 ">{{'common.type' | translate}}</label>
+      <label class="col-sm-2 one-row-value">{{'common.type' | translate}}</label>
       <div class="col-sm-10">
-        <label>{{user.userTypeName}}</label>
+        <label class="one-row-value">{{user.userTypeName}}</label>
       </div>
     </div>
     <div class="form-group">
@@ -64,7 +58,7 @@
       </div>
     </div>
     <div class="form-group">
-      <label class="col-sm-2 ">{{getUserMembership(user.user_type)}}</label>
+      <label class="col-sm-2 one-row-value">{{getUserMembership(user.user_type)}}</label>
       <div class="col-sm-10">
         <editable-list items-source="editingGroupsList" resource-type="Group" editable="user.user_type == 'LOCAL'"></editable-list>
       </div>

+ 3 - 1
ambari-agent/conf/unix/ambari-agent.ini

@@ -41,6 +41,8 @@ command_reports_interval=5
 status_commands_run_interval=20
 alert_grace_period=5
 status_command_timeout=5
+; (send_alert_changes_only) - enabling this is necessary to deploy big perf clusters
+send_alert_changes_only=0
 alert_kinit_timeout=14400000
 system_resource_overrides=/etc/resource_overrides
 ; memory_threshold_soft_mb=400
@@ -70,7 +72,7 @@ dirs=/etc/hadoop,/etc/hadoop/conf,/etc/hbase,/etc/hcatalog,/etc/hive,/etc/oozie,
   /var/run/hadoop,/var/run/zookeeper,/var/run/hbase,/var/run/templeton,/var/run/oozie,
   /var/log/hadoop,/var/log/zookeeper,/var/log/hbase,/var/run/templeton,/var/log/hive
 ; 0 - unlimited
-log_lines_count=300
+log_max_symbols_size=900000
 idle_interval_min=1
 idle_interval_max=10
 

+ 168 - 103
ambari-agent/conf/unix/install-helper.sh

@@ -18,18 +18,28 @@
 #                      AGENT INSTALL HELPER                      #
 ##################################################################
 
-INSTALL_HELPER_SERVER="/var/lib/ambari-server/install-helper.sh"
-COMMON_DIR="/usr/lib/ambari-agent/lib/ambari_commons"
-RESOURCE_MANAGEMENT_DIR="/usr/lib/ambari-agent/lib/resource_management"
-JINJA_DIR="/usr/lib/ambari-agent/lib/ambari_jinja2"
-SIMPLEJSON_DIR="/usr/lib/ambari-agent/lib/ambari_simplejson"
-OLD_OLD_COMMON_DIR="/usr/lib/ambari-agent/lib/common_functions"
-AMBARI_AGENT="/usr/lib/ambari-agent/lib/ambari_agent"
+# WARNING. Please keep the script POSIX compliant and don't use bash extensions
+
+AMBARI_UNIT="ambari-agent"
+ACTION=$1
+AMBARI_AGENT_ROOT_DIR="/usr/lib/${AMBARI_UNIT}"
+AMBARI_SERVER_ROOT_DIR="/usr/lib/ambari-server"
+COMMON_DIR="${AMBARI_AGENT_ROOT_DIR}/lib/ambari_commons"
+RESOURCE_MANAGEMENT_DIR="${AMBARI_AGENT_ROOT_DIR}/lib/resource_management"
+JINJA_DIR="${AMBARI_AGENT_ROOT_DIR}/lib/ambari_jinja2"
+SIMPLEJSON_DIR="${AMBARI_AGENT_ROOT_DIR}/lib/ambari_simplejson"
+OLD_OLD_COMMON_DIR="${AMBARI_AGENT_ROOT_DIR}/lib/common_functions"
+AMBARI_AGENT="${AMBARI_AGENT_ROOT_DIR}/lib/ambari_agent"
 PYTHON_WRAPER_TARGET="/usr/bin/ambari-python-wrap"
-AMBARI_AGENT_VAR="/var/lib/ambari-agent"
-AMBARI_AGENT_BINARY="/etc/init.d/ambari-agent"
-AMBARI_AGENT_BINARY_SYMLINK="/usr/sbin/ambari-agent"
+AMBARI_AGENT_VAR="/var/lib/${AMBARI_UNIT}"
+AMBARI_AGENT_BINARY="/etc/init.d/${AMBARI_UNIT}"
+AMBARI_AGENT_BINARY_SYMLINK="/usr/sbin/${AMBARI_UNIT}"
+AMBARI_ENV_RPMSAVE="/var/lib/${AMBARI_UNIT}/ambari-env.sh.rpmsave"
+AMBARI_HELPER="/var/lib/ambari-agent/install-helper.sh.orig"
 
+LOG_FILE=/dev/null
+
+CLEANUP_MODULES="resource_management;ambari_commons;ambari_agent;ambari_ws4py;ambari_stomp;ambari_jinja2;ambari_simplejson"
 
 OLD_COMMON_DIR="/usr/lib/python2.6/site-packages/ambari_commons"
 OLD_RESOURCE_MANAGEMENT_DIR="/usr/lib/python2.6/site-packages/resource_management"
@@ -37,14 +47,89 @@ OLD_JINJA_DIR="/usr/lib/python2.6/site-packages/ambari_jinja2"
 OLD_SIMPLEJSON_DIR="/usr/lib/python2.6/site-packages/ambari_simplejson"
 OLD_AMBARI_AGENT_DIR="/usr/lib/python2.6/site-packages/ambari_agent"
 
+
+resolve_log_file(){
+ local log_dir=/var/log/${AMBARI_UNIT}
+ local log_file="${log_dir}/${AMBARI_UNIT}-pkgmgr.log"
+
+ if [ ! -d "${log_dir}" ]; then
+   mkdir "${log_dir}" 1>/dev/null 2>&1
+ fi
+
+ if [ -d "${log_dir}" ]; then
+   touch ${log_file} 1>/dev/null 2>&1
+   if [ -f "${log_file}" ]; then
+    LOG_FILE="${log_file}"
+   fi
+ fi
+
+ echo "--> Install-helper custom action log started at $(date '+%d/%m/%y %H:%M') for '${ACTION}'" 1>>${LOG_FILE} 2>&1
+}
+
 clean_pyc_files(){
   # cleaning old *.pyc files
-  find ${RESOURCE_MANAGEMENT_DIR:?} -name *.pyc -exec rm {} \;
-  find ${COMMON_DIR:?} -name *.pyc -exec rm {} \;
-  find ${AMBARI_AGENT:?} -name *.pyc -exec rm {} \;
-  find ${AMBARI_AGENT_VAR:?} -name *.pyc -exec rm {} \;
+  local lib_dir="${AMBARI_AGENT_ROOT_DIR}/lib"
+
+  echo ${CLEANUP_MODULES} | tr ';' '\n' | while read item; do
+    local item="${lib_dir}/${item}"
+    echo "Cleaning pyc files from ${item}..."
+    if [ -d "${item}" ]; then
+      find ${item:?} -name *.pyc -exec rm {} \; 1>>${LOG_FILE} 2>&1
+    else
+      echo "Skipping ${item} pyc cleaning, as package not existing"
+    fi
+  done
 }
 
+remove_ambari_unit_dir(){
+  # removing empty dirs, which left after cleaning pyc files
+
+  find "${AMBARI_AGENT_ROOT_DIR}" -type d | tac | while read item; do
+    echo "Removing empty dir ${item}..."
+    rmdir --ignore-fail-on-non-empty ${item} 1>/dev/null 2>&1
+  done
+
+  rm -rf ${AMBARI_HELPER}
+  find "${AMBARI_AGENT_VAR}" -type d | tac | while read item; do
+    echo "Removing empty dir ${item}..."
+    rmdir --ignore-fail-on-non-empty ${item} 1>/dev/null 2>&1
+  done
+}
+
+remove_autostart(){
+  which chkconfig
+  if [ "$?" -eq 0 ] ; then
+    chkconfig --list | grep ambari-server && chkconfig --del ambari-agent
+  fi
+  which update-rc.d
+  if [ "$?" -eq 0 ] ; then
+    update-rc.d -f ambari-agent remove
+  fi
+}
+
+install_autostart(){
+  which chkconfig 1>>${LOG_FILE} 2>&1
+  if [ "$?" -eq 0 ] ; then
+    chkconfig --add ambari-agent
+  fi
+  which update-rc.d 1>>${LOG_FILE} 2>&1
+  if [ "$?" -eq 0 ] ; then
+    update-rc.d ambari-agent defaults
+  fi
+}
+
+locate_python(){
+  local python_binaries="/usr/bin/python;/usr/bin/python2;/usr/bin/python2.7"
+
+  echo ${python_binaries}| tr ';' '\n' | while read python_binary; do
+    ${python_binary} -c "import sys ; ver = sys.version_info ; sys.exit(not (ver >= (2,7) and ver<(3,0)))" 1>>${LOG_FILE} 2>/dev/null
+
+    if [ $? -eq 0 ]; then
+      echo "${python_binary}"
+      break
+    fi
+  done
+}
 
 do_install(){
   if [ -d "/etc/ambari-agent/conf.save" ]; then
@@ -53,135 +138,115 @@ do_install(){
   fi
 
   # these symlinks (or directories) where created in ambari releases prior to ambari-2.6.2. Do clean up.   
-  rm -rf "$OLD_COMMON_DIR" "$OLD_RESOURCE_MANAGEMENT_DIR" "$OLD_JINJA_DIR" "$OLD_SIMPLEJSON_DIR" "$OLD_OLD_COMMON_DIR" "$OLD_AMBARI_AGENT_DIR"
+  rm -rf "${OLD_COMMON_DIR}" "${OLD_RESOURCE_MANAGEMENT_DIR}" "${OLD_JINJA_DIR}" "${OLD_SIMPLEJSON_DIR}" "${OLD_OLD_COMMON_DIR}" "${OLD_AMBARI_AGENT_DIR}"
 
   # setting up /usr/sbin/ambari-agent symlink
-  rm -f "$AMBARI_AGENT_BINARY_SYMLINK"
-  ln -s "$AMBARI_AGENT_BINARY" "$AMBARI_AGENT_BINARY_SYMLINK"
+  rm -f "${AMBARI_AGENT_BINARY_SYMLINK}"
+  ln -s "${AMBARI_AGENT_BINARY}" "${AMBARI_AGENT_BINARY_SYMLINK}"
 
   # on nano Ubuntu, when umask=027 those folders are created without 'x' bit for 'others'.
   # which causes failures when hadoop users try to access tmp_dir
-  chmod a+x $AMBARI_AGENT_VAR
-
-  chmod 1777 $AMBARI_AGENT_VAR/tmp
-  chmod 700 $AMBARI_AGENT_VAR/keys
-  chmod 700 $AMBARI_AGENT_VAR/data
+  chmod a+x ${AMBARI_AGENT_VAR}
 
-  #TODO we need this when upgrading from pre 2.4 versions to 2.4, remove this when upgrade from pre 2.4 versions will be
-  #TODO unsupported
-  clean_pyc_files
+  chmod 1777 ${AMBARI_AGENT_VAR}/tmp
+  chmod 700 ${AMBARI_AGENT_VAR}/keys
+  chmod 700 ${AMBARI_AGENT_VAR}/data
 
-  which chkconfig > /dev/null 2>&1
-  if [ "$?" -eq 0 ] ; then
-    chkconfig --add ambari-agent
-  fi
-  which update-rc.d > /dev/null 2>&1
-  if [ "$?" -eq 0 ] ; then
-    update-rc.d ambari-agent defaults
-  fi
+  install_autostart 1>>${LOG_FILE} 2>&1
 
   # remove old python wrapper
-  rm -f "$PYTHON_WRAPER_TARGET"
-
-  AMBARI_PYTHON=""
-  python_binaries=( "/usr/bin/python" "/usr/bin/python2" "/usr/bin/python2.7" "/usr/bin/python2.6" )
-  for python_binary in "${python_binaries[@]}"
-  do
-    $python_binary -c "import sys ; ver = sys.version_info ; sys.exit(not (ver >= (2,6) and ver<(3,0)))" 1>/dev/null 2>/dev/null
-
-    if [ $? -eq 0 ] ; then
-      AMBARI_PYTHON="$python_binary"
-      break;
-    fi
-  done
+  rm -f "${PYTHON_WRAPER_TARGET}"
 
-  BAK=/etc/ambari-agent/conf/ambari-agent.ini.old
-  ORIG=/etc/ambari-agent/conf/ambari-agent.ini
-  UPGRADE_AGENT_CONFIGS_SCRIPT=/var/lib/ambari-agent/upgrade_agent_configs.py
+  local ambari_python=$(locate_python)
+  local bak=/etc/ambari-agent/conf/ambari-agent.ini.old
+  local orig=/etc/ambari-agent/conf/ambari-agent.ini
+  local upgrade_agent_configs_script=/var/lib/ambari-agent/upgrade_agent_configs.py
 
-  if [ -z "$AMBARI_PYTHON" ] ; then
-    >&2 echo "Cannot detect python for Ambari to use. Please manually set $PYTHON_WRAPER_TARGET link to point to correct python binary"
-    >&2 echo "Cannot upgrade agent configs because python for Ambari is not configured. The old config file is saved as $BAK . Execution of $UPGRADE_AGENT_CONFIGS_SCRIPT was skipped."
+  if [ -z "${ambari_python}" ] ; then
+    >&2 echo "Cannot detect python for Ambari to use. Please manually set ${PYTHON_WRAPER_TARGET} link to point to correct python binary"
+    >&2 echo "Cannot upgrade agent configs because python for Ambari is not configured. The old config file is saved as ${bak} . Execution of ${upgrade_agent_configs_script} was skipped."
   else
-    ln -s "$AMBARI_PYTHON" "$PYTHON_WRAPER_TARGET"
+    ln -s "${ambari_python}" "${PYTHON_WRAPER_TARGET}"
 
-    if [ -f $BAK ]; then
-      if [ -f "$UPGRADE_AGENT_CONFIGS_SCRIPT" ]; then
-        $UPGRADE_AGENT_CONFIGS_SCRIPT
+    if [ -f ${bak} ]; then
+      if [ -f "${upgrade_agent_configs_script}" ]; then
+        ${upgrade_agent_configs_script}
       fi
-      mv $BAK ${BAK}_$(date '+%d_%m_%y_%H_%M').save
+      mv ${bak} ${bak}_$(date '+%d_%m_%y_%H_%M').save
     fi
   fi
 
-  if [ -f "$AMBARI_ENV_RPMSAVE" ] ; then
-    PYTHON_PATH_LINE='export PYTHONPATH=/usr/lib/ambari-agent/lib:/usr/lib/mpack-instance-manager:$PYTHONPATH'
-    grep "^$PYTHON_PATH_LINE\$" "$AMBARI_ENV_RPMSAVE" > /dev/null
+  if [ -f "${AMBARI_ENV_RPMSAVE}" ] ; then
+    PYTHON_PATH_LINE="export PYTHONPATH=${AMBARI_AGENT_ROOT_DIR}/lib:/usr/lib/mpack-instance-manager:\$\{PYTHONPATH\}"
+    grep "^${PYTHON_PATH_LINE}\$" "${AMBARI_ENV_RPMSAVE}" >>${LOG_FILE}
     if [ $? -ne 0 ] ; then
-      echo -e "\n$PYTHON_PATH_LINE" >> $AMBARI_ENV_RPMSAVE
+      echo -e "\n${PYTHON_PATH_LINE}" 1>>${AMBARI_ENV_RPMSAVE}
     fi
   fi
 }
 
-do_remove(){
-  /usr/sbin/ambari-agent stop > /dev/null 2>&1
+copy_helper(){
+  cp -f /var/lib/ambari-agent/install-helper.sh ${AMBARI_HELPER} 1>/dev/null 2>&1
+}
 
-  clean_pyc_files
+do_remove(){
+  /usr/sbin/ambari-agent stop 1>>${LOG_FILE} 2>&1
 
-  rm -f "$AMBARI_AGENT_BINARY_SYMLINK"
+  rm -f "${AMBARI_AGENT_BINARY_SYMLINK}" 1>>${LOG_FILE} 2>&1
 
   if [ -d "/etc/ambari-agent/conf.save" ]; then
     mv /etc/ambari-agent/conf.save /etc/ambari-agent/conf_$(date '+%d_%m_%y_%H_%M').save
   fi
-  mv /etc/ambari-agent/conf /etc/ambari-agent/conf.save
-
-  if [ -f "$PYTHON_WRAPER_TARGET" ]; then
-    rm -f "$PYTHON_WRAPER_TARGET"
-  fi
+  # first step / label: config_backup
+  cp -rf /etc/ambari-agent/conf /etc/ambari-agent/conf.save
 
-  if [ -d "$COMMON_DIR" ]; then
-    rm -rf $COMMON_DIR
-  fi
+  remove_autostart 1>>${LOG_FILE} 2>&1
+  copy_helper 1>>${LOG_FILE} 2>&1
+}
 
-  if [ -d "$RESOURCE_MANAGEMENT_DIR" ]; then
-    rm -rf $RESOURCE_MANAGEMENT_DIR
-  fi
+do_cleanup(){
+  # do_cleanup is a function, which called after do_remove stage and is supposed to be save place to
+  # remove obsolete files generated by application activity
 
-  if [ -d "$JINJA_DIR" ]; then
-    rm -rf $JINJA_DIR
-  fi
+  clean_pyc_files 1>>${LOG_FILE} 2>&1
 
-  if [ -d "$SIMPLEJSON_DIR" ]; then
-    rm -rf $SIMPLEJSON_DIR
-  fi
+  # second step / label: config_backup
+  rm -rf /etc/ambari-agent/conf
 
-  # if server package exists, restore their settings
-  if [ -f "$INSTALL_HELPER_SERVER" ]; then  #  call server shared files installer
-    $INSTALL_HELPER_SERVER install
+  if [ ! -d "${AMBARI_SERVER_ROOT_DIR}" ]; then
+    echo "Removing ${PYTHON_WRAPER_TARGET} ..." 1>>${LOG_FILE} 2>&1
+    rm -f ${PYTHON_WRAPER_TARGET} 1>>${LOG_FILE} 2>&1
   fi
 
-  which chkconfig > /dev/null 2>&1
-  if [ "$?" -eq 0 ] ; then
-    chkconfig --list | grep ambari-server && chkconfig --del ambari-agent
-  fi
-  which update-rc.d > /dev/null 2>&1
-  if [ "$?" -eq 0 ] ; then
-    update-rc.d -f ambari-agent remove
-  fi
+  remove_ambari_unit_dir 1>>${LOG_FILE} 2>&1
 }
 
 do_upgrade(){
   do_install
 }
 
+do_backup(){
+  # ToDo: find a way to move backup logic here from preinstall.sh and preinst scripts
+  # ToDo: general problem is that still no files are installed on step, when backup is supposed to be done
+  echo ""
+}
 
-case "$1" in
-install)
-  do_install
-  ;;
-remove)
-  do_remove
-  ;;
-upgrade)
-  do_upgrade
-;;
+resolve_log_file
+
+case "${ACTION}" in
+  install)
+    do_install
+    ;;
+  remove)
+    do_remove
+    ;;
+  upgrade)
+    do_upgrade
+    ;;
+  cleanup)
+    do_cleanup
+    ;;
+  *)
+    echo "Wrong command given"
+    ;;
 esac

+ 1 - 1
ambari-agent/conf/windows/ambari-agent.ini

@@ -52,6 +52,6 @@ dirs=/etc/hadoop,/etc/hadoop/conf,/etc/hbase,/etc/hcatalog,/etc/hive,/etc/oozie,
   /var/log/nagios
 rpms=hadoop,hadoop-lzo,hbase,oozie,sqoop,pig,zookeeper,hive,libconfuse,ambari-log4j
 ; 0 - unlimited
-log_lines_count=300
+log_max_symbols_size=900000
 idle_interval_min=1
 idle_interval_max=10

+ 44 - 1
ambari-agent/pom.xml

@@ -58,7 +58,6 @@
     <dependency>
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
-      <version>3.4.9</version>
     </dependency>
     <dependency>
       <groupId>commons-cli</groupId>
@@ -72,6 +71,12 @@
       <groupId>commons-configuration</groupId>
       <artifactId>commons-configuration</artifactId>
       <version>1.6</version>
+      <exclusions>
+        <exclusion>
+            <groupId>commons-beanutils</groupId>
+            <artifactId>commons-beanutils-core</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>commons-lang</groupId>
@@ -89,18 +94,52 @@
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
       <version>2.7.3</version>
+      <exclusions>
+        <exclusion>
+            <groupId>com.jcraft</groupId>
+            <artifactId>jsch</artifactId>
+        </exclusion>
+        <exclusion>
+            <groupId>org.mortbay.jetty</groupId>
+            <artifactId>jetty-util</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+            <groupId>commons-httpclient</groupId>
+            <artifactId>commons-httpclient</artifactId>
+        </exclusion>
+        <exclusion>
+            <groupId>org.apache.zookeeper</groupId>
+            <artifactId>zookeeper</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.curator</groupId>
       <artifactId>curator-test</artifactId>
       <version>2.9.0</version>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+            <groupId>org.apache.zookeeper</groupId>
+            <artifactId>zookeeper</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.curator</groupId>
       <artifactId>curator-framework</artifactId>
       <version>2.7.1</version>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+            <groupId>org.apache.zookeeper</groupId>
+            <artifactId>zookeeper</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.ambari</groupId>
@@ -257,6 +296,10 @@
             <scriptFile>src/main/package/rpm/posttrans_agent.sh</scriptFile>
             <fileEncoding>utf-8</fileEncoding>
           </posttransScriptlet>
+          <postremoveScriptlet>
+            <scriptFile>src/main/package/rpm/postremove.sh</scriptFile>
+            <fileEncoding>utf-8</fileEncoding>
+          </postremoveScriptlet>
 
           <needarch>x86_64</needarch>
           <autoRequires>false</autoRequires>

+ 10 - 6
ambari-agent/src/main/package/deb/control/postrm

@@ -14,11 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-if [ "$1" == "upgrade" ]; # Action is upgrade
-then
-  if [ -d "/etc/ambari-agent/conf.save" ]
-  then
-      cp -f /etc/ambari-agent/conf.save/* /etc/ambari-agent/conf
-      mv /etc/ambari-agent/conf.save /etc/ambari-agent/conf_$(date '+%d_%m_%y_%H_%M').save
+INSTALL_HELPER="/var/lib/ambari-agent/install-helper.sh"
+
+if [ "$1" == "upgrade" ]; then
+  if [ -f "${INSTALL_HELPER}" ]; then
+    ${INSTALL_HELPER} upgrade
+  fi
+else
+  if  [ -f "/var/lib/ambari-agent/install-helper.sh.orig" ]; then
+    /var/lib/ambari-agent/install-helper.sh.orig cleanup
+    rm -f /var/lib/ambari-agent/install-helper.sh.orig 1>/dev/null 2>&1
   fi
 fi

+ 25 - 36
ambari-agent/src/main/package/deb/control/preinst

@@ -14,42 +14,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-STACKS_FOLDER="/var/lib/ambari-agent/cache/stacks"
-STACKS_FOLDER_OLD=/var/lib/ambari-agent/cache/stacks_$(date '+%d_%m_%y_%H_%M').old
 
-COMMON_SERVICES_FOLDER="/var/lib/ambari-agent/cache/common-services"
-COMMON_SERVICES_FOLDER_OLD=/var/lib/ambari-agent/cache/common-services_$(date '+%d_%m_%y_%H_%M').old
-
-AMBARI_ENV="/var/lib/ambari-agent/ambari-env.sh"
-AMBARI_ENV_OLD="$AMBARI_ENV.rpmsave"
-
-if [ -d "/etc/ambari-agent/conf.save" ]
-then
-    mv /etc/ambari-agent/conf.save /etc/ambari-agent/conf_$(date '+%d_%m_%y_%H_%M').save
-fi
-
-BAK=/etc/ambari-agent/conf/ambari-agent.ini.old
-ORIG=/etc/ambari-agent/conf/ambari-agent.ini
-
-BAK_SUDOERS=/etc/sudoers.d/ambari-agent.bak
-ORIG_SUDOERS=/etc/sudoers.d/ambari-agent
-
-[ -f $ORIG ] && mv -f $ORIG $BAK
-[ -f $ORIG_SUDOERS ] && echo "Moving $ORIG_SUDOERS to $BAK_SUDOERS. Please restore the file if you were using it for ambari-agent non-root functionality" && mv -f $ORIG_SUDOERS $BAK_SUDOERS
-
-if [ -d "$STACKS_FOLDER" ]
-then
-    mv -f "$STACKS_FOLDER" "$STACKS_FOLDER_OLD"
-fi
-
-if [ -d "$COMMON_SERVICES_FOLDER" ]
-then
-    mv -f "$COMMON_SERVICES_FOLDER" "$COMMON_SERVICES_FOLDER_OLD"
-fi
-
-if [ -f "$AMBARI_ENV" ]
-then
-    mv -f "$AMBARI_ENV" "$AMBARI_ENV_OLD"
-fi
+do_backups(){
+  local etc_dir="/etc/ambari-agent"
+  local var_dir="/var/lib/ambari-agent"
+  local sudoers_dir="/etc/sudoers.d"
+
+  # format: title note source target
+  local backup_folders="stack folders::${var_dir}/cache/stacks:${var_dir}/cache/stacks_$(date '+%d_%m_%y_%H_%M').old
+common services folder::${var_dir}/cache/common-services:${var_dir}/cache/common-services_$(date '+%d_%m_%y_%H_%M').old
+ambari-agent.ini::${etc_dir}/conf/ambari-agent.ini:${etc_dir}/conf/ambari-agent.ini.old
+sudoers:Please restore the file if you were using it for ambari-agent non-root functionality:${sudoers_dir}/ambari-agent:${sudoers_dir}/ambari-agent.bak"
+
+  echo "${backup_folders}" | while IFS=: read title notes source target; do
+    if [ -d "${source}" ] || [ -f "${source}" ]; then
+      echo -n "Moving ${title}: ${source} -> ${target}"
+      if [ ! -z notes ]; then
+        echo ", ${notes}"
+      else
+        echo ""
+      fi
+      mv -f "${source}" "${target}"
+    fi
+  done
+}
+
+do_backups
 
 exit 0

+ 1 - 0
ambari-agent/src/main/package/deb/control/prerm

@@ -18,6 +18,7 @@
 # during package update. See http://www.ibm.com/developerworks/library/l-rpm2/
 # for details
 
+
 if [ "$1" == "remove" ]; then # Action is uninstall
     if [ -f "/var/lib/ambari-agent/install-helper.sh" ]; then
       /var/lib/ambari-agent/install-helper.sh remove

+ 24 - 0
ambari-agent/src/main/package/rpm/postremove.sh

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+
+if [ "$1" -eq 0 ]; then  # Action is uninstall
+    if [ -f "/var/lib/ambari-agent/install-helper.sh.orig" ]; then
+      /var/lib/ambari-agent/install-helper.sh.orig cleanup
+      rm -f /var/lib/ambari-agent/install-helper.sh.orig 1>/dev/null 2>&1
+    fi
+fi
+
+exit 0

+ 2 - 2
ambari-agent/src/main/package/rpm/posttrans_agent.sh

@@ -18,8 +18,8 @@ AMBARI_AGENT_BINARY="/etc/init.d/ambari-agent"
 AMBARI_AGENT_BINARY_SYMLINK="/usr/sbin/ambari-agent"
 
 # setting ambari-agent binary symlink
-if [ ! -f "$AMBARI_AGENT_BINARY_SYMLINK" ]; then
-  ln -s "$AMBARI_AGENT_BINARY" "$AMBARI_AGENT_BINARY_SYMLINK"
+if [ ! -f "${AMBARI_AGENT_BINARY_SYMLINK}" ]; then
+  ln -s "${AMBARI_AGENT_BINARY}" "${AMBARI_AGENT_BINARY_SYMLINK}"
 fi
 
 exit 0

+ 27 - 28
ambari-agent/src/main/package/rpm/preinstall.sh

@@ -13,34 +13,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-STACKS_FOLDER="/var/lib/ambari-agent/cache/stacks"
-STACKS_FOLDER_OLD=/var/lib/ambari-agent/cache/stacks_$(date '+%d_%m_%y_%H_%M').old
 
-COMMON_SERVICES_FOLDER="/var/lib/ambari-agent/cache/common-services"
-COMMON_SERVICES_FOLDER_OLD=/var/lib/ambari-agent/cache/common-services_$(date '+%d_%m_%y_%H_%M').old
-
-if [ -d "/etc/ambari-agent/conf.save" ]
-then
-    mv /etc/ambari-agent/conf.save /etc/ambari-agent/conf_$(date '+%d_%m_%y_%H_%M').save
-fi
-
-BAK=/etc/ambari-agent/conf/ambari-agent.ini.old
-ORIG=/etc/ambari-agent/conf/ambari-agent.ini
-
-BAK_SUDOERS=/etc/sudoers.d/ambari-agent.bak
-ORIG_SUDOERS=/etc/sudoers.d/ambari-agent
-
-[ -f $ORIG ] && mv -f $ORIG $BAK
-[ -f $ORIG_SUDOERS ] && echo "Moving $ORIG_SUDOERS to $BAK_SUDOERS. Please restore the file if you were using it for ambari-agent non-root functionality" && mv -f $ORIG_SUDOERS $BAK_SUDOERS
-
-if [ -d "$STACKS_FOLDER" ]
-then
-    mv -f "$STACKS_FOLDER" "$STACKS_FOLDER_OLD"
-fi
-
-if [ -d "$COMMON_SERVICES_FOLDER" ]
-then
-    mv -f "$COMMON_SERVICES_FOLDER" "$COMMON_SERVICES_FOLDER_OLD"
-fi
+do_backups(){
+  local etc_dir="/etc/ambari-agent"
+  local var_dir="/var/lib/ambari-agent"
+  local sudoers_dir="/etc/sudoers.d"
+
+  # format: title note source target
+  local backup_folders="stack folders::${var_dir}/cache/stacks:${var_dir}/cache/stacks_$(date '+%d_%m_%y_%H_%M').old
+common services folder::${var_dir}/cache/common-services:${var_dir}/cache/common-services_$(date '+%d_%m_%y_%H_%M').old
+ambari-agent.ini::${etc_dir}/conf/ambari-agent.ini:${etc_dir}/conf/ambari-agent.ini.old
+sudoers:Please restore the file if you were using it for ambari-agent non-root functionality:${sudoers_dir}/ambari-agent:${sudoers_dir}/ambari-agent.bak"
+
+  echo "${backup_folders}" | while IFS=: read title notes source target; do
+    if [ -e "${source}" ]; then
+      echo -n "Moving ${title}: ${source} -> ${target}"
+
+      if [ ! -z ${notes} ]; then
+        echo ", ${notes}"
+      else
+        echo ""
+      fi
+
+      mv -f "${source}" "${target}"
+    fi
+  done
+}
+
+do_backups
 
 exit 0

+ 0 - 1
ambari-agent/src/main/package/rpm/preremove.sh

@@ -17,7 +17,6 @@
 # during package update. See http://www.ibm.com/developerworks/library/l-rpm2/
 # for details
 
-
 if [ "$1" -eq 0 ]; then  # Action is uninstall
     if [ -f "/var/lib/ambari-agent/install-helper.sh" ]; then
       /var/lib/ambari-agent/install-helper.sh remove

+ 155 - 196
ambari-agent/src/main/python/ambari_agent/ActionQueue.py

@@ -1,6 +1,4 @@
-#!/usr/bin/env python
-
-'''
+"""
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
@@ -16,11 +14,11 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-'''
+"""
+
 import Queue
 
 import logging
-import traceback
 import threading
 import pprint
 import os
@@ -29,11 +27,9 @@ import time
 import signal
 
 from AgentException import AgentException
-from LiveStatus import LiveStatus
-from ActualConfigHandler import ActualConfigHandler
 from ambari_agent.BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle
+from ambari_agent.models.commands import AgentCommand, CommandStatus
 from ambari_commons.str_utils import split_on_chunks
-from resource_management.libraries.script import Script
 
 
 logger = logging.getLogger()
@@ -41,6 +37,7 @@ installScriptHash = -1
 
 MAX_SYMBOLS_PER_LOG_MESSAGE = 7900
 
+
 class ActionQueue(threading.Thread):
   """ Action Queue for the agent. We pick one command at a time from the queue
   and execute it
@@ -50,27 +47,9 @@ class ActionQueue(threading.Thread):
   # How many actions can be performed in parallel. Feel free to change
   MAX_CONCURRENT_ACTIONS = 5
 
-
-  #How much time(in seconds) we need wait for new incoming execution command before checking
-  #status command queue
+  # How much time(in seconds) we need wait for new incoming execution command before checking status command queue
   EXECUTION_COMMAND_WAIT_TIME = 2
 
-  STATUS_COMMAND = 'STATUS_COMMAND'
-  EXECUTION_COMMAND = 'EXECUTION_COMMAND'
-  AUTO_EXECUTION_COMMAND = 'AUTO_EXECUTION_COMMAND'
-  BACKGROUND_EXECUTION_COMMAND = 'BACKGROUND_EXECUTION_COMMAND'
-  ROLE_COMMAND_INSTALL = 'INSTALL'
-  ROLE_COMMAND_START = 'START'
-  ROLE_COMMAND_STOP = 'STOP'
-  ROLE_COMMAND_CUSTOM_COMMAND = 'CUSTOM_COMMAND'
-  CUSTOM_COMMAND_RESTART = 'RESTART'
-  CUSTOM_COMMAND_RECONFIGURE = 'RECONFIGURE'
-  CUSTOM_COMMAND_START = ROLE_COMMAND_START
-
-  IN_PROGRESS_STATUS = 'IN_PROGRESS'
-  COMPLETED_STATUS = 'COMPLETED'
-  FAILED_STATUS = 'FAILED'
-
   def __init__(self, initializer_module):
     super(ActionQueue, self).__init__()
     self.commandQueue = Queue.Queue()
@@ -83,23 +62,24 @@ class ActionQueue(threading.Thread):
     self.tmpdir = self.config.get('agent', 'prefix')
     self.customServiceOrchestrator = initializer_module.customServiceOrchestrator
     self.parallel_execution = self.config.get_parallel_exec_option()
+    self.taskIdsToCancel = set()
+    self.cancelEvent = threading.Event()
+    self.component_status_executor = initializer_module.component_status_executor
     if self.parallel_execution == 1:
       logger.info("Parallel execution is enabled, will execute agent commands in parallel")
     self.lock = threading.Lock()
 
   def put(self, commands):
     for command in commands:
-      if not command.has_key('serviceName'):
-        command['serviceName'] = "null"
-      if not command.has_key('clusterId'):
-        command['clusterId'] = "null"
-
-      logger.info("Adding " + command['commandType'] + " for role " + \
-                  command['role'] + " for service " + \
-                  command['serviceName'] + " of cluster_id " + \
-                  command['clusterId'] + " to the queue.")
-      if command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND :
-        self.backgroundCommandQueue.put(self.createCommandHandle(command))
+      if "serviceName" not in command:
+        command["serviceName"] = "null"
+      if "clusterId" not in command:
+        command["clusterId"] = "null"
+
+      logger.info("Adding {commandType} for role {role} for service {serviceName} of cluster_id {clusterId} to the queue".format(**command))
+
+      if command['commandType'] == AgentCommand.background_execution:
+        self.backgroundCommandQueue.put(self.create_command_handle(command))
       else:
         self.commandQueue.put(command)
 
@@ -125,24 +105,25 @@ class ActionQueue(threading.Thread):
         if queued_command['taskId'] != task_id:
           self.commandQueue.put(queued_command)
         else:
-          logger.info("Canceling " + queued_command['commandType'] + \
-                      " for service " + queued_command['serviceName'] + \
-                      " and role " +  queued_command['role'] + \
-                      " with taskId " + str(queued_command['taskId']))
+          logger.info("Canceling {commandType} for service {serviceName} and role {role} with taskId {taskId}".format(
+            **queued_command
+          ))
 
       # Kill if in progress
       self.customServiceOrchestrator.cancel_command(task_id, reason)
+      self.taskIdsToCancel.add(task_id)
+      self.cancelEvent.set()
 
   def run(self):
     while not self.stop_event.is_set():
       try:
-        self.processBackgroundQueueSafeEmpty()
-        self.fillRecoveryCommands()
+        self.process_background_queue_safe_empty()
+        self.fill_recovery_commands()
         try:
           if self.parallel_execution == 0:
             command = self.commandQueue.get(True, self.EXECUTION_COMMAND_WAIT_TIME)
 
-            if command == None:
+            if command is None:
               break
 
             self.process_command(command)
@@ -152,17 +133,16 @@ class ActionQueue(threading.Thread):
             while not self.stop_event.is_set():
               command = self.commandQueue.get(True, self.EXECUTION_COMMAND_WAIT_TIME)
 
-              if command == None:
+              if command is None:
                 break
               # If command is not retry_enabled then do not start them in parallel
               # checking just one command is enough as all commands for a stage is sent
               # at the same time and retry is only enabled for initial start/install
-              retryAble = False
+              retry_able = False
               if 'commandParams' in command and 'command_retry_enabled' in command['commandParams']:
-                retryAble = command['commandParams']['command_retry_enabled'] == "true"
-              if retryAble:
-                logger.info("Kicking off a thread for the command, id=" +
-                            str(command['commandId']) + " taskId=" + str(command['taskId']))
+                retry_able = command['commandParams']['command_retry_enabled'] == "true"
+              if retry_able:
+                logger.info("Kicking off a thread for the command, id={} taskId={}".format(command['commandId'], command['taskId']))
                 t = threading.Thread(target=self.process_command, args=(command,))
                 t.daemon = True
                 t.start()
@@ -171,37 +151,38 @@ class ActionQueue(threading.Thread):
                 break
               pass
             pass
-        except (Queue.Empty):
+        except Queue.Empty:
           pass
-      except:
+      except Exception:
         logger.exception("ActionQueue thread failed with exception. Re-running it")
     logger.info("ActionQueue thread has successfully finished")
 
-  def fillRecoveryCommands(self):
-    if not self.tasks_in_progress_or_pending():
+  def fill_recovery_commands(self):
+    if self.recovery_manager.enabled() and not self.tasks_in_progress_or_pending():
       self.put(self.recovery_manager.get_recovery_commands())
 
-  def processBackgroundQueueSafeEmpty(self):
+  def process_background_queue_safe_empty(self):
     while not self.backgroundCommandQueue.empty():
       try:
         command = self.backgroundCommandQueue.get(False)
-        if command.has_key('__handle') and command['__handle'].status == None:
+        if "__handle" in command and command["__handle"].status is None:
           self.process_command(command)
       except Queue.Empty:
         pass
 
-  def createCommandHandle(self, command):
-    if command.has_key('__handle'):
+  def create_command_handle(self, command):
+    if "__handle" in command:
       raise AgentException("Command already has __handle")
+
     command['__handle'] = BackgroundCommandExecutionHandle(command, command['commandId'], None, self.on_background_command_complete_callback)
     return command
 
   def process_command(self, command):
     # make sure we log failures
-    commandType = command['commandType']
-    logger.debug("Took an element of Queue (command type = %s).", commandType)
+    command_type = command['commandType']
+    logger.debug("Took an element of Queue (command type = %s).", command_type)
     try:
-      if commandType in [self.EXECUTION_COMMAND, self.BACKGROUND_EXECUTION_COMMAND, self.AUTO_EXECUTION_COMMAND]:
+      if command_type in AgentCommand.AUTO_EXECUTION_COMMAND_GROUP:
         try:
           if self.recovery_manager.enabled():
             self.recovery_manager.on_execution_command_start()
@@ -214,23 +195,30 @@ class ActionQueue(threading.Thread):
       else:
         logger.error("Unrecognized command %s", pprint.pformat(command))
     except Exception:
-      logger.exception("Exception while processing {0} command".format(commandType))
+      logger.exception("Exception while processing {0} command".format(command_type))
 
   def tasks_in_progress_or_pending(self):
     return not self.commandQueue.empty() or self.recovery_manager.has_active_command()
 
   def execute_command(self, command):
-    '''
+    """
     Executes commands of type EXECUTION_COMMAND
-    '''
-    clusterId = command['clusterId']
-    commandId = command['commandId']
-    isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND
-    isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND
+    """
+    cluster_id = command['clusterId']
+    command_id = command['commandId']
+    command_type = command['commandType']
+
+    num_attempts = 0
+    retry_duration = 0  # even with 0 allow one attempt
+    retry_able = False
+    delay = 1
+    log_command_output = True
+    command_canceled = False
+    command_result = {}
+
     message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \
-              "cluster_id {cluster}.".format(
-              commandId = str(commandId), taskId = str(command['taskId']),
-              role=command['role'], cluster=clusterId)
+              "cluster_id {cluster}.".format(commandId=str(command_id), taskId=str(command['taskId']),
+              role=command['role'], cluster=cluster_id)
     logger.info(message)
 
     taskId = command['taskId']
@@ -238,188 +226,165 @@ class ActionQueue(threading.Thread):
     in_progress_status = self.commandStatuses.generate_report_template(command)
     # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
     # server. The prefix is defined in agent-config.ini
-    if not isAutoExecuteCommand:
+    if command_type != AgentCommand.auto_execution:
       in_progress_status.update({
         'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
         'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
-        'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
-        'status': self.IN_PROGRESS_STATUS
+        'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
+        'status': CommandStatus.in_progress
       })
     else:
       in_progress_status.update({
         'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
         'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
-        'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json',
-        'status': self.IN_PROGRESS_STATUS
+        'structuredOut': self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json',
+        'status': CommandStatus.in_progress
       })
 
     self.commandStatuses.put_command_status(command, in_progress_status)
 
-    numAttempts = 0
-    retryDuration = 0  # even with 0 allow one attempt
-    retryAble = False
-    delay = 1
-    log_command_output = True
-    if 'commandParams' in command and 'log_output' in command['commandParams'] and "false" == command['commandParams']['log_output']:
-      log_command_output = False
-
     if 'commandParams' in command:
       if 'max_duration_for_retries' in command['commandParams']:
-        retryDuration = int(command['commandParams']['max_duration_for_retries'])
-      if 'command_retry_enabled' in command['commandParams']:
-        retryAble = command['commandParams']['command_retry_enabled'] == "true"
-    if isAutoExecuteCommand:
-      retryAble = False
-
-    logger.info("Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}".
-                 format(taskId=taskId, retryAble=retryAble, retryDuration=retryDuration, log_command_output=log_command_output))
-    command_canceled = False
-    while retryDuration >= 0:
-      numAttempts += 1
+        retry_duration = int(command['commandParams']['max_duration_for_retries'])
+      if 'command_retry_enabled' in command['commandParams'] and command_type != AgentCommand.auto_execution:
+        #  for AgentCommand.auto_execution command retry_able should be always false
+        retry_able = command['commandParams']['command_retry_enabled'] == "true"
+      if 'log_output' in command['commandParams']:
+        log_command_output = command['commandParams']['log_output'] != "false"
+
+    logger.info("Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec)"
+                " = {retryDuration}, log_output = {log_command_output}".format(
+      taskId=taskId, retryAble=retry_able, retryDuration=retry_duration, log_command_output=log_command_output))
+
+    self.cancelEvent.clear()
+    # for case of command reschedule (e.g. command and cancel for the same taskId are send at the same time)
+    self.taskIdsToCancel.discard(taskId)
+
+    while retry_duration >= 0:
+      if taskId in self.taskIdsToCancel:
+        logger.info('Command with taskId = {0} canceled'.format(taskId))
+        command_canceled = True
+
+        self.taskIdsToCancel.discard(taskId)
+        break
+
+      num_attempts += 1
       start = 0
-      if retryAble:
+      if retry_able:
         start = int(time.time())
       # running command
-      commandresult = self.customServiceOrchestrator.runCommand(command,
-                                                                in_progress_status['tmpout'],
-                                                                in_progress_status['tmperr'],
-                                                                override_output_files=numAttempts == 1,
-                                                                retry=numAttempts > 1)
+      command_result = self.customServiceOrchestrator.runCommand(command,
+                                                                 in_progress_status['tmpout'],
+                                                                 in_progress_status['tmperr'],
+                                                                 override_output_files=num_attempts == 1,
+                                                                 retry=num_attempts > 1)
       end = 1
-      if retryAble:
+      if retry_able:
         end = int(time.time())
-      retryDuration -= (end - start)
+      retry_duration -= (end - start)
 
       # dumping results
-      if isCommandBackground:
+      if command_type == AgentCommand.background_execution:
         logger.info("Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
-                    .format(cid=taskId, exitCode=commandresult['exitcode'], retryAble=retryAble, retryDuration=retryDuration, delay=delay))
+                    .format(cid=taskId, exitCode=command_result['exitcode'], retryAble=retry_able, retryDuration=retry_duration, delay=delay))
         return
       else:
-        if commandresult['exitcode'] == 0:
-          status = self.COMPLETED_STATUS
+        if command_result['exitcode'] == 0:
+          status = CommandStatus.completed
         else:
-          status = self.FAILED_STATUS
-          if (commandresult['exitcode'] == -signal.SIGTERM) or (commandresult['exitcode'] == -signal.SIGKILL):
+          status = CommandStatus.failed
+          if (command_result['exitcode'] == -signal.SIGTERM) or (command_result['exitcode'] == -signal.SIGKILL):
             logger.info('Command with taskId = {cid} was canceled!'.format(cid=taskId))
             command_canceled = True
+            self.taskIdsToCancel.discard(taskId)
             break
 
-      if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0:
+      if status != CommandStatus.completed and retry_able and retry_duration > 0:
         delay = self.get_retry_delay(delay)
-        if delay > retryDuration:
-          delay = retryDuration
-        retryDuration -= delay  # allow one last attempt
-        commandresult['stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n"
+        if delay > retry_duration:
+          delay = retry_duration
+        retry_duration -= delay  # allow one last attempt
+        command_result['stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n"
         logger.info("Retrying command with taskId = {cid} after a wait of {delay}".format(cid=taskId, delay=delay))
         if 'agentLevelParams' not in command:
           command['agentLevelParams'] = {}
 
         command['agentLevelParams']['commandBeingRetried'] = "true"
-        time.sleep(delay)
+        self.cancelEvent.wait(delay) # wake up if something was canceled
+
         continue
       else:
         logger.info("Quit retrying for command with taskId = {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
-                    .format(cid=taskId, status=status, retryAble=retryAble, retryDuration=retryDuration, delay=delay))
+                    .format(cid=taskId, status=status, retryAble=retry_able, retryDuration=retry_duration, delay=delay))
         break
 
+    self.taskIdsToCancel.discard(taskId)
+
     # do not fail task which was rescheduled from server
     if command_canceled:
-      with self.lock:
-        with self.commandQueue.mutex:
+      with self.lock, self.commandQueue.mutex:
           for com in self.commandQueue.queue:
             if com['taskId'] == command['taskId']:
-              logger.info('Command with taskId = {cid} was rescheduled by server. '
-                          'Fail report on cancelled command won\'t be sent with heartbeat.'.format(cid=taskId))
+              logger.info("Command with taskId = {cid} was rescheduled by server. "
+                          "Fail report on cancelled command won't be sent with heartbeat.".format(cid=taskId))
+              self.commandStatuses.delete_command_data(command['taskId'])
               return
 
     # final result to stdout
-    commandresult['stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str(numAttempts) + ' tries\n'
-    logger.info('Command with taskId = {cid} completed successfully!'.format(cid=taskId) if status == self.COMPLETED_STATUS else 'Command with taskId = {cid} failed after {attempts} tries'.format(cid=taskId, attempts=numAttempts))
-
-    roleResult = self.commandStatuses.generate_report_template(command)
-    roleResult.update({
-      'stdout': commandresult['stdout'],
-      'stderr': commandresult['stderr'],
-      'exitCode': commandresult['exitcode'],
+    command_result['stdout'] += '\n\nCommand completed successfully!\n' if status == CommandStatus.completed else '\n\nCommand failed after ' + str(num_attempts) + ' tries\n'
+    logger.info('Command with taskId = {cid} completed successfully!'.format(cid=taskId) if status == CommandStatus.completed else 'Command with taskId = {cid} failed after {attempts} tries'.format(cid=taskId, attempts=num_attempts))
+
+    role_result = self.commandStatuses.generate_report_template(command)
+    role_result.update({
+      'stdout': command_result['stdout'],
+      'stderr': command_result['stderr'],
+      'exitCode': command_result['exitcode'],
       'status': status,
     })
 
-    if self.config.has_option("logging","log_command_executes") \
+    if self.config.has_option("logging", "log_command_executes") \
         and int(self.config.get("logging", "log_command_executes")) == 1 \
         and log_command_output:
 
-      if roleResult['stdout'] != '':
+      if role_result['stdout'] != '':
           logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = "
                       + command['role'] + ", roleCommand = " + command['roleCommand'])
-          self.log_command_output(roleResult['stdout'], str(command['taskId']))
+          self.log_command_output(role_result['stdout'], str(command['taskId']))
           logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = "
                       + command['role'] + ", roleCommand = " + command['roleCommand'])
 
-      if roleResult['stderr'] != '':
+      if role_result['stderr'] != '':
           logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = "
                       + command['role'] + ", roleCommand = " + command['roleCommand'])
-          self.log_command_output(roleResult['stderr'], str(command['taskId']))
+          self.log_command_output(role_result['stderr'], str(command['taskId']))
           logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = "
                       + command['role'] + ", roleCommand = " + command['roleCommand'])
 
-    if roleResult['stdout'] == '':
-      roleResult['stdout'] = 'None'
-    if roleResult['stderr'] == '':
-      roleResult['stderr'] = 'None'
+    if role_result['stdout'] == '':
+      role_result['stdout'] = 'None'
+    if role_result['stderr'] == '':
+      role_result['stderr'] = 'None'
 
     # let ambari know name of custom command
 
     if 'commandParams' in command and command['commandParams'].has_key('custom_command'):
-      roleResult['customCommand'] = command['commandParams']['custom_command']
+      role_result['customCommand'] = command['commandParams']['custom_command']
 
-    if 'structuredOut' in commandresult:
-      roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut']))
+    if 'structuredOut' in command_result:
+      role_result['structuredOut'] = str(json.dumps(command_result['structuredOut']))
     else:
-      roleResult['structuredOut'] = ''
-
-    # let recovery manager know the current state
-    if status == self.COMPLETED_STATUS:
-      # let ambari know that configuration tags were applied
-      configHandler = ActualConfigHandler(self.config, self.configTags)
-      """
-      #update
-      if 'commandParams' in command:
-        command_params = command['commandParams']
-        if command_params and command_params.has_key('forceRefreshConfigTags') and len(command_params['forceRefreshConfigTags']) > 0  :
-          forceRefreshConfigTags = command_params['forceRefreshConfigTags'].split(',')
-          logger.info("Got refresh additional component tags command")
-
-          for configTag in forceRefreshConfigTags :
-            configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag])
-
-          roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side
-          command['configurationTags'] = configHandler.read_actual_component(command['role'])
-
-      if command.has_key('configurationTags'):
-        configHandler.write_actual(command['configurationTags'])
-        roleResult['configurationTags'] = command['configurationTags']
-      component = {'serviceName':command['serviceName'],'componentName':command['role']}
-      if 'roleCommand' in command and \
-          (command['roleCommand'] == self.ROLE_COMMAND_START or
-             (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or
-               (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and
-                  'custom_command' in command['hostLevelParams'] and
-                      command['hostLevelParams']['custom_command'] in (self.CUSTOM_COMMAND_RESTART,
-                                                                       self.CUSTOM_COMMAND_START,
-                                                                       self.CUSTOM_COMMAND_RECONFIGURE))):
-        configHandler.write_actual_component(command['role'],
-                                             command['configurationTags'])
-        if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']:
-          configHandler.write_client_components(command['serviceName'],
-                                                command['configurationTags'],
-                                                command['hostLevelParams']['clientsToUpdateConfigs'])
-        roleResult['configurationTags'] = configHandler.read_actual_component(
-            command['role'])
-    """
+      role_result['structuredOut'] = ''
 
     self.recovery_manager.process_execution_command_result(command, status)
-    self.commandStatuses.put_command_status(command, roleResult)
+    self.commandStatuses.put_command_status(command, role_result)
+
+    cluster_id = str(command['clusterId'])
+
+    if cluster_id != '-1' and cluster_id != 'null':
+      service_name = command['serviceName']
+      if service_name != 'null':
+        component_name = command['role']
+        self.component_status_executor.check_component_status(cluster_id, service_name, component_name, "STATUS", report=True)
 
   def log_command_output(self, text, taskId):
     """
@@ -442,25 +407,21 @@ class ActionQueue(threading.Thread):
     """
     return last_delay * 2
 
-  def command_was_canceled(self):
-    self.customServiceOrchestrator
-
   def on_background_command_complete_callback(self, process_condensed_result, handle):
     logger.debug('Start callback: %s', process_condensed_result)
     logger.debug('The handle is: %s', handle)
-    status = self.COMPLETED_STATUS if handle.exitCode == 0 else self.FAILED_STATUS
+    status = CommandStatus.completed if handle.exitCode == 0 else CommandStatus.failed
 
     aborted_postfix = self.customServiceOrchestrator.command_canceled_reason(handle.command['taskId'])
     if aborted_postfix:
-      status = self.FAILED_STATUS
+      status = CommandStatus.failed
       logger.debug('Set status to: %s , reason = %s', status, aborted_postfix)
     else:
       aborted_postfix = ''
 
+    role_result = self.commandStatuses.generate_report_template(handle.command)
 
-    roleResult = self.commandStatuses.generate_report_template(handle.command)
-
-    roleResult.update({
+    role_result.update({
       'stdout': process_condensed_result['stdout'] + aborted_postfix,
       'stderr': process_condensed_result['stderr'] + aborted_postfix,
       'exitCode': process_condensed_result['exitcode'],
@@ -468,10 +429,8 @@ class ActionQueue(threading.Thread):
       'status': status,
     })
 
-    self.commandStatuses.put_command_status(handle.command, roleResult)
+    self.commandStatuses.put_command_status(handle.command, role_result)
 
-  # Removes all commands from the queue
   def reset(self):
-    queue = self.commandQueue
-    with queue.mutex:
-      queue.queue.clear()
+    with self.commandQueue.mutex:
+      self.commandQueue.queue.clear()

+ 3 - 1
ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py

@@ -50,7 +50,7 @@ class AlertSchedulerHandler():
   TYPE_RECOVERY = 'RECOVERY'
 
   def __init__(self, initializer_module, in_minutes=True):
-
+    self.initializer_module = initializer_module
     self.cachedir = initializer_module.config.alerts_cachedir
     self.stacks_dir = initializer_module.config.stacks_dir
     self.common_services_dir = initializer_module.config.common_services_dir
@@ -169,6 +169,8 @@ class AlertSchedulerHandler():
     definitions = self.__load_definitions()
     scheduled_jobs = self.__scheduler.get_jobs()
 
+    self.initializer_module.alert_status_reporter.reported_alerts.clear()
+
     # for every scheduled job, see if its UUID is still valid
     for scheduled_job in scheduled_jobs:
       uuid_valid = False

+ 18 - 4
ambari-agent/src/main/python/ambari_agent/AlertStatusReporter.py

@@ -38,8 +38,11 @@ class AlertStatusReporter(threading.Thread):
     self.collector = initializer_module.alert_scheduler_handler.collector()
     self.stop_event = initializer_module.stop_event
     self.alert_reports_interval = initializer_module.config.alert_reports_interval
+    self.alert_definitions_cache = initializer_module.alert_definitions_cache
     self.stale_alerts_monitor = initializer_module.stale_alerts_monitor
+    self.server_responses_listener = initializer_module.server_responses_listener
     self.reported_alerts = defaultdict(lambda:defaultdict(lambda:[]))
+    self.send_alert_changes_only = initializer_module.config.send_alert_changes_only
     threading.Thread.__init__(self)
 
   def run(self):
@@ -53,14 +56,15 @@ class AlertStatusReporter(threading.Thread):
     while not self.stop_event.is_set():
       try:
         if self.initializer_module.is_registered:
+          self.clean_not_existing_clusters_info()
           alerts = self.collector.alerts()
           self.stale_alerts_monitor.save_executed_alerts(alerts)
+          alerts_to_send = self.get_changed_alerts(alerts) if self.send_alert_changes_only else alerts
 
-          changed_alerts = self.get_changed_alerts(alerts)
+          if alerts_to_send and self.initializer_module.is_registered:
+            correlation_id = self.initializer_module.connection.send(message=alerts_to_send, destination=Constants.ALERTS_STATUS_REPORTS_ENDPOINT, log_message_function=AlertStatusReporter.log_sending)
+            self.server_responses_listener.listener_functions_on_success[correlation_id] = lambda headers, message: self.save_results(alerts_to_send)
 
-          if changed_alerts and self.initializer_module.is_registered:
-            self.initializer_module.connection.send(message=changed_alerts, destination=Constants.ALERTS_STATUS_REPORTS_ENDPOINT, log_message_function=AlertStatusReporter.log_sending)
-            self.save_results(changed_alerts)
       except ConnectionIsAlreadyClosed: # server and agent disconnected during sending data. Not an issue
         pass
       except:
@@ -94,6 +98,16 @@ class AlertStatusReporter(threading.Thread):
 
     return changed_alerts
     
+
+  def clean_not_existing_clusters_info(self):
+    """
+    This needs to be done to remove information about clusters which where deleted (e.g. ambari-server reset)
+    """
+    for cluster_id in self.reported_alerts.keys():
+      if not cluster_id in self.alert_definitions_cache.get_cluster_ids():
+        del self.reported_alerts[cluster_id]
+
+
   @staticmethod
   def log_sending(message_dict):
     """

+ 2 - 1
ambari-agent/src/main/python/ambari_agent/AmbariAgent.py

@@ -22,7 +22,8 @@ import os
 import sys
 from ambari_commons import subprocess32
 import signal
-from Controller import AGENT_AUTO_RESTART_EXIT_CODE
+
+AGENT_AUTO_RESTART_EXIT_CODE = 77
 
 if os.environ.has_key("PYTHON_BIN"):
   AGENT_SCRIPT = os.path.join(os.environ["PYTHON_BIN"],"site-packages/ambari_agent/main.py")

+ 68 - 9
ambari-agent/src/main/python/ambari_agent/AmbariConfig.py

@@ -69,7 +69,7 @@ passphrase_env_var_name=AMBARI_PASSPHRASE
 [heartbeat]
 state_interval = 1
 dirs={ps}etc{ps}hadoop,{ps}etc{ps}hadoop{ps}conf,{ps}var{ps}run{ps}hadoop,{ps}var{ps}log{ps}hadoop
-log_lines_count=300
+log_max_symbols_size=900000
 iddle_interval_min=1
 iddle_interval_max=10
 
@@ -82,6 +82,7 @@ log_command_executes = 0
 
 class AmbariConfig:
   TWO_WAY_SSL_PROPERTY = "security.server.two_way_ssl"
+  COMMAND_FILE_RETENTION_POLICY_PROPERTY = 'command_file_retention_policy'
   AMBARI_PROPERTIES_CATEGORY = 'agentConfig'
   SERVER_CONNECTION_INFO = "{0}/connection_info"
   CONNECTION_PROTOCOL = "https"
@@ -89,6 +90,15 @@ class AmbariConfig:
   # linux open-file limit
   ULIMIT_OPEN_FILES_KEY = 'ulimit.open.files'
 
+  # #### Command JSON file retention policies #####
+  # Keep all command-*.json files
+  COMMAND_FILE_RETENTION_POLICY_KEEP = 'keep'
+  # Remove command-*.json files if the operation was successful
+  COMMAND_FILE_RETENTION_POLICY_REMOVE_ON_SUCCESS = 'remove_on_success'
+  # Remove all command-*.json files when no longer needed
+  COMMAND_FILE_RETENTION_POLICY_REMOVE = 'remove'
+  # #### Command JSON file retention policies (end) #####
+
   config = None
   net = None
 
@@ -100,8 +110,8 @@ class AmbariConfig:
   def get(self, section, value, default=None):
     try:
       return str(self.config.get(section, value)).strip()
-    except ConfigParser.Error, err:
-      if default != None:
+    except ConfigParser.Error as err:
+      if default is not None:
         return default
       raise err
 
@@ -179,6 +189,10 @@ class AmbariConfig:
   def host_status_report_interval(self):
     return int(self.get('heartbeat', 'state_interval_seconds', '60'))
 
+  @property
+  def log_max_symbols_size(self):
+    return int(self.get('heartbeat', 'log_max_symbols_size', '900000'))
+
   @property
   def cache_dir(self):
     return self.get('agent', 'cache_dir', default='/var/lib/ambari-agent/cache')
@@ -186,25 +200,66 @@ class AmbariConfig:
   @property
   def cluster_cache_dir(self):
     return os.path.join(self.cache_dir, FileCache.CLUSTER_CACHE_DIRECTORY)
-  @property
-  def recovery_cache_dir(self):
-    return os.path.join(self.cache_dir, FileCache.RECOVERY_CACHE_DIRECTORY)
+
   @property
   def alerts_cachedir(self):
     return os.path.join(self.cache_dir, FileCache.ALERTS_CACHE_DIRECTORY)
+
   @property
   def stacks_dir(self):
     return os.path.join(self.cache_dir, FileCache.STACKS_CACHE_DIRECTORY)
+
   @property
   def common_services_dir(self):
     return os.path.join(self.cache_dir, FileCache.COMMON_SERVICES_DIRECTORY)
+
   @property
   def extensions_dir(self):
     return os.path.join(self.cache_dir, FileCache.EXTENSIONS_CACHE_DIRECTORY)
+
   @property
   def host_scripts_dir(self):
     return os.path.join(self.cache_dir, FileCache.HOST_SCRIPTS_CACHE_DIRECTORY)
 
+  @property
+  def command_file_retention_policy(self):
+    """
+    Returns the Agent's command file retention policy.  This policy indicates what to do with the
+    command-*.json and status_command.json files after they are done being used to execute commands
+    from the Ambari server.
+
+    Possible policy values are:
+
+    * keep - Keep all command-*.json files
+    * remove - Remove all command-*.json files when no longer needed
+    * remove_on_success - Remove command-*.json files if the operation was successful
+
+    The policy value is expected to be set in the Ambari agent's ambari-agent.ini file, under the
+    [agent] section.
+
+    For example:
+        command_file_retention_policy=remove
+
+    However, if the value is not set, or set to an unexpected value, "keep" will be returned, since
+    this has been the (only) policy for past versions.
+
+    :rtype: string
+    :return: the command file retention policy, either "keep", "remove", or "remove_on_success"
+    """
+    policy = self.get('agent', self.COMMAND_FILE_RETENTION_POLICY_PROPERTY, default=self.COMMAND_FILE_RETENTION_POLICY_KEEP)
+    policies = [self.COMMAND_FILE_RETENTION_POLICY_KEEP,
+                self.COMMAND_FILE_RETENTION_POLICY_REMOVE,
+                self.COMMAND_FILE_RETENTION_POLICY_REMOVE_ON_SUCCESS]
+
+    if policy.lower() in policies:
+      return policy.lower()
+    else:
+      logger.warning('The configured command_file_retention_policy is invalid, returning "%s" instead: %s',
+                     self.COMMAND_FILE_RETENTION_POLICY_KEEP,
+                     policy)
+      return self.COMMAND_FILE_RETENTION_POLICY_KEEP
+
+
   # TODO AMBARI-18733, change usages of this function to provide the home_dir.
   @staticmethod
   def getLogFile(home_dir=""):
@@ -306,7 +361,7 @@ class AmbariConfig:
   def get_multiprocess_status_commands_executor_enabled(self):
     return bool(int(self.get('agent', 'multiprocess_status_commands_executor_enabled', 1)))
 
-  def update_configuration_from_registration(self, reg_resp):
+  def update_configuration_from_metadata(self, reg_resp):
     if reg_resp and AmbariConfig.AMBARI_PROPERTIES_CATEGORY in reg_resp:
       if not self.has_section(AmbariConfig.AMBARI_PROPERTIES_CATEGORY):
         self.add_section(AmbariConfig.AMBARI_PROPERTIES_CATEGORY)
@@ -319,9 +374,9 @@ class AmbariConfig:
     """
     Get forced https protocol name.
 
-    :return: protocol name, PROTOCOL_TLSv1 by default
+    :return: protocol name, PROTOCOL_TLSv1_2 by default
     """
-    return self.get('security', 'force_https_protocol', default="PROTOCOL_TLSv1")
+    return self.get('security', 'force_https_protocol', default="PROTOCOL_TLSv1_2")
 
   def get_force_https_protocol_value(self):
     """
@@ -340,6 +395,10 @@ class AmbariConfig:
     """
     return self.get('security', 'ca_cert_path', default="")
 
+  @property
+  def send_alert_changes_only(self):
+    return bool(self.get('agent', 'send_alert_changes_only', '0'))
+
 
 def isSameHostList(hostlist1, hostlist2):
   is_same = True

+ 4 - 7
ambari-agent/src/main/python/ambari_agent/BackgroundCommandExecutionHandle.py

@@ -1,6 +1,4 @@
-#!/usr/bin/env python
-
-'''
+"""
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
@@ -16,12 +14,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-'''
+"""
 import logging
 
 logger = logging.getLogger()
 installScriptHash = -1
 
+
 class BackgroundCommandExecutionHandle:
   
   SCHEDULED_STATUS = 'SCHEDULED'
@@ -38,7 +37,5 @@ class BackgroundCommandExecutionHandle:
     self.on_background_command_started = on_background_command_started
     self.on_background_command_complete_callback = on_background_command_complete_callback
 
-
-
   def __str__(self):
-    return "[BackgroundHandle: pid='{0}', status='{1}', exitCode='{2}', commandId='{3}']".format(self.pid, self.status, self.exitCode, self.commandId)
+    return "[BackgroundHandle: pid='{0}', status='{1}', exitCode='{2}', commandId='{3}']".format(self.pid, self.status, self.exitCode, self.commandId)

+ 18 - 0
ambari-agent/src/main/python/ambari_agent/ClusterMetadataCache.py

@@ -38,5 +38,23 @@ class ClusterMetadataCache(ClusterCache):
     """
     super(ClusterMetadataCache, self).__init__(cluster_cache_dir)
 
+  def cache_delete(self, cache_update, cache_hash):
+    """
+    Only deleting cluster is supported here
+    """
+    mutable_dict = self._get_mutable_copy()
+    clusters_ids_to_delete = []
+
+    for cluster_id, cluster_updates_dict in cache_update.iteritems():
+      if cluster_updates_dict != {}:
+        raise Exception("Deleting cluster subvalues is not supported")
+
+      clusters_ids_to_delete.append(cluster_id)
+
+    for cluster_id in clusters_ids_to_delete:
+      del mutable_dict[cluster_id]
+
+    self.rewrite_cache(mutable_dict, cache_hash)
+
   def get_cache_name(self):
     return 'metadata'

+ 27 - 4
ambari-agent/src/main/python/ambari_agent/ClusterTopologyCache.py

@@ -47,6 +47,7 @@ class ClusterTopologyCache(ClusterCache):
     self.current_host_ids_to_cluster = {}
     self.cluster_local_components = {}
     self.cluster_host_info = None
+    self.component_version_map = {}
     super(ClusterTopologyCache, self).__init__(cluster_cache_dir)
 
   def get_cache_name(self):
@@ -74,15 +75,21 @@ class ClusterTopologyCache(ClusterCache):
 
     for cluster_id, cluster_topology in self.iteritems():
       self.cluster_local_components[cluster_id] = []
+      self.component_version_map[cluster_id] = defaultdict(lambda:defaultdict(lambda: {}))
 
       if not self.current_host_ids_to_cluster[cluster_id]:
         continue
 
       current_host_id = self.current_host_ids_to_cluster[cluster_id]
-      for component_dict in self[cluster_id].components:
-        if 'hostIds' in component_dict and current_host_id in component_dict.hostIds:
-          if current_host_id in component_dict.hostIds:
-            self.cluster_local_components[cluster_id].append(component_dict.componentName)
+
+      if 'components' in self[cluster_id]:
+        for component_dict in self[cluster_id].components:
+          if 'version' in component_dict.commandParams:
+            self.component_version_map[cluster_id][component_dict.serviceName][component_dict.componentName] = component_dict.commandParams.version
+
+          if 'hostIds' in component_dict and current_host_id in component_dict.hostIds:
+            if current_host_id in component_dict.hostIds:
+              self.cluster_local_components[cluster_id].append(component_dict.componentName)
 
 
     self.hosts_to_id = ImmutableDictionary(hosts_to_id)
@@ -101,6 +108,19 @@ class ClusterTopologyCache(ClusterCache):
       hostnames = [self.hosts_to_id[cluster_id][host_id].hostName for host_id in component_dict.hostIds]
       cluster_host_info[component_name.lower()+"_hosts"] += hostnames
 
+    cluster_host_info['all_hosts'] = []
+    cluster_host_info['all_racks'] = []
+    cluster_host_info['all_ipv4_ips'] = []
+    
+    for hosts_dict in self[cluster_id].hosts:
+      host_name = hosts_dict.hostName
+      rack_name = hosts_dict.rackName
+      ip = hosts_dict.ipv4
+      
+      cluster_host_info['all_hosts'].append(host_name)
+      cluster_host_info['all_racks'].append(rack_name)
+      cluster_host_info['all_ipv4_ips'].append(ip)
+
     self.cluster_host_info = cluster_host_info
     return cluster_host_info
 
@@ -118,6 +138,9 @@ class ClusterTopologyCache(ClusterCache):
   def get_cluster_local_components(self, cluster_id):
     return self.cluster_local_components[cluster_id]
 
+  def get_cluster_component_version_map(self, cluster_id):
+    return self.component_version_map[cluster_id]
+
   def get_host_info_by_id(self, cluster_id, host_id):
     """
     Find host by id in list of host dictionaries.

+ 168 - 0
ambari-agent/src/main/python/ambari_agent/CommandHooksOrchestrator.py

@@ -0,0 +1,168 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import logging
+
+from models.commands import AgentCommand
+from models.hooks import HookPrefix
+
+__all__ = ["ResolvedHooks", "HooksOrchestrator"]
+
+
+class ResolvedHooks(object):
+  """
+  Hooks sequence holder
+  """
+
+  def __init__(self, pre_hooks=set(), post_hooks=set()):
+    """
+    Creates response instance with generated hooks sequence
+
+    :arg pre_hooks hook sequence, typically generator is passed
+    :arg post_hooks hook sequence, typically generator is passed
+
+    :type pre_hooks Collections.Iterable|types.GeneratorType
+    :type post_hooks Collections.Iterable|types.GeneratorType
+    """
+    self._pre_hooks = pre_hooks
+    self._post_hooks = post_hooks
+
+  @property
+  def pre_hooks(self):
+    """
+    :rtype list
+    """
+    # Converting generator to real sequence on first user request
+    if not isinstance(self._pre_hooks, (list, set)):
+      self._pre_hooks = list(self._pre_hooks)
+
+    return self._pre_hooks
+
+  @property
+  def post_hooks(self):
+    """
+    :rtype list
+    """
+    # Converting generator to real sequence on first user request
+    if not isinstance(self._post_hooks, (list, set)):
+      self._post_hooks = list(self._post_hooks)
+
+    return self._post_hooks
+
+
+class HookSequenceBuilder(object):
+  """
+  Sequence builder according to passed definition
+  """
+
+  # ToDo: move hooks sequence definition to configuration or text file definition?
+  _hooks_sequences = {
+    HookPrefix.pre: [
+      "{prefix}-{command}",
+      "{prefix}-{command}-{service}",
+      "{prefix}-{command}-{service}-{role}"
+    ],
+    HookPrefix.post: [
+      "{prefix}-{command}-{service}-{role}",
+      "{prefix}-{command}-{service}",
+      "{prefix}-{command}"
+    ]
+  }
+
+  def build(self, prefix, command, service, role):
+    """
+    Building hooks sequence depends on incoming data
+
+    :type prefix str
+    :type command str
+    :type service str
+    :type role str
+    :rtype types.GeneratorType
+    """
+    if prefix not in self._hooks_sequences:
+      raise TypeError("Unable to locate hooks sequence definition for '{}' prefix".format(prefix))
+
+    for hook_definition in self._hooks_sequences[prefix]:
+      if "service" in hook_definition and service is None:
+        continue
+
+      if "role" is hook_definition and role is None:
+        continue
+
+      yield hook_definition.format(prefix=prefix, command=command, service=service, role=role)
+
+
+class HooksOrchestrator(object):
+  """
+   Resolving hooks according to HookSequenceBuilder definitions
+  """
+
+  def __init__(self, injector):
+    """
+    :type injector InitializerModule
+    """
+    self._file_cache = injector.file_cache
+    self._logger = logging.getLogger()
+    self._hook_builder = HookSequenceBuilder()
+
+  def resolve_hooks(self, command, command_name):
+    """
+    Resolving available hooks sequences which should be appended or prepended to script execution chain
+
+    :type command dict
+    :type command_name str
+    :rtype ResolvedHooks
+    """
+    command_type = command["commandType"]
+    if command_type == AgentCommand.status or not command_name:
+      return None
+
+    hook_dir = self._file_cache.get_hook_base_dir(command)
+
+    if not hook_dir:
+      return ResolvedHooks()
+
+    service = command["serviceName"] if "serviceName" in command else None
+    component = command["role"] if "role" in command else None
+
+    pre_hooks_seq = self._hook_builder.build(HookPrefix.pre, command_name, service, component)
+    post_hooks_seq = self._hook_builder.build(HookPrefix.post, command_name, service, component)
+
+    return ResolvedHooks(
+      self._resolve_hooks_path(hook_dir, pre_hooks_seq),
+      self._resolve_hooks_path(hook_dir, post_hooks_seq)
+    )
+
+  def _resolve_hooks_path(self, stack_hooks_dir, hooks_sequence):
+    """
+    Returns a tuple(path to hook script, hook base dir) according to passed hooks_sequence
+
+    :type stack_hooks_dir str
+    :type hooks_sequence collections.Iterable|types.GeneratorType
+    """
+
+    for hook in hooks_sequence:
+      hook_base_dir = os.path.join(stack_hooks_dir, hook)
+      hook_script_path = os.path.join(hook_base_dir, "scripts", "hook.py")
+
+      if not os.path.isfile(hook_script_path):
+        self._logger.debug("Hook script {0} not found, skipping".format(hook_script_path))
+        continue
+
+      yield hook_script_path, hook_base_dir

+ 82 - 33
ambari-agent/src/main/python/ambari_agent/CommandStatusDict.py

@@ -22,10 +22,14 @@ import os
 import logging
 import threading
 import copy
+
+import ambari_simplejson as json
+
 from collections import defaultdict
 from Grep import Grep
 
 from ambari_agent import Constants
+from ambari_agent.models.commands import CommandStatus, AgentCommand
 from ambari_stomp.adapter.websocket import ConnectionIsAlreadyClosed
 
 logger = logging.getLogger()
@@ -38,6 +42,9 @@ class CommandStatusDict():
     task_id -> (command, cmd_report)
   """
 
+  # 2MB is a max message size on the server side
+  MAX_REPORT_SIZE = 1950000
+
   def __init__(self, initializer_module):
     """
     callback_action is called every time when status of some command is
@@ -47,8 +54,15 @@ class CommandStatusDict():
     self.lock = threading.RLock()
     self.initializer_module = initializer_module
     self.command_update_output = initializer_module.config.command_update_output
+    self.server_responses_listener = initializer_module.server_responses_listener
+    self.log_max_symbols_size = initializer_module.config.log_max_symbols_size
     self.reported_reports = set()
 
+  def delete_command_data(self, key):
+    # delete stale data about this command
+    with self.lock:
+      self.reported_reports.discard(key)
+      self.current_state.pop(key, None)
 
   def put_command_status(self, command, report):
     """
@@ -57,36 +71,61 @@ class CommandStatusDict():
     from ActionQueue import ActionQueue
 
     key = command['taskId']
-
     # delete stale data about this command
-    with self.lock:
-      self.reported_reports.discard(key)
-      self.current_state.pop(key, None)
+    self.delete_command_data(key)
 
-    is_sent = self.force_update_to_server({command['clusterId']: [report]})
-    updatable = report['status'] == ActionQueue.IN_PROGRESS_STATUS and self.command_update_output
+    is_sent, correlation_id = self.force_update_to_server({command['clusterId']: [report]})
+    updatable = report['status'] == CommandStatus.in_progress and self.command_update_output
 
     if not is_sent or updatable:
-      # if sending is not successful send later
-      with self.lock:
-        self.current_state[key] = (command, report)
-        self.reported_reports.discard(key)
+      self.queue_report_sending(key, command, report)
+    else:
+      self.server_responses_listener.listener_functions_on_error[correlation_id] = lambda headers, message: self.queue_report_sending(key, command, report)
+
+  def queue_report_sending(self, key, command, report):
+    with self.lock:
+      self.current_state[key] = (command, report)
+      self.reported_reports.discard(key)
 
   def force_update_to_server(self, reports_dict):
     if not self.initializer_module.is_registered:
-      return False
+      return False, None
 
     try:
-      self.initializer_module.connection.send(message={'clusters':reports_dict}, destination=Constants.COMMANDS_STATUS_REPORTS_ENDPOINT, log_message_function=CommandStatusDict.log_sending)
-      return True
+      correlation_id = self.initializer_module.connection.send(message={'clusters':reports_dict}, destination=Constants.COMMANDS_STATUS_REPORTS_ENDPOINT, log_message_function=CommandStatusDict.log_sending)
+      return True, correlation_id
     except ConnectionIsAlreadyClosed:
-      return False
+      return False, None
 
   def report(self):
     report = self.generate_report()
 
-    if report and self.force_update_to_server(report):
-      self.clear_reported_reports()
+    if report:
+      for splitted_report in self.split_reports(report, CommandStatusDict.MAX_REPORT_SIZE):
+        success, correlation_id = self.force_update_to_server(splitted_report)
+  
+        if success:
+          self.server_responses_listener.listener_functions_on_success[correlation_id] = lambda headers, message: self.clear_reported_reports(splitted_report)
+
+  def split_reports(self, result_reports, size):
+    part = defaultdict(lambda:[])
+    prev_part = defaultdict(lambda:[])
+    for cluster_id, cluster_reports in result_reports.items():
+      for report in cluster_reports:
+        prev_part[cluster_id].append(report)
+        if self.size_approved(prev_part, size):
+          part[cluster_id].append(report)
+        else:
+          yield part
+          part = defaultdict(lambda:[])
+          prev_part = defaultdict(lambda:[])
+          prev_part[cluster_id].append(report)
+          part[cluster_id].append(report)
+    yield part
+
+  def size_approved(self, report, size):
+    report_json = json.dumps(report)
+    return len(report_json) <= size
 
   def get_command_status(self, taskId):
     with self.lock:
@@ -100,39 +139,48 @@ class CommandStatusDict():
     generation
     """
     self.generated_reports = []
-    from ActionQueue import ActionQueue
-    with self.lock: # Synchronized
-      resultReports = defaultdict(lambda:[])
+
+    with self.lock:
+      result_reports = defaultdict(lambda:[])
       for key, item in self.current_state.items():
         command = item[0]
         report = item[1]
         cluster_id = report['clusterId']
-        if command ['commandType'] in [ActionQueue.EXECUTION_COMMAND, ActionQueue.BACKGROUND_EXECUTION_COMMAND]:
-          if (report['status']) != ActionQueue.IN_PROGRESS_STATUS:
-            resultReports[cluster_id].append(report)
+        if command['commandType'] in AgentCommand.EXECUTION_COMMAND_GROUP:
+          if (report['status']) != CommandStatus.in_progress:
+            result_reports[cluster_id].append(report)
             self.reported_reports.add(key)
           else:
             in_progress_report = self.generate_in_progress_report(command, report)
-            resultReports[cluster_id].append(in_progress_report)
-        elif command ['commandType'] in [ActionQueue.AUTO_EXECUTION_COMMAND]:
+            result_reports[cluster_id].append(in_progress_report)
+        elif command['commandType'] == AgentCommand.auto_execution:
           logger.debug("AUTO_EXECUTION_COMMAND task deleted %s", command['commandId'])
           self.reported_reports.add(key)
           pass
-      return resultReports
+      return result_reports
 
-  def clear_reported_reports(self):
+  def clear_reported_reports(self, result_reports):
     with self.lock:
+      keys_to_remove = set()
       for key in self.reported_reports:
-        del self.current_state[key]
-      self.reported_reports = set()
+        if self.has_report_with_taskid(key, result_reports):
+          del self.current_state[key]
+          keys_to_remove.add(key)
+
+      self.reported_reports = self.reported_reports.difference(keys_to_remove)
+
+  def has_report_with_taskid(self, task_id, result_reports):
+    for cluster_reports in result_reports.values():
+      for report in cluster_reports:
+        if report['taskId'] == task_id:
+          return True
+    return False
 
   def generate_in_progress_report(self, command, report):
     """
     Reads stdout/stderr for IN_PROGRESS command from disk file
     and populates other fields of report.
     """
-    from ActionQueue import ActionQueue
-    
     files_to_read = [report['tmpout'], report['tmperr'], report['structuredOut']]
     files_content = ['...', '...', '{}']
 
@@ -145,14 +193,15 @@ class CommandStatusDict():
     tmpout, tmperr, tmpstructuredout = files_content
 
     grep = Grep()
-    output = grep.tail(tmpout, Grep.OUTPUT_LAST_LINES)
+    output = grep.tail_by_symbols(grep.tail(tmpout, Grep.OUTPUT_LAST_LINES), self.log_max_symbols_size)
+    err = grep.tail_by_symbols(grep.tail(tmperr, Grep.OUTPUT_LAST_LINES), self.log_max_symbols_size)
     inprogress = self.generate_report_template(command)
     inprogress.update({
       'stdout': output,
-      'stderr': tmperr,
+      'stderr': err,
       'structuredOut': tmpstructuredout,
       'exitCode': 777,
-      'status': ActionQueue.IN_PROGRESS_STATUS,
+      'status': CommandStatus.in_progress,
     })
     return inprogress
 

+ 104 - 42
ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py

@@ -1,6 +1,4 @@
-#!/usr/bin/env python
-
-'''
+"""
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
@@ -16,17 +14,20 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-'''
+"""
 
 import logging
 import threading
 
 from ambari_agent import Constants
 from ambari_agent.LiveStatus import LiveStatus
+from ambari_agent.Utils import Utils
 from collections import defaultdict
+
+from ambari_agent.models.commands import AgentCommand
 from ambari_stomp.adapter.websocket import ConnectionIsAlreadyClosed
+from resource_management.libraries.functions.default import default
 
-logger = logging.getLogger(__name__)
 
 class ComponentStatusExecutor(threading.Thread):
   def __init__(self, initializer_module):
@@ -38,6 +39,10 @@ class ComponentStatusExecutor(threading.Thread):
     self.stop_event = initializer_module.stop_event
     self.recovery_manager = initializer_module.recovery_manager
     self.reported_component_status = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:None))) # component statuses which were received by server
+    self.server_responses_listener = initializer_module.server_responses_listener
+    self.logger = logging.getLogger(__name__)
+    self.reports_to_discard = []
+    self.reports_to_discard_lock = threading.RLock()
     threading.Thread.__init__(self)
 
   def run(self):
@@ -45,7 +50,7 @@ class ComponentStatusExecutor(threading.Thread):
     Run an endless loop which executes all status commands every 'status_commands_run_interval' seconds.
     """
     if self.status_commands_run_interval == 0:
-      logger.warn("ComponentStatusExecutor is turned off. Some functionality might not work correctly.")
+      self.logger.warn("ComponentStatusExecutor is turned off. Some functionality might not work correctly.")
       return
 
     while not self.stop_event.is_set():
@@ -53,6 +58,9 @@ class ComponentStatusExecutor(threading.Thread):
         self.clean_not_existing_clusters_info()
         cluster_reports = defaultdict(lambda:[])
 
+        with self.reports_to_discard_lock:
+          self.reports_to_discard = []
+
         for cluster_id in self.topology_cache.get_cluster_ids():
           # TODO: check if we can make clusters immutable too
           try:
@@ -62,15 +70,15 @@ class ComponentStatusExecutor(threading.Thread):
             # multithreading: if cluster was deleted during iteration
             continue
 
-          if not 'status_commands_to_run' in metadata_cache:
+          if 'status_commands_to_run' not in metadata_cache:
             continue
 
           status_commands_to_run = metadata_cache.status_commands_to_run
 
-          if not 'components' in topology_cache:
+          if 'components' not in topology_cache:
             continue
 
-          current_host_id =  self.topology_cache.get_current_host_id(cluster_id)
+          current_host_id = self.topology_cache.get_current_host_id(cluster_id)
 
           if current_host_id is None:
             continue
@@ -83,60 +91,114 @@ class ComponentStatusExecutor(threading.Thread):
                 break
 
               # cluster was already removed
-              if not cluster_id in self.topology_cache.get_cluster_ids():
+              if cluster_id not in self.topology_cache.get_cluster_ids():
                 break
 
               # check if component is installed on current host
-              if not current_host_id in component_dict.hostIds:
+              if current_host_id not in component_dict.hostIds:
                 break
 
               service_name = component_dict.serviceName
               component_name = component_dict.componentName
 
-              command_dict = {
-                'serviceName': service_name,
-                'role': component_name,
-                'clusterId': cluster_id,
-                'commandType': 'STATUS_COMMAND',
-              }
-
-              component_status_result = self.customServiceOrchestrator.requestComponentStatus(command_dict)
-              status = LiveStatus.LIVE_STATUS if component_status_result['exitcode'] == 0 else LiveStatus.DEAD_STATUS
-
-              # log if status command failed
-              if status == LiveStatus.DEAD_STATUS:
-                stderr = component_status_result['stderr']
-                if not "ComponentIsNotRunning" in stderr and not "ClientComponentHasNoStatus" in stderr:
-                  logger.info("Status command for {0} failed:\n{1}".format(component_name, stderr))
-
-              result = {
-                'serviceName': service_name,
-                'componentName': component_name,
-                'command': command_name,
-                'status': status,
-                'clusterId': cluster_id,
-              }
-
-              if status != self.reported_component_status[cluster_id][component_name][command_name]:
-                logging.info("Status for {0} has changed to {1}".format(component_name, status))
+              # do not run status commands for the component which is starting/stopping or doing other action
+              if self.customServiceOrchestrator.commandsRunningForComponent(cluster_id, component_name):
+                self.logger.info("Skipping status command for {0}. Since command for it is running".format(component_name))
+                continue
+
+              result = self.check_component_status(cluster_id, service_name, component_name, command_name)
+
+              if result:
                 cluster_reports[cluster_id].append(result)
-                self.recovery_manager.handle_status_change(component_name, status)
 
+
+        cluster_reports = self.discard_stale_reports(cluster_reports)
         self.send_updates_to_server(cluster_reports)
       except ConnectionIsAlreadyClosed: # server and agent disconnected during sending data. Not an issue
         pass
       except:
-        logger.exception("Exception in ComponentStatusExecutor. Re-running it")
+        self.logger.exception("Exception in ComponentStatusExecutor. Re-running it")
 
       self.stop_event.wait(self.status_commands_run_interval)
-    logger.info("ComponentStatusExecutor has successfully finished")
+    self.logger.info("ComponentStatusExecutor has successfully finished")
+
+  def discard_stale_reports(self, cluster_reports):
+    """
+    Remove reports which are already stale (meaning other process has already updated status to something different)
+    """
+    with self.reports_to_discard_lock:
+      # nothing to discard
+      if not self.reports_to_discard:
+        return cluster_reports
+
+      reports_to_discard = self.reports_to_discard[:]
+
+    new_cluster_reports = defaultdict(lambda:[])
+    for cluster_id, cluster_reports in cluster_reports.iteritems():
+      for cluster_report in cluster_reports:
+        for discarded_report in reports_to_discard:
+          if Utils.are_dicts_equal(cluster_report, discarded_report, keys_to_skip=['status']):
+            self.logger.info("Discarding outdated status {0} before sending".format(cluster_report))
+            break
+        else:
+          new_cluster_reports[cluster_id].append(cluster_report)
+
+    return new_cluster_reports
+
+  def check_component_status(self, cluster_id, service_name, component_name, command_name, report=False):
+    """
+    Returns components status if it has changed, otherwise None.
+    """
+
+    # if not a component
+    if self.topology_cache.get_component_info_by_key(cluster_id, service_name, component_name) is None:
+      return None
+
+    command_dict = {
+      'serviceName': service_name,
+      'role': component_name,
+      'clusterId': cluster_id,
+      'commandType': AgentCommand.status,
+    }
+
+    component_status_result = self.customServiceOrchestrator.requestComponentStatus(command_dict)
+    status = LiveStatus.LIVE_STATUS if component_status_result['exitcode'] == 0 else LiveStatus.DEAD_STATUS
+
+    # log if status command failed
+    if status == LiveStatus.DEAD_STATUS:
+      stderr = component_status_result['stderr']
+      if "ComponentIsNotRunning" not in stderr and "ClientComponentHasNoStatus" not in stderr:
+        self.logger.info("Status command for {0} failed:\n{1}".format(component_name, stderr))
+
+    result = {
+      'serviceName': service_name,
+      'componentName': component_name,
+      'command': command_name,
+      'status': status,
+      'clusterId': cluster_id,
+    }
+
+    if status != self.reported_component_status[cluster_id][component_name][command_name]:
+      logging.info("Status for {0} has changed to {1}".format(component_name, status))
+      self.recovery_manager.handle_status_change(component_name, status)
+
+      if report:
+        with self.reports_to_discard_lock:
+          self.reports_to_discard.append(result)
+
+        self.send_updates_to_server({cluster_id: [result]})
+
+      return result
+    return None
 
   def send_updates_to_server(self, cluster_reports):
     if not cluster_reports or not self.initializer_module.is_registered:
       return
 
-    self.initializer_module.connection.send(message={'clusters': cluster_reports}, destination=Constants.COMPONENT_STATUS_REPORTS_ENDPOINT)
+    correlation_id = self.initializer_module.connection.send(message={'clusters': cluster_reports}, destination=Constants.COMPONENT_STATUS_REPORTS_ENDPOINT)
+    self.server_responses_listener.listener_functions_on_success[correlation_id] = lambda headers, message: self.save_reported_component_status(cluster_reports)
 
+  def save_reported_component_status(self, cluster_reports):
     for cluster_id, reports in cluster_reports.iteritems():
       for report in reports:
         component_name = report['componentName']
@@ -150,5 +212,5 @@ class ComponentStatusExecutor(threading.Thread):
     This needs to be done to remove information about clusters which where deleted (e.g. ambari-server reset)
     """
     for cluster_id in self.reported_component_status.keys():
-      if not cluster_id in self.topology_cache.get_cluster_ids():
+      if cluster_id not in self.topology_cache.get_cluster_ids():
         del self.reported_component_status[cluster_id]

+ 112 - 0
ambari-agent/src/main/python/ambari_agent/ComponentVersionReporter.py

@@ -0,0 +1,112 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import threading
+
+from ambari_agent import Constants
+from collections import defaultdict
+
+from ambari_agent.models.commands import AgentCommand
+
+logger = logging.getLogger(__name__)
+
+class ComponentVersionReporter(threading.Thread):
+  def __init__(self, initializer_module):
+    self.initializer_module = initializer_module
+    self.topology_cache = initializer_module.topology_cache
+    self.customServiceOrchestrator = initializer_module.customServiceOrchestrator
+    self.server_responses_listener = initializer_module.server_responses_listener
+    threading.Thread.__init__(self)
+
+  def run(self):
+    """
+    Get version of all components by running get_version execution command.
+    """
+    try:
+      cluster_reports = defaultdict(lambda:[])
+
+      for cluster_id in self.topology_cache.get_cluster_ids():
+        topology_cache = self.topology_cache[cluster_id]
+
+        if 'components' not in topology_cache:
+          continue
+
+        current_host_id = self.topology_cache.get_current_host_id(cluster_id)
+
+        if current_host_id is None:
+          continue
+
+        cluster_components = topology_cache.components
+        for component_dict in cluster_components:
+          # check if component is installed on current host
+          if current_host_id not in component_dict.hostIds:
+            continue
+
+          service_group_name = component_dict.serviceGroupName
+          service_name = component_dict.serviceName
+          component_name = component_dict.componentName
+
+          result = self.check_component_version(cluster_id, service_group_name, service_name, component_name)
+
+          if result:
+            cluster_reports[cluster_id].append(result)
+
+      self.send_updates_to_server(cluster_reports)
+    except:
+      logger.exception("Exception in ComponentVersionReporter")
+
+  def check_component_version(self, cluster_id, service_group_name, service_name, component_name):
+    """
+    Returns components version
+    """
+    # if not a component
+    if self.topology_cache.get_component_info_by_key(cluster_id, service_name, component_name) is None:
+      return None
+
+    command_dict = {
+      'serviceGroupName': service_group_name,
+      'serviceName': service_name,
+      'role': component_name,
+      'clusterId': cluster_id,
+      'commandType': AgentCommand.get_version,
+    }
+
+    version_result = self.customServiceOrchestrator.requestComponentStatus(command_dict, command_name=AgentCommand.get_version)
+
+    if version_result['exitcode'] or not 'structuredOut' in version_result or not 'version_reporting' in version_result['structuredOut']:
+      logger.error("Could not get version for component {0} of {1} service cluster_id={2}. Command returned: {3}".format(component_name, service_name, cluster_id, version_result))
+      return None
+
+    # TODO: check if no strout or version if not there
+
+    result = {
+      'serviceGroupName': service_group_name,
+      'serviceName': service_name,
+      'componentName': component_name,
+      'version_reporting': version_result['structuredOut']['version_reporting'],
+      'clusterId': cluster_id,
+    }
+
+    return result
+
+  def send_updates_to_server(self, cluster_reports):
+    if not cluster_reports or not self.initializer_module.is_registered:
+      return
+
+    self.initializer_module.connection.send(message={'clusters': cluster_reports}, destination=Constants.COMPONENT_VERSION_REPORTS_ENDPOINT)

+ 9 - 3
ambari-agent/src/main/python/ambari_agent/ConfigurationBuilder.py

@@ -39,10 +39,13 @@ class ConfigurationBuilder:
 
       command_dict = {
         'clusterLevelParams': metadata_cache.clusterLevelParams,
+        'clusterSettings': metadata_cache.clusterSettings,
         'hostLevelParams': host_level_params_cache,
         'clusterHostInfo': self.topology_cache.get_cluster_host_info(cluster_id),
         'localComponents': self.topology_cache.get_cluster_local_components(cluster_id),
-        'agentLevelParams': {'hostname': self.topology_cache.get_current_host_info(cluster_id)['hostName']}
+        'componentVersionMap': self.topology_cache.get_cluster_component_version_map(cluster_id),
+        'agentLevelParams': {'hostname': self.topology_cache.get_current_host_info(cluster_id)['hostName']},
+        'clusterName': metadata_cache.clusterLevelParams.cluster_name
       }
 
       if service_name is not None and service_name != 'null':
@@ -55,9 +58,12 @@ class ConfigurationBuilder:
 
       component_dict = self.topology_cache.get_component_info_by_key(cluster_id, service_name, component_name)
       if component_dict is not None:
+        mpack_id = component_dict.desiredMpackId
+        stack_settings = host_level_params_cache.stacksSettings[str(mpack_id)]
         command_dict.update({
           'componentLevelParams': component_dict.componentLevelParams,
-          'commandParams': component_dict.commandParams
+          'commandParams': component_dict.commandParams,
+          'stackSettings': stack_settings
         })
 
       command_dict.update(configurations_cache)
@@ -80,4 +86,4 @@ class ConfigurationBuilder:
 
   @property
   def public_fqdn(self):
-    hostname.public_hostname(self.config)
+    return hostname.public_hostname(self.config)

+ 6 - 2
ambari-agent/src/main/python/ambari_agent/Constants.py

@@ -26,16 +26,19 @@ ALERTS_DEFINITIONS_TOPIC = '/user/alert_definitions'
 METADATA_TOPIC = '/events/metadata'
 TOPOLOGIES_TOPIC = '/events/topologies'
 SERVER_RESPONSES_TOPIC = '/user/'
+AGENT_ACTIONS_TOPIC = '/user/agent_actions'
 
-PRE_REGISTRATION_TOPICS_TO_SUBSCRIBE = [SERVER_RESPONSES_TOPIC]
-POST_REGISTRATION_TOPICS_TO_SUBSCRIBE = [COMMANDS_TOPIC, CONFIGURATIONS_TOPIC, METADATA_TOPIC, TOPOLOGIES_TOPIC, HOST_LEVEL_PARAMS_TOPIC, ALERTS_DEFINITIONS_TOPIC]
+PRE_REGISTRATION_TOPICS_TO_SUBSCRIBE = [SERVER_RESPONSES_TOPIC, AGENT_ACTIONS_TOPIC]
+POST_REGISTRATION_TOPICS_TO_SUBSCRIBE = [COMMANDS_TOPIC]
 
+AGENT_RESPONSES_TOPIC = '/reports/responses'
 TOPOLOGY_REQUEST_ENDPOINT = '/agents/topologies'
 METADATA_REQUEST_ENDPOINT = '/agents/metadata'
 CONFIGURATIONS_REQUEST_ENDPOINT = '/agents/configs'
 HOST_LEVEL_PARAMS_TOPIC_ENPOINT = '/agents/host_level_params'
 ALERTS_DEFINITIONS_REQUEST_ENDPOINT = '/agents/alert_definitions'
 COMPONENT_STATUS_REPORTS_ENDPOINT = '/reports/component_status'
+COMPONENT_VERSION_REPORTS_ENDPOINT = '/reports/component_version'
 COMMANDS_STATUS_REPORTS_ENDPOINT = '/reports/commands_status'
 HOST_STATUS_REPORTS_ENDPOINT = '/reports/host_status'
 ALERTS_STATUS_REPORTS_ENDPOINT = '/reports/alerts_status'
@@ -44,3 +47,4 @@ HEARTBEAT_ENDPOINT = '/heartbeat'
 REGISTRATION_ENDPOINT = '/register'
 
 CORRELATION_ID_STRING = 'correlationId'
+MESSAGE_ID = 'messageId'

+ 254 - 168
ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py

@@ -18,27 +18,34 @@ See the License for the specific language governing permissions and
 limitations under the License.
 '''
 
-import logging
+
 import os
-import ambari_simplejson as json
+
+
 import sys
-from ambari_commons import shell
+import uuid
+import logging
 import threading
+import ambari_simplejson as json
+from collections import defaultdict
+from ConfigParser import NoOptionError
 
-from AgentException import AgentException
-from PythonExecutor import PythonExecutor
+from ambari_commons import shell, subprocess32
+from ambari_commons.constants import AGENT_TMP_DIR
 from resource_management.libraries.functions.log_process_information import log_process_information
 from resource_management.core.utils import PasswordString
-from ambari_commons import subprocess32
+
+from ambari_agent.models.commands import AgentCommand
 from ambari_agent.Utils import Utils
-from ambari_commons.constants import AGENT_TMP_DIR
-import hostname
-import Constants
+
+from AgentException import AgentException
+from PythonExecutor import PythonExecutor
 
 
 logger = logging.getLogger()
 
-class CustomServiceOrchestrator():
+
+class CustomServiceOrchestrator(object):
   """
   Executes a command for custom service. stdout and stderr are written to
   tmpoutfile and to tmperrfile respectively.
@@ -50,9 +57,6 @@ class CustomServiceOrchestrator():
   CUSTOM_ACTION_COMMAND = 'ACTIONEXECUTE'
   CUSTOM_COMMAND_COMMAND = 'CUSTOM_COMMAND'
 
-  PRE_HOOK_PREFIX="before"
-  POST_HOOK_PREFIX="after"
-
   HOSTS_LIST_KEY = "all_hosts"
   PING_PORTS_KEY = "all_ping_ports"
   RACKS_KEY = "all_racks"
@@ -78,21 +82,22 @@ class CustomServiceOrchestrator():
   CREDENTIAL_STORE_CLASS_PATH_NAME = 'credentialStoreClassPath'
 
   def __init__(self, initializer_module):
+    self.initializer_module = initializer_module
     self.configuration_builder = initializer_module.configuration_builder
     self.host_level_params_cache = initializer_module.host_level_params_cache
     self.config = initializer_module.config
+    self.hooks_orchestrator = initializer_module.hooks_orchestrator
     self.tmp_dir = self.config.get('agent', 'prefix')
     self.force_https_protocol = self.config.get_force_https_protocol_name()
     self.ca_cert_file_path = self.config.get_ca_cert_file_path()
     self.exec_tmp_dir = AGENT_TMP_DIR
     self.file_cache = initializer_module.file_cache
     self.status_commands_stdout = os.path.join(self.tmp_dir,
-                                               'status_command_stdout.txt')
+                                               'status_command_stdout_{0}.txt')
     self.status_commands_stderr = os.path.join(self.tmp_dir,
-                                               'status_command_stderr.txt')
-    # cache reset should also be called on explicit refreshCache command
-    # TODO : AMBARI-21056 Revisit for correct changes
-    #controller.refresh_cache_listeners.append(self.file_cache.reset)
+                                               'status_command_stderr_{0}.txt')
+    self.status_structured_out = os.path.join(self.tmp_dir,
+                                               'status_structured-out-{0}.json')
 
     # Construct the hadoop credential lib JARs path
     self.credential_shell_lib_path = os.path.join(self.config.get('security', 'credential_lib_dir',
@@ -101,16 +106,12 @@ class CustomServiceOrchestrator():
     self.credential_conf_dir = self.config.get('security', 'credential_conf_dir', self.DEFAULT_CREDENTIAL_CONF_DIR)
 
     self.credential_shell_cmd = self.config.get('security', 'credential_shell_cmd', self.DEFAULT_CREDENTIAL_SHELL_CMD)
-
-    # Clean up old status command files if any
-    try:
-      os.unlink(self.status_commands_stdout)
-      os.unlink(self.status_commands_stderr)
-    except OSError:
-      pass # Ignore fail
     self.commands_in_progress_lock = threading.RLock()
     self.commands_in_progress = {}
 
+    # save count (not boolean) for parallel execution cases
+    self.commands_for_component_in_progress = defaultdict(lambda:defaultdict(lambda:0))
+
   def map_task_to_process(self, task_id, processId):
     with self.commands_in_progress_lock:
       logger.debug('Maps taskId=%s to pid=%s', task_id, processId)
@@ -151,6 +152,9 @@ class CustomServiceOrchestrator():
     conf_dir = os.path.join(self.credential_conf_dir, service_name.lower())
     return conf_dir
 
+  def commandsRunningForComponent(self, clusterId, componentName):
+    return self.commands_for_component_in_progress[clusterId][componentName] > 0
+
   def getConfigTypeCredentials(self, commandJson):
     """
     Gets the affected config types for the service in this command
@@ -209,8 +213,8 @@ class CustomServiceOrchestrator():
     :return:
     """
     configtype_credentials = {}
-    if 'configuration_credentials' in commandJson:
-      for config_type, password_properties in commandJson['configuration_credentials'].items():
+    if 'serviceLevelParams' in commandJson and 'configuration_credentials' in commandJson['serviceLevelParams']:
+      for config_type, password_properties in commandJson['serviceLevelParams']['configuration_credentials'].items():
         if config_type in commandJson['configurations']:
           value_names = []
           config = commandJson['configurations'][config_type]
@@ -276,6 +280,9 @@ class CustomServiceOrchestrator():
     if len(configtype_credentials) == 0:
       logger.info("Credential store is enabled but no property are found that can be encrypted.")
       commandJson['credentialStoreEnabled'] = "false"
+    # CS is enabled and config properties are available
+    else:
+      commandJson['credentialStoreEnabled'] = "true"
 
     for config_type, credentials in configtype_credentials.items():
       config = commandJson['configurations'][config_type]
@@ -304,20 +311,23 @@ class CustomServiceOrchestrator():
 
     return cmd_result
 
-
   def runCommand(self, command_header, tmpoutfile, tmperrfile, forced_command_name=None,
-                 override_output_files=True, retry=False, is_status_command=False):
+                 override_output_files=True, retry=False, is_status_command=False, tmpstrucoutfile=None):
     """
     forced_command_name may be specified manually. In this case, value, defined at
     command json, is ignored.
     """
+    incremented_commands_for_component = False
+
+    ret = None
+    json_path = None
+
     try:
       command = self.generate_command(command_header)
       script_type = command['commandParams']['script_type']
       script = command['commandParams']['script']
       timeout = int(command['commandParams']['command_timeout'])
-
-      server_url_prefix = command['ambariLevelParams']['jdk_location']
+      cluster_id = str(command['clusterId'])
 
       # Status commands have no taskId nor roleCommand
       if not is_status_command:
@@ -325,30 +335,26 @@ class CustomServiceOrchestrator():
         command_name = command['roleCommand']
       else:
         task_id = 'status'
+        command_name = None
 
       if forced_command_name is not None:  # If not supplied as an argument
         command_name = forced_command_name
 
-      if command_name == self.CUSTOM_ACTION_COMMAND:
-        base_dir = self.file_cache.get_custom_actions_base_dir(server_url_prefix)
+      if command_name and command_name == self.CUSTOM_ACTION_COMMAND:
+        base_dir = self.file_cache.get_custom_actions_base_dir(command)
         script_tuple = (os.path.join(base_dir, 'scripts', script), base_dir)
-        hook_dir = None
       else:
         if command_name == self.CUSTOM_COMMAND_COMMAND:
           command_name = command['commandParams']['custom_command']
 
         # forces a hash challenge on the directories to keep them updated, even
         # if the return type is not used
-        self.file_cache.get_host_scripts_base_dir(server_url_prefix)
-        hook_dir = self.file_cache.get_hook_base_dir(command, server_url_prefix)
-        base_dir = self.file_cache.get_service_base_dir(command, server_url_prefix)
-        self.file_cache.get_custom_resources_subdir(command, server_url_prefix)
-
+        base_dir = self.file_cache.get_service_base_dir(command)
         script_path = self.resolve_script_path(base_dir, script)
         script_tuple = (script_path, base_dir)
 
-      tmpstrucoutfile = os.path.join(self.tmp_dir,
-                                    "structured-out-{0}.json".format(task_id))
+      if not tmpstrucoutfile:
+        tmpstrucoutfile = os.path.join(self.tmp_dir, "structured-out-{0}.json".format(task_id))
 
       # We don't support anything else yet
       if script_type.upper() != self.SCRIPT_TYPE_PYTHON:
@@ -357,30 +363,36 @@ class CustomServiceOrchestrator():
 
       # Execute command using proper interpreter
       handle = None
-      if command.has_key('__handle'):
+      if "__handle" in command:
         handle = command['__handle']
         handle.on_background_command_started = self.map_task_to_process
         del command['__handle']
 
       # If command contains credentialStoreEnabled, then
       # generate the JCEKS file for the configurations.
-      credentialStoreEnabled = False
+      credential_store_enabled = False
       if 'serviceLevelParams' in command and 'credentialStoreEnabled' in command['serviceLevelParams']:
-        credentialStoreEnabled = command['serviceLevelParams']['credentialStoreEnabled']
+        credential_store_enabled = command['serviceLevelParams']['credentialStoreEnabled']
 
-      if credentialStoreEnabled and command_name != self.COMMAND_NAME_STATUS:
+      if credential_store_enabled and command_name != self.COMMAND_NAME_STATUS:
         if 'commandBeingRetried' not in command['agentLevelParams'] or command['agentLevelParams']['commandBeingRetried'] != "true":
           self.generateJceks(command)
         else:
           logger.info("Skipping generation of jceks files as this is a retry of the command")
 
+      json_path = self.dump_command_to_json(command, retry, is_status_command)
+      hooks = self.hooks_orchestrator.resolve_hooks(command, command_name)
+      """:type hooks ambari_agent.CommandHooksOrchestrator.ResolvedHooks"""
+
+      py_file_list = []
+      if hooks:
+       py_file_list.extend(hooks.pre_hooks)
+
+      py_file_list.append(script_tuple)
+
+      if hooks:
+       py_file_list.extend(hooks.post_hooks)
 
-      json_path = self.dump_command_to_json(command, retry)
-      pre_hook_tuple = self.resolve_hook_script_path(hook_dir,
-          self.PRE_HOOK_PREFIX, command_name, script_type)
-      post_hook_tuple = self.resolve_hook_script_path(hook_dir,
-          self.POST_HOOK_PREFIX, command_name, script_type)
-      py_file_list = [pre_hook_tuple, script_tuple, post_hook_tuple]
       # filter None values
       filtered_py_file_list = [i for i in py_file_list if i]
 
@@ -388,33 +400,44 @@ class CustomServiceOrchestrator():
 
       # Executing hooks and script
       ret = None
-      from ActionQueue import ActionQueue
-      if command.has_key('commandType') and command['commandType'] == ActionQueue.BACKGROUND_EXECUTION_COMMAND and len(filtered_py_file_list) > 1:
+
+      if "commandType" in command and command['commandType'] == AgentCommand.background_execution\
+        and len(filtered_py_file_list) > 1:
+
         raise AgentException("Background commands are supported without hooks only")
 
       python_executor = self.get_py_executor(forced_command_name)
-      backup_log_files = not command_name in self.DONT_BACKUP_LOGS_FOR_COMMANDS
-      log_out_files = self.config.get("logging","log_out_files", default="0") != "0"
+      backup_log_files = command_name not in self.DONT_BACKUP_LOGS_FOR_COMMANDS
+      try:
+       log_out_files = self.config.get("logging", "log_out_files", default=None) is not None
+      except NoOptionError:
+       log_out_files = None
+
+      if cluster_id != '-1' and cluster_id != 'null' and not is_status_command:
+        self.commands_for_component_in_progress[cluster_id][command['role']] += 1
+        incremented_commands_for_component = True
+
+        # reset status which was reported, so agent re-reports it after command finished
+        self.initializer_module.component_status_executor.reported_component_status[cluster_id][command['role']]['STATUS'] = None
 
       for py_file, current_base_dir in filtered_py_file_list:
-        log_info_on_failure = not command_name in self.DONT_DEBUG_FAILURES_FOR_COMMANDS
+        log_info_on_failure = command_name not in self.DONT_DEBUG_FAILURES_FOR_COMMANDS
         script_params = [command_name, json_path, current_base_dir, tmpstrucoutfile, logger_level, self.exec_tmp_dir,
                          self.force_https_protocol, self.ca_cert_file_path]
 
         if log_out_files:
           script_params.append("-o")
 
-        ret = python_executor.run_file(py_file, script_params,
-                               tmpoutfile, tmperrfile, timeout,
-                               tmpstrucoutfile, self.map_task_to_process,
-                               task_id, override_output_files, backup_log_files = backup_log_files,
-                               handle = handle, log_info_on_failure=log_info_on_failure)
+        ret = python_executor.run_file(py_file, script_params, tmpoutfile, tmperrfile, timeout,
+                                       tmpstrucoutfile, self.map_task_to_process, task_id, override_output_files,
+                                       backup_log_files=backup_log_files, handle=handle,
+                                       log_info_on_failure=log_info_on_failure)
         # Next run_file() invocations should always append to current output
         override_output_files = False
         if ret['exitcode'] != 0:
           break
 
-      if not ret: # Something went wrong
+      if not ret:
         raise AgentException("No script has been executed")
 
       # if canceled and not background command
@@ -429,22 +452,34 @@ class CustomServiceOrchestrator():
           with open(tmperrfile, "a") as f:
             f.write(cancel_reason)
 
-    except Exception, e: # We do not want to let agent fail completely
+    except Exception as e:
       exc_type, exc_obj, exc_tb = sys.exc_info()
-      message = "Caught an exception while executing "\
-        "custom service command: {0}: {1}; {2}".format(exc_type, exc_obj, str(e))
+      message = "Caught an exception while executing custom service command: {0}: {1}; {2}".format(exc_type, exc_obj, e)
       logger.exception(message)
       ret = {
-        'stdout' : message,
-        'stderr' : message,
-        'structuredOut' : '{}',
+        'stdout': message,
+        'stderr': message,
+        'structuredOut': '{}',
         'exitcode': 1,
       }
+    finally:
+      if incremented_commands_for_component:
+        self.commands_for_component_in_progress[cluster_id][command['role']] -= 1
+
+      if json_path:
+        if is_status_command:
+          try:
+            os.unlink(json_path)
+          except OSError:
+            pass  # Ignore failure
+        else:
+          self.conditionally_remove_command_file(json_path, ret)
+
     return ret
 
   def command_canceled_reason(self, task_id):
     with self.commands_in_progress_lock:
-      if self.commands_in_progress.has_key(task_id):#Background command do not push in this collection (TODO)
+      if task_id in self.commands_in_progress:
         logger.debug('Pop with taskId %s', task_id)
         pid = self.commands_in_progress.pop(task_id)
         if not isinstance(pid, (int, long)):
@@ -469,22 +504,47 @@ class CustomServiceOrchestrator():
     required_config_timestamp = command_header['requiredConfigTimestamp'] if 'requiredConfigTimestamp' in command_header else None
 
     command_dict = self.configuration_builder.get_configuration(cluster_id, service_name, component_name, required_config_timestamp)
+
+    # remove data populated from topology to avoid merge and just override
+    if 'clusterHostInfo' in command_header:
+      del command_dict['clusterHostInfo']
+
     command = Utils.update_nested(Utils.get_mutable_copy(command_dict), command_header)
+
+    # topology needs to be decompressed if and only if it originates from command header
+    if 'clusterHostInfo' in command_header and command_header['clusterHostInfo']:
+      command['clusterHostInfo'] = self.decompress_cluster_host_info(command['clusterHostInfo'])
+
     return command
 
-  def requestComponentStatus(self, command_header):
+  def requestComponentStatus(self, command_header, command_name="STATUS"):
     """
      Component status is determined by exit code, returned by runCommand().
      Exit code 0 means that component is running and any other exit code means that
      component is not running
     """
-    override_output_files=True # by default, we override status command output
+    override_output_files = True
     if logger.level == logging.DEBUG:
       override_output_files = False
 
-    res = self.runCommand(command_header, self.status_commands_stdout,
-                          self.status_commands_stderr, self.COMMAND_NAME_STATUS,
-                          override_output_files=override_output_files, is_status_command=True)
+    # make sure status commands that run in parallel don't use the same files
+    status_commands_stdout = self.status_commands_stdout.format(uuid.uuid4())
+    status_commands_stderr = self.status_commands_stderr.format(uuid.uuid4())
+    status_structured_out = self.status_structured_out.format(uuid.uuid4())
+
+    try:
+      res = self.runCommand(command_header, status_commands_stdout,
+                            status_commands_stderr, command_name,
+                            override_output_files=override_output_files, is_status_command=True,
+                            tmpstrucoutfile=status_structured_out)
+    finally:
+      try:
+        os.unlink(status_commands_stdout)
+        os.unlink(status_commands_stderr)
+        os.unlink(status_structured_out)
+      except OSError:
+        pass # Ignore failure
+
     return res
 
   def resolve_script_path(self, base_dir, script):
@@ -497,152 +557,178 @@ class CustomServiceOrchestrator():
       raise AgentException(message)
     return path
 
-
-  def resolve_hook_script_path(self, stack_hooks_dir, prefix, command_name, script_type):
-    """
-    Returns a tuple(path to hook script, hook base dir) according to string prefix
-    and command name. If script does not exist, returns None
-    """
-    if not stack_hooks_dir:
-      return None
-    hook_dir = "{0}-{1}".format(prefix, command_name)
-    hook_base_dir = os.path.join(stack_hooks_dir, hook_dir)
-    hook_script_path = os.path.join(hook_base_dir, "scripts", "hook.py")
-    if not os.path.isfile(hook_script_path):
-      logger.debug("Hook script {0} not found, skipping".format(hook_script_path))
-      return None
-    return hook_script_path, hook_base_dir
-
-
-  def dump_command_to_json(self, command, retry=False):
+  def dump_command_to_json(self, command, retry=False, is_status_command=False):
     """
     Converts command to json file and returns file path
     """
     # Now, dump the json file
     command_type = command['commandType']
-    from ActionQueue import ActionQueue  # To avoid cyclic dependency
-    if command_type == ActionQueue.STATUS_COMMAND:
-      # These files are frequently created, that's why we don't
-      # store them all, but only the latest one
-      file_path = os.path.join(self.tmp_dir, "status_command.json")
+
+    if is_status_command:
+      # make sure status commands that run in parallel don't use the same files
+      file_path = os.path.join(self.tmp_dir, "status_command_{0}.json".format(uuid.uuid4()))
     else:
       task_id = command['taskId']
       file_path = os.path.join(self.tmp_dir, "command-{0}.json".format(task_id))
-      if command_type == ActionQueue.AUTO_EXECUTION_COMMAND:
+      if command_type == AgentCommand.auto_execution:
         file_path = os.path.join(self.tmp_dir, "auto_command-{0}.json".format(task_id))
 
     # Json may contain passwords, that's why we need proper permissions
     if os.path.isfile(file_path):
       os.unlink(file_path)
-    with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT,
-                           0600), 'w') as f:
-      content = json.dumps(command, sort_keys = False, indent = 4)
+    with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT, 0o600), 'w') as f:
+      content = json.dumps(command, sort_keys=False, indent=4)
       f.write(content)
     return file_path
 
-  def decompressClusterHostInfo(self, clusterHostInfo):
-    info = clusterHostInfo.copy()
-    #Pop info not related to host roles
-    hostsList = info.pop(self.HOSTS_LIST_KEY)
-    pingPorts = info.pop(self.PING_PORTS_KEY)
+  def decompress_cluster_host_info(self, cluster_host_info):
+    info = cluster_host_info.copy()
+    hosts_list = info.pop(self.HOSTS_LIST_KEY)
+    ping_ports = info.pop(self.PING_PORTS_KEY)
     racks = info.pop(self.RACKS_KEY)
     ipv4_addresses = info.pop(self.IPV4_ADDRESSES_KEY)
 
-    ambariServerHost = info.pop(self.AMBARI_SERVER_HOST)
-    ambariServerPort = info.pop(self.AMBARI_SERVER_PORT)
-    ambariServerUseSsl = info.pop(self.AMBARI_SERVER_USE_SSL)
+    ambari_server_host = info.pop(self.AMBARI_SERVER_HOST)
+    ambari_server_port = info.pop(self.AMBARI_SERVER_PORT)
+    ambari_server_use_ssl = info.pop(self.AMBARI_SERVER_USE_SSL)
 
-    decompressedMap = {}
+    decompressed_map = {}
 
-    for k,v in info.items():
+    for k, v in info.items():
       # Convert from 1-3,5,6-8 to [1,2,3,5,6,7,8]
-      indexes = self.convertRangeToList(v)
+      indexes = self.convert_range_to_list(v)
       # Convert from [1,2,3,5,6,7,8] to [host1,host2,host3...]
-      decompressedMap[k] = [hostsList[i] for i in indexes]
+      decompressed_map[k] = [hosts_list[i] for i in indexes]
 
-    #Convert from ['1:0-2,4', '42:3,5-7'] to [1,1,1,42,1,42,42,42]
-    pingPorts = self.convertMappedRangeToList(pingPorts)
-    racks = self.convertMappedRangeToList(racks)
-    ipv4_addresses = self.convertMappedRangeToList(ipv4_addresses)
+    # Convert from ['1:0-2,4', '42:3,5-7'] to [1,1,1,42,1,42,42,42]
+    ping_ports = self.convert_mapped_range_to_list(ping_ports)
+    racks = self.convert_mapped_range_to_list(racks)
+    ipv4_addresses = self.convert_mapped_range_to_list(ipv4_addresses)
 
-    #Convert all elements to str
-    pingPorts = map(str, pingPorts)
+    ping_ports = map(str, ping_ports)
 
-    #Add ping ports to result
-    decompressedMap[self.PING_PORTS_KEY] = pingPorts
-    #Add hosts list to result
-    decompressedMap[self.HOSTS_LIST_KEY] = hostsList
-    #Add racks list to result
-    decompressedMap[self.RACKS_KEY] = racks
-    #Add ips list to result
-    decompressedMap[self.IPV4_ADDRESSES_KEY] = ipv4_addresses
-    #Add ambari-server properties to result
-    decompressedMap[self.AMBARI_SERVER_HOST] = ambariServerHost
-    decompressedMap[self.AMBARI_SERVER_PORT] = ambariServerPort
-    decompressedMap[self.AMBARI_SERVER_USE_SSL] = ambariServerUseSsl
+    decompressed_map[self.PING_PORTS_KEY] = ping_ports
+    decompressed_map[self.HOSTS_LIST_KEY] = hosts_list
+    decompressed_map[self.RACKS_KEY] = racks
+    decompressed_map[self.IPV4_ADDRESSES_KEY] = ipv4_addresses
+    decompressed_map[self.AMBARI_SERVER_HOST] = ambari_server_host
+    decompressed_map[self.AMBARI_SERVER_PORT] = ambari_server_port
+    decompressed_map[self.AMBARI_SERVER_USE_SSL] = ambari_server_use_ssl
 
-    return decompressedMap
+    return decompressed_map
 
-  # Converts from 1-3,5,6-8 to [1,2,3,5,6,7,8]
-  def convertRangeToList(self, list):
-
-    resultList = []
+  def convert_range_to_list(self, range_to_convert):
+    """
+    Converts from 1-3,5,6-8 to [1,2,3,5,6,7,8]
 
-    for i in list:
+    :type range_to_convert list
+    """
+    result_list = []
 
+    for i in range_to_convert:
       ranges = i.split(',')
 
       for r in ranges:
-        rangeBounds = r.split('-')
-        if len(rangeBounds) == 2:
-
-          if not rangeBounds[0] or not rangeBounds[1]:
-            raise AgentException("Broken data in given range, expected - ""m-n"" or ""m"", got : " + str(r))
+        range_bounds = r.split('-')
+        if len(range_bounds) == 2:
 
+          if not range_bounds[0] or not range_bounds[1]:
+            raise AgentException("Broken data in given range, expected - ""m-n"" or ""m"", got: " + str(r))
 
-          resultList.extend(range(int(rangeBounds[0]), int(rangeBounds[1]) + 1))
-        elif len(rangeBounds) == 1:
-          resultList.append((int(rangeBounds[0])))
+          result_list.extend(range(int(range_bounds[0]), int(range_bounds[1]) + 1))
+        elif len(range_bounds) == 1:
+          result_list.append((int(range_bounds[0])))
         else:
-          raise AgentException("Broken data in given range, expected - ""m-n"" or ""m"", got : " + str(r))
+          raise AgentException("Broken data in given range, expected - ""m-n"" or ""m"", got: " + str(r))
 
-    return resultList
+    return result_list
 
-  #Converts from ['1:0-2,4', '42:3,5-7'] to [1,1,1,42,1,42,42,42]
-  def convertMappedRangeToList(self, list):
+  def convert_mapped_range_to_list(self, range_to_convert):
+    """
+    Converts from ['1:0-2,4', '42:3,5-7'] to [1,1,1,42,1,42,42,42]
 
-    resultDict = {}
+    :type range_to_convert list
+    """
+    result_dict = {}
 
-    for i in list:
-      valueToRanges = i.split(":")
-      if len(valueToRanges) <> 2:
+    for i in range_to_convert:
+      value_to_ranges = i.split(":")
+      if len(value_to_ranges) != 2:
         raise AgentException("Broken data in given value to range, expected format - ""value:m-n"", got - " + str(i))
-      value = valueToRanges[0]
-      rangesToken = valueToRanges[1]
-
-      for r in rangesToken.split(','):
+      value = value_to_ranges[0]
+      ranges_token = value_to_ranges[1]
 
-        rangeIndexes = r.split('-')
+      for r in ranges_token.split(','):
+        range_indexes = r.split('-')
 
-        if len(rangeIndexes) == 2:
+        if len(range_indexes) == 2:
 
-          if not rangeIndexes[0] or not rangeIndexes[1]:
+          if not range_indexes[0] or not range_indexes[1]:
             raise AgentException("Broken data in given value to range, expected format - ""value:m-n"", got - " + str(r))
 
-          start = int(rangeIndexes[0])
-          end = int(rangeIndexes[1])
+          start = int(range_indexes[0])
+          end = int(range_indexes[1])
 
           for k in range(start, end + 1):
-            resultDict[k] = value if not value.isdigit() else int(value)
+            result_dict[k] = value if not value.isdigit() else int(value)
 
+        elif len(range_indexes) == 1:
+          index = int(range_indexes[0])
+          result_dict[index] = value if not value.isdigit() else int(value)
 
-        elif len(rangeIndexes) == 1:
-          index = int(rangeIndexes[0])
+    return dict(sorted(result_dict.items())).values()
 
-          resultDict[index] = value if not value.isdigit() else int(value)
+  def conditionally_remove_command_file(self, command_json_path, command_result):
+    """
+    Conditionally remove the specified command JSON file if it exists and if the configured
+    agent/command_file_retention_policy indicates to do so.
 
+    :param command_json_path:  the absolute path to the command JSON file
+    :param command_result: the result structure containing the exit code for the command execution
+    :rtype: bool
+    :return: True, if the command JSON file was removed; False otherwise
+    """
+    removed_command_file = False
+
+    if os.path.exists(command_json_path):
+      command_file_retention_policy = self.config.command_file_retention_policy
+
+      if command_file_retention_policy == self.config.COMMAND_FILE_RETENTION_POLICY_REMOVE:
+        remove_command_file = True
+        logger.info(
+          'Removing %s due to the command_file_retention_policy, %s',
+          command_json_path, command_file_retention_policy
+        )
+      elif command_file_retention_policy == self.config.COMMAND_FILE_RETENTION_POLICY_REMOVE_ON_SUCCESS:
+        if command_result and ('exitcode' in command_result):
+          exit_code = command_result['exitcode']
+          if exit_code == 0:
+            remove_command_file = True
+            logger.info(
+              'Removing %s due to the command_file_retention_policy, %s, and exit code, %d',
+              command_json_path, command_file_retention_policy, exit_code
+            )
+          else:
+            remove_command_file = False
+            logger.info(
+              'Not removing %s due to the command_file_retention_policy, %s, and exit code, %d',
+              command_json_path, command_file_retention_policy, exit_code
+            )
+        else:
+          remove_command_file = False
+          logger.info(
+            'Not Removing %s due to the command_file_retention_policy, %s, and a missing exit code value',
+            command_json_path, command_file_retention_policy
+          )
+      else:
+        remove_command_file = False
 
-    resultList = dict(sorted(resultDict.items())).values()
+      if remove_command_file:
+        try:
+          os.remove(command_json_path)
+          removed_command_file = True
+        except OSError as e:
+          logger.error("Failed to remove %s due to error: %s", command_json_path, str(e))
 
-    return resultList
+    return removed_command_file
 

+ 4 - 3
ambari-agent/src/main/python/ambari_agent/ExitHelper.py

@@ -48,6 +48,7 @@ class ExitHelper(object):
   def __init__(self):
     self.exit_functions = []
     self.exit_functions_executed = False
+    self.exitcode = 0
     atexit.register(self.execute_cleanup)
 
   def execute_cleanup(self):
@@ -65,10 +66,10 @@ class ExitHelper(object):
   def register(self, func, *args, **kwargs):
     self.exit_functions.append((func, args, kwargs))
 
-  def exit(self, code):
+  def exit(self):
     self.execute_cleanup()
-    logger.info("Cleanup finished, exiting with code:" + str(code))
-    os._exit(code)
+    logger.info("Cleanup finished, exiting with code:" + str(self.exitcode))
+    os._exit(self.exitcode)
 
 
 if __name__ == '__main__':

+ 36 - 44
ambari-agent/src/main/python/ambari_agent/FileCache.py

@@ -73,8 +73,18 @@ class FileCache():
   def reset(self):
     self.uptodate_paths = [] # Paths that already have been recently checked
 
+  def get_server_url_prefix(self, command):
+    """
+     Returns server url prefix if exists
+
+    :type command dict
+    """
+    try:
+      return command['ambariLevelParams']['jdk_location']
+    except KeyError:
+      return ""
 
-  def get_service_base_dir(self, command, server_url_prefix):
+  def get_service_base_dir(self, command):
     """
     Returns a base directory for service
     """
@@ -82,11 +92,9 @@ class FileCache():
       service_subpath = command['commandParams']['service_package_folder']
     else:
       service_subpath = command['serviceLevelParams']['service_package_folder']
-    return self.provide_directory(self.cache_dir, service_subpath,
-                                  server_url_prefix)
+    return self.provide_directory(self.cache_dir, service_subpath, self.get_server_url_prefix(command))
 
-
-  def get_hook_base_dir(self, command, server_url_prefix):
+  def get_hook_base_dir(self, command):
     """
     Returns a base directory for hooks
     """
@@ -94,20 +102,18 @@ class FileCache():
       hooks_path = command['clusterLevelParams']['hooks_folder']
     except KeyError:
       return None
-    return self.provide_directory(self.cache_dir, hooks_path,
-                                  server_url_prefix)
-
+    return self.provide_directory(self.cache_dir, hooks_path, self.get_server_url_prefix(command))
 
-  def get_custom_actions_base_dir(self, server_url_prefix):
+  def get_custom_actions_base_dir(self, command):
     """
     Returns a base directory for custom action scripts
     """
     return self.provide_directory(self.cache_dir,
                                   self.CUSTOM_ACTIONS_CACHE_DIRECTORY,
-                                  server_url_prefix)
+                                  self.get_server_url_prefix(command))
 
 
-  def get_custom_resources_subdir(self, command, server_url_prefix):
+  def get_custom_resources_subdir(self, command):
     """
     Returns a custom directory which must be a subdirectory of the resources dir
     """
@@ -116,20 +122,16 @@ class FileCache():
     except KeyError:
       return None
 
-    return self.provide_directory(self.cache_dir,
-                                  custom_dir,
-                                  server_url_prefix)
-
+    return self.provide_directory(self.cache_dir, custom_dir, self.get_server_url_prefix(command))
 
-  def get_host_scripts_base_dir(self, server_url_prefix):
+  def get_host_scripts_base_dir(self, command):
     """
     Returns a base directory for host scripts (host alerts, etc) which
     are scripts that are not part of the main agent code
     """
     return self.provide_directory(self.cache_dir,
                                   self.HOST_SCRIPTS_CACHE_DIRECTORY,
-                                  server_url_prefix)
-
+                                  self.get_server_url_prefix(command))
 
   def auto_cache_update_enabled(self):
     from AmbariConfig import AmbariConfig
@@ -182,7 +184,7 @@ class FileCache():
                                                  subdirectory, self.ARCHIVE_NAME)
           membuffer = self.fetch_url(download_url)
           # extract only when the archive is not zero sized
-          if (membuffer.getvalue().strip()):
+          if membuffer.getvalue().strip():
             self.invalidate_directory(full_path)
             self.unpack_archive(membuffer, full_path)
             self.write_hash_sum(full_path, remote_hash)
@@ -193,7 +195,7 @@ class FileCache():
             pass
         # Finally consider cache directory up-to-date
         self.uptodate_paths.append(full_path)
-    except CachingException, e:
+    except CachingException as e:
       if self.tolerate_download_failures:
         # ignore
         logger.warn("Error occurred during cache update. "
@@ -203,14 +205,13 @@ class FileCache():
       else:
         raise # we are not tolerant to exceptions, command execution will fail
     finally:
-      self.currently_providing[full_path].set()
-      del self.currently_providing[full_path]
+      with self.currently_providing_dict_lock:
+        self.currently_providing[full_path].set()
+        del self.currently_providing[full_path]
 
     return full_path
 
-
-  def build_download_url(self, server_url_prefix,
-                         directory, filename):
+  def build_download_url(self, server_url_prefix, directory, filename):
     """
     Builds up a proper download url for file. Used for downloading files
     from the server.
@@ -220,7 +221,6 @@ class FileCache():
     return "{0}/{1}/{2}".format(server_url_prefix,
                                 urllib.pathname2url(directory), filename)
 
-
   def fetch_url(self, url):
     """
     Fetches content on url to in-memory buffer and returns the resulting buffer.
@@ -241,10 +241,8 @@ class FileCache():
         if not buff:
           break
       return memory_buffer
-    except Exception, err:
-      raise CachingException("Can not download file from"
-                             " url {0} : {1}".format(url, str(err)))
-
+    except Exception as err:
+      raise CachingException("Can not download file from url {0} : {1}".format(url, str(err)))
 
   def read_hash_sum(self, directory):
     """
@@ -256,8 +254,7 @@ class FileCache():
       with open(hash_file) as fh:
         return fh.readline().strip()
     except:
-      return None # We don't care
-
+      return None
 
   def write_hash_sum(self, directory, new_hash):
     """
@@ -269,10 +266,8 @@ class FileCache():
       with open(hash_file, "w") as fh:
         fh.write(new_hash)
       os.chmod(hash_file, 0o644)
-    except Exception, err:
-      raise CachingException("Can not write to file {0} : {1}".format(hash_file,
-                                                                 str(err)))
-
+    except Exception as err:
+      raise CachingException("Can not write to file {0} : {1}".format(hash_file, str(err)))
 
   def invalidate_directory(self, directory):
     """
@@ -286,7 +281,7 @@ class FileCache():
     logger.debug("Invalidating directory {0}".format(directory))
     try:
       if os.path.exists(directory):
-        if os.path.isfile(directory): # It would be a strange situation
+        if os.path.isfile(directory):  # It would be a strange situation
           os.unlink(directory)
         elif os.path.isdir(directory):
           """
@@ -297,12 +292,11 @@ class FileCache():
           execute_with_retries(CLEAN_DIRECTORY_TRIES, CLEAN_DIRECTORY_TRY_SLEEP, OSError, shutil.rmtree, directory)
         # create directory itself and any parent directories
       os.makedirs(directory)
-    except Exception, err:
+    except Exception as err:
       logger.exception("Can not invalidate cache directory {0}".format(directory))
       raise CachingException("Can not invalidate cache directory {0}: {1}",
                              directory, str(err))
 
-
   def unpack_archive(self, mem_buffer, target_directory):
     """
     Unpacks contents of in-memory buffer to file system.
@@ -316,9 +310,7 @@ class FileCache():
         if not os.path.isdir(concrete_dir):
           os.makedirs(concrete_dir)
         logger.debug("Unpacking file {0} to {1}".format(name, concrete_dir))
-        if filename!='':
+        if filename != '':
           zfile.extract(name, target_directory)
-    except Exception, err:
-      raise CachingException("Can not unpack zip file to "
-                             "directory {0} : {1}".format(
-                            target_directory, str(err)))
+    except Exception as err:
+      raise CachingException("Can not unpack zip file to directory {0} : {1}".format(target_directory, str(err)))

+ 14 - 0
ambari-agent/src/main/python/ambari_agent/Grep.py

@@ -74,3 +74,17 @@ class Grep:
       length = len(lines)
       tailed = lines[length - n:]
       return "".join(tailed)
+
+  def tail_by_symbols(self, string, n):
+    """
+    Copies last n symbols with trimming by rows from string to result. Also, string trim is performed.
+    """
+    stripped_string = string.strip()
+    lines = stripped_string.splitlines(True)
+    tailed = []
+    for line in reversed(lines):
+      if len("".join(tailed) + line) <= n:
+        tailed[:0] = line
+      else:
+        break
+    return "".join(tailed)

+ 5 - 5
ambari-agent/src/main/python/ambari_agent/Hardware.py

@@ -40,7 +40,7 @@ class Hardware:
   CHECK_REMOTE_MOUNTS_KEY = 'agent.check.remote.mounts'
   CHECK_REMOTE_MOUNTS_TIMEOUT_KEY = 'agent.check.mounts.timeout'
   CHECK_REMOTE_MOUNTS_TIMEOUT_DEFAULT = '10'
-  IGNORE_ROOT_MOUNTS = ["proc", "dev", "sys", "boot"]
+  IGNORE_ROOT_MOUNTS = ["proc", "dev", "sys", "boot", "home"]
   IGNORE_DEVICES = ["proc", "tmpfs", "cgroup", "mqueue", "shm"]
   LINUX_PATH_SEP = "/"
 
@@ -103,11 +103,11 @@ class Hardware:
   def _check_remote_mounts(self):
     """Verify if remote mount allowed to be processed or not"""
     if self.config and self.config.has_option(AmbariConfig.AMBARI_PROPERTIES_CATEGORY, Hardware.CHECK_REMOTE_MOUNTS_KEY) and \
-      self.config.get(AmbariConfig.AMBARI_PROPERTIES_CATEGORY, Hardware.CHECK_REMOTE_MOUNTS_KEY).lower() == "false":
+      self.config.get(AmbariConfig.AMBARI_PROPERTIES_CATEGORY, Hardware.CHECK_REMOTE_MOUNTS_KEY).lower() == "true":
 
-      return False
+      return True
 
-    return True
+    return False
 
   def _is_mount_blacklisted(self, blacklist, mount_point):
     """
@@ -174,7 +174,7 @@ class Hardware:
        - mount path or a part of mount path is not in the blacklist
       """
       if mount["device"] not in self.IGNORE_DEVICES and\
-         mount["mountpoint"].split("/")[0] not in self.IGNORE_ROOT_MOUNTS and\
+         mount["mountpoint"].strip()[1:].split("/")[0] not in self.IGNORE_ROOT_MOUNTS and\
          self._chk_writable_mount(mount['mountpoint']) and\
          not path_isfile(mount["mountpoint"]) and\
          not self._is_mount_blacklisted(blacklisted_mount_points, mount["mountpoint"]):

+ 36 - 21
ambari-agent/src/main/python/ambari_agent/HeartbeatThread.py

@@ -27,9 +27,11 @@ from ambari_agent import Constants
 from ambari_agent.Register import Register
 from ambari_agent.Utils import BlockingDictionary
 from ambari_agent.Utils import Utils
+from ambari_agent.ComponentVersionReporter import ComponentVersionReporter
 from ambari_agent.listeners.ServerResponsesListener import ServerResponsesListener
 from ambari_agent.listeners.TopologyEventListener import TopologyEventListener
 from ambari_agent.listeners.ConfigurationEventListener import ConfigurationEventListener
+from ambari_agent.listeners.AgentActionsListener import AgentActionsListener
 from ambari_agent.listeners.MetadataEventListener import MetadataEventListener
 from ambari_agent.listeners.CommandsEventListener import CommandsEventListener
 from ambari_agent.listeners.HostLevelParamsEventListener import HostLevelParamsEventListener
@@ -57,25 +59,29 @@ class HeartbeatThread(threading.Thread):
     self.config = initializer_module.config
 
     # listeners
-    self.server_responses_listener = ServerResponsesListener()
-    self.commands_events_listener = CommandsEventListener(initializer_module.action_queue)
-    self.metadata_events_listener = MetadataEventListener(initializer_module.metadata_cache)
-    self.topology_events_listener = TopologyEventListener(initializer_module.topology_cache)
-    self.configuration_events_listener = ConfigurationEventListener(initializer_module.configurations_cache)
-    self.host_level_params_events_listener = HostLevelParamsEventListener(initializer_module.host_level_params_cache, initializer_module.recovery_manager)
-    self.alert_definitions_events_listener = AlertDefinitionsEventListener(initializer_module.alert_definitions_cache, initializer_module.alert_scheduler_handler)
-    self.listeners = [self.server_responses_listener, self.commands_events_listener, self.metadata_events_listener, self.topology_events_listener, self.configuration_events_listener, self.host_level_params_events_listener, self.alert_definitions_events_listener]
+    self.server_responses_listener = initializer_module.server_responses_listener
+    self.commands_events_listener = CommandsEventListener(initializer_module)
+    self.metadata_events_listener = MetadataEventListener(initializer_module)
+    self.topology_events_listener = TopologyEventListener(initializer_module)
+    self.configuration_events_listener = ConfigurationEventListener(initializer_module)
+    self.host_level_params_events_listener = HostLevelParamsEventListener(initializer_module)
+    self.alert_definitions_events_listener = AlertDefinitionsEventListener(initializer_module)
+    self.agent_actions_events_listener = AgentActionsListener(initializer_module)
+    self.listeners = [self.server_responses_listener, self.commands_events_listener, self.metadata_events_listener, self.topology_events_listener, self.configuration_events_listener, self.host_level_params_events_listener, self.alert_definitions_events_listener, self.agent_actions_events_listener]
 
     self.post_registration_requests = [
-    (Constants.TOPOLOGY_REQUEST_ENDPOINT, initializer_module.topology_cache, self.topology_events_listener),
-    (Constants.METADATA_REQUEST_ENDPOINT, initializer_module.metadata_cache, self.metadata_events_listener),
-    (Constants.CONFIGURATIONS_REQUEST_ENDPOINT, initializer_module.configurations_cache, self.configuration_events_listener),
-    (Constants.HOST_LEVEL_PARAMS_TOPIC_ENPOINT, initializer_module.host_level_params_cache, self.host_level_params_events_listener),
-    (Constants.ALERTS_DEFINITIONS_REQUEST_ENDPOINT, initializer_module.alert_definitions_cache, self.alert_definitions_events_listener)
+    (Constants.TOPOLOGY_REQUEST_ENDPOINT, initializer_module.topology_cache, self.topology_events_listener, Constants.TOPOLOGIES_TOPIC),
+    (Constants.METADATA_REQUEST_ENDPOINT, initializer_module.metadata_cache, self.metadata_events_listener, Constants.METADATA_TOPIC),
+    (Constants.CONFIGURATIONS_REQUEST_ENDPOINT, initializer_module.configurations_cache, self.configuration_events_listener, Constants.CONFIGURATIONS_TOPIC),
+    (Constants.HOST_LEVEL_PARAMS_TOPIC_ENPOINT, initializer_module.host_level_params_cache, self.host_level_params_events_listener, Constants.HOST_LEVEL_PARAMS_TOPIC),
+    (Constants.ALERTS_DEFINITIONS_REQUEST_ENDPOINT, initializer_module.alert_definitions_cache, self.alert_definitions_events_listener, Constants.ALERTS_DEFINITIONS_TOPIC)
     ]
     self.responseId = 0
     self.file_cache = initializer_module.file_cache
     self.stale_alerts_monitor = initializer_module.stale_alerts_monitor
+    self.post_registration_actions = [self.file_cache.reset, initializer_module.component_status_executor.clean_not_existing_clusters_info,
+                                      initializer_module.alert_status_reporter.clean_not_existing_clusters_info, initializer_module.host_status_reporter.clean_cache]
+
 
 
   def run(self):
@@ -127,7 +133,7 @@ class HeartbeatThread(threading.Thread):
 
     self.handle_registration_response(response)
 
-    for endpoint, cache, listener in self.post_registration_requests:
+    for endpoint, cache, listener, subscribe_to in self.post_registration_requests:
       # should not hang forever on these requests
       response = self.blocking_request({'hash': cache.hash}, endpoint, log_handler=listener.get_log_message)
       try:
@@ -136,12 +142,25 @@ class HeartbeatThread(threading.Thread):
         logger.exception("Exception while handing response to request at {0}. {1}".format(endpoint, response))
         raise
 
+      self.subscribe_to_topics([subscribe_to])
+
     self.subscribe_to_topics(Constants.POST_REGISTRATION_TOPICS_TO_SUBSCRIBE)
-    self.file_cache.reset()
+
+    self.run_post_registration_actions()
+
     self.initializer_module.is_registered = True
     # now when registration is done we can expose connection to other threads.
     self.initializer_module._connection = self.connection
 
+    self.report_components_initial_versions()
+
+  def run_post_registration_actions(self):
+    for post_registration_action in self.post_registration_actions:
+      post_registration_action()
+
+  def report_components_initial_versions(self):
+    ComponentVersionReporter(self.initializer_module).start()
+
   def unregister(self):
     """
     Disconnect and remove connection object from initializer_module so other threads cannot use it
@@ -189,10 +208,6 @@ class HeartbeatThread(threading.Thread):
     else:
       self.responseId = serverId
 
-    if 'restartAgent' in response and response['restartAgent'].lower() == "true":
-      logger.warn("Restarting the agent by the request from server")
-      Utils.restartAgent(self.stop_event)
-
   def get_heartbeat_body(self):
     """
     Heartbeat body to be send to server
@@ -237,7 +252,7 @@ class HeartbeatThread(threading.Thread):
     """
     def presend_hook(correlation_id):
       if log_handler:
-        self.server_responses_listener.logging_handlers[str(correlation_id)] = log_handler 
+        self.server_responses_listener.logging_handlers[correlation_id] = log_handler
            
     try:
       correlation_id = self.connection.send(message=message, destination=destination, presend_hook=presend_hook)
@@ -247,6 +262,6 @@ class HeartbeatThread(threading.Thread):
       raise
 
     try:
-      return self.server_responses_listener.responses.blocking_pop(str(correlation_id), timeout=timeout)
+      return self.server_responses_listener.responses.blocking_pop(correlation_id, timeout=timeout)
     except BlockingDictionary.DictionaryPopTimeout:
       raise Exception("{0} seconds timeout expired waiting for response from server at {1} to message from {2}".format(timeout, Constants.SERVER_RESPONSES_TOPIC, destination))

+ 7 - 2
ambari-agent/src/main/python/ambari_agent/HostCleanup.py

@@ -511,8 +511,13 @@ class HostCleanup:
     for folder in folders:
       for filename in os.listdir(folder):
         fileToCheck = os.path.join(folder, filename)
-        stat = os.stat(fileToCheck)
-        if stat.st_uid in userIds:
+        try:
+          stat = os.stat(fileToCheck)
+        except OSError:
+          stat = None
+          logger.warn("Cannot stat file, skipping: " + fileToCheck)
+
+        if stat and stat.st_uid in userIds:
           self.do_erase_dir_silent([fileToCheck])
           logger.info("Deleting file/folder: " + fileToCheck)
 

+ 3 - 1
ambari-agent/src/main/python/ambari_agent/HostInfo.py

@@ -147,6 +147,8 @@ def get_ntp_service():
     return ("ntpd", "ntp",)
   elif OSCheck.is_ubuntu_family():
     return ("ntp", "chrony",)
+  else:
+    return ("ntpd",)
 
 
 @OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT)
@@ -158,7 +160,7 @@ class HostInfoLinux(HostInfo):
     "storm", "hive-hcatalog", "tez", "falcon", "ambari_qa", "hadoop_deploy",
     "rrdcached", "hcat", "ambari-qa", "sqoop-ambari-qa", "sqoop-ambari_qa",
     "webhcat", "hadoop-hdfs", "hadoop-yarn", "hadoop-mapreduce",
-    "knox", "yarn", "hive-webhcat", "kafka", "slider", "storm-slider-client",
+    "knox", "yarn", "hive-webhcat", "kafka",
     "mahout", "spark", "pig", "phoenix", "ranger", "accumulo",
     "ambari-metrics-collector", "ambari-metrics-monitor", "atlas", "zeppelin"
   ]

+ 10 - 6
ambari-agent/src/main/python/ambari_agent/HostStatusReporter.py

@@ -38,6 +38,7 @@ class HostStatusReporter(threading.Thread):
     self.config = initializer_module.config
     self.host_info = HostInfo(initializer_module.config)
     self.last_report = {}
+    self.server_responses_listener = initializer_module.server_responses_listener
     self.hardware = Hardware(config=initializer_module.config, cache_info=False)
     threading.Thread.__init__(self)
 
@@ -48,12 +49,9 @@ class HostStatusReporter(threading.Thread):
           report = self.get_report()
 
           if self.initializer_module.is_registered and not Utils.are_dicts_equal(report, self.last_report, keys_to_skip=["agentTimeStampAtReporting"]):
-            self.initializer_module.connection.send(message=report, destination=Constants.HOST_STATUS_REPORTS_ENDPOINT)
-            self.last_report = report
+            correlation_id = self.initializer_module.connection.send(message=report, destination=Constants.HOST_STATUS_REPORTS_ENDPOINT)
+            self.server_responses_listener.listener_functions_on_success[correlation_id] = lambda headers, message: self.save_last_report(report)
 
-        # don't use else to avoid race condition
-        if not self.initializer_module.is_registered:
-          self.last_report = {}
       except ConnectionIsAlreadyClosed: # server and agent disconnected during sending data. Not an issue
         pass
       except:
@@ -63,6 +61,9 @@ class HostStatusReporter(threading.Thread):
 
     logger.info("HostStatusReporter has successfully finished")
 
+  def save_last_report(self, report):
+    self.last_report = report
+
   def get_report(self):
     host_info_dict = {}
     self.host_info.register(host_info_dict)
@@ -72,4 +73,7 @@ class HostStatusReporter(threading.Thread):
       'mounts': self.hardware.osdisks(),
     }
 
-    return report
+    return report
+
+  def clean_cache(self):
+    self.last_report = {}

+ 44 - 6
ambari-agent/src/main/python/ambari_agent/InitializerModule.py

@@ -21,6 +21,7 @@ limitations under the License.
 import threading
 import logging
 
+from ambari_agent.CommandHooksOrchestrator import HooksOrchestrator
 from ambari_agent.FileCache import FileCache
 from ambari_agent.AmbariConfig import AmbariConfig
 from ambari_agent.ClusterConfigurationCache import ClusterConfigurationCache
@@ -36,9 +37,17 @@ from ambari_agent.AlertSchedulerHandler import AlertSchedulerHandler
 from ambari_agent.ConfigurationBuilder import ConfigurationBuilder
 from ambari_agent.StaleAlertsMonitor import StaleAlertsMonitor
 from ambari_stomp.adapter.websocket import ConnectionIsAlreadyClosed
+from ambari_agent.listeners.ServerResponsesListener import ServerResponsesListener
+
+from ambari_agent import HeartbeatThread
+from ambari_agent.ComponentStatusExecutor import ComponentStatusExecutor
+from ambari_agent.CommandStatusReporter import CommandStatusReporter
+from ambari_agent.HostStatusReporter import HostStatusReporter
+from ambari_agent.AlertStatusReporter import AlertStatusReporter
 
 logger = logging.getLogger(__name__)
 
+
 class InitializerModule:
   """
   - Instantiate some singleton classes or widely used instances along with providing their dependencies.
@@ -48,14 +57,31 @@ class InitializerModule:
   """
   def __init__(self):
     self.stop_event = threading.Event()
+    self.config = AmbariConfig.get_resolved_config()
+
+    self.is_registered = None
+    self.metadata_cache = None
+    self.topology_cache = None
+    self.host_level_params_cache = None
+    self.configurations_cache = None
+    self.alert_definitions_cache = None
+    self.configuration_builder = None
+    self.stale_alerts_monitor = None
+    self.server_responses_listener = None
+    self.file_cache = None
+    self.customServiceOrchestrator = None
+    self.hooks_orchestrator = None
+    self.recovery_manager = None
+    self.commandStatuses = None
+    self.action_queue = None
+    self.alert_scheduler_handler = None
+
     self.init()
 
   def init(self):
     """
     Initialize properties
     """
-    self.config = AmbariConfig.get_resolved_config()
-
     self.is_registered = False
 
     self.metadata_cache = ClusterMetadataCache(self.config.cluster_cache_dir)
@@ -65,15 +91,27 @@ class InitializerModule:
     self.alert_definitions_cache = ClusterAlertDefinitionsCache(self.config.cluster_cache_dir)
     self.configuration_builder = ConfigurationBuilder(self)
     self.stale_alerts_monitor = StaleAlertsMonitor(self)
-
+    self.server_responses_listener = ServerResponsesListener(self)
     self.file_cache = FileCache(self.config)
-
     self.customServiceOrchestrator = CustomServiceOrchestrator(self)
-
-    self.recovery_manager = RecoveryManager(self.config.recovery_cache_dir)
+    self.hooks_orchestrator = HooksOrchestrator(self)
+    self.recovery_manager = RecoveryManager(self)
     self.commandStatuses = CommandStatusDict(self)
+
+    self.init_threads()
+
+
+  def init_threads(self):
+    """
+    Initialize thread objects
+    """
+    self.component_status_executor = ComponentStatusExecutor(self)
     self.action_queue = ActionQueue(self)
     self.alert_scheduler_handler = AlertSchedulerHandler(self)
+    self.command_status_reporter = CommandStatusReporter(self)
+    self.host_status_reporter = HostStatusReporter(self)
+    self.alert_status_reporter = AlertStatusReporter(self)
+    self.heartbeat_thread = HeartbeatThread.HeartbeatThread(self)
 
   @property
   def connection(self):

+ 5 - 8
ambari-agent/src/main/python/ambari_agent/LiveStatus.py

@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-'''
+"""
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
@@ -16,13 +16,11 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-'''
+"""
 
 import logging
 from ActualConfigHandler import ActualConfigHandler
 
-logger = logging.getLogger()
-
 
 class LiveStatus:
 
@@ -33,8 +31,8 @@ class LiveStatus:
   LIVE_STATUS = "STARTED"
   DEAD_STATUS = "INSTALLED"
 
-  def __init__(self, cluster, service, component, globalConfig, config,
-               configTags):
+  def __init__(self, cluster, service, component, globalConfig, config, configTags):
+    self.logger = logging.getLogger()
     self.cluster = cluster
     self.service = service
     self.component = component
@@ -47,7 +45,6 @@ class LiveStatus:
     :param component_status: component status to include into report
     :return: populated livestatus dict
     """
-    global LIVE_STATUS, DEAD_STATUS
 
     livestatus = {"componentName": self.component,
                   "msg": "",
@@ -62,5 +59,5 @@ class LiveStatus:
     if active_config is not None:
       livestatus['configurationTags'] = active_config
 
-    logger.debug("The live status for component %s of service %s is %s", self.component, self.service, livestatus)
+    self.logger.debug("The live status for component %s of service %s is %s", self.component, self.service, livestatus)
     return livestatus

+ 3 - 3
ambari-agent/src/main/python/ambari_agent/NetUtil.py

@@ -17,7 +17,7 @@
 from urlparse import urlparse
 import logging
 import httplib
-import sys
+import ssl
 from ssl import SSLError
 from ambari_agent.AmbariConfig import AmbariConfig
 from ambari_commons.inet_utils import ensure_ssl_using_protocol
@@ -66,8 +66,8 @@ class NetUtil:
     try:
       parsedurl = urlparse(url)
 
-      if sys.version_info >= (2,7,9) and not ssl_verify_cert:
-          import ssl
+      # hasattr being true means that current python version has default cert verification enabled.
+      if hasattr(ssl, '_create_unverified_context') and not ssl_verify_cert:
           ca_connection = httplib.HTTPSConnection(parsedurl[1], context=ssl._create_unverified_context())
       else:
           ca_connection = httplib.HTTPSConnection(parsedurl[1])

+ 4 - 1
ambari-agent/src/main/python/ambari_agent/PingPortListener.py

@@ -25,7 +25,7 @@ import socket
 from ambari_commons import subprocess32
 
 logger = logging.getLogger(__name__)
-FUSER_CMD = "fuser {0}/tcp 2>/dev/null | awk '{1}'"
+FUSER_CMD = "timeout 10 fuser {0}/tcp 2>/dev/null | awk '{1}'"
 PSPF_CMD = "ps -fp {0}"
 PORT_IN_USE_MESSAGE = "Could not open port {0} because port already used by another process:\n{1}"
 
@@ -38,6 +38,9 @@ class PingPortListener(threading.Thread):
     self.config = config
     self.host = '0.0.0.0'
     self.port = int(self.config.get('agent','ping_port'))
+
+    logger.debug("Checking Ping port listener port {0}".format(self.port))
+
     if not self.port == None and not self.port == 0:
       (stdoutdata, stderrdata) = self.run_os_command_in_shell(FUSER_CMD.format(str(self.port), "{print $1}"))
       if stdoutdata.strip() and stdoutdata.strip().isdigit():

+ 92 - 128
ambari-agent/src/main/python/ambari_agent/PythonExecutor.py

@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-'''
+"""
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
@@ -16,26 +16,23 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-'''
-import ambari_simplejson as json
+"""
+
 import logging
 import os
-from ambari_commons import subprocess32
 import pprint
 import threading
-import platform
-from threading import Thread
-import time
-from BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle
-from resource_management.libraries.functions.log_process_information import log_process_information
-from ambari_commons.os_check import OSConst, OSCheck
-from Grep import Grep
 import sys
+
+import ambari_simplejson as json
+
+from ambari_commons import subprocess32
 from ambari_commons import shell
-from ambari_commons.shell import shellRunner
 
+from Grep import Grep
+from BackgroundCommandExecutionHandle import BackgroundCommandExecutionHandle
+from resource_management.libraries.functions.log_process_information import log_process_information
 
-logger = logging.getLogger()
 
 class PythonExecutor(object):
   """
@@ -45,43 +42,39 @@ class PythonExecutor(object):
   """
   NO_ERROR = "none"
 
-  def __init__(self, tmpDir, config):
+  def __init__(self, tmp_dir, config):
+    self.logger = logging.getLogger()
     self.grep = Grep()
     self.event = threading.Event()
     self.python_process_has_been_killed = False
-    self.tmpDir = tmpDir
+    self.tmpDir = tmp_dir
     self.config = config
-    pass
+    self.log_max_symbols_size = self.config.log_max_symbols_size
+
+  def open_subprocess32_files(self, tmp_out_file, tmp_err_file, override_output_files, backup_log_files=True):
+    mode = "w" if override_output_files else "a"
 
+    if override_output_files and backup_log_files:
+      self.back_up_log_file_if_exists(tmp_out_file)
+      self.back_up_log_file_if_exists(tmp_err_file)
 
-  def open_subprocess32_files(self, tmpoutfile, tmperrfile, override_output_files, backup_log_files = True):
-    if override_output_files: # Recreate files, existing files are backed up if backup_log_files is True
-      if backup_log_files:
-        self.back_up_log_file_if_exists(tmpoutfile)
-        self.back_up_log_file_if_exists(tmperrfile)
-      tmpout =  open(tmpoutfile, 'w')
-      tmperr =  open(tmperrfile, 'w')
-    else: # Append to files
-      tmpout =  open(tmpoutfile, 'a')
-      tmperr =  open(tmperrfile, 'a')
-    return tmpout, tmperr
+    return open(tmp_out_file, mode), open(tmp_err_file, mode)
 
   def back_up_log_file_if_exists(self, file_path):
     if os.path.isfile(file_path):
       counter = 0
       while True:
-        # Find backup name that is not used yet (saves logs
-        # from multiple command retries)
+        # Find backup name that is not used yet (saves logs from multiple command retries)
         backup_name = file_path + "." + str(counter)
         if not os.path.isfile(backup_name):
           break
         counter += 1
       os.rename(file_path, backup_name)
 
-  def run_file(self, script, script_params, tmpoutfile, tmperrfile,
-               timeout, tmpstructedoutfile, callback, task_id,
-               override_output_files = True, backup_log_files = True, handle = None,
-               log_info_on_failure = True):
+  def run_file(self, script, script_params, tmp_out_file, tmp_err_file,
+               timeout, tmp_structed_outfile, callback, task_id,
+               override_output_files=True, backup_log_files=True, handle=None,
+               log_info_on_failure=True):
     """
     Executes the specified python file in a separate subprocess32.
     Method returns only when the subprocess32 is finished.
@@ -92,55 +85,75 @@ class PythonExecutor(object):
     recreated or appended.
     The structured out file, however, is preserved during multiple invocations that use the same file.
     """
-    pythonCommand = self.python_command(script, script_params)
-    if logger.isEnabledFor(logging.DEBUG):
-      logger.debug("Running command %s", pprint.pformat(pythonCommand))
+    python_command = self.python_command(script, script_params)
+    if self.logger.isEnabledFor(logging.DEBUG):
+      self.logger.debug("Running command %s", pprint.pformat(python_command))
+
+    def background_executor():
+      logger = logging.getLogger()
+      process_out, process_err = self.open_subprocess32_files(tmp_out_file, tmp_err_file, True)
+
+      logger.debug("Starting process command %s", python_command)
+      p = self.launch_python_subprocess32(python_command, process_out, process_err)
+
+      logger.debug("Process has been started. Pid = %s", p.pid)
+
+      handle.pid = p.pid
+      handle.status = BackgroundCommandExecutionHandle.RUNNING_STATUS
+      handle.on_background_command_started(handle.command['taskId'], p.pid)
+
+      p.communicate()
+
+      handle.exitCode = p.returncode
+      process_condensed_result = self.prepare_process_result(p.returncode, tmp_out_file, tmp_err_file, tmp_structed_outfile)
+      logger.debug("Calling callback with args %s", process_condensed_result)
+      handle.on_background_command_complete_callback(process_condensed_result, handle)
+      logger.debug("Exiting from thread for holder pid %s", handle.pid)
 
     if handle is None:
-      tmpout, tmperr = self.open_subprocess32_files(tmpoutfile, tmperrfile, override_output_files, backup_log_files)
+      tmpout, tmperr = self.open_subprocess32_files(tmp_out_file, tmp_err_file, override_output_files, backup_log_files)
 
-      process = self.launch_python_subprocess32(pythonCommand, tmpout, tmperr)
+      process = self.launch_python_subprocess32(python_command, tmpout, tmperr)
       # map task_id to pid
       callback(task_id, process.pid)
-      logger.debug("Launching watchdog thread")
+      self.logger.debug("Launching watchdog thread")
       self.event.clear()
       self.python_process_has_been_killed = False
-      thread = Thread(target =  self.python_watchdog_func, args = (process, timeout))
+      thread = threading.Thread(target=self.python_watchdog_func, args=(process, timeout))
       thread.start()
       # Waiting for the process to be either finished or killed
       process.communicate()
       self.event.set()
       thread.join()
-      result = self.prepare_process_result(process.returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout=timeout)
+      result = self.prepare_process_result(process.returncode, tmp_out_file, tmp_err_file, tmp_structed_outfile, timeout=timeout)
 
       if log_info_on_failure and result['exitcode']:
-        self.on_failure(pythonCommand, result)
+        self.on_failure(python_command, result)
 
       return result
     else:
-      holder = Holder(pythonCommand, tmpoutfile, tmperrfile, tmpstructedoutfile, handle)
-
-      background = BackgroundThread(holder, self)
+      background = threading.Thread(target=background_executor, args=())
       background.start()
       return {"exitcode": 777}
 
-  def on_failure(self, pythonCommand, result):
+  def on_failure(self, python_command, result):
     """
     Log some useful information after task failure.
     """
-    pass
-    #logger.info("Command %s failed with exitcode=%s", pprint.pformat(pythonCommand), result['exitcode'])
-    #log_process_information(logger)
+    self.logger.info("Command %s failed with exitcode=%s", pprint.pformat(python_command), result['exitcode'])
+    log_process_information(self.logger)
 
   def prepare_process_result(self, returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout=None):
     out, error, structured_out = self.read_result_from_files(tmpoutfile, tmperrfile, tmpstructedoutfile)
 
     if self.python_process_has_been_killed:
-      error = str(error) + "\n Python script has been killed due to timeout" + \
-              (" after waiting %s secs" % str(timeout) if timeout else "")
+      error = "{error}\nPython script has been killed due to timeout{timeout_details}".format(
+        error=error,
+        timeout_details="" if not timeout else " after waiting {} secs".format(timeout)
+      )
       returncode = 999
-    result = self.condenseOutput(out, error, returncode, structured_out)
-    logger.debug("Result: %s", result)
+    result = self.condense_output(out, error, returncode, structured_out)
+    self.logger.debug("Result: %s", result)
     return result
 
   def read_result_from_files(self, out_path, err_path, structured_out_path):
@@ -149,95 +162,46 @@ class PythonExecutor(object):
     try:
       with open(structured_out_path, 'r') as fp:
         structured_out = json.load(fp)
-    except Exception:
-      if os.path.exists(structured_out_path):
-        errMsg = 'Unable to read structured output from ' + structured_out_path
-        structured_out = {
-          'msg' : errMsg
-        }
-        logger.warn(structured_out)
-      else:
-        structured_out = {}
+    except (TypeError, ValueError):
+      structured_out = {
+        "msg": "Unable to read structured output from " + structured_out_path
+      }
+      self.logger.warn(structured_out)
+    except (OSError, IOError):
+      structured_out = {}
     return out, error, structured_out
 
-  def preexec_fn(self):
-    os.setpgid(0, 0)
-
   def launch_python_subprocess32(self, command, tmpout, tmperr):
     """
     Creates subprocess32 with given parameters. This functionality was moved to separate method
     to make possible unit testing
     """
-    close_fds = None if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else True
     command_env = dict(os.environ)
-    if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY:
-      command_env["PYTHONPATH"] = os.pathsep.join(sys.path)
-      for k, v in command_env.iteritems():
-        command_env[k] = str(v)
-
-    return subprocess32.Popen(command,
-      stdout=tmpout,
-      stderr=tmperr, close_fds=close_fds, env=command_env, preexec_fn=self.preexec_fn)
+    return subprocess32.Popen(command, stdout=tmpout, stderr=tmperr, close_fds=True, env=command_env,
+                              preexec_fn=lambda: os.setpgid(0, 0))
 
-  def isSuccessfull(self, returncode):
-    return not self.python_process_has_been_killed and returncode == 0
+  def is_successful(self, return_code):
+    return not self.python_process_has_been_killed and return_code == 0
 
   def python_command(self, script, script_params):
-    #we need manually pass python executable on windows because sys.executable will return service wrapper
-    python_binary = os.environ['PYTHON_EXE'] if 'PYTHON_EXE' in os.environ else sys.executable
-    python_command = [python_binary, script] + script_params
+    """
+    :type script str
+    :type script_params list|set
+    """
+    python_command = [sys.executable, script] + script_params
     return python_command
 
-  def condenseOutput(self, stdout, stderr, retcode, structured_out):
-    log_lines_count = self.config.get('heartbeat', 'log_lines_count')
-
-    result = {
-      "exitcode": retcode,
-      "stdout": self.grep.tail(stdout, log_lines_count) if log_lines_count else stdout,
-      "stderr": self.grep.tail(stderr, log_lines_count) if log_lines_count else stderr,
-      "structuredOut" : structured_out
+  def condense_output(self, stdout, stderr, ret_code, structured_out):
+    return {
+      "exitcode": ret_code,
+      "stdout": self.grep.tail_by_symbols(stdout, self.log_max_symbols_size) if self.log_max_symbols_size else stdout,
+      "stderr": self.grep.tail_by_symbols(stderr, self.log_max_symbols_size) if self.log_max_symbols_size else stderr,
+      "structuredOut": structured_out
     }
 
-    return result
-
-  def python_watchdog_func(self, python, timeout):
+  def python_watchdog_func(self, process, timeout):
     self.event.wait(timeout)
-    if python.returncode is None:
-      logger.error("subprocess32 timed out and will be killed")
-      shell.kill_process_with_children(python.pid)
+    if process.returncode is None:
+      self.logger.error("Executed command with pid {} timed out and will be killed".format(process.pid))
+      shell.kill_process_with_children(process.pid)
       self.python_process_has_been_killed = True
-    pass
-
-class Holder:
-  def __init__(self, command, out_file, err_file, structured_out_file, handle):
-    self.command = command
-    self.out_file = out_file
-    self.err_file = err_file
-    self.structured_out_file = structured_out_file
-    self.handle = handle
-
-class BackgroundThread(threading.Thread):
-  def __init__(self, holder, pythonExecutor):
-    threading.Thread.__init__(self)
-    self.holder = holder
-    self.pythonExecutor = pythonExecutor
-
-  def run(self):
-    process_out, process_err = self.pythonExecutor.open_subprocess32_files(self.holder.out_file, self.holder.err_file, True)
-
-    logger.debug("Starting process command %s", self.holder.command)
-    process = self.pythonExecutor.launch_python_subprocess32(self.holder.command, process_out, process_err)
-
-    logger.debug("Process has been started. Pid = %s", process.pid)
-
-    self.holder.handle.pid = process.pid
-    self.holder.handle.status = BackgroundCommandExecutionHandle.RUNNING_STATUS
-    self.holder.handle.on_background_command_started(self.holder.handle.command['taskId'], process.pid)
-
-    process.communicate()
-
-    self.holder.handle.exitCode = process.returncode
-    process_condensed_result = self.pythonExecutor.prepare_process_result(process.returncode, self.holder.out_file, self.holder.err_file, self.holder.structured_out_file)
-    logger.debug("Calling callback with args %s", process_condensed_result)
-    self.holder.handle.on_background_command_complete_callback(process_condensed_result, self.holder.handle)
-    logger.debug("Exiting from thread for holder pid %s", self.holder.handle.pid)

+ 6 - 6
ambari-agent/src/main/python/ambari_agent/PythonReflectiveExecutor.py

@@ -38,11 +38,11 @@ class PythonReflectiveExecutor(PythonExecutor):
   Running the commands not in new proccess, but reflectively makes this really fast.
   """
   
-  def __init__(self, tmpDir, config):
-    super(PythonReflectiveExecutor, self).__init__(tmpDir, config)
+  def __init__(self, tmp_dir, config):
+    super(PythonReflectiveExecutor, self).__init__(tmp_dir, config)
     
-  def run_file(self, script, script_params, tmpoutfile, tmperrfile,
-               timeout, tmpstructedoutfile, callback, task_id,
+  def run_file(self, script, script_params, tmp_out_file, tmp_err_file,
+               timeout, tmp_structed_outfile, callback, task_id,
                override_output_files = True, backup_log_files = True,
                handle = None, log_info_on_failure=True):
     pythonCommand = self.python_command(script, script_params)
@@ -50,7 +50,7 @@ class PythonReflectiveExecutor(PythonExecutor):
       logger.debug("Running command reflectively %s", pprint.pformat(pythonCommand))
     
     script_dir = os.path.dirname(script)
-    self.open_subprocess32_files(tmpoutfile, tmperrfile, override_output_files, backup_log_files)
+    self.open_subprocess32_files(tmp_out_file, tmp_err_file, override_output_files, backup_log_files)
     returncode = 1
 
     try:
@@ -69,7 +69,7 @@ class PythonReflectiveExecutor(PythonExecutor):
     else: 
       returncode = 0
       
-    return self.prepare_process_result(returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout=timeout)
+    return self.prepare_process_result(returncode, tmp_out_file, tmp_err_file, tmp_structed_outfile, timeout=timeout)
   
 class PythonContext:
   """

+ 182 - 270
ambari-agent/src/main/python/ambari_agent/RecoveryManager.py

@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -15,31 +13,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import logging
 import copy
-import os
 import time
 import threading
 import pprint
 
 from ambari_agent.ActionQueue import ActionQueue
 from ambari_agent.LiveStatus import LiveStatus
-
+from ambari_agent.models.commands import CommandStatus, RoleCommand, CustomCommand, AgentCommand
 
 logger = logging.getLogger()
 
-"""
-RecoveryManager has the following capabilities:
-* Store data needed for execution commands extracted from STATUS command
-* Generate INSTALL command
-* Generate START command
-"""
-
 
 class RecoveryManager:
+  """
+  RecoveryManager has the following capabilities:
+  * Store data needed for execution commands extracted from STATUS command
+  * Generate INSTALL command
+  * Generate START command
+  """
+  BLUEPRINT_STATE_IN_PROGRESS = 'IN_PROGRESS'
   COMMAND_TYPE = "commandType"
   PAYLOAD_LEVEL = "payloadLevel"
+  SERVICE_NAME = "serviceName"
   COMPONENT_NAME = "componentName"
   ROLE = "role"
   TASK_ID = "taskId"
@@ -57,7 +54,7 @@ class RecoveryManager:
   INIT = "INIT"  # TODO: What is the state when machine is reset
   INSTALL_FAILED = "INSTALL_FAILED"
   COMPONENT_UPDATE_KEY_FORMAT = "{0}_UPDATE_TIME"
-  COMMAND_REFRESH_DELAY_SEC = 600 #10 minutes
+  COMMAND_REFRESH_DELAY_SEC = 600
 
   FILENAME = "recovery.json"
 
@@ -77,13 +74,15 @@ class RecoveryManager:
     "stale_config": False
   }
 
-  def __init__(self, cache_dir, recovery_enabled=False, auto_start_only=False, auto_install_start=False):
+  def __init__(self, initializer_module, recovery_enabled=False, auto_start_only=False, auto_install_start=False):
     self.recovery_enabled = recovery_enabled
     self.auto_start_only = auto_start_only
     self.auto_install_start = auto_install_start
     self.max_count = 6
     self.window_in_min = 60
     self.retry_gap = 5
+    self.window_in_sec = self.window_in_min * 60
+    self.retry_gap_in_sec = self.retry_gap * 60
     self.max_lifetime_count = 12
 
     self.id = int(time.time())
@@ -91,36 +90,34 @@ class RecoveryManager:
     self.allowed_current_states = [self.INIT, self.INSTALLED]
     self.enabled_components = []
     self.statuses = {}
+    self.__component_to_service_map = {}   # component => service map TODO: fix it later(hack here)
     self.__status_lock = threading.RLock()
     self.__command_lock = threading.RLock()
     self.__active_command_lock = threading.RLock()
     self.__cache_lock = threading.RLock()
     self.active_command_count = 0
     self.cluster_id = None
-
-    if not os.path.exists(cache_dir):
-      try:
-        os.makedirs(cache_dir)
-      except:
-        logger.critical("[RecoveryManager] Could not create the cache directory {0}".format(cache_dir))
-
-    self.__actions_json_file = os.path.join(cache_dir, self.FILENAME)
+    self.initializer_module = initializer_module
+    self.host_level_params_cache = initializer_module.host_level_params_cache
 
     self.actions = {}
-
-    self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, auto_install_start, "")
-
-    pass
+    self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, auto_install_start)
 
   def on_execution_command_start(self):
     with self.__active_command_lock:
       self.active_command_count += 1
-    pass
 
   def on_execution_command_finish(self):
     with self.__active_command_lock:
       self.active_command_count -= 1
-    pass
+
+  def is_blueprint_provisioning_for_component(self, component_name):
+    try:
+      blueprint_state = self.host_level_params_cache[self.cluster_id]['blueprint_provisioning_state'][component_name]
+    except KeyError:
+      blueprint_state = 'NONE'
+
+    return blueprint_state == RecoveryManager.BLUEPRINT_STATE_IN_PROGRESS
 
   def has_active_command(self):
     return self.active_command_count > 0
@@ -131,12 +128,10 @@ class RecoveryManager:
   def get_current_status(self, component):
     if component in self.statuses:
       return self.statuses[component]["current"]
-    pass
 
   def get_desired_status(self, component):
     if component in self.statuses:
       return self.statuses[component]["desired"]
-    pass
 
   def update_config_staleness(self, component, is_config_stale):
     """
@@ -154,17 +149,10 @@ class RecoveryManager:
       pass
 
     self.statuses[component]["stale_config"] = is_config_stale
-    pass
 
   def handle_status_change(self, component, component_status):
-    if not self.enabled() or not self.configured_for_recovery(component):
-      return
-
-    if component_status == LiveStatus.LIVE_STATUS:
-        self.update_current_status(component, component_status)
-    else:
-        if (self.get_current_status(component) != self.INSTALL_FAILED):
-          self.update_current_status(component, component_status)
+    if component_status == LiveStatus.LIVE_STATUS or self.get_current_status(component) != self.INSTALL_FAILED:
+      self.update_current_status(component, component_status)
 
   def update_current_status(self, component, state):
     """
@@ -184,9 +172,8 @@ class RecoveryManager:
 
     if self.statuses[component]["current"] != state:
       logger.info("current status is set to %s for %s", state, component)
-    self.statuses[component]["current"] = state
-    pass
 
+    self.statuses[component]["current"] = state
 
   def update_desired_status(self, component, state):
     """
@@ -202,21 +189,16 @@ class RecoveryManager:
           logger.info("New status, desired status is set to %s for %s", self.statuses[component]["desired"], component)
       finally:
         self.__status_lock.release()
-      pass
 
     if self.statuses[component]["desired"] != state:
       logger.info("desired status is set to %s for %s", state, component)
     self.statuses[component]["desired"] = state
-    pass
 
-  """
-  Whether specific components are enabled for recovery.
-  """
   def configured_for_recovery(self, component):
-    if len(self.enabled_components) > 0 and component in self.enabled_components:
-      return True
-
-    return False
+    """
+    Whether specific components are enabled for recovery.
+    """
+    return len(self.enabled_components) > 0 and component in self.enabled_components
 
   def requires_recovery(self, component):
     """
@@ -225,23 +207,15 @@ class RecoveryManager:
     INIT --> INSTALLED --> STARTED
     RE-INSTALLED (if configs do not match)
     """
-    if not self.enabled():
-      return False
-
-    if not self.configured_for_recovery(component):
-      return False
-
-    if component not in self.statuses:
+    if not self.enabled() or not self.configured_for_recovery(component) or component not in self.statuses:
       return False
 
     status = self.statuses[component]
     if self.auto_start_only or self.auto_install_start:
-      if status["current"] == status["desired"]:
-        return False
-      if status["desired"] not in self.allowed_desired_states:
+      if status["current"] == status["desired"] or status["desired"] not in self.allowed_desired_states:
         return False
     else:
-      if status["current"] == status["desired"] and status['stale_config'] == False:
+      if status["current"] == status["desired"] and status['stale_config'] is False:
         return False
 
     if status["desired"] not in self.allowed_desired_states or status["current"] not in self.allowed_current_states:
@@ -249,9 +223,6 @@ class RecoveryManager:
 
     logger.info("%s needs recovery, desired = %s, and current = %s.", component, status["desired"], status["current"])
     return True
-    pass
-
-
 
   def get_recovery_status(self):
     """
@@ -268,8 +239,7 @@ class RecoveryManager:
       ]
     }
     """
-    report = {}
-    report["summary"] = "DISABLED"
+    report = {"summary": "DISABLED"}
     if self.enabled():
       report["summary"] = "RECOVERABLE"
       num_limits_reached = 0
@@ -279,24 +249,23 @@ class RecoveryManager:
       try:
         for component in self.actions.keys():
           action = self.actions[component]
-          recovery_state = {}
-          recovery_state["name"] = component
-          recovery_state["numAttempts"] = action["lifetimeCount"]
-          recovery_state["limitReached"] = self.max_lifetime_count <= action["lifetimeCount"]
+          recovery_state = {
+            "name": component,
+            "numAttempts": action["lifetimeCount"],
+            "limitReached": self.max_lifetime_count <= action["lifetimeCount"]
+          }
           recovery_states.append(recovery_state)
-          if recovery_state["limitReached"] == True:
+          if recovery_state["limitReached"] is True:
             num_limits_reached += 1
-          pass
       finally:
         self.__status_lock.release()
 
-      if num_limits_reached > 0:
+      if num_limits_reached > 0 and num_limits_reached == len(recovery_states):
+        report["summary"] = "UNRECOVERABLE"
+      elif num_limits_reached > 0:
         report["summary"] = "PARTIALLY_RECOVERABLE"
-        if num_limits_reached == len(recovery_states):
-          report["summary"] = "UNRECOVERABLE"
 
     return report
-    pass
 
   def get_recovery_commands(self):
     """
@@ -308,39 +277,34 @@ class RecoveryManager:
     """
     commands = []
     for component in self.statuses.keys():
-      if self.requires_recovery(component) and self.may_execute(component):
+      if self.configured_for_recovery(component) and self.requires_recovery(component) and self.may_execute(component):
         status = copy.deepcopy(self.statuses[component])
         command = None
         if self.auto_start_only:
-          if status["desired"] == self.STARTED:
-            if status["current"] == self.INSTALLED:
-              command = self.get_start_command(component)
+          if status["desired"] == self.STARTED and status["current"] == self.INSTALLED:
+            command = self.get_start_command(component)
         elif self.auto_install_start:
-          if status["desired"] == self.STARTED:
-            if status["current"] == self.INSTALLED:
-              command = self.get_start_command(component)
-            elif status["current"] == self.INSTALL_FAILED:
-              command = self.get_install_command(component)
-          elif status["desired"] == self.INSTALLED:
-            if status["current"] == self.INSTALL_FAILED:
+          if status["desired"] == self.STARTED and status["current"] == self.INSTALLED:
+            command = self.get_start_command(component)
+          elif status["desired"] == self.STARTED and status["current"] == self.INSTALL_FAILED:
+            command = self.get_install_command(component)
+          elif status["desired"] == self.INSTALLED and status["current"] == self.INSTALL_FAILED:
               command = self.get_install_command(component)
         else:
           # START, INSTALL, RESTART
           if status["desired"] != status["current"]:
-            if status["desired"] == self.STARTED:
-              if status["current"] == self.INSTALLED:
-                command = self.get_start_command(component)
-              elif status["current"] == self.INIT:
-                command = self.get_install_command(component)
-              elif status["current"] == self.INSTALL_FAILED:
-                command = self.get_install_command(component)
-            elif status["desired"] == self.INSTALLED:
-              if status["current"] == self.INIT:
-                command = self.get_install_command(component)
-              elif status["current"] == self.INSTALL_FAILED:
-                command = self.get_install_command(component)
-              elif status["current"] == self.STARTED:
-                command = self.get_stop_command(component)
+            if status["desired"] == self.STARTED and status["current"] == self.INSTALLED:
+              command = self.get_start_command(component)
+            elif status["desired"] == self.STARTED and status["current"] == self.INIT:
+              command = self.get_install_command(component)
+            elif status["desired"] == self.STARTED and status["current"] == self.INSTALL_FAILED:
+              command = self.get_install_command(component)
+            elif status["desired"] == self.INSTALLED and status["current"] == self.INIT:
+              command = self.get_install_command(component)
+            elif status["desired"] == self.INSTALLED and status["current"] == self.INSTALL_FAILED:
+              command = self.get_install_command(component)
+            elif status["desired"] == self.INSTALLED and status["current"] == self.STARTED:
+              command = self.get_stop_command(component)
           else:
             if status["current"] == self.INSTALLED:
               command = self.get_install_command(component)
@@ -349,11 +313,10 @@ class RecoveryManager:
 
         if command:
           self.execute(component)
-          logger.info("Created recovery command %s for component %s",
-                    command[self.ROLE_COMMAND], command[self.ROLE])
+          logger.info("Created recovery command %s for component %s", command[self.ROLE_COMMAND], command[self.ROLE])
           commands.append(command)
-    return commands
 
+    return commands
 
   def may_execute(self, action):
     """
@@ -369,8 +332,6 @@ class RecoveryManager:
       finally:
         self.__status_lock.release()
     return self._execute_action_chk_only(action)
-    pass
-
 
   def execute(self, action):
     """
@@ -386,8 +347,6 @@ class RecoveryManager:
       finally:
         self.__status_lock.release()
     return self._execute_action_(action)
-    pass
-
 
   def _execute_action_(self, action_name):
     """
@@ -398,7 +357,7 @@ class RecoveryManager:
     executed = False
     seconds_since_last_attempt = now - action_counter["lastAttempt"]
     if action_counter["lifetimeCount"] < self.max_lifetime_count:
-      #reset if window_in_sec seconds passed since last attempt
+      # reset if window_in_sec seconds passed since last attempt
       if seconds_since_last_attempt > self.window_in_sec:
         action_counter["count"] = 0
         action_counter["lastReset"] = now
@@ -406,7 +365,7 @@ class RecoveryManager:
       if action_counter["count"] < self.max_count:
         if seconds_since_last_attempt > self.retry_gap_in_sec:
           action_counter["count"] += 1
-          action_counter["lifetimeCount"] +=1
+          action_counter["lifetimeCount"] += 1
           if self.retry_gap > 0:
             action_counter["lastAttempt"] = now
           action_counter["warnedLastAttempt"] = False
@@ -414,28 +373,27 @@ class RecoveryManager:
             action_counter["lastReset"] = now
           executed = True
         else:
-          if action_counter["warnedLastAttempt"] == False:
+          if action_counter["warnedLastAttempt"] is False:
             action_counter["warnedLastAttempt"] = True
             logger.warn(
               "%s seconds has not passed since last occurrence %s seconds back for %s. " +
               "Will silently skip execution without warning till retry gap is passed",
               self.retry_gap_in_sec, seconds_since_last_attempt, action_name)
           else:
-            logger.debug(
-              "%s seconds has not passed since last occurrence %s seconds back for %s",
-              self.retry_gap_in_sec, seconds_since_last_attempt, action_name)
+            logger.debug("%s seconds has not passed since last occurrence %s seconds back for %s",
+                         self.retry_gap_in_sec, seconds_since_last_attempt, action_name)
       else:
         sec_since_last_reset = now - action_counter["lastReset"]
         if sec_since_last_reset > self.window_in_sec:
           action_counter["count"] = 1
-          action_counter["lifetimeCount"] +=1
+          action_counter["lifetimeCount"] += 1
           if self.retry_gap > 0:
             action_counter["lastAttempt"] = now
           action_counter["lastReset"] = now
           action_counter["warnedLastReset"] = False
           executed = True
         else:
-          if action_counter["warnedLastReset"] == False:
+          if action_counter["warnedLastReset"] is False:
             action_counter["warnedLastReset"] = True
             logger.warn("%s occurrences in %s minutes reached the limit for %s. " +
                         "Will silently skip execution without warning till window is reset",
@@ -444,7 +402,7 @@ class RecoveryManager:
             logger.debug("%s occurrences in %s minutes reached the limit for %s",
                          action_counter["count"], self.window_in_min, action_name)
     else:
-      if action_counter["warnedThresholdReached"] == False:
+      if action_counter["warnedThresholdReached"] is False:
         action_counter["warnedThresholdReached"] = True
         logger.warn("%s occurrences in agent life time reached the limit for %s. " +
                     "Will silently skip execution without warning till window is reset",
@@ -452,47 +410,7 @@ class RecoveryManager:
       else:
         logger.error("%s occurrences in agent life time reached the limit for %s",
                      action_counter["lifetimeCount"], action_name)
-    self._dump_actions()
     return executed
-    pass
-
-
-  def _dump_actions(self):
-    """
-    Dump recovery actions to FS
-    """
-    self.__cache_lock.acquire()
-    try:
-      with open(self.__actions_json_file, 'w') as f:
-        json.dump(self.actions, f, indent=2)
-    except Exception, exception:
-      logger.exception("Unable to dump actions to {0}".format(self.__actions_json_file))
-      return False
-    finally:
-      self.__cache_lock.release()
-
-    return True
-    pass
-
-
-  def _load_actions(self):
-    """
-    Loads recovery actions from FS
-    """
-    self.__cache_lock.acquire()
-
-    try:
-      if os.path.isfile(self.__actions_json_file):
-        with open(self.__actions_json_file, 'r') as fp:
-          return json.load(fp)
-    except Exception, exception:
-      logger.warning("Unable to load recovery actions from {0}.".format(self.__actions_json_file))
-    finally:
-      self.__cache_lock.release()
-
-    return {}
-    pass
-
 
   def get_actions_copy(self):
     """
@@ -503,8 +421,6 @@ class RecoveryManager:
       return copy.deepcopy(self.actions)
     finally:
       self.__status_lock.release()
-    pass
-
 
   def is_action_info_stale(self, action_name):
     """
@@ -518,7 +434,6 @@ class RecoveryManager:
       seconds_since_last_attempt = now - action_counter["lastAttempt"]
       return seconds_since_last_attempt > self.window_in_sec
     return False
-    pass
 
   def _execute_action_chk_only(self, action_name):
     """
@@ -532,31 +447,41 @@ class RecoveryManager:
       if action_counter["count"] < self.max_count:
         if seconds_since_last_attempt > self.retry_gap_in_sec:
           return True
+        else:
+          logger.info("Not running recovery command due to retry_gap = {0} (seconds)".format(self.retry_gap_in_sec))
       else:
         sec_since_last_reset = now - action_counter["lastReset"]
         if sec_since_last_reset > self.window_in_sec:
           return True
 
     return False
-    pass
 
   def _now_(self):
     return int(time.time())
-    pass
-
 
   def update_recovery_config(self, dictionary):
-    """
-    TODO: Server sends the recovery configuration - call update_config after parsing
-    "recoveryConfig": {
-      "type" : "DEFAULT|AUTO_START|AUTO_INSTALL_START|FULL",
-      "maxCount" : 10,
-      "windowInMinutes" : 60,
-      "retryGap" : 0,
-      "components" : "a,b"
-      }
-    """
-
+    if dictionary and "recoveryConfig" in dictionary:
+      if logger.isEnabledFor(logging.INFO):
+        logger.info("RecoverConfig = %s", pprint.pformat(dictionary["recoveryConfig"]))
+      config = dictionary["recoveryConfig"]
+      if 'components' in config:
+        enabled_components = config['components']
+        enabled_components_list = []
+
+        components = [(item["service_name"], item["component_name"], item["desired_state"]) for item in enabled_components]
+        for service, component, state in components:
+          enabled_components_list.append(component)
+          self.update_desired_status(component, state)
+          # Recovery Manager is Component oriented, however Agent require Service and component name to build properly
+          # commands. As workaround, we pushing service name from the server and keeping it relation at agent.
+          #
+          # However it important to keep map actual, for this reason relation could be updated if service will
+          #  push another service <-> component relation
+          self.__component_to_service_map[component] = service
+          
+        self.enabled_components = enabled_components_list
+
+  def on_config_update(self):
     recovery_enabled = False
     auto_start_only = False
     auto_install_start = False
@@ -564,56 +489,51 @@ class RecoveryManager:
     window_in_min = 60
     retry_gap = 5
     max_lifetime_count = 12
-    enabled_components = ""
 
+    cluster_cache = self.initializer_module.configurations_cache[self.cluster_id]
 
-    if dictionary and "recoveryConfig" in dictionary:
-      if logger.isEnabledFor(logging.INFO):
-        logger.info("RecoverConfig = %s", pprint.pformat(dictionary["recoveryConfig"]))
-      config = dictionary["recoveryConfig"]
-      if "type" in config:
-        if config["type"] in ["AUTO_INSTALL_START", "AUTO_START", "FULL"]:
+    if 'configurations' in cluster_cache and 'cluster-env' in cluster_cache['configurations']:
+      config = cluster_cache['configurations']['cluster-env']
+      if "recovery_type" in config:
+        if config["recovery_type"] in ["AUTO_INSTALL_START", "AUTO_START", "FULL"]:
           recovery_enabled = True
-          if config["type"] == "AUTO_START":
+          if config["recovery_type"] == "AUTO_START":
             auto_start_only = True
-          elif config["type"] == "AUTO_INSTALL_START":
+          elif config["recovery_type"] == "AUTO_INSTALL_START":
             auto_install_start = True
 
-      if "maxCount" in config:
-        max_count = self._read_int_(config["maxCount"], max_count)
-      if "windowInMinutes" in config:
-        window_in_min = self._read_int_(config["windowInMinutes"], window_in_min)
-      if "retryGap" in config:
-        retry_gap = self._read_int_(config["retryGap"], retry_gap)
-      if 'maxLifetimeCount' in config:
-        max_lifetime_count = self._read_int_(config['maxLifetimeCount'], max_lifetime_count)
+      if "recovery_enabled" in config:
+        recovery_enabled = self._read_bool_(config, "recovery_enabled", recovery_enabled)
 
-      if 'components' in config:
-        enabled_components = config['components']
+      if "recovery_max_count" in config:
+        max_count = self._read_int_(config, "recovery_max_count", max_count)
+      if "recovery_window_in_minutes" in config:
+        window_in_min = self._read_int_(config, "recovery_window_in_minutes", window_in_min)
+      if "recovery_retry_interval" in config:
+        retry_gap = self._read_int_(config, "recovery_retry_interval", retry_gap)
+      if 'recovery_lifetime_max_count' in config:
+        max_lifetime_count = self._read_int_(config, 'recovery_lifetime_max_count', max_lifetime_count)
 
     self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only,
-                       auto_install_start, enabled_components)
-    pass
+                       auto_install_start)
 
-  """
-  Update recovery configuration with the specified values.
-
-  max_count - Configured maximum count of recovery attempt allowed per host component in a window.
-  window_in_min - Configured window size in minutes.
-  retry_gap - Configured retry gap between tries per host component
-  max_lifetime_count - Configured maximum lifetime count of recovery attempt allowed per host component.
-  recovery_enabled - True or False. Indicates whether recovery is enabled or not.
-  auto_start_only - True if AUTO_START recovery type was specified. False otherwise.
-  auto_install_start - True if AUTO_INSTALL_START recovery type was specified. False otherwise.
-  enabled_components - CSV of componenents enabled for auto start.
-  """
   def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
-                    auto_start_only, auto_install_start, enabled_components):
+                    auto_start_only, auto_install_start):
     """
+    Update recovery configuration with the specified values.
+
+    max_count - Configured maximum count of recovery attempt allowed per host component in a window.
+    window_in_min - Configured window size in minutes.
+    retry_gap - Configured retry gap between tries per host component
+    max_lifetime_count - Configured maximum lifetime count of recovery attempt allowed per host component.
+    recovery_enabled - True or False. Indicates whether recovery is enabled or not.
+    auto_start_only - True if AUTO_START recovery type was specified. False otherwise.
+    auto_install_start - True if AUTO_INSTALL_START recovery type was specified. False otherwise.
+
     Update recovery configuration, recovery is disabled if configuration values
     are not correct
     """
-    self.recovery_enabled = False;
+    self.recovery_enabled = False
     if max_count <= 0:
       logger.warn("Recovery disabled: max_count must be a non-negative number")
       return
@@ -640,7 +560,6 @@ class RecoveryManager:
     self.auto_start_only = auto_start_only
     self.auto_install_start = auto_install_start
     self.max_lifetime_count = max_lifetime_count
-    self.enabled_components = []
 
     self.allowed_desired_states = [self.STARTED, self.INSTALLED]
     self.allowed_current_states = [self.INIT, self.INSTALL_FAILED, self.INSTALLED, self.STARTED]
@@ -652,21 +571,7 @@ class RecoveryManager:
       self.allowed_desired_states = [self.INSTALLED, self.STARTED]
       self.allowed_current_states = [self.INSTALL_FAILED, self.INSTALLED]
 
-    if enabled_components is not None and len(enabled_components) > 0:
-      components = enabled_components.split(",")
-      for component in components:
-        if len(component.strip()) > 0:
-          self.enabled_components.append(component.strip())
-
     self.recovery_enabled = recovery_enabled
-    if self.recovery_enabled:
-      logger.info(
-        "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes between and"
-        " lifetime max being %s. Enabled components - %s",
-        self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count,
-        ', '.join(self.enabled_components))
-    pass
-
 
   def get_unique_task_id(self):
     self.id += 1
@@ -679,33 +584,31 @@ class RecoveryManager:
     if not self.enabled():
       return
 
-    if not command.has_key(self.ROLE_COMMAND) or not self.configured_for_recovery(command['role']):
+    if self.ROLE_COMMAND not in command or not self.configured_for_recovery(command['role']):
       return
 
-    if status == ActionQueue.COMPLETED_STATUS:
-      if command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START:
+    if status == CommandStatus.completed:
+      if command[self.ROLE_COMMAND] == RoleCommand.start:
         self.update_current_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
-        #self.update_config_staleness(command['role'], False)
-        logger.info("After EXECUTION_COMMAND (START), with taskId=" + str(command['taskId']) +
-                    ", current state of " + command[self.ROLE] + " to " +
-                     self.get_current_status(command[self.ROLE]) )
-      elif command['roleCommand'] == ActionQueue.ROLE_COMMAND_STOP or command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_INSTALL:
+        logger.info("After EXECUTION_COMMAND (START), with taskId={}, current state of {} to {}".format(
+          command['taskId'], command[self.ROLE], self.get_current_status(command[self.ROLE])))
+
+      elif command['roleCommand'] == RoleCommand.stop or command[self.ROLE_COMMAND] == RoleCommand.install:
         self.update_current_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
-        logger.info("After EXECUTION_COMMAND (STOP/INSTALL), with taskId=" + str(command['taskId']) +
-                    ", current state of " + command[self.ROLE] + " to " +
-                     self.get_current_status(command[self.ROLE]) )
-      elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_CUSTOM_COMMAND:
-        if command.has_key('custom_command') and command['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART:
+        logger.info("After EXECUTION_COMMAND (STOP/INSTALL), with taskId={}, current state of {} to {}".format(
+          command['taskId'], command[self.ROLE], self.get_current_status(command[self.ROLE])))
+
+      elif command[self.ROLE_COMMAND] == RoleCommand.custom_command:
+        if 'custom_command' in command and command['custom_command'] == CustomCommand.restart:
           self.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
-          #self.update_config_staleness(command['role'], False)
-          logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command[self.ROLE] + " to " +
-                       self.get_current_status(command[self.ROLE]) )
-    elif status == ActionQueue.FAILED_STATUS:
-      if command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_INSTALL:
+          logger.info("After EXECUTION_COMMAND (RESTART), current state of {} to {}".format(
+            command[self.ROLE], self.get_current_status(command[self.ROLE])))
+
+    elif status == CommandStatus.failed:
+      if command[self.ROLE_COMMAND] == RoleCommand.install:
         self.update_current_status(command[self.ROLE], self.INSTALL_FAILED)
-        logger.info("After EXECUTION_COMMAND (INSTALL), with taskId=" + str(command['taskId']) +
-                    ", current state of " + command[self.ROLE] + " to " +
-                    self.get_current_status(command[self.ROLE]))
+        logger.info("After EXECUTION_COMMAND (INSTALL), with taskId={}, current state of {} to {}".format(
+          command['taskId'], command[self.ROLE], self.get_current_status(command[self.ROLE])))
 
   def process_execution_command(self, command):
     """
@@ -714,28 +617,30 @@ class RecoveryManager:
     if not self.enabled():
       return
 
-    if not self.COMMAND_TYPE in command or not command[self.COMMAND_TYPE] == ActionQueue.EXECUTION_COMMAND:
+    if self.COMMAND_TYPE not in command or not command[self.COMMAND_TYPE] == AgentCommand.execution:
       return
 
-    if not self.ROLE in command:
+    if self.ROLE not in command:
       return
 
-    if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP) \
+    if command[self.ROLE_COMMAND] in (RoleCommand.install, RoleCommand.stop) \
         and self.configured_for_recovery(command[self.ROLE]):
+
       self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
-      logger.info("Received EXECUTION_COMMAND (STOP/INSTALL), desired state of " + command[self.ROLE] + " to " +
-                   self.get_desired_status(command[self.ROLE]) )
-    elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START \
-        and self.configured_for_recovery(command[self.ROLE]):
+      logger.info("Received EXECUTION_COMMAND (STOP/INSTALL), desired state of {} to {}".format(
+        command[self.ROLE], self.get_desired_status(command[self.ROLE])))
+
+    elif command[self.ROLE_COMMAND] == RoleCommand.start and self.configured_for_recovery(command[self.ROLE]):
       self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
-      logger.info("Received EXECUTION_COMMAND (START), desired state of " + command[self.ROLE] + " to " +
-                   self.get_desired_status(command[self.ROLE]) )
-    elif command.has_key('custom_command') and \
-            command['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART \
+      logger.info("Received EXECUTION_COMMAND (START), desired state of {} to {}".format(
+        command[self.ROLE], self.get_desired_status(command[self.ROLE])))
+
+    elif 'custom_command' in command and command['custom_command'] == CustomCommand.restart \
             and self.configured_for_recovery(command[self.ROLE]):
+
       self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
-      logger.info("Received EXECUTION_COMMAND (RESTART), desired state of " + command[self.ROLE] + " to " +
-                   self.get_desired_status(command[self.ROLE]) )
+      logger.info("Received EXECUTION_COMMAND (RESTART), desired state of {} to {}".format(
+        command[self.ROLE], self.get_desired_status(command[self.ROLE])))
 
   def get_command(self, component, command_name):
     """
@@ -745,16 +650,24 @@ class RecoveryManager:
       logger.info("Recovery is paused, tasks waiting in pipeline for this host.")
       return None
 
+    if self.is_blueprint_provisioning_for_component(component):
+      logger.info("Recovery is paused, blueprint is being provisioned.")
+      return None
+
     if self.enabled():
       command_id = self.get_unique_task_id()
       command = {
         self.CLUSTER_ID: self.cluster_id,
         self.ROLE_COMMAND: command_name,
-        self.COMMAND_TYPE: ActionQueue.AUTO_EXECUTION_COMMAND,
+        self.COMMAND_TYPE: AgentCommand.auto_execution,
         self.TASK_ID: command_id,
         self.ROLE: component,
         self.COMMAND_ID: command_id
       }
+
+      if component in self.__component_to_service_map:
+        command[self.SERVICE_NAME] = self.__component_to_service_map[component]
+
       return command
     else:
       logger.info("Recovery is not enabled. START command will not be computed.")
@@ -779,19 +692,18 @@ class RecoveryManager:
   def get_start_command(self, component):
     return self.get_command(component, "START")
 
-  def _read_int_(self, value, default_value=0):
+  def _read_int_(self, config, key, default_value=0):
     int_value = default_value
     try:
-      int_value = int(value)
-    except ValueError:
+      int_value = int(config[key])
+    except (ValueError, KeyError):
       pass
     return int_value
 
-
-def main(argv=None):
-  cmd_mgr = RecoveryManager('/tmp')
-  pass
-
-
-if __name__ == '__main__':
-  main()
+  def _read_bool_(self, config, key, default_value=False):
+    bool_value = default_value
+    try:
+      bool_value = (config[key].lower() == "true")
+    except KeyError:
+      pass
+    return bool_value

+ 6 - 2
ambari-agent/src/main/python/ambari_agent/Utils.py

@@ -21,6 +21,7 @@ import os
 import time
 import threading
 import collections
+import traceback
 from functools import wraps
 from ambari_agent.ExitHelper import ExitHelper
 
@@ -155,13 +156,16 @@ class Utils(object):
 
   @staticmethod
   def restartAgent(stop_event, graceful_stop_timeout=30):
-    from ambari_agent import main
-    main.EXIT_CODE_ON_STOP = AGENT_AUTO_RESTART_EXIT_CODE
+    ExitHelper().exitcode = AGENT_AUTO_RESTART_EXIT_CODE
     stop_event.set()
 
     t = threading.Timer( graceful_stop_timeout, ExitHelper().exit, [AGENT_AUTO_RESTART_EXIT_CODE])
     t.start()
 
+  @staticmethod
+  def get_traceback_as_text(ex):
+    return ''.join(traceback.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__))
+
 class ImmutableDictionary(dict):
   def __init__(self, dictionary):
     """

+ 31 - 20
ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py

@@ -433,6 +433,7 @@ class BaseAlert(object):
     ha_alias_key = alert_uri_lookup_keys.ha_alias_key
     ha_http_pattern = alert_uri_lookup_keys.ha_http_pattern
     ha_https_pattern = alert_uri_lookup_keys.ha_https_pattern
+    ha_nameservice_aliases = {}
 
     # if HA alias key is not defined then it's not HA environment
     if ha_alias_key is None:
@@ -444,18 +445,25 @@ class BaseAlert(object):
         return None
 
       # convert dfs.ha.namenodes.{{ha-nameservice}} into dfs.ha.namenodes.c1ha
-      ha_alias_key = ha_alias_key.replace(self.HA_NAMESERVICE_PARAM, ha_nameservice)
-      ha_nameservice_alias = self._get_configuration_value(configurations, ha_alias_key)
+      ha_nameservices = filter(None, ha_nameservice.split(','))
 
-      if ha_nameservice_alias is None:
+      for nameservice in ha_nameservices:
+        ha_alias_key_nameservice = ha_alias_key.replace(self.HA_NAMESERVICE_PARAM, nameservice)
+        ha_nameservice_alias = self._get_configuration_value(configurations, ha_alias_key_nameservice)
+
+
+        if ha_nameservice_alias:
+          ha_nameservice_aliases[nameservice] = ha_nameservice_alias
+
+      if not ha_nameservice_aliases:
         logger.warning("[Alert][{0}] HA nameservice value is present but there are no aliases for {1}".format(
           self.get_name(), ha_alias_key))
         return None
     else:
-      ha_nameservice_alias = self._get_configuration_value(configurations, ha_alias_key)
+      ha_nameservice_aliases = {None: self._get_configuration_value(configurations, ha_alias_key)}
 
       # if HA nameservice is not defined then the fact that the HA alias_key could not be evaluated shows that it's not HA environment
-      if ha_nameservice_alias is None:
+      if ha_nameservice_aliases[None] is None:
         return None
 
     # determine which pattern to use (http or https)
@@ -477,22 +485,25 @@ class BaseAlert(object):
 
       return None
 
-    # convert dfs.namenode.http-address.{{ha-nameservice}}.{{alias}} into
-    # dfs.namenode.http-address.c1ha.{{alias}}
-    if ha_nameservice is not None:
-      ha_pattern = ha_pattern.replace(self.HA_NAMESERVICE_PARAM, ha_nameservice)
-
     # for each alias, grab it and check to see if this host matches
-    for alias in ha_nameservice_alias.split(','):
-      # convert dfs.namenode.http-address.c1ha.{{alias}} into
-      # dfs.namenode.http-address.c1ha.nn1
-      key = ha_pattern.replace(self.HA_ALIAS_PARAM, alias.strip())
-
-      # get the host for dfs.namenode.http-address.c1ha.nn1 and see if it's
-      # this host
-      value = self._get_configuration_value(configurations, key)
-      if value is not None and (self.host_name.lower() in value.lower() or self.public_host_name.lower() in value.lower()):
-        return AlertUri(uri=value, is_ssl_enabled=is_ssl_enabled)
+    for nameservice, aliases in ha_nameservice_aliases.iteritems():
+      for alias in aliases.split(','):
+
+        # convert dfs.namenode.http-address.{{ha-nameservice}}.{{alias}} into
+        # dfs.namenode.http-address.c1ha.{{alias}}
+        ha_pattern_current = ha_pattern
+        if nameservice is not None:
+          ha_pattern_current = ha_pattern_current.replace(self.HA_NAMESERVICE_PARAM, nameservice)
+        # convert dfs.namenode.http-address.c1ha.{{alias}} into
+        # dfs.namenode.http-address.c1ha.nn1
+        key = ha_pattern_current.replace(self.HA_ALIAS_PARAM, alias.strip())
+
+        # get the host for dfs.namenode.http-address.c1ha.nn1 and see if it's
+        # this host
+        value = self._get_configuration_value(configurations, key)
+
+        if value is not None and (self.host_name.lower() in value.lower() or self.public_host_name.lower() in value.lower()):
+          return AlertUri(uri=value, is_ssl_enabled=is_ssl_enabled)
 
     return None
 

+ 66 - 0
ambari-agent/src/main/python/ambari_agent/listeners/AgentActionsListener.py

@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import logging
+import ambari_stomp
+
+from ambari_agent.listeners import EventListener
+from ambari_agent.Utils import Utils
+from ambari_agent import Constants
+
+logger = logging.getLogger(__name__)
+
+class AgentActionsListener(EventListener):
+  """
+  Listener of Constants.AGENT_ACTIONS_TOPIC events from server.
+  """
+  ACTION_NAME = 'actionName'
+  RESTART_AGENT_ACTION = 'RESTART_AGENT'
+  
+  def __init__(self, initializer_module):
+    super(AgentActionsListener, self).__init__(initializer_module)
+    self.stop_event = initializer_module.stop_event
+
+  def on_event(self, headers, message):
+    """
+    Is triggered when an event to Constants.AGENT_ACTIONS_TOPIC topic is received from server.
+    It contains some small actions which server can ask agent to do.
+
+    For bigger actions containing a lot of info and special workflow and a new topic would be
+    required. Small actions like restart_agent/clean_cache make sense to be in a general event
+
+    @param headers: headers dictionary
+    @param message: message payload dictionary
+    """
+    action_name = message[self.ACTION_NAME]
+
+    if action_name == self.RESTART_AGENT_ACTION:
+      self.restart_agent()
+    else:
+      logger.warn("Unknown action '{0}' requested by server. Ignoring it".format(action_name))
+
+  def restart_agent(self):
+    logger.warn("Restarting the agent by the request from server")
+    Utils.restartAgent(self.stop_event)
+
+  def get_handled_path(self):
+    return Constants.AGENT_ACTIONS_TOPIC
+
+

+ 4 - 3
ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py

@@ -30,9 +30,10 @@ class AlertDefinitionsEventListener(EventListener):
   """
   Listener of Constants.ALERTS_DEFINITIONS_TOPIC events from server.
   """
-  def __init__(self, alert_definitions_cache, alert_scheduler_handler):
-    self.alert_definitions_cache = alert_definitions_cache
-    self.alert_scheduler_handler = alert_scheduler_handler
+  def __init__(self, initializer_module):
+    super(AlertDefinitionsEventListener, self).__init__(initializer_module)
+    self.alert_definitions_cache = initializer_module.alert_definitions_cache
+    self.alert_scheduler_handler = initializer_module.alert_scheduler_handler
 
   def on_event(self, headers, message):
     """

+ 3 - 2
ambari-agent/src/main/python/ambari_agent/listeners/CommandsEventListener.py

@@ -30,8 +30,9 @@ class CommandsEventListener(EventListener):
   """
   Listener of Constants.CONFIGURATIONS_TOPIC events from server.
   """
-  def __init__(self, action_queue):
-    self.action_queue = action_queue
+  def __init__(self, initializer_module):
+    super(CommandsEventListener, self).__init__(initializer_module)
+    self.action_queue = initializer_module.action_queue
 
   def on_event(self, headers, message):
     """

+ 11 - 4
ambari-agent/src/main/python/ambari_agent/listeners/ConfigurationEventListener.py

@@ -30,8 +30,10 @@ class ConfigurationEventListener(EventListener):
   """
   Listener of Constants.CONFIGURATIONS_TOPIC events from server.
   """
-  def __init__(self, configuration_cache):
-    self.configuration_cache = configuration_cache
+  def __init__(self, initializer_module):
+    super(ConfigurationEventListener, self).__init__(initializer_module)
+    self.configurations_cache = initializer_module.configurations_cache
+    self.recovery_manager = initializer_module.recovery_manager
 
   def on_event(self, headers, message):
     """
@@ -40,13 +42,18 @@ class ConfigurationEventListener(EventListener):
     @param headers: headers dictionary
     @param message: message payload dictionary
     """
-    self.configuration_cache.timestamp = message.pop('timestamp')
+    self.configurations_cache.timestamp = message.pop('timestamp')
 
     # this kind of response is received if hash was identical. And server does not need to change anything
     if message == {}:
       return
 
-    self.configuration_cache.rewrite_cache(message['clusters'], message['hash'])
+    self.configurations_cache.rewrite_cache(message['clusters'], message['hash'])
+
+    if message['clusters']:
+      # FIXME: Recovery manager does not support multiple cluster as of now.
+      self.recovery_manager.cluster_id = message['clusters'].keys()[0]
+      self.recovery_manager.on_config_update()
 
   def get_handled_path(self):
     return Constants.CONFIGURATIONS_TOPIC

+ 6 - 5
ambari-agent/src/main/python/ambari_agent/listeners/HostLevelParamsEventListener.py

@@ -30,9 +30,10 @@ class HostLevelParamsEventListener(EventListener):
   """
   Listener of Constants.HOST_LEVEL_PARAMS_TOPIC events from server.
   """
-  def __init__(self, host_level_params_cache, recovery_manager):
-    self.host_level_params_cache = host_level_params_cache
-    self.recovery_manager = recovery_manager
+  def __init__(self, initializer_module):
+    super(HostLevelParamsEventListener, self).__init__(initializer_module)
+    self.host_level_params_cache = initializer_module.host_level_params_cache
+    self.recovery_manager = initializer_module.recovery_manager
 
   def on_event(self, headers, message):
     """
@@ -52,9 +53,9 @@ class HostLevelParamsEventListener(EventListener):
       cluster_id = message['clusters'].keys()[0]
 
       if 'recoveryConfig' in message['clusters'][cluster_id]:
-        logging.info("Updating recoveryConfig from metadata")
-        self.recovery_manager.update_recovery_config(self.host_level_params_cache[cluster_id])
+        logging.info("Updating recoveryConfig from hostLevelParams")
         self.recovery_manager.cluster_id = cluster_id
+        self.recovery_manager.update_recovery_config(self.host_level_params_cache[cluster_id])
 
   def get_handled_path(self):
     return Constants.HOST_LEVEL_PARAMS_TOPIC

+ 19 - 3
ambari-agent/src/main/python/ambari_agent/listeners/MetadataEventListener.py

@@ -32,8 +32,10 @@ class MetadataEventListener(EventListener):
   """
   Listener of Constants.METADATA_TOPIC events from server.
   """
-  def __init__(self, metadata_cache):
-    self.metadata_cache = metadata_cache
+  def __init__(self, initializer_module):
+    super(MetadataEventListener, self).__init__(initializer_module)
+    self.metadata_cache = initializer_module.metadata_cache
+    self.config = initializer_module.config
 
   def on_event(self, headers, message):
     """
@@ -46,7 +48,21 @@ class MetadataEventListener(EventListener):
     if message == {}:
       return
 
-    self.metadata_cache.cache_update(message['clusters'], message['hash'])
+    event_type = message['eventType']
+
+    if event_type == 'CREATE':
+      self.metadata_cache.rewrite_cache(message['clusters'], message['hash'])
+    elif event_type == 'UPDATE':
+      self.metadata_cache.cache_update(message['clusters'], message['hash'])
+    elif event_type == 'DELETE':
+      self.metadata_cache.cache_delete(message['clusters'], message['hash'])
+    else:
+      logger.error("Unknown event type '{0}' for metadata event")
+
+    try:
+      self.config.update_configuration_from_metadata(message['clusters']['-1']['agentConfigs'])
+    except KeyError:
+      pass
 
   def get_handled_path(self):
     return Constants.METADATA_TOPIC

+ 21 - 5
ambari-agent/src/main/python/ambari_agent/listeners/ServerResponsesListener.py

@@ -31,9 +31,11 @@ class ServerResponsesListener(EventListener):
   """
   Listener of Constants.SERVER_RESPONSES_TOPIC events from server.
   """
-  def __init__(self):
-    self.listener_functions = {}
-    self.logging_handlers = {}
+  RESPONSE_STATUS_STRING = 'status'
+  RESPONSE_STATUS_SUCCESS = 'OK'
+
+  def __init__(self, initializer_module):
+    super(ServerResponsesListener, self).__init__(initializer_module)
     self.reset_responses()
 
   def on_event(self, headers, message):
@@ -46,12 +48,21 @@ class ServerResponsesListener(EventListener):
     @param message: message payload dictionary
     """
     if Constants.CORRELATION_ID_STRING in headers:
-      correlation_id = headers[Constants.CORRELATION_ID_STRING]
+      correlation_id = int(headers[Constants.CORRELATION_ID_STRING])
       self.responses.put(correlation_id, message)
 
       if correlation_id in self.listener_functions:
         self.listener_functions[correlation_id](headers, message)
         del self.listener_functions[correlation_id]
+
+      if self.RESPONSE_STATUS_STRING in message and message[self.RESPONSE_STATUS_STRING] == self.RESPONSE_STATUS_SUCCESS:
+        if correlation_id in self.listener_functions_on_success:
+          self.listener_functions_on_success[correlation_id](headers, message)
+          del self.listener_functions_on_success[correlation_id]
+      else:
+        if correlation_id in self.listener_functions_on_error:
+          self.listener_functions_on_error[correlation_id](headers, message)
+          del self.listener_functions_on_error[correlation_id]
     else:
       logger.warn("Received a message from server without a '{0}' header. Ignoring the message".format(Constants.CORRELATION_ID_STRING))
 
@@ -76,8 +87,13 @@ class ServerResponsesListener(EventListener):
 
   def reset_responses(self):
     """
-    Clear responses dictionary
+    Resets data saved on per-response basis.
+    Should be called when correlactionIds are reset to 0 aka. re-registration case.
     """
     self.responses = Utils.BlockingDictionary()
+    self.listener_functions_on_success = {}
+    self.listener_functions_on_error = {}
+    self.listener_functions = {}
+    self.logging_handlers = {}
 
 

+ 3 - 2
ambari-agent/src/main/python/ambari_agent/listeners/TopologyEventListener.py

@@ -30,8 +30,9 @@ class TopologyEventListener(EventListener):
   """
   Listener of Constants.TOPOLOGIES_TOPIC events from server.
   """
-  def __init__(self, topology_cache):
-    self.topology_cache = topology_cache
+  def __init__(self, initializer_module):
+    super(TopologyEventListener, self).__init__(initializer_module)
+    self.topology_cache = initializer_module.topology_cache
 
   def on_event(self, headers, message):
     """

+ 40 - 2
ambari-agent/src/main/python/ambari_agent/listeners/__init__.py

@@ -20,7 +20,11 @@ limitations under the License.
 import ambari_simplejson as json
 import ambari_stomp
 import logging
+import traceback
 import copy
+from ambari_stomp.adapter.websocket import ConnectionIsAlreadyClosed
+from ambari_agent import Constants
+from ambari_agent.Utils import Utils
 
 logger = logging.getLogger(__name__)
 
@@ -28,6 +32,9 @@ class EventListener(ambari_stomp.ConnectionListener):
   """
   Base abstract class for event listeners on specific topics.
   """
+  def __init__(self, initializer_module):
+    self.initializer_module = initializer_module
+
   def on_message(self, headers, message):
     """
     This method is triggered by stomp when message from serve is received.
@@ -42,15 +49,46 @@ class EventListener(ambari_stomp.ConnectionListener):
     if destination.rstrip('/') == self.get_handled_path().rstrip('/'):
       try:
         message_json = json.loads(message)
-      except ValueError:
+      except ValueError as ex:
         logger.exception("Received from server event is not a valid message json. Message is:\n{0}".format(message))
+        self.report_status_to_sender(headers, message, ex)
         return
 
       logger.info("Event from server at {0}{1}".format(destination, self.get_log_message(headers, copy.deepcopy(message_json))))
       try:
         self.on_event(headers, message_json)
-      except:
+      except Exception as ex:
         logger.exception("Exception while handing event from {0} {1}".format(destination, headers, message))
+        self.report_status_to_sender(headers, message, ex)
+      else:
+        self.report_status_to_sender(headers, message)
+
+  def report_status_to_sender(self, headers, message, ex=None):
+    """
+    Reports the status of delivery of the message to a sender
+
+    @param headers: headers dictionary
+    @param message: message payload dictionary
+    @params ex: optional exception object for errors
+    """
+    if not Constants.MESSAGE_ID in headers:
+      return
+
+    if ex:
+      confirmation_of_received = {Constants.MESSAGE_ID:headers[Constants.MESSAGE_ID], 'status':'ERROR', 'reason':Utils.get_traceback_as_text(ex)}
+    else:
+      confirmation_of_received = {Constants.MESSAGE_ID:headers[Constants.MESSAGE_ID], 'status':'OK'}
+
+    try:
+      connection = self.initializer_module.connection
+    except ConnectionIsAlreadyClosed:
+      # access early copy of connection before it is exposed globally
+      connection = self.initializer_module.heartbeat_thread.connection
+
+    try:
+      connection.send(message=confirmation_of_received, destination=Constants.AGENT_RESPONSES_TOPIC)
+    except:
+      logger.exception("Could not send a confirmation '{0}' to server".format(confirmation_of_received))
 
   def on_event(self, headers, message):
     """

+ 11 - 29
ambari-agent/src/main/python/ambari_agent/main.py

@@ -106,12 +106,7 @@ from resource_management.core.logger import Logger
 #from resource_management.core.resources.system import File
 #from resource_management.core.environment import Environment
 
-from ambari_agent import HeartbeatThread
 from ambari_agent.InitializerModule import InitializerModule
-from ambari_agent.ComponentStatusExecutor import ComponentStatusExecutor
-from ambari_agent.CommandStatusReporter import CommandStatusReporter
-from ambari_agent.HostStatusReporter import HostStatusReporter
-from ambari_agent.AlertStatusReporter import AlertStatusReporter
 
 #logging.getLogger('ambari_agent').propagate = False
 
@@ -142,8 +137,6 @@ SYSLOG_FORMATTER = logging.Formatter(SYSLOG_FORMAT_STRING)
 
 _file_logging_handlers ={}
 
-EXIT_CODE_ON_STOP = 0
-
 def setup_logging(logger, filename, logging_level):
   logger.propagate = False
   formatter = logging.Formatter(formatstr)
@@ -362,22 +355,11 @@ MAX_RETRIES = 10
 
 def run_threads(initializer_module):
   initializer_module.alert_scheduler_handler.start()
-
-  heartbeat_thread = HeartbeatThread.HeartbeatThread(initializer_module)
-  heartbeat_thread.start()
-
-  component_status_executor = ComponentStatusExecutor(initializer_module)
-  component_status_executor.start()
-
-  command_status_reporter = CommandStatusReporter(initializer_module)
-  command_status_reporter.start()
-
-  host_status_reporter = HostStatusReporter(initializer_module)
-  host_status_reporter.start()
-
-  alert_status_reporter = AlertStatusReporter(initializer_module)
-  alert_status_reporter.start()
-
+  initializer_module.heartbeat_thread.start()
+  initializer_module.component_status_executor.start()
+  initializer_module.command_status_reporter.start()
+  initializer_module.host_status_reporter.start()
+  initializer_module.alert_status_reporter.start()
   initializer_module.action_queue.start()
 
   while not initializer_module.stop_event.is_set():
@@ -385,11 +367,11 @@ def run_threads(initializer_module):
 
   initializer_module.action_queue.interrupt()
 
-  command_status_reporter.join()
-  component_status_executor.join()
-  host_status_reporter.join()
-  alert_status_reporter.join()
-  heartbeat_thread.join()
+  initializer_module.command_status_reporter.join()
+  initializer_module.component_status_executor.join()
+  initializer_module.host_status_reporter.join()
+  initializer_module.alert_status_reporter.join()
+  initializer_module.heartbeat_thread.join()
   initializer_module.action_queue.join()
 
 # event - event, that will be passed to Controller and NetUtil to make able to interrupt loops form outside process
@@ -518,7 +500,7 @@ def main(initializer_module, heartbeat_stop_callback=None):
       # Clean up if not Windows OS
       #
       if connected or stopped:
-        ExitHelper().exit(EXIT_CODE_ON_STOP)
+        ExitHelper().exit()
         logger.info("finished")
         break
     pass # for server_hostname in server_hostnames

+ 17 - 0
ambari-agent/src/main/python/ambari_agent/models/__init__.py

@@ -0,0 +1,17 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""

+ 47 - 0
ambari-agent/src/main/python/ambari_agent/models/commands.py

@@ -0,0 +1,47 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class AgentCommand(object):
+  status = "STATUS_COMMAND"
+  get_version = "GET_VERSION"
+  execution = "EXECUTION_COMMAND"
+  auto_execution = "AUTO_EXECUTION_COMMAND"
+  background_execution = "BACKGROUND_EXECUTION_COMMAND"
+
+  AUTO_EXECUTION_COMMAND_GROUP = [execution, auto_execution, background_execution]
+  EXECUTION_COMMAND_GROUP = [execution, background_execution]
+
+
+class RoleCommand(object):
+  install = 'INSTALL'
+  start = 'START'
+  stop = 'STOP'
+  custom_command = 'CUSTOM_COMMAND'
+
+
+class CustomCommand(object):
+  restart = 'RESTART'
+  reconfigure = 'RECONFIGURE'
+  start = RoleCommand.start
+
+
+class CommandStatus(object):
+  in_progress = 'IN_PROGRESS'
+  completed = 'COMPLETED'
+  failed = 'FAILED'

+ 22 - 0
ambari-agent/src/main/python/ambari_agent/models/hooks.py

@@ -0,0 +1,22 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class HookPrefix(object):
+  pre = "before"
+  post = "after"

+ 20 - 3
ambari-agent/src/main/python/ambari_agent/security.py

@@ -37,7 +37,7 @@ from socket import error as socket_error
 
 logger = logging.getLogger(__name__)
 
-GEN_AGENT_KEY = 'openssl req -new -newkey rsa:1024 -nodes -keyout "%(keysdir)s' \
+GEN_AGENT_KEY = 'openssl req -new -newkey rsa -nodes -keyout "%(keysdir)s' \
                 + os.sep + '%(hostname)s.key" -subj /OU=%(hostname)s/ ' \
                 '-out "%(keysdir)s' + os.sep + '%(hostname)s.csr"'
 KEY_FILENAME = '%(hostname)s.key'
@@ -63,6 +63,10 @@ class VerifiedHTTPSConnection:
 
     if not self.two_way_ssl_required:
       conn = AmbariStompConnection(self.connection_url)
+      self.establish_connection(conn)
+      logger.info('SSL connection established. Two-way SSL authentication is '
+                  'turned off on the server.')
+      return conn
     else:
       self.certMan = CertificateManager(self.config, self.host)
       self.certMan.initSecurity()
@@ -79,8 +83,21 @@ class VerifiedHTTPSConnection:
 
       conn = AmbariStompConnection(self.connection_url, ssl_options=ssl_options)
 
-    self.establish_connection(conn)
-    return conn
+      try:
+        self.establish_connection(conn)
+        logger.info('SSL connection established. Two-way SSL authentication '
+                    'completed successfully.')
+      except ssl.SSLError:
+        logger.error('Two-way SSL authentication failed. Ensure that '
+                     'server and agent certificates were signed by the same CA '
+                     'and restart the agent. '
+                     '\nIn order to receive a new agent certificate, remove '
+                     'existing certificate file from keys directory. As a '
+                     'workaround you can turn off two-way SSL authentication in '
+                     'server configuration(ambari.properties) '
+                     '\nExiting..')
+        raise
+      return conn
 
   def establish_connection(self, conn):
     """

+ 47 - 211
ambari-agent/src/test/python/ambari_agent/TestActionQueue.py

@@ -29,6 +29,7 @@ from threading import Thread
 import copy
 import signal
 
+from ambari_agent.models.commands import CommandStatus, AgentCommand
 from mock.mock import patch, MagicMock, call
 from ambari_agent.CustomServiceOrchestrator import CustomServiceOrchestrator
 from ambari_agent.PythonExecutor import PythonExecutor
@@ -45,7 +46,7 @@ import logging
 
 CLUSTER_ID = '0'
 
-class TestActionQueue:#(TestCase):
+class TestActionQueue(TestCase):
   def setUp(self):
     # save original open() method for later use
     self.original_open = open
@@ -60,6 +61,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'INSTALL',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 3,
     'clusterName': u'cc',
@@ -77,6 +79,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'INSTALL',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 3,
     'clusterName': u'cc',
@@ -94,6 +97,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'AUTO_EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'START',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 3,
     'clusterName': u'cc',
@@ -128,6 +132,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'NAMENODE',
     'roleCommand': u'INSTALL',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 4,
     'clusterName': u'cc',
@@ -140,6 +145,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'SECONDARY_NAMENODE',
     'roleCommand': u'INSTALL',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 5,
     'clusterName': u'cc',
@@ -152,6 +158,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'HBASE',
     'roleCommand': u'INSTALL',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 7,
     'clusterName': u'cc',
@@ -177,6 +184,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'CUSTOM_COMMAND',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 9,
     'clusterName': u'cc',
@@ -191,6 +199,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'CUSTOM_COMMAND',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 9,
     'clusterName': u'cc',
@@ -208,6 +217,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'CUSTOM_COMMAND',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 9,
     'clusterName': u'cc',
@@ -223,6 +233,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'DATANODE',
     'roleCommand': u'CUSTOM_COMMAND',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 9,
     'clusterName': u'cc',
@@ -237,6 +248,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': u'RESOURCEMANAGER',
     'roleCommand': u'CUSTOM_COMMAND',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 9,
     'clusterName': u'cc',
@@ -262,6 +274,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': 'NAMENODE',
     'roleCommand': 'INSTALL',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 19,
     'clusterName': 'c1',
@@ -285,19 +298,22 @@ class TestActionQueue:#(TestCase):
     'commandType': 'BACKGROUND_EXECUTION_COMMAND',
     'role': 'NAMENODE',
     'roleCommand': 'CUSTOM_COMMAND',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 19,
     'clusterName': 'c1',
     'serviceName': 'HDFS',
     'configurations':{'global' : {}},
     'configurationTags':{'global' : { 'tag': 'v123' }},
-    'hostLevelParams':{'custom_command': 'REBALANCE_HDFS'},
     'commandParams' :  {
       'script_type' : 'PYTHON',
       'script' : 'script.py',
       'command_timeout' : '600',
-      'jdk_location' : '.',
-      'service_package_folder' : '.'
+      'service_package_folder' : '.',
+      'custom_command': 'REBALANCE_HDFS',
+      },
+      'ambariLevelParams': {
+        'jdk_location': 'test'
       },
       'clusterId': CLUSTER_ID,
   }
@@ -305,6 +321,7 @@ class TestActionQueue:#(TestCase):
     'commandType': 'EXECUTION_COMMAND',
     'role': 'NAMENODE',
     'roleCommand': 'ACTIONEXECUTE',
+    'mpackId': 1,
     'commandId': '1-1',
     'taskId': 20,
     'clusterName': 'c1',
@@ -363,10 +380,10 @@ class TestActionQueue:#(TestCase):
     
     actionQueue = ActionQueue(initializer_module)
     execution_command = {
-      'commandType' : ActionQueue.EXECUTION_COMMAND,
+      'commandType' : AgentCommand.execution,
     }
     status_command = {
-      'commandType' : ActionQueue.STATUS_COMMAND,
+      'commandType' : AgentCommand.status,
     }
     wrong_command = {
       'commandType' : "SOME_WRONG_COMMAND",
@@ -438,6 +455,7 @@ class TestActionQueue:#(TestCase):
                 'roleCommand': u'CUSTOM_COMMAND',
                 'serviceName': u'HDFS',
                 'role': u'DATANODE',
+                'mpackId': 1,
                 'actionId': '1-1',
                 'taskId': 9,
                 'exitCode': 0}
@@ -472,6 +490,7 @@ class TestActionQueue:#(TestCase):
     initializer_module = InitializerModule()
     initializer_module.init()
     
+
     actionQueue = ActionQueue(initializer_module)
     actionQueue.execute_command(self.datanode_restart_command_no_logging)
     reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
@@ -483,6 +502,7 @@ class TestActionQueue:#(TestCase):
                 'roleCommand': u'CUSTOM_COMMAND',
                 'serviceName': u'HDFS',
                 'role': u'DATANODE',
+                'mpackId': 1,
                 'actionId': '1-1',
                 'taskId': 9,
                 'exitCode': 0}
@@ -504,8 +524,8 @@ class TestActionQueue:#(TestCase):
     initializer_module = InitializerModule()
     initializer_module.init()
     initializer_module.config = config
-    initializer_module.recovery_manager = RecoveryManager(tempfile.mktemp())
-    initializer_module.recovery_manager.update_config(5, 5, 1, 11, True, False, False, "")
+    initializer_module.recovery_manager = RecoveryManager(MagicMock())
+    initializer_module.recovery_manager.update_config(5, 5, 1, 11, True, False, False)
 
     with patch("__builtin__.open") as open_mock:
       # Make file read calls visible
@@ -576,7 +596,7 @@ class TestActionQueue:#(TestCase):
     config.set('agent', 'prefix', tempdir)
     config.set('agent', 'cache_dir', "/var/lib/ambari-agent/cache")
     config.set('agent', 'tolerate_download_failures', "true")
-    
+    config.set('heartbeat', 'log_symbols_count', "900000")
     initializer_module = InitializerModule()
     initializer_module.init()
     initializer_module.config = config
@@ -631,10 +651,10 @@ class TestActionQueue:#(TestCase):
                   'roleCommand': u'INSTALL',
                   'serviceName': u'HDFS',
                   'role': u'DATANODE',
+                  'mpackId': 1,
                   'actionId': '1-1',
                   'taskId': 3,
                   'exitCode': 777}
-      self.assertEqual(reports[0], expected)
   
     # Continue command execution
       unfreeze_flag.set()
@@ -652,6 +672,7 @@ class TestActionQueue:#(TestCase):
                   'roleCommand': u'INSTALL',
                   'serviceName': u'HDFS',
                   'role': u'DATANODE',
+                  'mpackId': 1,
                   'actionId': '1-1',
                   'taskId': 3,
                   'exitCode': 0}
@@ -659,7 +680,7 @@ class TestActionQueue:#(TestCase):
       self.assertEqual(reports[0], expected)
   
       # now should not have reports (read complete/failed reports are deleted)
-      actionQueue.commandStatuses.clear_reported_reports()
+      actionQueue.commandStatuses.clear_reported_reports({CLUSTER_ID: reports})
       reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
       self.assertEqual(len(reports), 0)
   
@@ -678,7 +699,7 @@ class TestActionQueue:#(TestCase):
                       reports[0]['status'] == 'IN_PROGRESS':
         time.sleep(0.1)
         reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
-        actionQueue.commandStatuses.clear_reported_reports()
+        actionQueue.commandStatuses.clear_reported_reports({CLUSTER_ID: reports})
         
         # check report
       expected = {'status': 'FAILED',
@@ -689,6 +710,7 @@ class TestActionQueue:#(TestCase):
                   'roleCommand': u'INSTALL',
                   'serviceName': u'HDFS',
                   'role': u'DATANODE',
+                  'mpackId': 1,
                   'actionId': '1-1',
                   'taskId': 3,
                   'exitCode': 13}
@@ -696,7 +718,7 @@ class TestActionQueue:#(TestCase):
       self.assertEqual(reports[0], expected)
   
       # now should not have reports (read complete/failed reports are deleted)
-      actionQueue.commandStatuses.clear_reported_reports()
+      actionQueue.commandStatuses.clear_reported_reports({CLUSTER_ID: reports})
       reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
       self.assertEqual(len(reports), 0)
   
@@ -713,7 +735,7 @@ class TestActionQueue:#(TestCase):
                       reports[0]['status'] == 'IN_PROGRESS':
         time.sleep(0.1)
         reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
-        actionQueue.commandStatuses.clear_reported_reports()
+        actionQueue.commandStatuses.clear_reported_reports({CLUSTER_ID: reports})
       # check report
       expected = {'status': 'COMPLETED',
                   'stderr': 'stderr',
@@ -723,6 +745,7 @@ class TestActionQueue:#(TestCase):
                   'roleCommand': 'UPGRADE',
                   'serviceName': 'serviceName',
                   'role': 'role',
+                  'mpackId': None,
                   'actionId': 17,
                   'taskId': 'taskId',
                   'exitCode': 0}
@@ -730,52 +753,10 @@ class TestActionQueue:#(TestCase):
       self.assertEqual(reports[0], expected)
   
       # now should not have reports (read complete/failed reports are deleted)
-      actionQueue.commandStatuses.clear_reported_reports()
+      actionQueue.commandStatuses.clear_reported_reports({CLUSTER_ID: reports})
       reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
       self.assertEqual(len(reports), 0)
 
-  def test_cancel_with_reschedule_command(self):
-    config = AmbariConfig()
-    tempdir = tempfile.gettempdir()
-    config.set('agent', 'prefix', tempdir)
-    config.set('agent', 'cache_dir', "/var/lib/ambari-agent/cache")
-    config.set('agent', 'tolerate_download_failures', "true")
-    dummy_controller = MagicMock()
-    
-    initializer_module = InitializerModule()
-    initializer_module.init()
-    
-    actionQueue = ActionQueue(initializer_module)
-    unfreeze_flag = threading.Event()
-    python_execution_result_dict = {
-      'stdout': 'out',
-      'stderr': 'stderr',
-      'structuredOut' : '',
-      'status' : '',
-      'exitcode' :-signal.SIGTERM
-    }
-
-    def side_effect(command, tmpoutfile, tmperrfile, override_output_files=True, retry=False):
-      unfreeze_flag.wait()
-      return python_execution_result_dict
-    def patched_aq_execute_command(command):
-      # We have to perform patching for separate thread in the same thread
-      with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
-        runCommand_mock.side_effect = side_effect
-        actionQueue.execute_command(command)
-
-    # We call method in a separate thread
-    execution_thread = Thread(target=patched_aq_execute_command ,
-                              args=(self.datanode_install_command,))
-    execution_thread.start()
-    #  check in progress report
-    # wait until ready
-    while True:
-      time.sleep(0.1)
-      reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
-      if len(reports) != 0:
-        break
-
 
   @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
   @patch.object(CustomServiceOrchestrator, "runCommand")
@@ -810,6 +791,7 @@ class TestActionQueue:#(TestCase):
                 'roleCommand': u'CUSTOM_COMMAND',
                 'serviceName': u'HDFS',
                 'role': u'DATANODE',
+                'mpackId': 1,
                 'actionId': '1-1',
                 'taskId': 9,
                 'clusterId': CLUSTER_ID,
@@ -853,6 +835,7 @@ class TestActionQueue:#(TestCase):
                 'roleCommand': u'CUSTOM_COMMAND',
                 'serviceName': u'HDFS',
                 'role': u'DATANODE',
+                'mpackId': 1,
                 'actionId': '1-1',
                 'taskId': 9,
                 'exitCode': 0}
@@ -895,6 +878,7 @@ class TestActionQueue:#(TestCase):
                 'roleCommand': u'CUSTOM_COMMAND',
                 'serviceName': u'YARN',
                 'role': u'RESOURCEMANAGER',
+                'mpackId': 1,
                 'actionId': '1-1',
                 'taskId': 9,
                 'exitCode': 0}
@@ -933,6 +917,7 @@ class TestActionQueue:#(TestCase):
                 'roleCommand': u'CUSTOM_COMMAND',
                 'serviceName': u'HDFS',
                 'role': u'DATANODE',
+                'mpackId': 1,
                 'actionId': '1-1',
                 'taskId': 9,
                 'exitCode': 0,
@@ -959,6 +944,7 @@ class TestActionQueue:#(TestCase):
       'commandType': 'EXECUTION_COMMAND',
       'role': u'TEZ_CLIENT',
       'roleCommand': u'INSTALL',
+      'mpackId': 1,
       'commandId': '1-1',
       'taskId': 9,
       'clusterName': u'cc',
@@ -991,7 +977,7 @@ class TestActionQueue:#(TestCase):
                                 get_mock, process_command_mock, gpeo_mock):
     CustomServiceOrchestrator_mock.return_value = None
     dummy_controller = MagicMock()
-    dummy_controller.recovery_manager = RecoveryManager(tempfile.mktemp())
+    dummy_controller.recovery_manager = RecoveryManager(MagicMock())
     config = MagicMock()
     gpeo_mock.return_value = 0
     config.get_parallel_exec_option = gpeo_mock
@@ -1100,156 +1086,6 @@ class TestActionQueue:#(TestCase):
     self.assertEqual(0, threading_mock.call_count)
     process_command_mock.assert_any_calls([call(self.datanode_install_command), call(self.hbase_install_command)])
 
-  @not_for_platform(PLATFORM_LINUX)
-  @patch("time.sleep")
-  @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
-  @patch.object(CustomServiceOrchestrator, "__init__")
-  def test_execute_retryable_command(self, CustomServiceOrchestrator_mock,
-                                     sleep_mock
-  ):
-    CustomServiceOrchestrator_mock.return_value = None
-    dummy_controller = MagicMock()
-    actionQueue = ActionQueue(AmbariConfig(), dummy_controller)
-    python_execution_result_dict = {
-      'exitcode': 1,
-      'stdout': 'out',
-      'stderr': 'stderr',
-      'structuredOut': '',
-      'status': 'FAILED'
-    }
-
-    def side_effect(command, tmpoutfile, tmperrfile, override_output_files=True, retry=False):
-      return python_execution_result_dict
-
-    command = copy.deepcopy(self.retryable_command)
-    with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
-      runCommand_mock.side_effect = side_effect
-      actionQueue.execute_command(command)
-
-    # assert that python executor start
-    self.assertTrue(runCommand_mock.called)
-    self.assertEqual(3, runCommand_mock.call_count)
-    self.assertEqual(2, sleep_mock.call_count)
-    sleep_mock.assert_has_calls([call(2), call(3)], False)
-    runCommand_mock.assert_has_calls([
-      call(command, os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'output-19.txt',
-           os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'errors-19.txt', override_output_files=True, retry=False),
-      call(command, os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'output-19.txt',
-           os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'errors-19.txt', override_output_files=False, retry=True),
-      call(command, os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'output-19.txt',
-           os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'errors-19.txt', override_output_files=False, retry=True)])
-
-
-  @patch("time.time")
-  @patch("time.sleep")
-  @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
-  @patch.object(CustomServiceOrchestrator, "__init__")
-  def test_execute_retryable_command_with_time_lapse(self, CustomServiceOrchestrator_mock,
-                                     sleep_mock, time_mock
-  ):
-    CustomServiceOrchestrator_mock.return_value = None
-    initializer_module = InitializerModule()
-    initializer_module.init()
-    actionQueue = ActionQueue(initializer_module)
-    python_execution_result_dict = {
-      'exitcode': 1,
-      'stdout': 'out',
-      'stderr': 'stderr',
-      'structuredOut': '',
-      'status': 'FAILED'
-    }
-
-    times_arr = [8, 10, 14, 18, 22, 26, 30, 34]
-    if self.logger.isEnabledFor(logging.INFO):
-      times_arr.insert(0, 4)
-    time_mock.side_effect = times_arr
-
-    def side_effect(command, tmpoutfile, tmperrfile, override_output_files=True, retry=False):
-      return python_execution_result_dict
-
-    command = copy.deepcopy(self.retryable_command)
-    with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
-      runCommand_mock.side_effect = side_effect
-      actionQueue.execute_command(command)
-
-    # assert that python executor start
-    self.assertTrue(runCommand_mock.called)
-    self.assertEqual(2, runCommand_mock.call_count)
-    self.assertEqual(1, sleep_mock.call_count)
-    sleep_mock.assert_has_calls([call(2)], False)
-    runCommand_mock.assert_has_calls([
-      call(command, os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'output-19.txt',
-           os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'errors-19.txt', override_output_files=True, retry=False),
-      call(command, os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'output-19.txt',
-           os.sep + 'tmp' + os.sep + 'ambari-agent' + os.sep + 'errors-19.txt', override_output_files=False, retry=True)])
-
-  # retryable_command
-  @not_for_platform(PLATFORM_LINUX)
-  @patch("time.sleep")
-  @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
-  @patch.object(CustomServiceOrchestrator, "__init__")
-  def test_execute_retryable_command_fail_and_succeed(self, CustomServiceOrchestrator_mock,
-                                                      sleep_mock
-  ):
-    CustomServiceOrchestrator_mock.return_value = None
-    dummy_controller = MagicMock()
-    actionQueue = ActionQueue(AmbariConfig(), dummy_controller)
-    execution_result_fail_dict = {
-      'exitcode': 1,
-      'stdout': 'out',
-      'stderr': 'stderr',
-      'structuredOut': '',
-      'status': 'FAILED'
-    }
-    execution_result_succ_dict = {
-      'exitcode': 0,
-      'stdout': 'out',
-      'stderr': 'stderr',
-      'structuredOut': '',
-      'status': 'COMPLETED'
-    }
-
-    command = copy.deepcopy(self.retryable_command)
-    self.assertFalse('commandBeingRetried' in command)
-    with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
-      runCommand_mock.side_effect = [execution_result_fail_dict, execution_result_succ_dict]
-      actionQueue.execute_command(command)
-
-    # assert that python executor start
-    self.assertTrue(runCommand_mock.called)
-    self.assertEqual(2, runCommand_mock.call_count)
-    self.assertEqual(1, sleep_mock.call_count)
-    self.assertEqual(command['commandBeingRetried'], "true")
-    sleep_mock.assert_any_call(2)
-
-  @not_for_platform(PLATFORM_LINUX)
-  @patch("time.sleep")
-  @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
-  @patch.object(CustomServiceOrchestrator, "__init__")
-  def test_execute_retryable_command_succeed(self, CustomServiceOrchestrator_mock,
-                                             sleep_mock
-  ):
-    CustomServiceOrchestrator_mock.return_value = None
-    dummy_controller = MagicMock()
-    actionQueue = ActionQueue(AmbariConfig(), dummy_controller)
-    execution_result_succ_dict = {
-      'exitcode': 0,
-      'stdout': 'out',
-      'stderr': 'stderr',
-      'structuredOut': '',
-      'status': 'COMPLETED'
-    }
-
-    command = copy.deepcopy(self.retryable_command)
-    with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
-      runCommand_mock.side_effect = [execution_result_succ_dict]
-      actionQueue.execute_command(command)
-
-    # assert that python executor start
-    self.assertTrue(runCommand_mock.called)
-    self.assertFalse(sleep_mock.called)
-    self.assertEqual(1, runCommand_mock.call_count)
-
   @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
   @patch.object(CustomServiceOrchestrator, "runCommand")
   @patch.object(CustomServiceOrchestrator, "__init__")
@@ -1268,14 +1104,14 @@ class TestActionQueue:#(TestCase):
 
     execute_command = copy.deepcopy(self.background_command)
     actionQueue.put([execute_command])
-    actionQueue.processBackgroundQueueSafeEmpty();
+    actionQueue.process_background_queue_safe_empty()
     # actionQueue.controller.statusCommandExecutor.process_results();
     
     # assert that python execturor start
     self.assertTrue(runCommand_mock.called)
     runningCommand = actionQueue.commandStatuses.current_state.get(execute_command['taskId'])
     self.assertTrue(runningCommand is not None)
-    self.assertEqual(runningCommand[1]['status'], ActionQueue.IN_PROGRESS_STATUS)
+    self.assertEqual(runningCommand[1]['status'], CommandStatus.in_progress)
     
     reports = actionQueue.commandStatuses.generate_report()[CLUSTER_ID]
     self.assertEqual(len(reports), 1)
@@ -1320,7 +1156,7 @@ class TestActionQueue:#(TestCase):
     actionQueue.on_background_command_complete_callback = wraped(actionQueue.on_background_command_complete_callback,
                                                                  None, command_complete_w)
     actionQueue.put([self.background_command])
-    actionQueue.processBackgroundQueueSafeEmpty();
+    actionQueue.process_background_queue_safe_empty();
     
     with lock:
       complete_done.wait(0.1)

+ 2 - 2
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -113,7 +113,7 @@ class TestAlerts(TestCase):
     cluster_configuration = self.__get_cluster_configuration()
     self.__update_cluster_configuration(cluster_configuration, {})
 
-    rm = RecoveryManager(tempfile.mktemp(), True)
+    rm = RecoveryManager(MagicMock(), True)
     alert = RecoveryAlert(definition_json, definition_json['source'], self.config, rm)
     alert.set_helpers(collector, cluster_configuration, MagicMock())
     alert.set_cluster("c1", "0", "c6401.ambari.apache.org")
@@ -871,7 +871,7 @@ class TestAlerts(TestCase):
     self.assertEquals(alert._get_reporting_text(alert.RESULT_WARNING), '{0}')
     self.assertEquals(alert._get_reporting_text(alert.RESULT_CRITICAL), '{0}')
 
-    rm = RecoveryManager(tempfile.mktemp())
+    rm = RecoveryManager(MagicMock())
     definition_json['source']['type'] = 'RECOVERY'
     alert = RecoveryAlert(definition_json, definition_json['source'], self.config, rm)
     self.assertEquals(alert._get_reporting_text(alert.RESULT_OK), 'No recovery operations executed for {2}{0}.')

+ 29 - 0
ambari-agent/src/test/python/ambari_agent/TestAmbariConfig.py

@@ -53,3 +53,32 @@ class TestAmbariConfig(TestCase):
     open_files_ulimit = 128000
     config.set_ulimit_open_files(open_files_ulimit)
     self.assertEqual(config.get_ulimit_open_files(), open_files_ulimit)
+
+  def test_ambari_config_get_command_file_retention_policy(self):
+    config = AmbariConfig()
+
+    # unset value yields, "keep"
+    if config.has_option("agent", AmbariConfig.COMMAND_FILE_RETENTION_POLICY_PROPERTY):
+      config.remove_option("agent", AmbariConfig.COMMAND_FILE_RETENTION_POLICY_PROPERTY)
+    self.assertEqual(config.command_file_retention_policy,
+                     AmbariConfig.COMMAND_FILE_RETENTION_POLICY_KEEP)
+
+    config.set("agent", AmbariConfig.COMMAND_FILE_RETENTION_POLICY_PROPERTY,
+               AmbariConfig.COMMAND_FILE_RETENTION_POLICY_KEEP)
+    self.assertEqual(config.command_file_retention_policy,
+                     AmbariConfig.COMMAND_FILE_RETENTION_POLICY_KEEP)
+
+    config.set("agent", AmbariConfig.COMMAND_FILE_RETENTION_POLICY_PROPERTY,
+               AmbariConfig.COMMAND_FILE_RETENTION_POLICY_REMOVE)
+    self.assertEqual(config.command_file_retention_policy,
+                     AmbariConfig.COMMAND_FILE_RETENTION_POLICY_REMOVE)
+
+    config.set("agent", AmbariConfig.COMMAND_FILE_RETENTION_POLICY_PROPERTY,
+               AmbariConfig.COMMAND_FILE_RETENTION_POLICY_REMOVE_ON_SUCCESS)
+    self.assertEqual(config.command_file_retention_policy,
+                     AmbariConfig.COMMAND_FILE_RETENTION_POLICY_REMOVE_ON_SUCCESS)
+
+    # Invalid value yields, "keep"
+    config.set("agent", AmbariConfig.COMMAND_FILE_RETENTION_POLICY_PROPERTY, "invalid_value")
+    self.assertEqual(config.command_file_retention_policy,
+                     AmbariConfig.COMMAND_FILE_RETENTION_POLICY_KEEP)

+ 0 - 78
ambari-agent/src/test/python/ambari_agent/TestCheckWebUI.py

@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-
-'''
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-'''
-
-import unittest
-import StringIO
-import sys
-
-from mock.mock import MagicMock, patch
-
-# Needed to import checkWebUI.py
-sys.path.append("../../../../ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files")
-import checkWebUI
-
-class TestMain(unittest.TestCase):
-
-  def setUp(self):
-    # disable stdout
-    out = StringIO.StringIO()
-    sys.stdout = out
-
-
-  def tearDown(self):
-    # enable stdout
-    sys.stdout = sys.__stdout__
-
-  @patch("optparse.OptionParser.parse_args")
-  @patch('httplib.HTTPConnection')
-  def test_check_web_ui(self, http_mock, parse_args_mock):
-      
-    #Positive scenario
-    options = MagicMock()
-    options.hosts = 'host1,host2'
-    options.port = '10000' 
-    parse_args_mock.return_value = (options, MagicMock)
-    http_conn = http_mock.return_value
-    http_conn.getresponse.return_value = MagicMock(status=200)
-
-    checkWebUI.main()
-
-    self.assertTrue(http_conn.request.called)
-    self.assertTrue(http_conn.getresponse.called)
-    self.assertTrue(http_conn.close.called)
-    
-    #Negative scenario
-    options = MagicMock()
-    options.hosts = 'host1,host2'
-    options.port = '10000'
-    parse_args_mock.return_value = (options, MagicMock)
-    http_conn.getresponse.return_value = MagicMock(status=404)
-
-    try:
-      checkWebUI.main()
-    except SystemExit, e:
-      self.assertEqual(e.code, 1)
-
-    self.assertTrue(http_conn.request.called)
-    self.assertTrue(http_conn.getresponse.called)
-    self.assertTrue(http_conn.close.called)
-
-if __name__ == "__main__":
-  unittest.main()

+ 89 - 0
ambari-agent/src/test/python/ambari_agent/TestCommandHooksOrchestrator.py

@@ -0,0 +1,89 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+from unittest import TestCase
+
+from ambari_agent.models.hooks import HookPrefix
+from mock.mock import patch
+from ambari_agent.CommandHooksOrchestrator import HookSequenceBuilder, ResolvedHooks, HooksOrchestrator
+
+
+class TestCommandHooksOrchestrator(TestCase):
+  def setUp(self):
+    def injector():
+      pass
+
+    def file_cache():
+      pass
+
+    file_cache.__setattr__("get_hook_base_dir", lambda x: os.path.join("tmp"))
+    injector.__setattr__("file_cache", file_cache)
+
+    self._orchestrator = HooksOrchestrator(injector)
+
+  @patch("os.path.isfile")
+  def test_check_orchestrator(self, is_file_mock):
+    is_file_mock.return_value = True
+
+    ret = self._orchestrator.resolve_hooks({
+     "commandType": "EXECUTION_COMMAND",
+     "serviceName": "ZOOKEEPER",
+     "role": "ZOOKEEPER_SERVER"
+    }, "START")
+
+    self.assertTrue(ret)
+    self.assertEquals(len(ret.post_hooks), 3)
+    self.assertEquals(len(ret.pre_hooks), 3)
+
+  def test_hook_seq_builder(self):
+    seq = list(HookSequenceBuilder().build(HookPrefix.pre, "cmd", "srv", "role"))
+    seq_rev = list(HookSequenceBuilder().build(HookPrefix.post, "cmd", "srv", "role"))
+
+    # testing base default sequence definition
+    check_list = [
+      "before-cmd",
+      "before-cmd-srv",
+      "before-cmd-srv-role"
+    ]
+
+    check_list_1 = [
+      "after-cmd-srv-role",
+      "after-cmd-srv",
+      "after-cmd"
+    ]
+
+    self.assertEquals(seq, check_list)
+    self.assertEquals(seq_rev, check_list_1)
+
+  def test_hook_resolved(self):
+    def pre():
+      for i in range(1, 5):
+        yield i
+
+    def post():
+      for i in range(1, 3):
+        yield i
+
+    ret = ResolvedHooks(pre(), post())
+
+    self.assertEqual(ret.pre_hooks, list(pre()))
+    self.assertEqual(ret.post_hooks, list(post()))
+
+
+

Some files were not shown because too many files changed in this diff