123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578 |
- #!/usr/bin/env python
- '''
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- '''
- import sys
- # For compatibility with different OSes
- # Edit PYTHONPATH to be able to import common_functions
- sys.path.append("/usr/lib/python2.6/site-packages/")
- import os
- import string
- import subprocess
- import logging
- import shutil
- import platform
- import fnmatch
- import ConfigParser
- import optparse
- import shlex
- import datetime
- from AmbariConfig import AmbariConfig
- from ambari_commons import OSCheck, OSConst
- if OSCheck.get_os_family() != OSConst.WINSRV_FAMILY:
- from pwd import getpwnam
- logger = logging.getLogger()
- PACKAGE_ERASE_CMD = {
- "redhat": "yum erase -y {0}",
- "suse": "zypper -n -q remove {0}",
- "ubuntu": "/usr/bin/apt-get -y -q remove {0}"
- }
- USER_ERASE_CMD = "userdel -rf {0}"
- GROUP_ERASE_CMD = "groupdel {0}"
- PROC_KILL_CMD = "kill -9 {0}"
- ALT_DISP_CMD = "alternatives --display {0}"
- ALT_ERASE_CMD = "alternatives --remove {0} {1}"
- REPO_PATH_RHEL = "/etc/yum.repos.d"
- REPO_PATH_SUSE = "/etc/zypp/repos.d/"
- SKIP_LIST = []
- HOST_CHECK_FILE_NAME = "hostcheck.result"
- OUTPUT_FILE_NAME = "hostcleanup.result"
- PACKAGE_SECTION = "packages"
- PACKAGE_KEY = "pkg_list"
- USER_SECTION = "users"
- USER_KEY = "usr_list"
- USER_HOMEDIR_KEY = "usr_homedir_list"
- USER_HOMEDIR_SECTION = "usr_homedir"
- REPO_SECTION = "repositories"
- REPOS_KEY = "repo_list"
- DIR_SECTION = "directories"
- ADDITIONAL_DIRS = "additional_directories"
- DIR_KEY = "dir_list"
- CACHE_FILES_PATTERN = {
- 'alerts': ['*.json']
- }
- PROCESS_SECTION = "processes"
- PROCESS_KEY = "proc_list"
- ALT_SECTION = "alternatives"
- ALT_KEYS = ["symlink_list", "target_list"]
- HADOOP_GROUP = "hadoop"
- FOLDER_LIST = ["/tmp"]
- # Additional path patterns to find existing directory
- DIRNAME_PATTERNS = [
- "/tmp/hadoop-", "/tmp/hsperfdata_"
- ]
- # resources that should not be cleaned
- REPOSITORY_BLACK_LIST = ["ambari.repo"]
- PACKAGES_BLACK_LIST = ["ambari-server", "ambari-agent"]
- class HostCleanup:
- def resolve_ambari_config(self):
- try:
- config = AmbariConfig()
- if os.path.exists(AmbariConfig.getConfigFile()):
- config.read(AmbariConfig.getConfigFile())
- else:
- raise Exception("No config found, use default")
- except Exception, err:
- logger.warn(err)
- return config
- def get_additional_dirs(self):
- resultList = []
- dirList = set()
- for patern in DIRNAME_PATTERNS:
- dirList.add(os.path.dirname(patern))
- for folder in dirList:
- for dirs in os.walk(folder):
- for dir in dirs:
- for patern in DIRNAME_PATTERNS:
- if patern in dir:
- resultList.append(dir)
- return resultList
- def do_cleanup(self, argMap=None):
- if argMap:
- packageList = argMap.get(PACKAGE_SECTION)
- userList = argMap.get(USER_SECTION)
- homeDirList = argMap.get(USER_HOMEDIR_SECTION)
- dirList = argMap.get(DIR_SECTION)
- repoList = argMap.get(REPO_SECTION)
- procList = argMap.get(PROCESS_SECTION)
- alt_map = argMap.get(ALT_SECTION)
- additionalDirList = self.get_additional_dirs()
- if userList and not USER_SECTION in SKIP_LIST:
- userIds = self.get_user_ids(userList)
- if procList and not PROCESS_SECTION in SKIP_LIST:
- logger.info("\n" + "Killing pid's: " + str(procList) + "\n")
- self.do_kill_processes(procList)
- if packageList and not PACKAGE_SECTION in SKIP_LIST:
- logger.info("Deleting packages: " + str(packageList) + "\n")
- self.do_erase_packages(packageList)
- if userList and not USER_SECTION in SKIP_LIST:
- logger.info("\n" + "Deleting users: " + str(userList))
- self.do_delete_users(userList)
- self.do_erase_dir_silent(homeDirList)
- self.do_delete_by_owner(userIds, FOLDER_LIST)
- if dirList and not DIR_SECTION in SKIP_LIST:
- logger.info("\n" + "Deleting directories: " + str(dirList))
- self.do_erase_dir_silent(dirList)
- if additionalDirList and not ADDITIONAL_DIRS in SKIP_LIST:
- logger.info("\n" + "Deleting additional directories: " + str(dirList))
- self.do_erase_dir_silent(additionalDirList)
- if repoList and not REPO_SECTION in SKIP_LIST:
- repoFiles = self.find_repo_files_for_repos(repoList)
- logger.info("\n" + "Deleting repo files: " + str(repoFiles))
- self.do_erase_files_silent(repoFiles)
- if alt_map and not ALT_SECTION in SKIP_LIST:
- logger.info("\n" + "Erasing alternatives:" + str(alt_map) + "\n")
- self.do_erase_alternatives(alt_map)
- return 0
- def read_host_check_file(self, config_file_path):
- propertyMap = {}
- try:
- with open(config_file_path, 'r'):
- pass
- except Exception, e:
- logger.error("Host check result not found at: " + str(config_file_path))
- return None
- try:
- config = ConfigParser.RawConfigParser()
- config.read(config_file_path)
- except Exception, e:
- logger.error("Cannot read host check result: " + str(e))
- return None
- # Initialize map from file
- try:
- if config.has_option(PACKAGE_SECTION, PACKAGE_KEY):
- propertyMap[PACKAGE_SECTION] = config.get(PACKAGE_SECTION, PACKAGE_KEY).split(',')
- except:
- logger.warn("Cannot read package list: " + str(sys.exc_info()[0]))
- try:
- if config.has_option(PROCESS_SECTION, PROCESS_KEY):
- propertyMap[PROCESS_SECTION] = config.get(PROCESS_SECTION, PROCESS_KEY).split(',')
- except:
- logger.warn("Cannot read process list: " + str(sys.exc_info()[0]))
- try:
- if config.has_option(USER_SECTION, USER_KEY):
- propertyMap[USER_SECTION] = config.get(USER_SECTION, USER_KEY).split(',')
- except:
- logger.warn("Cannot read user list: " + str(sys.exc_info()[0]))
- try:
- if config.has_option(USER_SECTION, USER_HOMEDIR_KEY):
- propertyMap[USER_HOMEDIR_SECTION] = config.get(USER_SECTION, USER_HOMEDIR_KEY).split(',')
- except:
- logger.warn("Cannot read user homedir list: " + str(sys.exc_info()[0]))
- try:
- if config.has_option(REPO_SECTION, REPOS_KEY):
- propertyMap[REPO_SECTION] = config.get(REPO_SECTION, REPOS_KEY).split(',')
- except:
- logger.warn("Cannot read repositories list: " + str(sys.exc_info()[0]))
- try:
- if config.has_option(DIR_SECTION, DIR_KEY):
- propertyMap[DIR_SECTION] = config.get(DIR_SECTION, DIR_KEY).split(',')
- except:
- logger.warn("Cannot read dir list: " + str(sys.exc_info()[0]))
- try:
- alt_map = {}
- if config.has_option(ALT_SECTION, ALT_KEYS[0]):
- alt_map[ALT_KEYS[0]] = config.get(ALT_SECTION, ALT_KEYS[0]).split(',')
- if config.has_option(ALT_SECTION, ALT_KEYS[1]):
- alt_map[ALT_KEYS[1]] = config.get(ALT_SECTION, ALT_KEYS[1]).split(',')
- if alt_map:
- propertyMap[ALT_SECTION] = alt_map
- except:
- logger.warn("Cannot read alternates list: " + str(sys.exc_info()[0]))
- return propertyMap
- def get_alternatives_desc(self, alt_name):
- command = ALT_DISP_CMD.format(alt_name)
- out = None
- try:
- p1 = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE)
- p2 = subprocess.Popen(["grep", "priority"], stdin=p1.stdout, stdout=subprocess.PIPE)
- p1.stdout.close()
- out = p2.communicate()[0]
- logger.debug('alternatives --display ' + alt_name + '\n, out = ' + out)
- except:
- logger.warn('Cannot process alternative named: ' + alt_name + ',' + \
- 'error: ' + str(sys.exc_info()[0]))
- return out
- def do_clear_cache(self, cache_root, dir_map=None):
- """
- Clear cache dir according to provided root directory
- cache_root - root dir for cache directory
- dir_map - should be used only for recursive calls
- """
- global CACHE_FILES_PATTERN
- file_map = CACHE_FILES_PATTERN if dir_map is None else dir_map
- remList = []
- # Build remove list according to masks
- for folder in file_map:
- if isinstance(file_map[folder], list): # here is list of file masks/files
- for mask in file_map[folder]:
- remList += self.get_files_in_dir("%s/%s" % (cache_root, folder), mask)
- elif isinstance(file_map[folder], dict): # here described sub-folder
- remList += self.do_clear_cache("%s/%s" % (cache_root, folder), file_map[folder])
- if dir_map is not None: # push result list back as this is call from stack
- return remList
- else: # root call, so we have final list
- self.do_erase_files_silent(remList)
- # Alternatives exist as a stack of symlinks under /var/lib/alternatives/$name
- # Script expects names of the alternatives as input
- # We find all the symlinks using command, #] alternatives --display $name
- # and delete them using command, #] alternatives --remove $name $path.
- def do_erase_alternatives(self, alt_map):
- if alt_map:
- alt_list = alt_map.get(ALT_KEYS[0])
- if alt_list:
- for alt_name in alt_list:
- if alt_name:
- out = self.get_alternatives_desc(alt_name)
- if not out:
- logger.warn('No alternatives found for: ' + alt_name)
- continue
- else:
- alternates = out.split('\n')
- if alternates:
- for entry in alternates:
- if entry:
- alt_path = entry.split()[0]
- logger.debug('Erasing alternative named: ' + alt_name + ', ' \
- 'path: ' + alt_path)
- command = ALT_ERASE_CMD.format(alt_name, alt_path)
- (returncode, stdoutdata, stderrdata) = self.run_os_command(command)
- if returncode != 0:
- logger.warn('Failed to remove alternative: ' + alt_name +
- ", path: " + alt_path + ", error: " + stderrdata)
- # Remove directories - configs
- dir_list = alt_map.get(ALT_KEYS[1])
- if dir_list:
- self.do_erase_dir_silent(dir_list)
- return 0
- def do_kill_processes(self, pidList):
- if pidList:
- for pid in pidList:
- if pid:
- command = PROC_KILL_CMD.format(pid)
- (returncode, stdoutdata, stderrdata) = self.run_os_command(command)
- if returncode != 0:
- logger.error("Unable to kill process with pid: " + pid + ", " + stderrdata)
- return 0
- def get_files_in_dir(self, dirPath, filemask = None):
- fileList = []
- if dirPath:
- if os.path.exists(dirPath):
- listdir = os.listdir(dirPath)
- if listdir:
- for link in listdir:
- path = dirPath + os.sep + link
- if not os.path.islink(path) and not os.path.isdir(path):
- if filemask is not None:
- if fnmatch.fnmatch(path, filemask):
- fileList.append(path)
- else:
- fileList.append(path)
- return fileList
- def find_repo_files_for_repos(self, repoNames):
- repoFiles = []
- osType = OSCheck.get_os_family()
- repoNameList = []
- for repoName in repoNames:
- if len(repoName.strip()) > 0:
- repoNameList.append("[" + repoName + "]")
- repoNameList.append("name=" + repoName)
- if repoNameList:
- # get list of files
- if osType == 'suse':
- fileList = self.get_files_in_dir(REPO_PATH_SUSE)
- elif osType == "redhat":
- fileList = self.get_files_in_dir(REPO_PATH_RHEL)
- else:
- logger.warn("Unsupported OS type, cannot get repository location.")
- return []
- if fileList:
- for filePath in fileList:
- with open(filePath, 'r') as file:
- content = file.readline()
- while (content != "" ):
- for repoName in repoNameList:
- if content.find(repoName) == 0 and filePath not in repoFiles:
- repoFiles.append(filePath)
- break;
- content = file.readline()
- return repoFiles
- def do_erase_packages(self, packageList):
- packageStr = None
- if packageList:
- packageStr = ' '.join(packageList)
- logger.debug("Erasing packages: " + packageStr)
- if packageStr is not None and packageStr:
- os_name = OSCheck.get_os_family()
- command = ''
- if os_name in PACKAGE_ERASE_CMD:
- command = PACKAGE_ERASE_CMD[os_name].format(packageStr)
- else:
- logger.warn("Unsupported OS type, cannot remove package.")
- if command != '':
- logger.debug('Executing: ' + str(command))
- (returncode, stdoutdata, stderrdata) = self.run_os_command(command)
- if returncode != 0:
- logger.warn("Erasing packages failed: " + stderrdata)
- else:
- logger.info("Erased packages successfully.\n" + stdoutdata)
- return 0
- def do_erase_dir_silent(self, pathList):
- if pathList:
- for path in pathList:
- if path and os.path.exists(path):
- if os.path.isdir(path):
- try:
- shutil.rmtree(path)
- except:
- logger.warn("Failed to remove dir: " + path + ", error: " + str(sys.exc_info()[0]))
- else:
- logger.info(path + " is a file and not a directory, deleting file")
- self.do_erase_files_silent([path])
- else:
- logger.info("Path doesn't exists: " + path)
- return 0
- def do_erase_files_silent(self, pathList):
- if pathList:
- for path in pathList:
- if path and os.path.exists(path):
- try:
- os.remove(path)
- except:
- logger.warn("Failed to delete file: " + path + ", error: " + str(sys.exc_info()[0]))
- else:
- logger.info("File doesn't exists: " + path)
- return 0
- def do_delete_group(self):
- groupDelCommand = GROUP_ERASE_CMD.format(HADOOP_GROUP)
- (returncode, stdoutdata, stderrdata) = self.run_os_command(groupDelCommand)
- if returncode != 0:
- logger.warn("Cannot delete group : " + HADOOP_GROUP + ", " + stderrdata)
- else:
- logger.info("Successfully deleted group: " + HADOOP_GROUP)
- def do_delete_by_owner(self, userIds, folders):
- for folder in folders:
- for filename in os.listdir(folder):
- fileToCheck = os.path.join(folder, filename)
- stat = os.stat(fileToCheck)
- if stat.st_uid in userIds:
- self.do_erase_dir_silent([fileToCheck])
- logger.info("Deleting file/folder: " + fileToCheck)
- def get_user_ids(self, userList):
- userIds = []
- if userList:
- for user in userList:
- if user:
- try:
- userIds.append(getpwnam(user).pw_uid)
- except Exception:
- logger.warn("Cannot find user : " + user)
- return userIds
- def do_delete_users(self, userList):
- if userList:
- for user in userList:
- if user:
- command = USER_ERASE_CMD.format(user)
- (returncode, stdoutdata, stderrdata) = self.run_os_command(command)
- if returncode != 0:
- logger.warn("Cannot delete user : " + user + ", " + stderrdata)
- else:
- logger.info("Successfully deleted user: " + user)
- self.do_delete_group()
- return 0
- def is_current_user_root(self):
- return os.getuid() == 0
- # Run command as sudoer by default, if root no issues
- def run_os_command(self, cmd, runWithSudo=True):
- if runWithSudo:
- cmd = 'sudo ' + cmd
- logger.info('Executing command: ' + str(cmd))
- if type(cmd) == str:
- cmd = shlex.split(cmd)
- process = subprocess.Popen(cmd,
- stdout=subprocess.PIPE,
- stdin=subprocess.PIPE,
- stderr=subprocess.PIPE
- )
- (stdoutdata, stderrdata) = process.communicate()
- return process.returncode, stdoutdata, stderrdata
- def search_file(self, filename, search_path, pathsep=os.pathsep):
- """ Given a search path, find file with requested name """
- for path in string.split(search_path, pathsep):
- candidate = os.path.join(path, filename)
- if os.path.exists(candidate): return os.path.abspath(candidate)
- return None
- # Copy file and save with file.# (timestamp)
- def backup_file(filePath):
- if filePath is not None and os.path.exists(filePath):
- timestamp = datetime.datetime.now()
- format = '%Y%m%d%H%M%S'
- try:
- shutil.copyfile(filePath, filePath + "." + timestamp.strftime(format))
- except (Exception), e:
- logger.warn('Could not backup file "%s": %s' % (str(filePath, e)))
- return 0
- def get_YN_input(prompt, default):
- yes = set(['yes', 'ye', 'y'])
- no = set(['no', 'n'])
- return get_choice_string_input(prompt, default, yes, no)
- def get_choice_string_input(prompt, default, firstChoice, secondChoice):
- choice = raw_input(prompt).lower()
- if choice in firstChoice:
- return True
- elif choice in secondChoice:
- return False
- elif choice is "": # Just enter pressed
- return default
- else:
- print "input not recognized, please try again: "
- return get_choice_string_input(prompt, default, firstChoice, secondChoice)
- pass
- def main():
- h = HostCleanup()
- config = h.resolve_ambari_config()
- hostCheckFileDir = config.get('agent', 'prefix')
- hostCheckFilePath = os.path.join(hostCheckFileDir, HOST_CHECK_FILE_NAME)
- hostCheckResultPath = os.path.join(hostCheckFileDir, OUTPUT_FILE_NAME)
- parser = optparse.OptionParser()
- parser.add_option("-v", "--verbose", dest="verbose", action="store_false",
- default=False, help="output verbosity.")
- parser.add_option("-f", "--file", dest="inputfile",
- default=hostCheckFilePath,
- help="host check result file to read.", metavar="FILE")
- parser.add_option("-o", "--out", dest="outputfile",
- default=hostCheckResultPath,
- help="log file to store results.", metavar="FILE")
- parser.add_option("-k", "--skip", dest="skip",
- help="(packages|users|directories|repositories|processes|alternatives)." + \
- " Use , as separator.")
- parser.add_option("-s", "--silent",
- action="store_true", dest="silent", default=False,
- help="Silently accepts default prompt values")
- (options, args) = parser.parse_args()
- # set output file
- backup_file(options.outputfile)
- global logger
- logger = logging.getLogger('HostCleanup')
- handler = logging.FileHandler(options.outputfile)
- formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- # set verbose
- if options.verbose:
- logging.basicConfig(level=logging.DEBUG)
- else:
- logging.basicConfig(level=logging.INFO)
- if options.skip is not None:
- global SKIP_LIST
- SKIP_LIST = options.skip.split(',')
- is_root = h.is_current_user_root()
- if not is_root:
- raise RuntimeError('HostCleanup needs to be run as root.')
- if not options.silent:
- if "users" not in SKIP_LIST:
- delete_users = get_YN_input('You have elected to remove all users as well. If it is not intended then use '
- 'option --skip \"users\". Do you want to continue [y/n] (y)', True)
- if not delete_users:
- print 'Exiting. Use option --skip="users" to skip deleting users'
- sys.exit(1)
- hostcheckfile = options.inputfile
- propMap = h.read_host_check_file(hostcheckfile)
- if propMap:
- h.do_cleanup(propMap)
- if os.path.exists(config.get('agent', 'cache_dir')):
- h.do_clear_cache(config.get('agent', 'cache_dir'))
- logger.info('Clean-up completed. The output is at %s' % (str(options.outputfile)))
- if __name__ == '__main__':
- main()
|