diff options
author | xiubuzhe <xiubuzhe@sina.com> | 2023-10-08 20:59:00 +0800 |
---|---|---|
committer | xiubuzhe <xiubuzhe@sina.com> | 2023-10-08 20:59:00 +0800 |
commit | 1dac2263372df2b85db5d029a45721fa158a5c9d (patch) | |
tree | 0365f9c57df04178a726d7584ca6a6b955a7ce6a /sbin/insert-ethers | |
parent | b494be364bb39e1de128ada7dc576a729d99907e (diff) | |
download | sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.gz sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.bz2 sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.zip |
first add files
Diffstat (limited to 'sbin/insert-ethers')
-rwxr-xr-x | sbin/insert-ethers | 742 |
1 files changed, 742 insertions, 0 deletions
diff --git a/sbin/insert-ethers b/sbin/insert-ethers new file mode 100755 index 0000000..b15aad1 --- /dev/null +++ b/sbin/insert-ethers @@ -0,0 +1,742 @@ +#!/opt/sunpy3/bin/python3 +#coding:utf-8 +import os +import sys +import time +import snack +import getopt +import syslog +import sunhpc +import signal +import sqlite3 +import logging +import sunhpc.invoke +from sunhpc.core.utils import InsertError +from sunhpc.core.utils import InsertDone +from sunhpc.core.utils import DumpError + +logging.basicConfig(filename="/tmp/sunhpc.log", level=logging.INFO) +log = logging.getLogger("Insertnodes ") + +try: + from rhpl.translate import _, N_ + import rhpl.translate as translate + translate.textdomain('insert-ethers') +except: + from gettext import gettext as _ + +class ServiceController(object): + """Handler system services functions""" + + def __init__(self): + self.services = {} + self.ignoreList = [] + + self.plugins = [] + self.plugindir = os.path.abspath( + '/opt/sunhpc/var/plugins') + + def igore(self, service): + if service not in self.ignoreList: + self.ignoreList.append(service) + + def isIgnored(self, service): + return service in self.ignoreList + + def restart(self, service): + for name in self.services[service]: + if service not in self.ignoreList: + eval('self.restart_%s()' % name) + + def loadPlugins(self, app): + # load plug in /opt/sunhpc/var/plugins dirctorys + if not os.path.exists(self.plugindir): + return + + # 将plugin目录添加到模块自动导入. + if self.plugindir not in sys.path: + sys.path.append(self.plugindir) + + info = _("insert-ethers loading plugins: ") + + # 只载入insertnodes相关的模块. + modlist = os.listdir(self.plugindir + '/insertnodes') + modlist.sort() + for f in modlist: + modname, ext = os.path.splitext(f) + if modname == '__init__' or \ + modname == '__pycache__' or ext != '.py': + continue + + info += "%s " % modname + mods = __import__('insertnodes.%s' % modname) + mod = getattr(mods, modname) + try: + # 导入这个模块中的Plugin类. + plugin_class = getattr(mod, 'Plugin') + # 将app类以参数形式传入这个类使用. + # p是这个Plugin类,包含其方法和函数. + p = plugin_class(app) + self.plugins.append(p) + except: + info += _("(invalid, skipping) ") + + # 将模块导入信息输出到系统日志. + log.info('Load KS Plugins: %s' % info) + syslog.syslog(info) + + def logError(self, o=''): + "Logs the last execption to syslog" + oops = "%s threw exception '%s'" % (o, sys.exc_info()) + syslog.syslog(oops) + + def added(self, nodename): + """Tell all plugins this node has been added""" + for p in self.plugins: + try: + p.added(nodename) + except: + self.logError(p) + + def removed(self, nodename): + """Tell all plugins this node has been removed""" + for p in self.plugins: + try: + p.removed(nodename) + except: + self.logError(p) + + def done(self): + """Tell all plugins we are finished""" + for p in self.plugins: + try: + p.done() + except: + self.logError(p) + + def update(self): + """Tell all plugins we to reload""" + for p in self.plugins: + try: + p.update() + except: + self.logError(p) + +class GUI(object): + """Use the snack gui class""" + + def __init__(self): + self.screen = None + + def startGUI(self): + self.screen = snack.SnackScreen() + + def endGUI(self): + self.screen.finish() + + def errorGUI(self, message, l1=_("Quit"), l2=None): + return self.modalGUI(str(message), _("Error"), l1, l2) + + def warningGUI(self, message, l1=_("OK"), l2=None): + return self.modalGUI(str(message), _("Warning"), l1, l2) + + def infoGUI(self, message, l1=_("OK"), l2=None): + return self.modalGUI(str(message), _("Information"), l1, l2) + + def modalGUI(self, message, title, l1, l2): + form = snack.GridForm(self.screen, title, 2, 2) + + textbox = snack.TextboxReflowed(40, message) + form.add(textbox, 0, 0) + if not l2: + b1 = snack.Button(l1) + form.add(b1, 0, 1) + else: + b1 = snack.Button(l1) + b2 = snack.Button(l2) + form.add(b1, 0, 1) + form.add(b2, 1, 1) + + if form.runOnce() == b1: + return 0 + else: + return 1 + + +class InsertEthers(GUI): + + def __init__(self, app): + super(InsertEthers, self).__init__() + + self.sql = app + self.cmd = None + self.controller = ServiceController() + self.cabinet = 0 + self.rank = -1 + self.replace = '' + self.maxNew = -1 + self.remove = 0 + self.membership = None + self.basename = None + self.restart_srvs = 0 + self.inserted = [] + self.kickstarted = {} + self.excludeMacList = [] + self.dist_lockFile = '/var/lock/sunhpc-dist' + self.osname = 'linux' + + + self.doRestart = 1 + # 排除的mac地址 + self.subnet = 'private' # Internal Network + self.hostname = None + self.kickstartable = True + + def setMembershipName(self, membership_name): + self.membership = membership_name + + def setRemove(self, host): + self.replace = host + self.remove = 1 + + def startGUI(self): + + GUI.startGUI(self) + self.form = snack.GridForm(self.screen, _("Install the system using pxelinux"), 1, 1) + + self.textbox = snack.Textbox(50, 4, "", scroll=1) + self.form.add(self.textbox, 0, 0) + + self.screen.drawRootText(0, 0, _("SunHPC(%s) -- version %s") % + (self.sql.usage_name, + self.sql.usage_version)) + + self.screen.drawRootText(0, 1, _("Opened kickstart access to %s network") % + self.sql.getPrivateNet()) + + self.screen.pushHelpLine(' ') + + def statusGUI(self): + """Updates the list of nodes in 'Inserted Appliances' windows""" + macs_n_names = '' + ks = '' + for (mac, name) in self.inserted: + if name not in self.kickstarted: + ks = '' + elif self.kickstarted[name] == 0: + ks = '( )' + elif self.kickstarted[name] == 200: + ks = '(*)' + else: # An error + ks = '(%s)' % self.kickstarted[name] + macs_n_names += '%s\t%s\t%s\n' % (mac, name, ks) + + self.textbox.setText(_(macs_n_names)) + + self.form.draw() + self.screen.refresh() + + def waitGUI(self): + + not_done = '' + hosts = list(self.kickstarted.keys()) + hosts.sort() + for name in hosts: + status = self.kickstarted[name] + if status != 200: + ks = '( )' + if status: + ks = '(%s)' % status + not_done += '%s \t %s\n' % (name, ks) + + form = snack.GridForm(self.screen, + _("Not kickstarted, please wait..."), 1, 1) + textbox = snack.Textbox(35, 4, not_done, scroll=1) + form.add(textbox, 0,0) + + form.draw() + self.screen.refresh() + time.sleep(1) + self.screen.popWindow() + + def membershipGUI(self): + self.kickstartable = True + self.basename = 'compute' + self.setMembershipName(self.basename) + + def initializeRank(self): + query = 'select rank,max(rank) from nodes where rack = %d group by rack' % (self.cabinet) + + if self.sql.search(query) > 0: + (rank, max_rank) = self.sql.fetchone() + self.rank = max_rank + 1 + else: + self.rank = 0 + + def getnextIP(self, subnet): + + args = [ subnet ] + if self.sql.ipIncrement != -1: + args.append('increment=%d' % self.sql.ipIncrement) + + text = self.cmd.command('report.nextip', args) + if len(text) == 0: + raise Exception("Unable to get next IP address") + + return text.strip() + + def addit(self, mac, nodename, ip): + + self.cmd.command('add.host', [nodename, 'os=' + self.osname, + 'rack=' + str(self.cabinet), 'rank=' + str(self.rank)]) + + self.cmd.command('add.host.interface', [nodename, 'eth0', + 'ip=' + ip, 'mac=' + mac, 'subnet=' + self.subnet]) + + self.sql.commit() + + self.controller.added(nodename) + self.restart_srvs = 1 + + self.sql.commit() + + list = [(mac, nodename)] + list.extend(self.inserted) + self.inserted = list + self.kickstarted[nodename] = 0 + + def discover(self, mac, dev): + """如果存在数据库中返回真""" + retval = False + query = 'select mac from networks where mac="%s"' % (mac) + if not self.sql.search(query): + nodename = self.getNodename() + log.info('GetNodename: %s' % nodename) + + ipaddr = self.getnextIP(self.subnet) + self.addit(mac, nodename, ipaddr) + log.info('Addit Host: %s/%s/%s' % (nodename, ipaddr, mac)) + self.printDiscovered(mac) + + retval = True + return retval + + def printDiscovered(self, mac): + + form = snack.GridForm(self.screen, + _("Discovered New Appliance"), 1, 1) + + new_app = _("Discovered a new appliance with MAC (%s)") % (mac) + textbox = snack.Textbox(len(new_app), 1, new_app) + form.add(textbox, 0, 0) + + form.draw() + self.screen.refresh() + time.sleep(2) + self.screen.popWindow() + + def getNodename(self): + if self.hostname is not None: + return self.hostname + else: + return '%s-%d-%d' % (self.basename, self.cabinet, self.rank) + + def listenDHCP(self, line): + + tokens = line.split()[:-1] + if len(tokens) > 9 and tokens[4] == 'dhcpd:' and \ + (tokens[5] in ['DHCPDISCOVER', 'BOOTREQUEST']): + + Dev = tokens[9].replace(':','').strip() + Mac = tokens[7].strip() + + # 在DHCPDISCOVER from macaddr via eth0,这里面的eth0 + # 是指主节点开启了dhcpd的网卡名称,也是private网卡名称. + # 但这并非是计算节点的网卡名称. + self.sql.execute("""select networks.device from + networks, subnets, nodes where + subnets.name='%s' and nodes.name='%s' and + networks.subnet=subnets.id and networks.node=nodes.id""" % ( + self.subnet, self.sql.newdb.getFrontendName())) + + # 如果有需要排除的Mac地址则在这里配置. + if Mac in self.excludeMacList: return + + # 如果不匹配主节点DHCP服务的网卡名称, + subnet_dev = self.sql.fetchone()[0] + if Dev != subnet_dev: return + + # 如果已经完成添加的mac地址,放弃这次请求. + if not self.discover(Mac, Dev): return + + log.info('Discover New MAC: %s' % Mac) + self.statusGUI() + + if self.maxNew > 0: + self.maxNew -= 1 + if self.maxNew == 0: + raise InsertDone(_("Suggest Done")) + + # 自动增加主机名称的Rank号. + self.rank = self.rank + 1 + + elif len(tokens) > 6 and tokens[4] == 'last' and \ + tokens[5] == 'message' and tokens[6] == 'repeated': + + shortname = os.uname()[1].split('.')[0] + if tokens[3] == shortname: + os.system('/usr/bin/systemctl restart syslog >/dev/null 2>&1') + + def monitoring(self): + # 监控日志 + mslog = open('/var/log/messages', 'r') + mslog.seek(0, 2) + + kslog = open('/var/log/httpd/access_log', 'r') + kslog.seek(0, 2) + + self.screen.pushHelpLine( + _(" Press <F8> to quit, press <F9> to force quit")) + self.form.addHotKey('F8') + self.form.addHotKey('F9') + self.form.setTimer(1000) + + self.statusGUI() + + result = self.form.run() + suggest_done = 0 + done = 0 + log.info('Monitoring Log: OK') + while not done: + + # 监控系统日志中的dhcpd信息. + syslog_line = mslog.readline() + if syslog_line and not suggest_done: + try: + self.listenDHCP(syslog_line) + except InsertDone: + suggest_done = 1 + + except (sunhpc.core.utils.CommandError, InsertError) as msg: + self.warningGUI(msg) + continue + + # 监控日志中的pxelinux信息. + access_line = kslog.readline() + if access_line: + try: + self.listenKS(access_line) + except InsertError as msg: + self.warningGUI(msg) + continue + # + result = self.form.run() + done = self.checkDone(result, suggest_done) + + log.info('Restarting services status: %s' % self.restart_srvs) + if self.restart_srvs: + log.info('Start restart services ...') + form = snack.GridForm(self.screen, _("Restarting Services"), 1, 1) + message = _("Restarting Services...") + textbox = snack.Textbox(len(message), 1, message) + form.add(textbox, 0, 0) + form.draw() + + self.screen.refresh() + self.controller.done() + self.screen.popWindow() + + mslog.close() + self.endGUI() + + def listenKS(self, line): + """Look in log line for a kickstart request.""" + + # Track accesses both with and without local certs. + interesting = line.count('install/sbin/kickstart.cgi') \ + or line.count('install/sbin/public/kickstart.cgi') \ + or line.count('install/sbin/public/jumpstart.cgi') + if not interesting: + return + + fields = line.split() + try: + status = int(fields[8]) + log.info('Kickstart Code: %s' % status) + except: + raise InsertError(_("Apache log file not well formed!")) + + nodeid = int(self.sql.getNodeId(fields[0])) + self.sql.execute('select name from nodes where id=%d' % nodeid) + try: + name, = self.sql.fetchone() + except: + if status == 200: + raise InsertError( _("Unknown node %s got a kickstart file!") % fields[0]) + return + + log.info('Kickstart NodeID %s->%s' % (name, nodeid)) + if name not in self.kickstarted: + return + + log.info('Change KS Status %s->%s' % (name, status)) + self.kickstarted[name] = status + self.statusGUI() + + def checkDone(self, result, suggest_done): + + if result == 'TIMER' and not suggest_done: + return 0 + + if result == 'F9': return 1 + + if not self.kickstartable: return 1 + + ok = 1 + for status in self.kickstarted.values(): + if status != 200: + ok = 0 + break + + if not ok: + if result == 'F8': + self.waitGUI() + else: + if suggest_done or result == 'F8': + return 1 + return 0 + + def distDone(self): + if os.path.exists(self.dist_lockFile): + self.warningGUI(_("Sunhpc distribution is not ready\n\n") + + _("Please wait for 'sunhpc create distro' to complete\n")) + return 0 + return 1 + + def run(self): + + self.cmd = sunhpc.commands.Command(self.sql.newdb) + try: + self.cmd.command('check.services', []) + log.info('Check services: OK') + except sunhpc.core.utils.CommandError as err: + sys.stderr.write('error - ' + str(err) + '\n') + return + + # 开始启动界面 + self.startGUI() + log.info('Start Daemon GUI: OK') + + # make sure 'sunhpc create distro' is build finished. + if not self.distDone(): + self.endGUI() + return + + self.controller.loadPlugins(self.sql) + try: + if self.remove: + self.endGUI() + self.controller.done() + print ('Removed node %s' % self.replace) + return + + # 初始化Member界面信息 + self.membershipGUI() + # 初始化Rank信息 + self.initializeRank() + + if self.hostname: + # 检查给与的主机名是否有效. + self.checkHostNameValidity(self.hostname) + + except (sunhpc.core.utils.CommandError, InsertError) as msg: + self.errorGUI(msg) + self.endGUI() + sys.stderr.write(_("%s\n") % str(msg)) + return + + log.info('Start Monitoring ...') + self.monitoring() + +class App(sunhpc.core.sql.Application): + + def __init__(self, argv=None): + sunhpc.core.sql.Application.__init__(self, argv) + + if not argv: + argv = sys.argv + + self.args = [] + self.caller_args = argv[1:] + self.usage_name = 'Kamaitachi' + self.usage_version = '1.0.0' + self.usage_command = os.path.basename(argv[0]) + + self.getopt = sunhpc.core.utils.Struct() + # 短参数 + self.getopt.s = [] + # 长参数 + self.getopt.l = [ ('help', 'display the command help infomation'), + ('version', 'Display the sunhpc version') + ] + + try: + # unset our locale + del os.environ['LANG'] + except KeyError: + pass + + self.dist = None + self.doUpdate = 0 + self.lockFile = '/var/lock/insert-ethers' + self.insertor = InsertEthers(self) + self.controller = ServiceController() + self.ipIncrement = -1 + self.doPublicMode = 0 + + self.getopt.l.extend([ + ('remove=', 'remove an hostname') + ]) + + def getArgs(self): + return self.args + + def setArgs(self, list): + self.args = list + + def getPrivateNet(self): + net = self.getHostAttr('localhost', 'Kickstart_PrivateNetwork') + mask = self.getHostAttr('localhost', 'Kickstart_PrivateNetmask') + return "%s/%s" % (net, mask) + + def parseArgs(self, rcbase=None): + """解析参数""" + + args = self.getArgs() + + # 设置参数 + self.setArgs(self.caller_args) + + # 开始解析参数 + self.parseCommandLine() + + def parseCommandLine(self): + + # 使用 getopt 类中的 parse函数解析命令行 + + # 解析短参数形式 + short = '' + for e in self.getopt.s: + if type(e) == type(()): + # 取参数左值 + short = short + e[0] + else: + short = short + e + + # 解析长参数形式 + long = [] + for e in self.getopt.l: + if type(e) == type(()): + # 取参数左值 + long.append(e[0]) + else: + long.append(e) + + try: + opts, args = getopt.getopt(self.args, short, long) + except getopt.GetoptError as msg: + sys.stderr.write('error - %s\n' % msg) + self.usage() + sys.exit(1) + + for c in opts: + self.parseArg(c) + + def parseArg(self, c): + + if c[0] in ('-h', '--help'): + self.usage() + sys.exit(0) + + elif c[0] in ('version', '--version'): + print (self.getClusterVersion()) + elif c[0] == '--remove': + self.insertor.setRemove(c[1]) + return 0 + + def getClusterVersion(self): + return "SunhpcOS (%s) for version - %s" % ( + self.cmd.release, self.cmd.version) + + def usage(self): + argDict = {} + for e in self.getopt.s: + if type(e) == type(()): + argDict['-%s' % e[0]] = e[1] + else: + argDict['-%s' % e] = '' + + for l in self.getopt.l: + if type(l) == type(()): + argDict['--%s' % l[0]] = l[1] + else: + argDict['--%s' % l] = '' + + if not argDict: return + maxlen = max(map(len, argDict.keys())) + print ('\nUsage: %s [options] command infomations' % self.usage_command) + for k in argDict: + keys = k.ljust(maxlen) + vals = argDict[k] + print (' %s\t%s' % (keys, vals)) + print ('If you have any questions, please contact info@sunhpc.com') + + def run(self): + self.connect() + + if os.path.isfile(self.lockFile): + self.cmd.abort('lock file %s exists.' % self.lockFile) + else: + os.system('touch %s' % self.lockFile) + + if self.doPublicMode: + self.insertor.runPublicOnly() + else: + self.insertor.run() + + self.cleanup() + + def cleanup(self): + try: + os.unlink(self.lockFile) + except: + pass + +if __name__ == "__main__": + + try: + (width, heigh) = shutil.get_terminal_size() + except: + width = 80 + os.environ['COLUMNS'] = str(width) + log.info('starting insert-node ...') + + app = App(sys.argv) + app.parseArgs() + try: + app.run() + except Exception as msg: + app.cleanup() + if app.insertor and app.insertor.screen: + app.insertor.endGUI() + sys.stderr.write('error - ' + str(msg) + '\n') + import traceback + traceback.print_exc() + sys.exit(1) + + finally: + if os.path.exists(app.lockFile): + os.unlink(app.lockFile) + + + |