#!/opt/sunpy3/bin/python3 #coding:utf-8 import os import sys import time import snack import getopt import syslog import sunhpc import signal import sqlite3 import logging import sunhpc.invoke from sunhpc.core.utils import InsertError from sunhpc.core.utils import InsertDone from sunhpc.core.utils import DumpError logging.basicConfig(filename="/tmp/sunhpc.log", level=logging.INFO) log = logging.getLogger("Insertnodes ") try: from rhpl.translate import _, N_ import rhpl.translate as translate translate.textdomain('insert-ethers') except: from gettext import gettext as _ class ServiceController(object): """Handler system services functions""" def __init__(self): self.services = {} self.ignoreList = [] self.plugins = [] self.plugindir = os.path.abspath( '/opt/sunhpc/var/plugins') def igore(self, service): if service not in self.ignoreList: self.ignoreList.append(service) def isIgnored(self, service): return service in self.ignoreList def restart(self, service): for name in self.services[service]: if service not in self.ignoreList: eval('self.restart_%s()' % name) def loadPlugins(self, app): # load plug in /opt/sunhpc/var/plugins dirctorys if not os.path.exists(self.plugindir): return # 将plugin目录添加到模块自动导入. if self.plugindir not in sys.path: sys.path.append(self.plugindir) info = _("insert-ethers loading plugins: ") # 只载入insertnodes相关的模块. modlist = os.listdir(self.plugindir + '/insertnodes') modlist.sort() for f in modlist: modname, ext = os.path.splitext(f) if modname == '__init__' or \ modname == '__pycache__' or ext != '.py': continue info += "%s " % modname mods = __import__('insertnodes.%s' % modname) mod = getattr(mods, modname) try: # 导入这个模块中的Plugin类. plugin_class = getattr(mod, 'Plugin') # 将app类以参数形式传入这个类使用. # p是这个Plugin类,包含其方法和函数. p = plugin_class(app) self.plugins.append(p) except: info += _("(invalid, skipping) ") # 将模块导入信息输出到系统日志. log.info('Load KS Plugins: %s' % info) syslog.syslog(info) def logError(self, o=''): "Logs the last execption to syslog" oops = "%s threw exception '%s'" % (o, sys.exc_info()) syslog.syslog(oops) def added(self, nodename): """Tell all plugins this node has been added""" for p in self.plugins: try: p.added(nodename) except: self.logError(p) def removed(self, nodename): """Tell all plugins this node has been removed""" for p in self.plugins: try: p.removed(nodename) except: self.logError(p) def done(self): """Tell all plugins we are finished""" for p in self.plugins: try: p.done() except: self.logError(p) def update(self): """Tell all plugins we to reload""" for p in self.plugins: try: p.update() except: self.logError(p) class GUI(object): """Use the snack gui class""" def __init__(self): self.screen = None def startGUI(self): self.screen = snack.SnackScreen() def endGUI(self): self.screen.finish() def errorGUI(self, message, l1=_("Quit"), l2=None): return self.modalGUI(str(message), _("Error"), l1, l2) def warningGUI(self, message, l1=_("OK"), l2=None): return self.modalGUI(str(message), _("Warning"), l1, l2) def infoGUI(self, message, l1=_("OK"), l2=None): return self.modalGUI(str(message), _("Information"), l1, l2) def modalGUI(self, message, title, l1, l2): form = snack.GridForm(self.screen, title, 2, 2) textbox = snack.TextboxReflowed(40, message) form.add(textbox, 0, 0) if not l2: b1 = snack.Button(l1) form.add(b1, 0, 1) else: b1 = snack.Button(l1) b2 = snack.Button(l2) form.add(b1, 0, 1) form.add(b2, 1, 1) if form.runOnce() == b1: return 0 else: return 1 class InsertEthers(GUI): def __init__(self, app): super(InsertEthers, self).__init__() self.sql = app self.cmd = None self.controller = ServiceController() self.cabinet = 0 self.rank = -1 self.replace = '' self.maxNew = -1 self.remove = 0 self.membership = None self.basename = None self.restart_srvs = 0 self.inserted = [] self.kickstarted = {} self.excludeMacList = [] self.dist_lockFile = '/var/lock/sunhpc-dist' self.osname = 'linux' self.doRestart = 1 # 排除的mac地址 self.subnet = 'private' # Internal Network self.hostname = None self.kickstartable = True def setMembershipName(self, membership_name): self.membership = membership_name def setRemove(self, host): self.replace = host self.remove = 1 def startGUI(self): GUI.startGUI(self) self.form = snack.GridForm(self.screen, _("Install the system using pxelinux"), 1, 1) self.textbox = snack.Textbox(50, 4, "", scroll=1) self.form.add(self.textbox, 0, 0) self.screen.drawRootText(0, 0, _("SunHPC(%s) -- version %s") % (self.sql.usage_name, self.sql.usage_version)) self.screen.drawRootText(0, 1, _("Opened kickstart access to %s network") % self.sql.getPrivateNet()) self.screen.pushHelpLine(' ') def statusGUI(self): """Updates the list of nodes in 'Inserted Appliances' windows""" macs_n_names = '' ks = '' for (mac, name) in self.inserted: if name not in self.kickstarted: ks = '' elif self.kickstarted[name] == 0: ks = '( )' elif self.kickstarted[name] == 200: ks = '(*)' else: # An error ks = '(%s)' % self.kickstarted[name] macs_n_names += '%s\t%s\t%s\n' % (mac, name, ks) self.textbox.setText(_(macs_n_names)) self.form.draw() self.screen.refresh() def waitGUI(self): not_done = '' hosts = list(self.kickstarted.keys()) hosts.sort() for name in hosts: status = self.kickstarted[name] if status != 200: ks = '( )' if status: ks = '(%s)' % status not_done += '%s \t %s\n' % (name, ks) form = snack.GridForm(self.screen, _("Not kickstarted, please wait..."), 1, 1) textbox = snack.Textbox(35, 4, not_done, scroll=1) form.add(textbox, 0,0) form.draw() self.screen.refresh() time.sleep(1) self.screen.popWindow() def membershipGUI(self): self.kickstartable = True self.basename = 'compute' self.setMembershipName(self.basename) def initializeRank(self): query = 'select rank,max(rank) from nodes where rack = %d group by rack' % (self.cabinet) if self.sql.search(query) > 0: (rank, max_rank) = self.sql.fetchone() self.rank = max_rank + 1 else: self.rank = 0 def getnextIP(self, subnet): args = [ subnet ] if self.sql.ipIncrement != -1: args.append('increment=%d' % self.sql.ipIncrement) text = self.cmd.command('report.nextip', args) if len(text) == 0: raise Exception("Unable to get next IP address") return text.strip() def addit(self, mac, nodename, ip): self.cmd.command('add.host', [nodename, 'os=' + self.osname, 'rack=' + str(self.cabinet), 'rank=' + str(self.rank)]) self.cmd.command('add.host.interface', [nodename, 'eth0', 'ip=' + ip, 'mac=' + mac, 'subnet=' + self.subnet]) self.sql.commit() self.controller.added(nodename) self.restart_srvs = 1 self.sql.commit() list = [(mac, nodename)] list.extend(self.inserted) self.inserted = list self.kickstarted[nodename] = 0 def discover(self, mac, dev): """如果存在数据库中返回真""" retval = False query = 'select mac from networks where mac="%s"' % (mac) if not self.sql.search(query): nodename = self.getNodename() log.info('GetNodename: %s' % nodename) ipaddr = self.getnextIP(self.subnet) self.addit(mac, nodename, ipaddr) log.info('Addit Host: %s/%s/%s' % (nodename, ipaddr, mac)) self.printDiscovered(mac) retval = True return retval def printDiscovered(self, mac): form = snack.GridForm(self.screen, _("Discovered New Appliance"), 1, 1) new_app = _("Discovered a new appliance with MAC (%s)") % (mac) textbox = snack.Textbox(len(new_app), 1, new_app) form.add(textbox, 0, 0) form.draw() self.screen.refresh() time.sleep(2) self.screen.popWindow() def getNodename(self): if self.hostname is not None: return self.hostname else: return '%s-%d-%d' % (self.basename, self.cabinet, self.rank) def listenDHCP(self, line): tokens = line.split()[:-1] if len(tokens) > 9 and tokens[4] == 'dhcpd:' and \ (tokens[5] in ['DHCPDISCOVER', 'BOOTREQUEST']): Dev = tokens[9].replace(':','').strip() Mac = tokens[7].strip() # 在DHCPDISCOVER from macaddr via eth0,这里面的eth0 # 是指主节点开启了dhcpd的网卡名称,也是private网卡名称. # 但这并非是计算节点的网卡名称. self.sql.execute("""select networks.device from networks, subnets, nodes where subnets.name='%s' and nodes.name='%s' and networks.subnet=subnets.id and networks.node=nodes.id""" % ( self.subnet, self.sql.newdb.getFrontendName())) # 如果有需要排除的Mac地址则在这里配置. if Mac in self.excludeMacList: return # 如果不匹配主节点DHCP服务的网卡名称, subnet_dev = self.sql.fetchone()[0] if Dev != subnet_dev: return # 如果已经完成添加的mac地址,放弃这次请求. if not self.discover(Mac, Dev): return log.info('Discover New MAC: %s' % Mac) self.statusGUI() if self.maxNew > 0: self.maxNew -= 1 if self.maxNew == 0: raise InsertDone(_("Suggest Done")) # 自动增加主机名称的Rank号. self.rank = self.rank + 1 elif len(tokens) > 6 and tokens[4] == 'last' and \ tokens[5] == 'message' and tokens[6] == 'repeated': shortname = os.uname()[1].split('.')[0] if tokens[3] == shortname: os.system('/usr/bin/systemctl restart syslog >/dev/null 2>&1') def monitoring(self): # 监控日志 mslog = open('/var/log/messages', 'r') mslog.seek(0, 2) kslog = open('/var/log/httpd/access_log', 'r') kslog.seek(0, 2) self.screen.pushHelpLine( _(" Press to quit, press to force quit")) self.form.addHotKey('F8') self.form.addHotKey('F9') self.form.setTimer(1000) self.statusGUI() result = self.form.run() suggest_done = 0 done = 0 log.info('Monitoring Log: OK') while not done: # 监控系统日志中的dhcpd信息. syslog_line = mslog.readline() if syslog_line and not suggest_done: try: self.listenDHCP(syslog_line) except InsertDone: suggest_done = 1 except (sunhpc.core.utils.CommandError, InsertError) as msg: self.warningGUI(msg) continue # 监控日志中的pxelinux信息. access_line = kslog.readline() if access_line: try: self.listenKS(access_line) except InsertError as msg: self.warningGUI(msg) continue # result = self.form.run() done = self.checkDone(result, suggest_done) log.info('Restarting services status: %s' % self.restart_srvs) if self.restart_srvs: log.info('Start restart services ...') form = snack.GridForm(self.screen, _("Restarting Services"), 1, 1) message = _("Restarting Services...") textbox = snack.Textbox(len(message), 1, message) form.add(textbox, 0, 0) form.draw() self.screen.refresh() self.controller.done() self.screen.popWindow() mslog.close() self.endGUI() def listenKS(self, line): """Look in log line for a kickstart request.""" # Track accesses both with and without local certs. interesting = line.count('install/sbin/kickstart.cgi') \ or line.count('install/sbin/public/kickstart.cgi') \ or line.count('install/sbin/public/jumpstart.cgi') if not interesting: return fields = line.split() try: status = int(fields[8]) log.info('Kickstart Code: %s' % status) except: raise InsertError(_("Apache log file not well formed!")) nodeid = int(self.sql.getNodeId(fields[0])) self.sql.execute('select name from nodes where id=%d' % nodeid) try: name, = self.sql.fetchone() except: if status == 200: raise InsertError( _("Unknown node %s got a kickstart file!") % fields[0]) return log.info('Kickstart NodeID %s->%s' % (name, nodeid)) if name not in self.kickstarted: return log.info('Change KS Status %s->%s' % (name, status)) self.kickstarted[name] = status self.statusGUI() def checkDone(self, result, suggest_done): if result == 'TIMER' and not suggest_done: return 0 if result == 'F9': return 1 if not self.kickstartable: return 1 ok = 1 for status in self.kickstarted.values(): if status != 200: ok = 0 break if not ok: if result == 'F8': self.waitGUI() else: if suggest_done or result == 'F8': return 1 return 0 def distDone(self): if os.path.exists(self.dist_lockFile): self.warningGUI(_("Sunhpc distribution is not ready\n\n") + _("Please wait for 'sunhpc create distro' to complete\n")) return 0 return 1 def run(self): self.cmd = sunhpc.commands.Command(self.sql.newdb) try: self.cmd.command('check.services', []) log.info('Check services: OK') except sunhpc.core.utils.CommandError as err: sys.stderr.write('error - ' + str(err) + '\n') return # 开始启动界面 self.startGUI() log.info('Start Daemon GUI: OK') # make sure 'sunhpc create distro' is build finished. if not self.distDone(): self.endGUI() return self.controller.loadPlugins(self.sql) try: if self.remove: self.endGUI() self.controller.done() print ('Removed node %s' % self.replace) return # 初始化Member界面信息 self.membershipGUI() # 初始化Rank信息 self.initializeRank() if self.hostname: # 检查给与的主机名是否有效. self.checkHostNameValidity(self.hostname) except (sunhpc.core.utils.CommandError, InsertError) as msg: self.errorGUI(msg) self.endGUI() sys.stderr.write(_("%s\n") % str(msg)) return log.info('Start Monitoring ...') self.monitoring() class App(sunhpc.core.sql.Application): def __init__(self, argv=None): sunhpc.core.sql.Application.__init__(self, argv) if not argv: argv = sys.argv self.args = [] self.caller_args = argv[1:] self.usage_name = 'Kamaitachi' self.usage_version = '1.0.0' self.usage_command = os.path.basename(argv[0]) self.getopt = sunhpc.core.utils.Struct() # 短参数 self.getopt.s = [] # 长参数 self.getopt.l = [ ('help', 'display the command help infomation'), ('version', 'Display the sunhpc version') ] try: # unset our locale del os.environ['LANG'] except KeyError: pass self.dist = None self.doUpdate = 0 self.lockFile = '/var/lock/insert-ethers' self.insertor = InsertEthers(self) self.controller = ServiceController() self.ipIncrement = -1 self.doPublicMode = 0 self.getopt.l.extend([ ('remove=', 'remove an hostname') ]) def getArgs(self): return self.args def setArgs(self, list): self.args = list def getPrivateNet(self): net = self.getHostAttr('localhost', 'Kickstart_PrivateNetwork') mask = self.getHostAttr('localhost', 'Kickstart_PrivateNetmask') return "%s/%s" % (net, mask) def parseArgs(self, rcbase=None): """解析参数""" args = self.getArgs() # 设置参数 self.setArgs(self.caller_args) # 开始解析参数 self.parseCommandLine() def parseCommandLine(self): # 使用 getopt 类中的 parse函数解析命令行 # 解析短参数形式 short = '' for e in self.getopt.s: if type(e) == type(()): # 取参数左值 short = short + e[0] else: short = short + e # 解析长参数形式 long = [] for e in self.getopt.l: if type(e) == type(()): # 取参数左值 long.append(e[0]) else: long.append(e) try: opts, args = getopt.getopt(self.args, short, long) except getopt.GetoptError as msg: sys.stderr.write('error - %s\n' % msg) self.usage() sys.exit(1) for c in opts: self.parseArg(c) def parseArg(self, c): if c[0] in ('-h', '--help'): self.usage() sys.exit(0) elif c[0] in ('version', '--version'): print (self.getClusterVersion()) elif c[0] == '--remove': self.insertor.setRemove(c[1]) return 0 def getClusterVersion(self): return "SunhpcOS (%s) for version - %s" % ( self.cmd.release, self.cmd.version) def usage(self): argDict = {} for e in self.getopt.s: if type(e) == type(()): argDict['-%s' % e[0]] = e[1] else: argDict['-%s' % e] = '' for l in self.getopt.l: if type(l) == type(()): argDict['--%s' % l[0]] = l[1] else: argDict['--%s' % l] = '' if not argDict: return maxlen = max(map(len, argDict.keys())) print ('\nUsage: %s [options] command infomations' % self.usage_command) for k in argDict: keys = k.ljust(maxlen) vals = argDict[k] print (' %s\t%s' % (keys, vals)) print ('If you have any questions, please contact info@sunhpc.com') def run(self): self.connect() if os.path.isfile(self.lockFile): self.cmd.abort('lock file %s exists.' % self.lockFile) else: os.system('touch %s' % self.lockFile) if self.doPublicMode: self.insertor.runPublicOnly() else: self.insertor.run() self.cleanup() def cleanup(self): try: os.unlink(self.lockFile) except: pass if __name__ == "__main__": try: (width, heigh) = shutil.get_terminal_size() except: width = 80 os.environ['COLUMNS'] = str(width) log.info('starting insert-node ...') app = App(sys.argv) app.parseArgs() try: app.run() except Exception as msg: app.cleanup() if app.insertor and app.insertor.screen: app.insertor.endGUI() sys.stderr.write('error - ' + str(msg) + '\n') import traceback traceback.print_exc() sys.exit(1) finally: if os.path.exists(app.lockFile): os.unlink(app.lockFile)