summaryrefslogtreecommitdiffstats
path: root/sbin/insert-ethers
diff options
context:
space:
mode:
authorxiubuzhe <xiubuzhe@sina.com>2023-10-08 20:59:00 +0800
committerxiubuzhe <xiubuzhe@sina.com>2023-10-08 20:59:00 +0800
commit1dac2263372df2b85db5d029a45721fa158a5c9d (patch)
tree0365f9c57df04178a726d7584ca6a6b955a7ce6a /sbin/insert-ethers
parentb494be364bb39e1de128ada7dc576a729d99907e (diff)
downloadsunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.gz
sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.bz2
sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.zip
first add files
Diffstat (limited to 'sbin/insert-ethers')
-rwxr-xr-xsbin/insert-ethers742
1 files changed, 742 insertions, 0 deletions
diff --git a/sbin/insert-ethers b/sbin/insert-ethers
new file mode 100755
index 0000000..b15aad1
--- /dev/null
+++ b/sbin/insert-ethers
@@ -0,0 +1,742 @@
+#!/opt/sunpy3/bin/python3
+#coding:utf-8
+import os
+import sys
+import time
+import snack
+import getopt
+import syslog
+import sunhpc
+import signal
+import sqlite3
+import logging
+import sunhpc.invoke
+from sunhpc.core.utils import InsertError
+from sunhpc.core.utils import InsertDone
+from sunhpc.core.utils import DumpError
+
+logging.basicConfig(filename="/tmp/sunhpc.log", level=logging.INFO)
+log = logging.getLogger("Insertnodes ")
+
+try:
+ from rhpl.translate import _, N_
+ import rhpl.translate as translate
+ translate.textdomain('insert-ethers')
+except:
+ from gettext import gettext as _
+
+class ServiceController(object):
+ """Handler system services functions"""
+
+ def __init__(self):
+ self.services = {}
+ self.ignoreList = []
+
+ self.plugins = []
+ self.plugindir = os.path.abspath(
+ '/opt/sunhpc/var/plugins')
+
+ def igore(self, service):
+ if service not in self.ignoreList:
+ self.ignoreList.append(service)
+
+ def isIgnored(self, service):
+ return service in self.ignoreList
+
+ def restart(self, service):
+ for name in self.services[service]:
+ if service not in self.ignoreList:
+ eval('self.restart_%s()' % name)
+
+ def loadPlugins(self, app):
+ # load plug in /opt/sunhpc/var/plugins dirctorys
+ if not os.path.exists(self.plugindir):
+ return
+
+ # 将plugin目录添加到模块自动导入.
+ if self.plugindir not in sys.path:
+ sys.path.append(self.plugindir)
+
+ info = _("insert-ethers loading plugins: ")
+
+ # 只载入insertnodes相关的模块.
+ modlist = os.listdir(self.plugindir + '/insertnodes')
+ modlist.sort()
+ for f in modlist:
+ modname, ext = os.path.splitext(f)
+ if modname == '__init__' or \
+ modname == '__pycache__' or ext != '.py':
+ continue
+
+ info += "%s " % modname
+ mods = __import__('insertnodes.%s' % modname)
+ mod = getattr(mods, modname)
+ try:
+ # 导入这个模块中的Plugin类.
+ plugin_class = getattr(mod, 'Plugin')
+ # 将app类以参数形式传入这个类使用.
+ # p是这个Plugin类,包含其方法和函数.
+ p = plugin_class(app)
+ self.plugins.append(p)
+ except:
+ info += _("(invalid, skipping) ")
+
+ # 将模块导入信息输出到系统日志.
+ log.info('Load KS Plugins: %s' % info)
+ syslog.syslog(info)
+
+ def logError(self, o=''):
+ "Logs the last execption to syslog"
+ oops = "%s threw exception '%s'" % (o, sys.exc_info())
+ syslog.syslog(oops)
+
+ def added(self, nodename):
+ """Tell all plugins this node has been added"""
+ for p in self.plugins:
+ try:
+ p.added(nodename)
+ except:
+ self.logError(p)
+
+ def removed(self, nodename):
+ """Tell all plugins this node has been removed"""
+ for p in self.plugins:
+ try:
+ p.removed(nodename)
+ except:
+ self.logError(p)
+
+ def done(self):
+ """Tell all plugins we are finished"""
+ for p in self.plugins:
+ try:
+ p.done()
+ except:
+ self.logError(p)
+
+ def update(self):
+ """Tell all plugins we to reload"""
+ for p in self.plugins:
+ try:
+ p.update()
+ except:
+ self.logError(p)
+
+class GUI(object):
+ """Use the snack gui class"""
+
+ def __init__(self):
+ self.screen = None
+
+ def startGUI(self):
+ self.screen = snack.SnackScreen()
+
+ def endGUI(self):
+ self.screen.finish()
+
+ def errorGUI(self, message, l1=_("Quit"), l2=None):
+ return self.modalGUI(str(message), _("Error"), l1, l2)
+
+ def warningGUI(self, message, l1=_("OK"), l2=None):
+ return self.modalGUI(str(message), _("Warning"), l1, l2)
+
+ def infoGUI(self, message, l1=_("OK"), l2=None):
+ return self.modalGUI(str(message), _("Information"), l1, l2)
+
+ def modalGUI(self, message, title, l1, l2):
+ form = snack.GridForm(self.screen, title, 2, 2)
+
+ textbox = snack.TextboxReflowed(40, message)
+ form.add(textbox, 0, 0)
+ if not l2:
+ b1 = snack.Button(l1)
+ form.add(b1, 0, 1)
+ else:
+ b1 = snack.Button(l1)
+ b2 = snack.Button(l2)
+ form.add(b1, 0, 1)
+ form.add(b2, 1, 1)
+
+ if form.runOnce() == b1:
+ return 0
+ else:
+ return 1
+
+
+class InsertEthers(GUI):
+
+ def __init__(self, app):
+ super(InsertEthers, self).__init__()
+
+ self.sql = app
+ self.cmd = None
+ self.controller = ServiceController()
+ self.cabinet = 0
+ self.rank = -1
+ self.replace = ''
+ self.maxNew = -1
+ self.remove = 0
+ self.membership = None
+ self.basename = None
+ self.restart_srvs = 0
+ self.inserted = []
+ self.kickstarted = {}
+ self.excludeMacList = []
+ self.dist_lockFile = '/var/lock/sunhpc-dist'
+ self.osname = 'linux'
+
+
+ self.doRestart = 1
+ # 排除的mac地址
+ self.subnet = 'private' # Internal Network
+ self.hostname = None
+ self.kickstartable = True
+
+ def setMembershipName(self, membership_name):
+ self.membership = membership_name
+
+ def setRemove(self, host):
+ self.replace = host
+ self.remove = 1
+
+ def startGUI(self):
+
+ GUI.startGUI(self)
+ self.form = snack.GridForm(self.screen, _("Install the system using pxelinux"), 1, 1)
+
+ self.textbox = snack.Textbox(50, 4, "", scroll=1)
+ self.form.add(self.textbox, 0, 0)
+
+ self.screen.drawRootText(0, 0, _("SunHPC(%s) -- version %s") %
+ (self.sql.usage_name,
+ self.sql.usage_version))
+
+ self.screen.drawRootText(0, 1, _("Opened kickstart access to %s network") %
+ self.sql.getPrivateNet())
+
+ self.screen.pushHelpLine(' ')
+
+ def statusGUI(self):
+ """Updates the list of nodes in 'Inserted Appliances' windows"""
+ macs_n_names = ''
+ ks = ''
+ for (mac, name) in self.inserted:
+ if name not in self.kickstarted:
+ ks = ''
+ elif self.kickstarted[name] == 0:
+ ks = '( )'
+ elif self.kickstarted[name] == 200:
+ ks = '(*)'
+ else: # An error
+ ks = '(%s)' % self.kickstarted[name]
+ macs_n_names += '%s\t%s\t%s\n' % (mac, name, ks)
+
+ self.textbox.setText(_(macs_n_names))
+
+ self.form.draw()
+ self.screen.refresh()
+
+ def waitGUI(self):
+
+ not_done = ''
+ hosts = list(self.kickstarted.keys())
+ hosts.sort()
+ for name in hosts:
+ status = self.kickstarted[name]
+ if status != 200:
+ ks = '( )'
+ if status:
+ ks = '(%s)' % status
+ not_done += '%s \t %s\n' % (name, ks)
+
+ form = snack.GridForm(self.screen,
+ _("Not kickstarted, please wait..."), 1, 1)
+ textbox = snack.Textbox(35, 4, not_done, scroll=1)
+ form.add(textbox, 0,0)
+
+ form.draw()
+ self.screen.refresh()
+ time.sleep(1)
+ self.screen.popWindow()
+
+ def membershipGUI(self):
+ self.kickstartable = True
+ self.basename = 'compute'
+ self.setMembershipName(self.basename)
+
+ def initializeRank(self):
+ query = 'select rank,max(rank) from nodes where rack = %d group by rack' % (self.cabinet)
+
+ if self.sql.search(query) > 0:
+ (rank, max_rank) = self.sql.fetchone()
+ self.rank = max_rank + 1
+ else:
+ self.rank = 0
+
+ def getnextIP(self, subnet):
+
+ args = [ subnet ]
+ if self.sql.ipIncrement != -1:
+ args.append('increment=%d' % self.sql.ipIncrement)
+
+ text = self.cmd.command('report.nextip', args)
+ if len(text) == 0:
+ raise Exception("Unable to get next IP address")
+
+ return text.strip()
+
+ def addit(self, mac, nodename, ip):
+
+ self.cmd.command('add.host', [nodename, 'os=' + self.osname,
+ 'rack=' + str(self.cabinet), 'rank=' + str(self.rank)])
+
+ self.cmd.command('add.host.interface', [nodename, 'eth0',
+ 'ip=' + ip, 'mac=' + mac, 'subnet=' + self.subnet])
+
+ self.sql.commit()
+
+ self.controller.added(nodename)
+ self.restart_srvs = 1
+
+ self.sql.commit()
+
+ list = [(mac, nodename)]
+ list.extend(self.inserted)
+ self.inserted = list
+ self.kickstarted[nodename] = 0
+
+ def discover(self, mac, dev):
+ """如果存在数据库中返回真"""
+ retval = False
+ query = 'select mac from networks where mac="%s"' % (mac)
+ if not self.sql.search(query):
+ nodename = self.getNodename()
+ log.info('GetNodename: %s' % nodename)
+
+ ipaddr = self.getnextIP(self.subnet)
+ self.addit(mac, nodename, ipaddr)
+ log.info('Addit Host: %s/%s/%s' % (nodename, ipaddr, mac))
+ self.printDiscovered(mac)
+
+ retval = True
+ return retval
+
+ def printDiscovered(self, mac):
+
+ form = snack.GridForm(self.screen,
+ _("Discovered New Appliance"), 1, 1)
+
+ new_app = _("Discovered a new appliance with MAC (%s)") % (mac)
+ textbox = snack.Textbox(len(new_app), 1, new_app)
+ form.add(textbox, 0, 0)
+
+ form.draw()
+ self.screen.refresh()
+ time.sleep(2)
+ self.screen.popWindow()
+
+ def getNodename(self):
+ if self.hostname is not None:
+ return self.hostname
+ else:
+ return '%s-%d-%d' % (self.basename, self.cabinet, self.rank)
+
+ def listenDHCP(self, line):
+
+ tokens = line.split()[:-1]
+ if len(tokens) > 9 and tokens[4] == 'dhcpd:' and \
+ (tokens[5] in ['DHCPDISCOVER', 'BOOTREQUEST']):
+
+ Dev = tokens[9].replace(':','').strip()
+ Mac = tokens[7].strip()
+
+ # 在DHCPDISCOVER from macaddr via eth0,这里面的eth0
+ # 是指主节点开启了dhcpd的网卡名称,也是private网卡名称.
+ # 但这并非是计算节点的网卡名称.
+ self.sql.execute("""select networks.device from
+ networks, subnets, nodes where
+ subnets.name='%s' and nodes.name='%s' and
+ networks.subnet=subnets.id and networks.node=nodes.id""" % (
+ self.subnet, self.sql.newdb.getFrontendName()))
+
+ # 如果有需要排除的Mac地址则在这里配置.
+ if Mac in self.excludeMacList: return
+
+ # 如果不匹配主节点DHCP服务的网卡名称,
+ subnet_dev = self.sql.fetchone()[0]
+ if Dev != subnet_dev: return
+
+ # 如果已经完成添加的mac地址,放弃这次请求.
+ if not self.discover(Mac, Dev): return
+
+ log.info('Discover New MAC: %s' % Mac)
+ self.statusGUI()
+
+ if self.maxNew > 0:
+ self.maxNew -= 1
+ if self.maxNew == 0:
+ raise InsertDone(_("Suggest Done"))
+
+ # 自动增加主机名称的Rank号.
+ self.rank = self.rank + 1
+
+ elif len(tokens) > 6 and tokens[4] == 'last' and \
+ tokens[5] == 'message' and tokens[6] == 'repeated':
+
+ shortname = os.uname()[1].split('.')[0]
+ if tokens[3] == shortname:
+ os.system('/usr/bin/systemctl restart syslog >/dev/null 2>&1')
+
+ def monitoring(self):
+ # 监控日志
+ mslog = open('/var/log/messages', 'r')
+ mslog.seek(0, 2)
+
+ kslog = open('/var/log/httpd/access_log', 'r')
+ kslog.seek(0, 2)
+
+ self.screen.pushHelpLine(
+ _(" Press <F8> to quit, press <F9> to force quit"))
+ self.form.addHotKey('F8')
+ self.form.addHotKey('F9')
+ self.form.setTimer(1000)
+
+ self.statusGUI()
+
+ result = self.form.run()
+ suggest_done = 0
+ done = 0
+ log.info('Monitoring Log: OK')
+ while not done:
+
+ # 监控系统日志中的dhcpd信息.
+ syslog_line = mslog.readline()
+ if syslog_line and not suggest_done:
+ try:
+ self.listenDHCP(syslog_line)
+ except InsertDone:
+ suggest_done = 1
+
+ except (sunhpc.core.utils.CommandError, InsertError) as msg:
+ self.warningGUI(msg)
+ continue
+
+ # 监控日志中的pxelinux信息.
+ access_line = kslog.readline()
+ if access_line:
+ try:
+ self.listenKS(access_line)
+ except InsertError as msg:
+ self.warningGUI(msg)
+ continue
+ #
+ result = self.form.run()
+ done = self.checkDone(result, suggest_done)
+
+ log.info('Restarting services status: %s' % self.restart_srvs)
+ if self.restart_srvs:
+ log.info('Start restart services ...')
+ form = snack.GridForm(self.screen, _("Restarting Services"), 1, 1)
+ message = _("Restarting Services...")
+ textbox = snack.Textbox(len(message), 1, message)
+ form.add(textbox, 0, 0)
+ form.draw()
+
+ self.screen.refresh()
+ self.controller.done()
+ self.screen.popWindow()
+
+ mslog.close()
+ self.endGUI()
+
+ def listenKS(self, line):
+ """Look in log line for a kickstart request."""
+
+ # Track accesses both with and without local certs.
+ interesting = line.count('install/sbin/kickstart.cgi') \
+ or line.count('install/sbin/public/kickstart.cgi') \
+ or line.count('install/sbin/public/jumpstart.cgi')
+ if not interesting:
+ return
+
+ fields = line.split()
+ try:
+ status = int(fields[8])
+ log.info('Kickstart Code: %s' % status)
+ except:
+ raise InsertError(_("Apache log file not well formed!"))
+
+ nodeid = int(self.sql.getNodeId(fields[0]))
+ self.sql.execute('select name from nodes where id=%d' % nodeid)
+ try:
+ name, = self.sql.fetchone()
+ except:
+ if status == 200:
+ raise InsertError( _("Unknown node %s got a kickstart file!") % fields[0])
+ return
+
+ log.info('Kickstart NodeID %s->%s' % (name, nodeid))
+ if name not in self.kickstarted:
+ return
+
+ log.info('Change KS Status %s->%s' % (name, status))
+ self.kickstarted[name] = status
+ self.statusGUI()
+
+ def checkDone(self, result, suggest_done):
+
+ if result == 'TIMER' and not suggest_done:
+ return 0
+
+ if result == 'F9': return 1
+
+ if not self.kickstartable: return 1
+
+ ok = 1
+ for status in self.kickstarted.values():
+ if status != 200:
+ ok = 0
+ break
+
+ if not ok:
+ if result == 'F8':
+ self.waitGUI()
+ else:
+ if suggest_done or result == 'F8':
+ return 1
+ return 0
+
+ def distDone(self):
+ if os.path.exists(self.dist_lockFile):
+ self.warningGUI(_("Sunhpc distribution is not ready\n\n")
+ + _("Please wait for 'sunhpc create distro' to complete\n"))
+ return 0
+ return 1
+
+ def run(self):
+
+ self.cmd = sunhpc.commands.Command(self.sql.newdb)
+ try:
+ self.cmd.command('check.services', [])
+ log.info('Check services: OK')
+ except sunhpc.core.utils.CommandError as err:
+ sys.stderr.write('error - ' + str(err) + '\n')
+ return
+
+ # 开始启动界面
+ self.startGUI()
+ log.info('Start Daemon GUI: OK')
+
+ # make sure 'sunhpc create distro' is build finished.
+ if not self.distDone():
+ self.endGUI()
+ return
+
+ self.controller.loadPlugins(self.sql)
+ try:
+ if self.remove:
+ self.endGUI()
+ self.controller.done()
+ print ('Removed node %s' % self.replace)
+ return
+
+ # 初始化Member界面信息
+ self.membershipGUI()
+ # 初始化Rank信息
+ self.initializeRank()
+
+ if self.hostname:
+ # 检查给与的主机名是否有效.
+ self.checkHostNameValidity(self.hostname)
+
+ except (sunhpc.core.utils.CommandError, InsertError) as msg:
+ self.errorGUI(msg)
+ self.endGUI()
+ sys.stderr.write(_("%s\n") % str(msg))
+ return
+
+ log.info('Start Monitoring ...')
+ self.monitoring()
+
+class App(sunhpc.core.sql.Application):
+
+ def __init__(self, argv=None):
+ sunhpc.core.sql.Application.__init__(self, argv)
+
+ if not argv:
+ argv = sys.argv
+
+ self.args = []
+ self.caller_args = argv[1:]
+ self.usage_name = 'Kamaitachi'
+ self.usage_version = '1.0.0'
+ self.usage_command = os.path.basename(argv[0])
+
+ self.getopt = sunhpc.core.utils.Struct()
+ # 短参数
+ self.getopt.s = []
+ # 长参数
+ self.getopt.l = [ ('help', 'display the command help infomation'),
+ ('version', 'Display the sunhpc version')
+ ]
+
+ try:
+ # unset our locale
+ del os.environ['LANG']
+ except KeyError:
+ pass
+
+ self.dist = None
+ self.doUpdate = 0
+ self.lockFile = '/var/lock/insert-ethers'
+ self.insertor = InsertEthers(self)
+ self.controller = ServiceController()
+ self.ipIncrement = -1
+ self.doPublicMode = 0
+
+ self.getopt.l.extend([
+ ('remove=', 'remove an hostname')
+ ])
+
+ def getArgs(self):
+ return self.args
+
+ def setArgs(self, list):
+ self.args = list
+
+ def getPrivateNet(self):
+ net = self.getHostAttr('localhost', 'Kickstart_PrivateNetwork')
+ mask = self.getHostAttr('localhost', 'Kickstart_PrivateNetmask')
+ return "%s/%s" % (net, mask)
+
+ def parseArgs(self, rcbase=None):
+ """解析参数"""
+
+ args = self.getArgs()
+
+ # 设置参数
+ self.setArgs(self.caller_args)
+
+ # 开始解析参数
+ self.parseCommandLine()
+
+ def parseCommandLine(self):
+
+ # 使用 getopt 类中的 parse函数解析命令行
+
+ # 解析短参数形式
+ short = ''
+ for e in self.getopt.s:
+ if type(e) == type(()):
+ # 取参数左值
+ short = short + e[0]
+ else:
+ short = short + e
+
+ # 解析长参数形式
+ long = []
+ for e in self.getopt.l:
+ if type(e) == type(()):
+ # 取参数左值
+ long.append(e[0])
+ else:
+ long.append(e)
+
+ try:
+ opts, args = getopt.getopt(self.args, short, long)
+ except getopt.GetoptError as msg:
+ sys.stderr.write('error - %s\n' % msg)
+ self.usage()
+ sys.exit(1)
+
+ for c in opts:
+ self.parseArg(c)
+
+ def parseArg(self, c):
+
+ if c[0] in ('-h', '--help'):
+ self.usage()
+ sys.exit(0)
+
+ elif c[0] in ('version', '--version'):
+ print (self.getClusterVersion())
+ elif c[0] == '--remove':
+ self.insertor.setRemove(c[1])
+ return 0
+
+ def getClusterVersion(self):
+ return "SunhpcOS (%s) for version - %s" % (
+ self.cmd.release, self.cmd.version)
+
+ def usage(self):
+ argDict = {}
+ for e in self.getopt.s:
+ if type(e) == type(()):
+ argDict['-%s' % e[0]] = e[1]
+ else:
+ argDict['-%s' % e] = ''
+
+ for l in self.getopt.l:
+ if type(l) == type(()):
+ argDict['--%s' % l[0]] = l[1]
+ else:
+ argDict['--%s' % l] = ''
+
+ if not argDict: return
+ maxlen = max(map(len, argDict.keys()))
+ print ('\nUsage: %s [options] command infomations' % self.usage_command)
+ for k in argDict:
+ keys = k.ljust(maxlen)
+ vals = argDict[k]
+ print (' %s\t%s' % (keys, vals))
+ print ('If you have any questions, please contact info@sunhpc.com')
+
+ def run(self):
+ self.connect()
+
+ if os.path.isfile(self.lockFile):
+ self.cmd.abort('lock file %s exists.' % self.lockFile)
+ else:
+ os.system('touch %s' % self.lockFile)
+
+ if self.doPublicMode:
+ self.insertor.runPublicOnly()
+ else:
+ self.insertor.run()
+
+ self.cleanup()
+
+ def cleanup(self):
+ try:
+ os.unlink(self.lockFile)
+ except:
+ pass
+
+if __name__ == "__main__":
+
+ try:
+ (width, heigh) = shutil.get_terminal_size()
+ except:
+ width = 80
+ os.environ['COLUMNS'] = str(width)
+ log.info('starting insert-node ...')
+
+ app = App(sys.argv)
+ app.parseArgs()
+ try:
+ app.run()
+ except Exception as msg:
+ app.cleanup()
+ if app.insertor and app.insertor.screen:
+ app.insertor.endGUI()
+ sys.stderr.write('error - ' + str(msg) + '\n')
+ import traceback
+ traceback.print_exc()
+ sys.exit(1)
+
+ finally:
+ if os.path.exists(app.lockFile):
+ os.unlink(app.lockFile)
+
+
+