Files
sunhpc/sbin/insert-ethers
2023-10-08 20:59:00 +08:00

743 lines
22 KiB
Python
Executable File

#!/opt/sunpy3/bin/python3
#coding:utf-8
import os
import sys
import time
import snack
import getopt
import syslog
import sunhpc
import signal
import sqlite3
import logging
import sunhpc.invoke
from sunhpc.core.utils import InsertError
from sunhpc.core.utils import InsertDone
from sunhpc.core.utils import DumpError
logging.basicConfig(filename="/tmp/sunhpc.log", level=logging.INFO)
log = logging.getLogger("Insertnodes ")
try:
from rhpl.translate import _, N_
import rhpl.translate as translate
translate.textdomain('insert-ethers')
except:
from gettext import gettext as _
class ServiceController(object):
"""Handler system services functions"""
def __init__(self):
self.services = {}
self.ignoreList = []
self.plugins = []
self.plugindir = os.path.abspath(
'/opt/sunhpc/var/plugins')
def igore(self, service):
if service not in self.ignoreList:
self.ignoreList.append(service)
def isIgnored(self, service):
return service in self.ignoreList
def restart(self, service):
for name in self.services[service]:
if service not in self.ignoreList:
eval('self.restart_%s()' % name)
def loadPlugins(self, app):
# load plug in /opt/sunhpc/var/plugins dirctorys
if not os.path.exists(self.plugindir):
return
# 将plugin目录添加到模块自动导入.
if self.plugindir not in sys.path:
sys.path.append(self.plugindir)
info = _("insert-ethers loading plugins: ")
# 只载入insertnodes相关的模块.
modlist = os.listdir(self.plugindir + '/insertnodes')
modlist.sort()
for f in modlist:
modname, ext = os.path.splitext(f)
if modname == '__init__' or \
modname == '__pycache__' or ext != '.py':
continue
info += "%s " % modname
mods = __import__('insertnodes.%s' % modname)
mod = getattr(mods, modname)
try:
# 导入这个模块中的Plugin类.
plugin_class = getattr(mod, 'Plugin')
# 将app类以参数形式传入这个类使用.
# p是这个Plugin类,包含其方法和函数.
p = plugin_class(app)
self.plugins.append(p)
except:
info += _("(invalid, skipping) ")
# 将模块导入信息输出到系统日志.
log.info('Load KS Plugins: %s' % info)
syslog.syslog(info)
def logError(self, o=''):
"Logs the last execption to syslog"
oops = "%s threw exception '%s'" % (o, sys.exc_info())
syslog.syslog(oops)
def added(self, nodename):
"""Tell all plugins this node has been added"""
for p in self.plugins:
try:
p.added(nodename)
except:
self.logError(p)
def removed(self, nodename):
"""Tell all plugins this node has been removed"""
for p in self.plugins:
try:
p.removed(nodename)
except:
self.logError(p)
def done(self):
"""Tell all plugins we are finished"""
for p in self.plugins:
try:
p.done()
except:
self.logError(p)
def update(self):
"""Tell all plugins we to reload"""
for p in self.plugins:
try:
p.update()
except:
self.logError(p)
class GUI(object):
"""Use the snack gui class"""
def __init__(self):
self.screen = None
def startGUI(self):
self.screen = snack.SnackScreen()
def endGUI(self):
self.screen.finish()
def errorGUI(self, message, l1=_("Quit"), l2=None):
return self.modalGUI(str(message), _("Error"), l1, l2)
def warningGUI(self, message, l1=_("OK"), l2=None):
return self.modalGUI(str(message), _("Warning"), l1, l2)
def infoGUI(self, message, l1=_("OK"), l2=None):
return self.modalGUI(str(message), _("Information"), l1, l2)
def modalGUI(self, message, title, l1, l2):
form = snack.GridForm(self.screen, title, 2, 2)
textbox = snack.TextboxReflowed(40, message)
form.add(textbox, 0, 0)
if not l2:
b1 = snack.Button(l1)
form.add(b1, 0, 1)
else:
b1 = snack.Button(l1)
b2 = snack.Button(l2)
form.add(b1, 0, 1)
form.add(b2, 1, 1)
if form.runOnce() == b1:
return 0
else:
return 1
class InsertEthers(GUI):
def __init__(self, app):
super(InsertEthers, self).__init__()
self.sql = app
self.cmd = None
self.controller = ServiceController()
self.cabinet = 0
self.rank = -1
self.replace = ''
self.maxNew = -1
self.remove = 0
self.membership = None
self.basename = None
self.restart_srvs = 0
self.inserted = []
self.kickstarted = {}
self.excludeMacList = []
self.dist_lockFile = '/var/lock/sunhpc-dist'
self.osname = 'linux'
self.doRestart = 1
# 排除的mac地址
self.subnet = 'private' # Internal Network
self.hostname = None
self.kickstartable = True
def setMembershipName(self, membership_name):
self.membership = membership_name
def setRemove(self, host):
self.replace = host
self.remove = 1
def startGUI(self):
GUI.startGUI(self)
self.form = snack.GridForm(self.screen, _("Install the system using pxelinux"), 1, 1)
self.textbox = snack.Textbox(50, 4, "", scroll=1)
self.form.add(self.textbox, 0, 0)
self.screen.drawRootText(0, 0, _("SunHPC(%s) -- version %s") %
(self.sql.usage_name,
self.sql.usage_version))
self.screen.drawRootText(0, 1, _("Opened kickstart access to %s network") %
self.sql.getPrivateNet())
self.screen.pushHelpLine(' ')
def statusGUI(self):
"""Updates the list of nodes in 'Inserted Appliances' windows"""
macs_n_names = ''
ks = ''
for (mac, name) in self.inserted:
if name not in self.kickstarted:
ks = ''
elif self.kickstarted[name] == 0:
ks = '( )'
elif self.kickstarted[name] == 200:
ks = '(*)'
else: # An error
ks = '(%s)' % self.kickstarted[name]
macs_n_names += '%s\t%s\t%s\n' % (mac, name, ks)
self.textbox.setText(_(macs_n_names))
self.form.draw()
self.screen.refresh()
def waitGUI(self):
not_done = ''
hosts = list(self.kickstarted.keys())
hosts.sort()
for name in hosts:
status = self.kickstarted[name]
if status != 200:
ks = '( )'
if status:
ks = '(%s)' % status
not_done += '%s \t %s\n' % (name, ks)
form = snack.GridForm(self.screen,
_("Not kickstarted, please wait..."), 1, 1)
textbox = snack.Textbox(35, 4, not_done, scroll=1)
form.add(textbox, 0,0)
form.draw()
self.screen.refresh()
time.sleep(1)
self.screen.popWindow()
def membershipGUI(self):
self.kickstartable = True
self.basename = 'compute'
self.setMembershipName(self.basename)
def initializeRank(self):
query = 'select rank,max(rank) from nodes where rack = %d group by rack' % (self.cabinet)
if self.sql.search(query) > 0:
(rank, max_rank) = self.sql.fetchone()
self.rank = max_rank + 1
else:
self.rank = 0
def getnextIP(self, subnet):
args = [ subnet ]
if self.sql.ipIncrement != -1:
args.append('increment=%d' % self.sql.ipIncrement)
text = self.cmd.command('report.nextip', args)
if len(text) == 0:
raise Exception("Unable to get next IP address")
return text.strip()
def addit(self, mac, nodename, ip):
self.cmd.command('add.host', [nodename, 'os=' + self.osname,
'rack=' + str(self.cabinet), 'rank=' + str(self.rank)])
self.cmd.command('add.host.interface', [nodename, 'eth0',
'ip=' + ip, 'mac=' + mac, 'subnet=' + self.subnet])
self.sql.commit()
self.controller.added(nodename)
self.restart_srvs = 1
self.sql.commit()
list = [(mac, nodename)]
list.extend(self.inserted)
self.inserted = list
self.kickstarted[nodename] = 0
def discover(self, mac, dev):
"""如果存在数据库中返回真"""
retval = False
query = 'select mac from networks where mac="%s"' % (mac)
if not self.sql.search(query):
nodename = self.getNodename()
log.info('GetNodename: %s' % nodename)
ipaddr = self.getnextIP(self.subnet)
self.addit(mac, nodename, ipaddr)
log.info('Addit Host: %s/%s/%s' % (nodename, ipaddr, mac))
self.printDiscovered(mac)
retval = True
return retval
def printDiscovered(self, mac):
form = snack.GridForm(self.screen,
_("Discovered New Appliance"), 1, 1)
new_app = _("Discovered a new appliance with MAC (%s)") % (mac)
textbox = snack.Textbox(len(new_app), 1, new_app)
form.add(textbox, 0, 0)
form.draw()
self.screen.refresh()
time.sleep(2)
self.screen.popWindow()
def getNodename(self):
if self.hostname is not None:
return self.hostname
else:
return '%s-%d-%d' % (self.basename, self.cabinet, self.rank)
def listenDHCP(self, line):
tokens = line.split()[:-1]
if len(tokens) > 9 and tokens[4] == 'dhcpd:' and \
(tokens[5] in ['DHCPDISCOVER', 'BOOTREQUEST']):
Dev = tokens[9].replace(':','').strip()
Mac = tokens[7].strip()
# 在DHCPDISCOVER from macaddr via eth0,这里面的eth0
# 是指主节点开启了dhcpd的网卡名称,也是private网卡名称.
# 但这并非是计算节点的网卡名称.
self.sql.execute("""select networks.device from
networks, subnets, nodes where
subnets.name='%s' and nodes.name='%s' and
networks.subnet=subnets.id and networks.node=nodes.id""" % (
self.subnet, self.sql.newdb.getFrontendName()))
# 如果有需要排除的Mac地址则在这里配置.
if Mac in self.excludeMacList: return
# 如果不匹配主节点DHCP服务的网卡名称,
subnet_dev = self.sql.fetchone()[0]
if Dev != subnet_dev: return
# 如果已经完成添加的mac地址,放弃这次请求.
if not self.discover(Mac, Dev): return
log.info('Discover New MAC: %s' % Mac)
self.statusGUI()
if self.maxNew > 0:
self.maxNew -= 1
if self.maxNew == 0:
raise InsertDone(_("Suggest Done"))
# 自动增加主机名称的Rank号.
self.rank = self.rank + 1
elif len(tokens) > 6 and tokens[4] == 'last' and \
tokens[5] == 'message' and tokens[6] == 'repeated':
shortname = os.uname()[1].split('.')[0]
if tokens[3] == shortname:
os.system('/usr/bin/systemctl restart syslog >/dev/null 2>&1')
def monitoring(self):
# 监控日志
mslog = open('/var/log/messages', 'r')
mslog.seek(0, 2)
kslog = open('/var/log/httpd/access_log', 'r')
kslog.seek(0, 2)
self.screen.pushHelpLine(
_(" Press <F8> to quit, press <F9> to force quit"))
self.form.addHotKey('F8')
self.form.addHotKey('F9')
self.form.setTimer(1000)
self.statusGUI()
result = self.form.run()
suggest_done = 0
done = 0
log.info('Monitoring Log: OK')
while not done:
# 监控系统日志中的dhcpd信息.
syslog_line = mslog.readline()
if syslog_line and not suggest_done:
try:
self.listenDHCP(syslog_line)
except InsertDone:
suggest_done = 1
except (sunhpc.core.utils.CommandError, InsertError) as msg:
self.warningGUI(msg)
continue
# 监控日志中的pxelinux信息.
access_line = kslog.readline()
if access_line:
try:
self.listenKS(access_line)
except InsertError as msg:
self.warningGUI(msg)
continue
#
result = self.form.run()
done = self.checkDone(result, suggest_done)
log.info('Restarting services status: %s' % self.restart_srvs)
if self.restart_srvs:
log.info('Start restart services ...')
form = snack.GridForm(self.screen, _("Restarting Services"), 1, 1)
message = _("Restarting Services...")
textbox = snack.Textbox(len(message), 1, message)
form.add(textbox, 0, 0)
form.draw()
self.screen.refresh()
self.controller.done()
self.screen.popWindow()
mslog.close()
self.endGUI()
def listenKS(self, line):
"""Look in log line for a kickstart request."""
# Track accesses both with and without local certs.
interesting = line.count('install/sbin/kickstart.cgi') \
or line.count('install/sbin/public/kickstart.cgi') \
or line.count('install/sbin/public/jumpstart.cgi')
if not interesting:
return
fields = line.split()
try:
status = int(fields[8])
log.info('Kickstart Code: %s' % status)
except:
raise InsertError(_("Apache log file not well formed!"))
nodeid = int(self.sql.getNodeId(fields[0]))
self.sql.execute('select name from nodes where id=%d' % nodeid)
try:
name, = self.sql.fetchone()
except:
if status == 200:
raise InsertError( _("Unknown node %s got a kickstart file!") % fields[0])
return
log.info('Kickstart NodeID %s->%s' % (name, nodeid))
if name not in self.kickstarted:
return
log.info('Change KS Status %s->%s' % (name, status))
self.kickstarted[name] = status
self.statusGUI()
def checkDone(self, result, suggest_done):
if result == 'TIMER' and not suggest_done:
return 0
if result == 'F9': return 1
if not self.kickstartable: return 1
ok = 1
for status in self.kickstarted.values():
if status != 200:
ok = 0
break
if not ok:
if result == 'F8':
self.waitGUI()
else:
if suggest_done or result == 'F8':
return 1
return 0
def distDone(self):
if os.path.exists(self.dist_lockFile):
self.warningGUI(_("Sunhpc distribution is not ready\n\n")
+ _("Please wait for 'sunhpc create distro' to complete\n"))
return 0
return 1
def run(self):
self.cmd = sunhpc.commands.Command(self.sql.newdb)
try:
self.cmd.command('check.services', [])
log.info('Check services: OK')
except sunhpc.core.utils.CommandError as err:
sys.stderr.write('error - ' + str(err) + '\n')
return
# 开始启动界面
self.startGUI()
log.info('Start Daemon GUI: OK')
# make sure 'sunhpc create distro' is build finished.
if not self.distDone():
self.endGUI()
return
self.controller.loadPlugins(self.sql)
try:
if self.remove:
self.endGUI()
self.controller.done()
print ('Removed node %s' % self.replace)
return
# 初始化Member界面信息
self.membershipGUI()
# 初始化Rank信息
self.initializeRank()
if self.hostname:
# 检查给与的主机名是否有效.
self.checkHostNameValidity(self.hostname)
except (sunhpc.core.utils.CommandError, InsertError) as msg:
self.errorGUI(msg)
self.endGUI()
sys.stderr.write(_("%s\n") % str(msg))
return
log.info('Start Monitoring ...')
self.monitoring()
class App(sunhpc.core.sql.Application):
def __init__(self, argv=None):
sunhpc.core.sql.Application.__init__(self, argv)
if not argv:
argv = sys.argv
self.args = []
self.caller_args = argv[1:]
self.usage_name = 'Kamaitachi'
self.usage_version = '1.0.0'
self.usage_command = os.path.basename(argv[0])
self.getopt = sunhpc.core.utils.Struct()
# 短参数
self.getopt.s = []
# 长参数
self.getopt.l = [ ('help', 'display the command help infomation'),
('version', 'Display the sunhpc version')
]
try:
# unset our locale
del os.environ['LANG']
except KeyError:
pass
self.dist = None
self.doUpdate = 0
self.lockFile = '/var/lock/insert-ethers'
self.insertor = InsertEthers(self)
self.controller = ServiceController()
self.ipIncrement = -1
self.doPublicMode = 0
self.getopt.l.extend([
('remove=', 'remove an hostname')
])
def getArgs(self):
return self.args
def setArgs(self, list):
self.args = list
def getPrivateNet(self):
net = self.getHostAttr('localhost', 'Kickstart_PrivateNetwork')
mask = self.getHostAttr('localhost', 'Kickstart_PrivateNetmask')
return "%s/%s" % (net, mask)
def parseArgs(self, rcbase=None):
"""解析参数"""
args = self.getArgs()
# 设置参数
self.setArgs(self.caller_args)
# 开始解析参数
self.parseCommandLine()
def parseCommandLine(self):
# 使用 getopt 类中的 parse函数解析命令行
# 解析短参数形式
short = ''
for e in self.getopt.s:
if type(e) == type(()):
# 取参数左值
short = short + e[0]
else:
short = short + e
# 解析长参数形式
long = []
for e in self.getopt.l:
if type(e) == type(()):
# 取参数左值
long.append(e[0])
else:
long.append(e)
try:
opts, args = getopt.getopt(self.args, short, long)
except getopt.GetoptError as msg:
sys.stderr.write('error - %s\n' % msg)
self.usage()
sys.exit(1)
for c in opts:
self.parseArg(c)
def parseArg(self, c):
if c[0] in ('-h', '--help'):
self.usage()
sys.exit(0)
elif c[0] in ('version', '--version'):
print (self.getClusterVersion())
elif c[0] == '--remove':
self.insertor.setRemove(c[1])
return 0
def getClusterVersion(self):
return "SunhpcOS (%s) for version - %s" % (
self.cmd.release, self.cmd.version)
def usage(self):
argDict = {}
for e in self.getopt.s:
if type(e) == type(()):
argDict['-%s' % e[0]] = e[1]
else:
argDict['-%s' % e] = ''
for l in self.getopt.l:
if type(l) == type(()):
argDict['--%s' % l[0]] = l[1]
else:
argDict['--%s' % l] = ''
if not argDict: return
maxlen = max(map(len, argDict.keys()))
print ('\nUsage: %s [options] command infomations' % self.usage_command)
for k in argDict:
keys = k.ljust(maxlen)
vals = argDict[k]
print (' %s\t%s' % (keys, vals))
print ('If you have any questions, please contact info@sunhpc.com')
def run(self):
self.connect()
if os.path.isfile(self.lockFile):
self.cmd.abort('lock file %s exists.' % self.lockFile)
else:
os.system('touch %s' % self.lockFile)
if self.doPublicMode:
self.insertor.runPublicOnly()
else:
self.insertor.run()
self.cleanup()
def cleanup(self):
try:
os.unlink(self.lockFile)
except:
pass
if __name__ == "__main__":
try:
(width, heigh) = shutil.get_terminal_size()
except:
width = 80
os.environ['COLUMNS'] = str(width)
log.info('starting insert-node ...')
app = App(sys.argv)
app.parseArgs()
try:
app.run()
except Exception as msg:
app.cleanup()
if app.insertor and app.insertor.screen:
app.insertor.endGUI()
sys.stderr.write('error - ' + str(msg) + '\n')
import traceback
traceback.print_exc()
sys.exit(1)
finally:
if os.path.exists(app.lockFile):
os.unlink(app.lockFile)