first add files

This commit is contained in:
2023-10-08 20:59:00 +08:00
parent b494be364b
commit 1dac226337
991 changed files with 368151 additions and 40 deletions

14
sbin/build-sunhpc Executable file
View File

@@ -0,0 +1,14 @@
#!/bin/bash
# first : initialize the sunhpc database
# second : add cluster data to database.
WHERE=$1
HERE=`pwd`
SUNHPC=/opt/sunhpc/bin/sunhpc
# initialize database
/opt/sunhpc/sbin/init-sunhpcDB
# add fronetend node to database
MYNAME=`hostname -s`
$SUNHPC add host $MYNAME rack=0 rank=1 member=server 2> /tmp/sunhpc-initdb.log
$SUNHPC add catindex $MYNAME category=host 2>> /tmp/sunhpc-initdb.log

49
sbin/calcrollmd5 Executable file
View File

@@ -0,0 +1,49 @@
#!/usr/bin/python
#coding:utf-8
import os
import sys
import hashlib
def GetFileMD5(filename):
myhash = hashlib.md5()
with open(filename, 'rb') as f:
while True:
b = f.read(8096)
if not b:
break
myhash.update(b)
return myhash.hexdigest()
def running(path):
rpmlist = []
for root, dirs,files in os.walk(path):
if root.endswith("RedHat"):
(roll, version, arch) = root.split(os.path.sep)[-4:-1]
rollname = "%s/%s/%s" % (roll, version, arch)
rollvers = "%s-%s-%s" % (roll, version, arch)
baseDirs = os.path.join(rollname, 'RedHat', 'RPMS')
for f in os.listdir(baseDirs):
rpmname = os.path.join(baseDirs, f)
md5sum = GetFileMD5(rpmname)
result = "%s %s %s" % (md5sum, rollvers, rpmname)
rpmlist.append(result)
return rpmlist
if __name__ == "__main__":
args = sys.argv
if len(args) < 2:
print " - Must to be supply an path."
sys.exit(0)
path = args[1]
if os.path.isdir(path) and path != '/':
path = os.path.abspath(path)
rpmlist = running(path)
rfile = os.path.join(path, 'rollslist')
with open(rfile, 'w') as f:
f.write('\n'.join(rpmlist))
f.write('\n')

10
sbin/gen_root_pw Executable file
View File

@@ -0,0 +1,10 @@
#!/usr/bin/env python3
import crypt
import string
import random
def get_pw():
pw = random.random()
return crypt.crypt(str(pw))
if __name__ == '__main__':
print (get_pw())

742
sbin/insert-ethers Executable file
View File

@@ -0,0 +1,742 @@
#!/opt/sunpy3/bin/python3
#coding:utf-8
import os
import sys
import time
import snack
import getopt
import syslog
import sunhpc
import signal
import sqlite3
import logging
import sunhpc.invoke
from sunhpc.core.utils import InsertError
from sunhpc.core.utils import InsertDone
from sunhpc.core.utils import DumpError
logging.basicConfig(filename="/tmp/sunhpc.log", level=logging.INFO)
log = logging.getLogger("Insertnodes ")
try:
from rhpl.translate import _, N_
import rhpl.translate as translate
translate.textdomain('insert-ethers')
except:
from gettext import gettext as _
class ServiceController(object):
"""Handler system services functions"""
def __init__(self):
self.services = {}
self.ignoreList = []
self.plugins = []
self.plugindir = os.path.abspath(
'/opt/sunhpc/var/plugins')
def igore(self, service):
if service not in self.ignoreList:
self.ignoreList.append(service)
def isIgnored(self, service):
return service in self.ignoreList
def restart(self, service):
for name in self.services[service]:
if service not in self.ignoreList:
eval('self.restart_%s()' % name)
def loadPlugins(self, app):
# load plug in /opt/sunhpc/var/plugins dirctorys
if not os.path.exists(self.plugindir):
return
# 将plugin目录添加到模块自动导入.
if self.plugindir not in sys.path:
sys.path.append(self.plugindir)
info = _("insert-ethers loading plugins: ")
# 只载入insertnodes相关的模块.
modlist = os.listdir(self.plugindir + '/insertnodes')
modlist.sort()
for f in modlist:
modname, ext = os.path.splitext(f)
if modname == '__init__' or \
modname == '__pycache__' or ext != '.py':
continue
info += "%s " % modname
mods = __import__('insertnodes.%s' % modname)
mod = getattr(mods, modname)
try:
# 导入这个模块中的Plugin类.
plugin_class = getattr(mod, 'Plugin')
# 将app类以参数形式传入这个类使用.
# p是这个Plugin类,包含其方法和函数.
p = plugin_class(app)
self.plugins.append(p)
except:
info += _("(invalid, skipping) ")
# 将模块导入信息输出到系统日志.
log.info('Load KS Plugins: %s' % info)
syslog.syslog(info)
def logError(self, o=''):
"Logs the last execption to syslog"
oops = "%s threw exception '%s'" % (o, sys.exc_info())
syslog.syslog(oops)
def added(self, nodename):
"""Tell all plugins this node has been added"""
for p in self.plugins:
try:
p.added(nodename)
except:
self.logError(p)
def removed(self, nodename):
"""Tell all plugins this node has been removed"""
for p in self.plugins:
try:
p.removed(nodename)
except:
self.logError(p)
def done(self):
"""Tell all plugins we are finished"""
for p in self.plugins:
try:
p.done()
except:
self.logError(p)
def update(self):
"""Tell all plugins we to reload"""
for p in self.plugins:
try:
p.update()
except:
self.logError(p)
class GUI(object):
"""Use the snack gui class"""
def __init__(self):
self.screen = None
def startGUI(self):
self.screen = snack.SnackScreen()
def endGUI(self):
self.screen.finish()
def errorGUI(self, message, l1=_("Quit"), l2=None):
return self.modalGUI(str(message), _("Error"), l1, l2)
def warningGUI(self, message, l1=_("OK"), l2=None):
return self.modalGUI(str(message), _("Warning"), l1, l2)
def infoGUI(self, message, l1=_("OK"), l2=None):
return self.modalGUI(str(message), _("Information"), l1, l2)
def modalGUI(self, message, title, l1, l2):
form = snack.GridForm(self.screen, title, 2, 2)
textbox = snack.TextboxReflowed(40, message)
form.add(textbox, 0, 0)
if not l2:
b1 = snack.Button(l1)
form.add(b1, 0, 1)
else:
b1 = snack.Button(l1)
b2 = snack.Button(l2)
form.add(b1, 0, 1)
form.add(b2, 1, 1)
if form.runOnce() == b1:
return 0
else:
return 1
class InsertEthers(GUI):
def __init__(self, app):
super(InsertEthers, self).__init__()
self.sql = app
self.cmd = None
self.controller = ServiceController()
self.cabinet = 0
self.rank = -1
self.replace = ''
self.maxNew = -1
self.remove = 0
self.membership = None
self.basename = None
self.restart_srvs = 0
self.inserted = []
self.kickstarted = {}
self.excludeMacList = []
self.dist_lockFile = '/var/lock/sunhpc-dist'
self.osname = 'linux'
self.doRestart = 1
# 排除的mac地址
self.subnet = 'private' # Internal Network
self.hostname = None
self.kickstartable = True
def setMembershipName(self, membership_name):
self.membership = membership_name
def setRemove(self, host):
self.replace = host
self.remove = 1
def startGUI(self):
GUI.startGUI(self)
self.form = snack.GridForm(self.screen, _("Install the system using pxelinux"), 1, 1)
self.textbox = snack.Textbox(50, 4, "", scroll=1)
self.form.add(self.textbox, 0, 0)
self.screen.drawRootText(0, 0, _("SunHPC(%s) -- version %s") %
(self.sql.usage_name,
self.sql.usage_version))
self.screen.drawRootText(0, 1, _("Opened kickstart access to %s network") %
self.sql.getPrivateNet())
self.screen.pushHelpLine(' ')
def statusGUI(self):
"""Updates the list of nodes in 'Inserted Appliances' windows"""
macs_n_names = ''
ks = ''
for (mac, name) in self.inserted:
if name not in self.kickstarted:
ks = ''
elif self.kickstarted[name] == 0:
ks = '( )'
elif self.kickstarted[name] == 200:
ks = '(*)'
else: # An error
ks = '(%s)' % self.kickstarted[name]
macs_n_names += '%s\t%s\t%s\n' % (mac, name, ks)
self.textbox.setText(_(macs_n_names))
self.form.draw()
self.screen.refresh()
def waitGUI(self):
not_done = ''
hosts = list(self.kickstarted.keys())
hosts.sort()
for name in hosts:
status = self.kickstarted[name]
if status != 200:
ks = '( )'
if status:
ks = '(%s)' % status
not_done += '%s \t %s\n' % (name, ks)
form = snack.GridForm(self.screen,
_("Not kickstarted, please wait..."), 1, 1)
textbox = snack.Textbox(35, 4, not_done, scroll=1)
form.add(textbox, 0,0)
form.draw()
self.screen.refresh()
time.sleep(1)
self.screen.popWindow()
def membershipGUI(self):
self.kickstartable = True
self.basename = 'compute'
self.setMembershipName(self.basename)
def initializeRank(self):
query = 'select rank,max(rank) from nodes where rack = %d group by rack' % (self.cabinet)
if self.sql.search(query) > 0:
(rank, max_rank) = self.sql.fetchone()
self.rank = max_rank + 1
else:
self.rank = 0
def getnextIP(self, subnet):
args = [ subnet ]
if self.sql.ipIncrement != -1:
args.append('increment=%d' % self.sql.ipIncrement)
text = self.cmd.command('report.nextip', args)
if len(text) == 0:
raise Exception("Unable to get next IP address")
return text.strip()
def addit(self, mac, nodename, ip):
self.cmd.command('add.host', [nodename, 'os=' + self.osname,
'rack=' + str(self.cabinet), 'rank=' + str(self.rank)])
self.cmd.command('add.host.interface', [nodename, 'eth0',
'ip=' + ip, 'mac=' + mac, 'subnet=' + self.subnet])
self.sql.commit()
self.controller.added(nodename)
self.restart_srvs = 1
self.sql.commit()
list = [(mac, nodename)]
list.extend(self.inserted)
self.inserted = list
self.kickstarted[nodename] = 0
def discover(self, mac, dev):
"""如果存在数据库中返回真"""
retval = False
query = 'select mac from networks where mac="%s"' % (mac)
if not self.sql.search(query):
nodename = self.getNodename()
log.info('GetNodename: %s' % nodename)
ipaddr = self.getnextIP(self.subnet)
self.addit(mac, nodename, ipaddr)
log.info('Addit Host: %s/%s/%s' % (nodename, ipaddr, mac))
self.printDiscovered(mac)
retval = True
return retval
def printDiscovered(self, mac):
form = snack.GridForm(self.screen,
_("Discovered New Appliance"), 1, 1)
new_app = _("Discovered a new appliance with MAC (%s)") % (mac)
textbox = snack.Textbox(len(new_app), 1, new_app)
form.add(textbox, 0, 0)
form.draw()
self.screen.refresh()
time.sleep(2)
self.screen.popWindow()
def getNodename(self):
if self.hostname is not None:
return self.hostname
else:
return '%s-%d-%d' % (self.basename, self.cabinet, self.rank)
def listenDHCP(self, line):
tokens = line.split()[:-1]
if len(tokens) > 9 and tokens[4] == 'dhcpd:' and \
(tokens[5] in ['DHCPDISCOVER', 'BOOTREQUEST']):
Dev = tokens[9].replace(':','').strip()
Mac = tokens[7].strip()
# 在DHCPDISCOVER from macaddr via eth0,这里面的eth0
# 是指主节点开启了dhcpd的网卡名称,也是private网卡名称.
# 但这并非是计算节点的网卡名称.
self.sql.execute("""select networks.device from
networks, subnets, nodes where
subnets.name='%s' and nodes.name='%s' and
networks.subnet=subnets.id and networks.node=nodes.id""" % (
self.subnet, self.sql.newdb.getFrontendName()))
# 如果有需要排除的Mac地址则在这里配置.
if Mac in self.excludeMacList: return
# 如果不匹配主节点DHCP服务的网卡名称,
subnet_dev = self.sql.fetchone()[0]
if Dev != subnet_dev: return
# 如果已经完成添加的mac地址,放弃这次请求.
if not self.discover(Mac, Dev): return
log.info('Discover New MAC: %s' % Mac)
self.statusGUI()
if self.maxNew > 0:
self.maxNew -= 1
if self.maxNew == 0:
raise InsertDone(_("Suggest Done"))
# 自动增加主机名称的Rank号.
self.rank = self.rank + 1
elif len(tokens) > 6 and tokens[4] == 'last' and \
tokens[5] == 'message' and tokens[6] == 'repeated':
shortname = os.uname()[1].split('.')[0]
if tokens[3] == shortname:
os.system('/usr/bin/systemctl restart syslog >/dev/null 2>&1')
def monitoring(self):
# 监控日志
mslog = open('/var/log/messages', 'r')
mslog.seek(0, 2)
kslog = open('/var/log/httpd/access_log', 'r')
kslog.seek(0, 2)
self.screen.pushHelpLine(
_(" Press <F8> to quit, press <F9> to force quit"))
self.form.addHotKey('F8')
self.form.addHotKey('F9')
self.form.setTimer(1000)
self.statusGUI()
result = self.form.run()
suggest_done = 0
done = 0
log.info('Monitoring Log: OK')
while not done:
# 监控系统日志中的dhcpd信息.
syslog_line = mslog.readline()
if syslog_line and not suggest_done:
try:
self.listenDHCP(syslog_line)
except InsertDone:
suggest_done = 1
except (sunhpc.core.utils.CommandError, InsertError) as msg:
self.warningGUI(msg)
continue
# 监控日志中的pxelinux信息.
access_line = kslog.readline()
if access_line:
try:
self.listenKS(access_line)
except InsertError as msg:
self.warningGUI(msg)
continue
#
result = self.form.run()
done = self.checkDone(result, suggest_done)
log.info('Restarting services status: %s' % self.restart_srvs)
if self.restart_srvs:
log.info('Start restart services ...')
form = snack.GridForm(self.screen, _("Restarting Services"), 1, 1)
message = _("Restarting Services...")
textbox = snack.Textbox(len(message), 1, message)
form.add(textbox, 0, 0)
form.draw()
self.screen.refresh()
self.controller.done()
self.screen.popWindow()
mslog.close()
self.endGUI()
def listenKS(self, line):
"""Look in log line for a kickstart request."""
# Track accesses both with and without local certs.
interesting = line.count('install/sbin/kickstart.cgi') \
or line.count('install/sbin/public/kickstart.cgi') \
or line.count('install/sbin/public/jumpstart.cgi')
if not interesting:
return
fields = line.split()
try:
status = int(fields[8])
log.info('Kickstart Code: %s' % status)
except:
raise InsertError(_("Apache log file not well formed!"))
nodeid = int(self.sql.getNodeId(fields[0]))
self.sql.execute('select name from nodes where id=%d' % nodeid)
try:
name, = self.sql.fetchone()
except:
if status == 200:
raise InsertError( _("Unknown node %s got a kickstart file!") % fields[0])
return
log.info('Kickstart NodeID %s->%s' % (name, nodeid))
if name not in self.kickstarted:
return
log.info('Change KS Status %s->%s' % (name, status))
self.kickstarted[name] = status
self.statusGUI()
def checkDone(self, result, suggest_done):
if result == 'TIMER' and not suggest_done:
return 0
if result == 'F9': return 1
if not self.kickstartable: return 1
ok = 1
for status in self.kickstarted.values():
if status != 200:
ok = 0
break
if not ok:
if result == 'F8':
self.waitGUI()
else:
if suggest_done or result == 'F8':
return 1
return 0
def distDone(self):
if os.path.exists(self.dist_lockFile):
self.warningGUI(_("Sunhpc distribution is not ready\n\n")
+ _("Please wait for 'sunhpc create distro' to complete\n"))
return 0
return 1
def run(self):
self.cmd = sunhpc.commands.Command(self.sql.newdb)
try:
self.cmd.command('check.services', [])
log.info('Check services: OK')
except sunhpc.core.utils.CommandError as err:
sys.stderr.write('error - ' + str(err) + '\n')
return
# 开始启动界面
self.startGUI()
log.info('Start Daemon GUI: OK')
# make sure 'sunhpc create distro' is build finished.
if not self.distDone():
self.endGUI()
return
self.controller.loadPlugins(self.sql)
try:
if self.remove:
self.endGUI()
self.controller.done()
print ('Removed node %s' % self.replace)
return
# 初始化Member界面信息
self.membershipGUI()
# 初始化Rank信息
self.initializeRank()
if self.hostname:
# 检查给与的主机名是否有效.
self.checkHostNameValidity(self.hostname)
except (sunhpc.core.utils.CommandError, InsertError) as msg:
self.errorGUI(msg)
self.endGUI()
sys.stderr.write(_("%s\n") % str(msg))
return
log.info('Start Monitoring ...')
self.monitoring()
class App(sunhpc.core.sql.Application):
def __init__(self, argv=None):
sunhpc.core.sql.Application.__init__(self, argv)
if not argv:
argv = sys.argv
self.args = []
self.caller_args = argv[1:]
self.usage_name = 'Kamaitachi'
self.usage_version = '1.0.0'
self.usage_command = os.path.basename(argv[0])
self.getopt = sunhpc.core.utils.Struct()
# 短参数
self.getopt.s = []
# 长参数
self.getopt.l = [ ('help', 'display the command help infomation'),
('version', 'Display the sunhpc version')
]
try:
# unset our locale
del os.environ['LANG']
except KeyError:
pass
self.dist = None
self.doUpdate = 0
self.lockFile = '/var/lock/insert-ethers'
self.insertor = InsertEthers(self)
self.controller = ServiceController()
self.ipIncrement = -1
self.doPublicMode = 0
self.getopt.l.extend([
('remove=', 'remove an hostname')
])
def getArgs(self):
return self.args
def setArgs(self, list):
self.args = list
def getPrivateNet(self):
net = self.getHostAttr('localhost', 'Kickstart_PrivateNetwork')
mask = self.getHostAttr('localhost', 'Kickstart_PrivateNetmask')
return "%s/%s" % (net, mask)
def parseArgs(self, rcbase=None):
"""解析参数"""
args = self.getArgs()
# 设置参数
self.setArgs(self.caller_args)
# 开始解析参数
self.parseCommandLine()
def parseCommandLine(self):
# 使用 getopt 类中的 parse函数解析命令行
# 解析短参数形式
short = ''
for e in self.getopt.s:
if type(e) == type(()):
# 取参数左值
short = short + e[0]
else:
short = short + e
# 解析长参数形式
long = []
for e in self.getopt.l:
if type(e) == type(()):
# 取参数左值
long.append(e[0])
else:
long.append(e)
try:
opts, args = getopt.getopt(self.args, short, long)
except getopt.GetoptError as msg:
sys.stderr.write('error - %s\n' % msg)
self.usage()
sys.exit(1)
for c in opts:
self.parseArg(c)
def parseArg(self, c):
if c[0] in ('-h', '--help'):
self.usage()
sys.exit(0)
elif c[0] in ('version', '--version'):
print (self.getClusterVersion())
elif c[0] == '--remove':
self.insertor.setRemove(c[1])
return 0
def getClusterVersion(self):
return "SunhpcOS (%s) for version - %s" % (
self.cmd.release, self.cmd.version)
def usage(self):
argDict = {}
for e in self.getopt.s:
if type(e) == type(()):
argDict['-%s' % e[0]] = e[1]
else:
argDict['-%s' % e] = ''
for l in self.getopt.l:
if type(l) == type(()):
argDict['--%s' % l[0]] = l[1]
else:
argDict['--%s' % l] = ''
if not argDict: return
maxlen = max(map(len, argDict.keys()))
print ('\nUsage: %s [options] command infomations' % self.usage_command)
for k in argDict:
keys = k.ljust(maxlen)
vals = argDict[k]
print (' %s\t%s' % (keys, vals))
print ('If you have any questions, please contact info@sunhpc.com')
def run(self):
self.connect()
if os.path.isfile(self.lockFile):
self.cmd.abort('lock file %s exists.' % self.lockFile)
else:
os.system('touch %s' % self.lockFile)
if self.doPublicMode:
self.insertor.runPublicOnly()
else:
self.insertor.run()
self.cleanup()
def cleanup(self):
try:
os.unlink(self.lockFile)
except:
pass
if __name__ == "__main__":
try:
(width, heigh) = shutil.get_terminal_size()
except:
width = 80
os.environ['COLUMNS'] = str(width)
log.info('starting insert-node ...')
app = App(sys.argv)
app.parseArgs()
try:
app.run()
except Exception as msg:
app.cleanup()
if app.insertor and app.insertor.screen:
app.insertor.endGUI()
sys.stderr.write('error - ' + str(msg) + '\n')
import traceback
traceback.print_exc()
sys.exit(1)
finally:
if os.path.exists(app.lockFile):
os.unlink(app.lockFile)

157
sbin/kgen Executable file
View File

@@ -0,0 +1,157 @@
#!/opt/sunpy3/bin/python3
#coding:utf-8
import os,sys
import getopt
import sunhpc.invoke
from xml.sax._exceptions import SAXParseException
class App(sunhpc.core.database.ApplicationSQL):
def __init__(self, argv=None):
sunhpc.core.database.ApplicationSQL.__init__(self)
if not argv:
argv = sys.argv
self.args = []
self.caller_args = argv[1:]
self.usage_name = 'Kickstart Generator'
self.usage_version = '1.0'
self.usage_command = os.path.basename(argv[0])
self.sections = []
self.os = os.uname()[0].lower()
self.arch = os.uname()[4]
osGenerator = getattr(sunhpc.core.xmlgen, 'Generator_%s' % self.os)
self.generator = osGenerator()
self.generator.setArch(self.arch)
self.generator.setOS(self.os)
self.getopt = sunhpc.core.utils.Struct()
self.getopt.s = [('h', 'help infomation'), ('a', 'architecture')]
self.getopt.l = [('arch=', 'architecture'),
('section=', 'name'),
('postonly', 'show post'),
]
def usage(self):
argDict = {}
for e in self.getopt.s:
if type(e) == type(()):
argDict['-%s' % e[0]] = e[1]
else:
argDict['-%s' % e] = ''
for l in self.getopt.l:
if type(l) == type(()):
argDict['--%s' % l[0]] = l[1]
else:
argDict['--%s' % l] = ''
if not argDict: return
maxlen = max(map(len, argDict.keys()))
print ('\nUsage: %s [options] command infomations' % self.usage_command)
for k in argDict:
keys = k.ljust(maxlen)
vals = argDict[k]
print (' %s\t%s' % (keys, vals))
print ('If you have any questions, please contact info@sunhpc.com')
def parseArg(self, c):
if c[0] in ('-h', '--help'):
self.usage()
sys.exit(-1)
elif c[0] in ('-a', '--arch'):
self.generator.setArch(c[1])
elif c[0] == '--section':
self.sections += c[1].split()
elif c[0] == '--postonly':
self.sections.append('post')
else:
return 0
return 1
def parseArgs(self):
self.parseCommandLine()
def parseCommandLine(self):
# 解析短参数形式
short = ''
for e in self.getopt.s:
if type(e) == type(()):
# 取参数左值
short = short + e[0]
else:
short = short + e
# 解析长参数形式
long = []
for e in self.getopt.l:
if type(e) == type(()):
# 取参数左值
long.append(e[0])
else:
long.append(e)
try:
opts, args = getopt.getopt(self.caller_args, short, long)
except getopt.GetoptError as msg:
sys.stderr.write('error - %s\n' % msg)
self.usage()
sys.exit(1)
for c in opts:
self.parseArg(c)
def run(self):
if self.args:
fe = open(self.args[0], 'r')
else:
fe = sys.stdin
self.generator.parse(fe.read())
print ('#')
print ('# %s version %s' % (self.usage_name, self.usage_version))
print ('#')
sections = self.sections
if not sections:
sections = ['order', 'debug', 'main', 'packages', 'pre', 'post']
plist = []
for s in sections:
plist += self.generator.generate(s)
for line in plist:
print (line.rstrip())
if __name__ == "__main__":
app = App(sys.argv)
app.parseArgs()
try:
app.run()
except sunhpc.core.exceptions.KickstartError as msg:
sys.stderr.write("kgen error - %s\n" % msg)
sys.exit(-1)
except SAXParseException as msg:
sys.stderr.write("kgen XML parse exception: %s\n" % msg)
sys.exit(-1)

BIN
sbin/mksquashfs Executable file

Binary file not shown.

559
sbin/mom_gencfg Executable file
View File

@@ -0,0 +1,559 @@
#!/usr/bin/perl
# *****************************************************************************
#
# Copyright 2011 Zuse Institute Berlin
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Please send comments to kallies@zib.de
#
# *****************************************************************************
# Purpose: - called from /etc/init.d/pbs_mom during start actions.
# - creates /var/spool/torque/mom_priv/mom.layout
# - creates/modifies /dev/cpuset/torque
# Prereq: - hwloc >= 1.1, http://www.open-mpi.org/projects/hwloc/
# - Sys::Hwloc >= 0.09, http://search.cpan.org/~bka/
# Install: Install this script on each UV rack
# /opt/torque/Scripts/mom_gencfg root:root -rwxr-xr-x
# Config: Set MOM_GENCFG=/opt/torque/Scripts/mom_gencfg
# in /etc/init.d/pbs_mom for UV, execute $MOM_GENCFG before
# starting the pbs_mom daemon.
# MOM_GENCFG can be overridden in /etc/sysconfig/pbs_mom.
# *****************************************************************************
# $Id: mom_gencfg,v 1.1.2.1 2011/01/17 10:12:46 acountin Exp $
# *****************************************************************************
#
# *** Instructions for use ***
#
# 1. Install hwloc - see contrib/hwloc_install.sh. This should already be done since
# TORQUE needs hwloc for its cpuset implementation starting in 4.0
# 2. Install Sys::Hwloc from CPAN
# 3. Set $PBS_HOME to the proper value if not already set
# 4. Update the variables in the section 'Config Definitions' Especially update firstNodeId
# and nodesPerBoard if desired.
# firstNodeId should be set above 0 if you have a root cpuset that you wish to exclude
# nodesPerBoard is the number of numa nodes per board. Each node is defined in the
# directory /sys/devices/system/node, in a subdirectory node<node index>
# 5. Backup your current file, just in case a variable is set incorrectly or neglected
# 6. Run this script and enjoy the layout file
#
#
use strict;
use lib qw(
/usr/lib/perl5
/usr/lib/perl5/site_perl
);
use Sys::Hostname;
use File::Basename;
use Getopt::Long qw(:config no_ignore_case);
use autouse 'Pod::Usage' => qw(pod2usage);
use Sys::Hwloc 0.09;
my $progName = basename($0);
my $hostName = hostname();
$SIG{__DIE__} = \&xDie;
# ==============================================================================
# Setup needed before init
# ==============================================================================
BEGIN: {
die "This script needs at least hwloc-1.1\n" unless HWLOC_XSAPI_VERSION() >= 0x00010100;
}
# ==============================================================================
# Config definitions
# ==============================================================================
my $hostNames = undef; # hostname pattern to be run on, undef to skip test
my $cpusetFsName = '/dev/cpuset'; # the name of the cpuset file system
my $cpusetBaseName = '/torque'; # the name of the parent cpuset of a job's cpuset
my $mkdirCmd = '/bin/mkdir'; # the path to the mkdir command
my $catCmd = '/bin/cat'; # the path to the cat command
my $echoCmd = '/bin/echo'; # the path to the echo command
my $momCfgDir = 'mom_priv'; # the directory where MOM configs are stored
my $momLayoutFile = 'mom.layout'; # the name of the MOM layout file
my $firstNodeId = 0; # ID of 1st NUMA node to be used by Torque (start with 0)
my $lastNodeId = undef; # ID of last NUMA node to be used (undef means last available)
my $nodesPerBoard = 1; # number of NUMA nodes per nodeboard
my %cpusetConf = (
cpus => undef, # undef means auto-generate
mems => undef, # undef means auto-generate
cpu_exclusive => 1, #
mem_exclusive => 1, #
);
my %options = (
-doLayout => 1, # generate mom.layout
-withCpus => 1, # include cpus in mom.layout
-withMems => 1, # include mems in mom.layout
-doCpuset => 1, # generate/modify /torque cpuset
-withSmt => 1, # include logical processors running on the same core
-verbose => undef, # be verbose to STDERR
-dryRun => undef, # no actions, just tell what would be done
);
# ==============================================================================
# Command line options
# ==============================================================================
GetOptions(
"layout!" => \$options{-doLayout},
"cpus!" => \$options{-withCpus},
"mems!" => \$options{-withMems},
"smt!" => \$options{-withSmt},
"cpuset!" => \$options{-doCpuset},
"dry-run!" => \$options{-dryRun},
"verbose!" => \$options{-verbose},
"help|?" => sub { usage(0) },
"man" => sub { manPage() },
) or usage(2);
if($options{-dryRun}) {
$options{-verbose} = 1 unless defined $options{-verbose};
xDebug(">>> DryRunDryRunDryRunDryRunDryRun <<<");
}
# ==============================================================================
# Quick exit if not wanted on this host, or if no work to do
# ==============================================================================
#if(defined $hostNames) {
# unless($hostName =~ /$hostNames/) {
# xDebug("--- Don't run on $hostName ---");
# exit 0;
# }
#}
exit 0 unless ($options{-doLayout} || $options{-doCpuset});
# ==============================================================================
# See if PBS_HOME is set, and if $PBS_HOME/mom_priv exists.
# If not, we are probably not called correctly, thus die.
# See if cpusets are configured. If not, die.
# ==============================================================================
die "\$PBS_HOME not set\n" unless (exists $ENV{PBS_HOME} && $ENV{PBS_HOME});
die "PBS_HOME=$ENV{PBS_HOME} does not exist\n" unless -d $ENV{PBS_HOME};
$momCfgDir = "$ENV{PBS_HOME}/${momCfgDir}";
die "MOM config dir $momCfgDir does not exist\n" unless -d $momCfgDir;
$momLayoutFile = "${momCfgDir}/${momLayoutFile}";
die "this system does not support cpusets\n" unless -d $cpusetFsName;
# ==============================================================================
# Figure out system topology, collect wanted node objects
# ==============================================================================
my $topology = Sys::Hwloc::Topology->init;
die "Failed to init topology\n" unless defined $topology;
$topology->set_flags(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);
die("Failed to load topology\n") if $topology->load;
# ==============================================================================
# Collect nodesets of wanted NUMA nodes per nodeBoard
# ==============================================================================
my @nodeBoards = ();
my $nodeObj = undef;
my $nNodes = 0;
while($nodeObj = $topology->get_next_obj_by_type(HWLOC_OBJ_NODE, $nodeObj)) {
my $nodeId = $nodeObj->logical_index;
next if $nodeId < $firstNodeId;
last if (defined $lastNodeId && $nodeId > $lastNodeId);
if($nNodes) {
$nodeBoards[$#nodeBoards]->{nodeset}->or($nodeObj->nodeset);
} else {
push @nodeBoards, {
cpuset => Sys::Hwloc::Bitmap->new,
nodeset => $nodeObj->nodeset->dup,
};
}
$nNodes++;
$nNodes = 0 if $nNodes >= $nodesPerBoard;
}
# ==============================================================================
# Assemble cpusets per nodeBoard
# ==============================================================================
foreach my $nodeBoard (@nodeBoards) {
$topology->cpuset_from_nodeset_strict($nodeBoard->{cpuset}, $nodeBoard->{nodeset});
next if $options{-withSmt};
my $core = undef;
while($core = $topology->get_next_obj_inside_cpuset_by_type($nodeBoard->{cpuset}, HWLOC_OBJ_CORE, $core)) {
my $j = 1;
while (my $pu = $topology->get_obj_inside_cpuset_by_type($core->cpuset, HWLOC_OBJ_PU, $j++)) {
$nodeBoard->{cpuset}->andnot($pu->cpuset);
}
}
}
# ==============================================================================
# Generate mom.layout
# ==============================================================================
if($options{-doLayout}) {
xDebug("--- Generating $momLayoutFile ---");
if(! $options{-dryRun}) {
open(FILE, "> $momLayoutFile") or die "failed to open $momLayoutFile: $!\n";
}
foreach my $nodeBoard (@nodeBoards) {
my $line = sprintf("nodes=%s", $nodeBoard->{nodeset}->sprintf_list);
$line .= sprintf(" cpus=%s", $nodeBoard->{cpuset}->sprintf_list) if $options{-withCpus};
$line .= sprintf(" mems=%s", $nodeBoard->{nodeset}->sprintf_list) if $options{-withMems};
xDebug(" $line");
print FILE "$line\n" unless $options{-dryRun};
}
close(FILE) unless $options{-dryRun};
}
# ==============================================================================
# Create/modify torque cpuset
# ==============================================================================
if($options{-doCpuset}) {
# Create it if it is not there
my $cpusetPath = "${cpusetFsName}${cpusetBaseName}";
if(! -d $cpusetPath) {
xDebug("--- Creating $cpusetPath ---");
my $rc = execCmd($mkdirCmd,1,$cpusetPath);
die "Failed to create $cpusetPath\n" unless defined $rc;
}
# Read content
xDebug("--- Reading $cpusetPath ---");
my $cpusetData = readCpuset($cpusetPath);
die "Failed to read $cpusetPath\n" unless defined $cpusetData;
# Assemble changes
my %cpusetMod = ();
foreach my $key (keys %cpusetConf) {
next unless exists $cpusetData->{$key};
my $val = $cpusetConf{$key};
CASE: {
$key eq 'cpus' && do {
if(! defined $val) {
my $cpuset = Sys::Hwloc::Bitmap->new;
foreach my $nodeBoard (@nodeBoards) {
$cpuset->or($nodeBoard->{cpuset});
}
$val = $cpuset->sprintf_list;
$cpuset->free;
}
last CASE;
};
$key eq 'mems' && do {
if(! defined $val) {
my $nodeset = Sys::Hwloc::Bitmap->new;
foreach my $nodeBoard (@nodeBoards) {
$nodeset->or($nodeBoard->{nodeset});
}
$val = $nodeset->sprintf_list;
$nodeset->free;
}
last CASE;
};
}
next unless defined $val;
if(
(! defined $cpusetData->{$key}) ||
(defined $cpusetData->{$key} && $cpusetData->{$key} ne $val)
) {
$cpusetMod{$key} = $val;
}
}
# Write changes, if any. Don't abort on error, but warn if changes not done
if(%cpusetMod) {
xDebug("--- Modifying $cpusetPath ---");
if($options{-dryRun}) {
while(my ($key, $val) = each %cpusetMod) {
xDebug(sprintf(" = cpuset %s: %-25s %s", $cpusetPath, $key, $val));
}
} else {
while(my ($key, $val) = each %cpusetMod) {
my $out = execCmd($echoCmd, 0, "$val > ${cpusetPath}/$key");
}
if($options{-verbose}) {
$cpusetData = readCpuset($cpusetPath);
die "Failed to read $cpusetPath\n" unless defined $cpusetData;
while(my ($key, $val) = each %cpusetMod) {
xDebug(sprintf(" %s cpuset %s: %-25s %s", $val eq $cpusetData->{$key} ? '=' : '-', $cpusetPath, $key, $val));
}
}
}
}
}
# ==============================================================================
# All done
# ==============================================================================
$topology->destroy;
exit 0;
# #############################################################################
# ==============================================================================
# Read cpuset data into a hash, return 0 on error, 1 on success
# ==============================================================================
sub readCpuset {
my $cpusetPath = shift;
my $cpusetData = {};
# Check if cpuset exists
unless(-d $cpusetPath) {
xDebug("ERROR: Cpuset $cpusetPath does not exist.");
return undef;
}
# Read content of cpuset
foreach my $key (qw(
cpu_exclusive
cpus
mem_exclusive
mem_hardwall
memory_migrate
memory_pressure
memory_spread_page
memory_spread_slab
mems
notify_on_release
sched_load_balance
sched_relax_domain_level
)) {
my $f = "${cpusetPath}/$key";
next unless -e $f;
my $rc = execCmd($catCmd,0,$f);
return undef unless defined $rc; # Command failed
my $val = undef;
if(@{$rc}) {
CASE: {
$key eq 'tasks' && do { $val = join(",", @{$rc}); last CASE };
$val = $rc->[0];
}
}
xDebug(sprintf(" cpuset %s: %-25s %s", $cpusetPath, $key, defined $val ? $val : "NO DATA"));
$cpusetData->{$key} = $val;
}
return $cpusetData;
}
# ==============================================================================
# Execute a command with args.
# Returns arrayref with chomped output on success.
# On command failure, print error msg and return undef.
# ==============================================================================
sub execCmd {
my $cmdBase = shift;
my $verbose = shift;
my @cmdArgs = @_;
if(! $cmdBase) {
xDebug("ERROR execCmd: need \$cmdBase.");
return undef;
}
# --
# Check if cmdBase is executable
# --
if(! -x $cmdBase) {
xDebug("ERROR: File \"$cmdBase\" does not exist or is not executable.");
return undef;
}
# --
# Execute
# --
my $cmd = $cmdBase;
$cmd .= (" " . join(" ", @cmdArgs)) if @cmdArgs;
xDebug(" About to execute \"$cmd\"") if $verbose;
open(CMD, "$cmd 2>&1 |") or do {
xDebug("ERROR: Failed to execute \"$cmd\": $!");
return undef;
};
my @cmdOut = (<CMD>);
chomp @cmdOut;
close(CMD);
my $rc = $? >> 8;
if($rc) {
xDebug("ERROR: Command \"$cmd\" returned rc = $rc");
if(@cmdOut) {
xDebug(join("\n", map { " $_" } grep { /\S/ } $#cmdOut < 3 ? @cmdOut : (@cmdOut[0..2], "...")));
}
return undef;
}
# --
# Return output
# --
return \@cmdOut;
}
# ==============================================================================
# Usage message
# ==============================================================================
sub usage {
my $code = shift || 0;
pod2usage(
-verbose => 0,
-exitval => "NOEXIT",
);
exit $code;
}
# ==============================================================================
# Man page
# ==============================================================================
sub manPage {
if ($< == 0) { # Cannot invoke perldoc as root
my $id = eval { getpwnam("nobody") };
$id = eval { getpwnam("nouser") } unless defined $id;
$id = -2 unless defined $id;
$< = $id;
}
$> = $<; # Disengage setuid
$ENV{PATH} = "/bin:/usr/bin"; # Untaint PATH
delete @ENV{ 'IFS', 'CDPATH', 'ENV', 'BASH_ENV' };
if ($0 =~ /^([-\/\w\.]+)$/) {
$0 = $1; # Untaint $0
} else {
die "Illegal characters were found in \$0 ($0)\n";
}
pod2usage(
-verbose => 2,
-exitval => 0,
);
}
# ==============================================================================
# Verbose printing
# ==============================================================================
sub xDebug {
return unless $options{-verbose};
my $msg = join("", @_);
if($msg) {
foreach(split("\n", $msg)) {
print STDERR "$progName - $_\n"
}
} else {
print STDERR "$progName - something to debug\n";
}
}
sub xDie {
die "$progName - ", @_;
}
__END__
=head1 NAME
mom_gencfg - Create mom.layout and /dev/cpuset/torque, designed to be called from /etc/init.d/pbs_mom
=head1 SYNOPSIS
mom_gencfg --help|-?|--man
mom_gencfg -(no)layout -(no)cpus -(no)mems -(no)cpuset -(no)smt -(no)dry-run -(no)verbose
=head1 DESCRIPTION
This script creates /var/spool/torque/mom_priv/mom.layout and creates/modifies /dev/cpuset/torque
for a pbs_mom that is compiled with --enable-numa-support.
The basic configuration like number and offset of NUMA node IDs per nodeboard,
cpuset settings, and defaults of command line options is hardcoded in the script.
The script checks if I<PBS_HOME> is set in the environment. Usually this should point to
/var/spool/torque.
=head1 OPTIONS
=over 4
=item B<-(no)layout>
Create the mom.layout file or not.
=item B<-(no)cpus>
mom.layout contains cpu IDs per nodeboard or not.
=item B<-(no)mems>
mom.layout contains memory node IDs per nodeboard or not.
=item B<-(no)cpuset>
Create/modify /dev/cpuset/torque or not.
=item B<-(no)smt>
The I<cpus> entry in mom.layout and in /dev/cpuset/torque contain additional
logical processors running on the same core or not.
=item B<-(no)dry-run>
If B<-dry-run> is given, show what would have been done. Switches B<-verbose> on, unless B<-noverbose> was given.
=item B<-(no)verbose>
Verbose printing to STDERR.
=item B<-man>
Prints this man page.
=item B<-help|-?>
Prints synopsis.
=back
=head1 AUTHOR
Bernd Kallies, E<lt>kallies@zib.deE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2011 Zuse Institute Berlin
This library is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation.
=cut

34
sbin/restart-anaconda Executable file
View File

@@ -0,0 +1,34 @@
#! /bin/bash
#
# restart-anaconda: Debugging tool to restart stage2 Anaconda.
#
# Copyright (C) 2010
# Red Hat, Inc. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
rm -rf /tmp/updates || echo "Error removing /tmp/updates. Updates won't be re-downloaded." >&2
if [[ -f /var/run/iscsid.pid ]]; then
# iscsid must die else it will cause us troubles on the next run
# log out of all nodes
/sbin/iscsiadm -m node --logoutall=all
fi
# This will kill all programs in the anaconda group and restart the
# service.
systemctl stop anaconda.service
anaconda-cleanup
systemctl start --no-block anaconda.service

38
sbin/suncli Executable file
View File

@@ -0,0 +1,38 @@
#!/opt/sunpy3/bin/python3
import os
import sys
import sunhpc
import sunhpc.invoke
import logging.handlers
if sys.version_info.major < 3:
print("Sunhpc cluster supports only Python3. Rerun application in Python3 environment.")
exit(0)
from sunhpc.console import SunhpcConsole
sunhpc_home = os.environ.get('SUNHPCHOME')
if sunhpc_home:
log_file = os.path.join(sunhpc_home, 'logs', 'runSunhpc.log')
else:
log_file = os.path.join('/opt/sunhpc', 'logs', 'runSunhpc.log')
log_handler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=500000)
log_formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s")
log_handler.setFormatter(log_formatter)
LOGGER = logging.getLogger()
LOGGER.setLevel(logging.DEBUG)
LOGGER.addHandler(log_handler)
def sunhpcApplication(argv):
hpc = SunhpcConsole()
if len(argv[1:]):
hpc.nonInteractive(argv)
else:
hpc.start()
if __name__ == "__main__":
try:
sunhpcApplication(sys.argv)
except (KeyboardInterrupt, SystemExit):
pass

178
sbin/sunyums Executable file
View File

@@ -0,0 +1,178 @@
#!/usr/bin/python
#coding:utf-8
import os, sys
import yum, pickle
import tempfile
usages = \
"""
Usage: sunyums [OPTION]... [FILE]...
Output and match all dependent installation packages
Example:
sunyums packname1 packname2
sunyums packname1 packname2 --config=file --comps=comps.xml
sunyums packname1 packname2 --mandatory=1 --default=1 --options=0
Options:
--config=file.conf supply an yum config file, default: optional
--comps=comps.xml supply an parsed comps.xml default: optional
--mandatory=True include mandatory packages default: True
--default=True include mandatory packages default: True
--options=False include mandatory packages default: False
"""
class Application(object):
def __init__(self, args):
self.args = args[1:]
self.yums = yum.YumBase()
self.comps = None
self.config = None
self.groups = []
self.mandatory = True
self.default = True
self.options = False
self.basePacks = []
self.origPacks = []
self.packages = []
def str2bool(self, s):
"""Converts an on/off, yes/no, true/false string to 1/0."""
if s and s.upper() in [ 'ON', 'YES', 'Y', 'TRUE', '1', 'ENABLED', 'ENABLE']:
return True
else:
return False
def usages(self):
print usages
sys.exit(0)
def parseArgs(self):
if not self.args:
self.usages()
for arg in self.args:
if arg in [ '-h', '--help']:
self.usages()
elif arg.startswith('--comps='):
self.comps = arg.split('=')[1]
elif arg.startswith('--config='):
self.config = arg.split('=')[1]
elif arg.startswith('--mandatory='):
self.mandatory = self.str2bool(arg.split('=')[1])
elif arg.startswith('--default='):
self.default = self.str2bool(arg.split('=')[1])
elif arg.startswith('--options='):
self.options = self.str2bool(arg.split('=')[1])
else:
self.groups.append(arg)
def depends(self):
pkgs = []
avail = self.yums.pkgSack.returnNewestByNameArch()
for p in avail:
if p.name in self.basePacks:
pkgs.append(p)
done = 0
while not done:
done = 1
results = self.yums.findDeps(pkgs)
for pkg in results.keys():
for req in results[pkg].keys():
reqlist = results[pkg][req]
for r in reqlist:
if r.name not in self.basePacks:
self.basePacks.append(r.name)
pkgs.append(r)
done = 0
def allgroups(self):
for grp in self.yums.comps.groups:
self.packages.extend(grp.packages)
def handerPackages(self, name):
if not self.packages:
self.allgroups()
if name in self.packages and \
name not in self.basePacks:
self.basePacks.append(name)
if name not in self.origPacks:
self.origPacks.append(name)
def handerGroups(self, name):
groups = []
if not self.yums.comps.has_group(name):
return
valid_groups = self.yums.comps.return_group(name.encode('utf-8'))
if self.mandatory:
groups.extend(valid_groups.mandatory_packages.keys())
if self.default:
groups.extend(valid_groups.default_packages.keys())
if self.options:
groups.extend(valid_groups.options_packages.keys())
for package in groups:
self.handerPackages(package)
def handerEnviron(self, name):
groups = []
if not self.yums.comps.has_environment(name):
return
valid_environ = self.yums.comps.return_environment(name)
for grp in valid_environ.groups:
self.handerGroups(grp)
def run(self):
if self.comps and os.path.exists(self.comps):
self.yums.comps.add(self.comps)
if self.config and os.path.exists(self.config):
self.yums.doConfigSetup(fn=self.config, init_plugins=False)
self.yums.conf.cache = 0
for rpm in self.groups:
if rpm[0] == '@':
self.handerGroups(rpm[1:])
elif rpm[0] == '^':
self.handerEnviron(rpm[1:])
else:
self.handerPackages(rpm)
self.depends()
for o in self.origPacks:
if o not in self.basePacks:
print '#%s' % o
for p in self.basePacks:
print p
if __name__ == "__main__":
app = Application(sys.argv)
app.parseArgs()
app.allgroups()
app.run()

BIN
sbin/unsquashfs Executable file

Binary file not shown.