512 lines
11 KiB
YAML
512 lines
11 KiB
YAML
# 数据中心/集群基础配置
|
||
metadata:
|
||
version: "1.0"
|
||
last_updated: "2024-01-01"
|
||
description: "数据中心基础设施配置"
|
||
|
||
# 集群配置
|
||
cluster:
|
||
name: "sunhpc-cluster"
|
||
type: "control"
|
||
osname: "Rocky Linux"
|
||
osversion: "9.7"
|
||
location:
|
||
country: "China"
|
||
city: "Beijing"
|
||
timezone:
|
||
name: "Asia/Shanghai"
|
||
offset: "+08:00"
|
||
ntp_servers:
|
||
- "ntp1.aliyun.com"
|
||
- "ntp2.tencent.com"
|
||
- "pool.ntp.org"
|
||
environment:
|
||
type: "production" # production/staging/development
|
||
region: "华北"
|
||
availability_zone: "AZ-01"
|
||
network:
|
||
domain: "sunhpc.local"
|
||
dns:
|
||
primary: "8.8.8.8"
|
||
secondary: "114.114.114.114"
|
||
wan:
|
||
- interface: "eth0"
|
||
address: "202.96.128.86"
|
||
netmask: "255.255.255.0"
|
||
gateway: "202.96.128.1"
|
||
mtu: 1500
|
||
type: "public"
|
||
description: "public network"
|
||
lan:
|
||
- interface: "eth1"
|
||
address: "192.168.1.100"
|
||
netmask: "255.255.255.0"
|
||
gateway: ""
|
||
mtu: 1500
|
||
type: "management"
|
||
description: "management network"
|
||
disks:
|
||
- device: "/dev/sda"
|
||
model: "PowerVault ME484"
|
||
type: "ssd"
|
||
size: "50TB"
|
||
vendor: "Dell"
|
||
serial: "1234567890"
|
||
status: "online"
|
||
|
||
partition:
|
||
- name: "sda1"
|
||
usage: "boot partition"
|
||
mount: "/boot"
|
||
size: "16GB"
|
||
fstype: "ext4"
|
||
filesystem: "ext4"
|
||
uuid: "12345678-90ab-cdef-1234-567890abcdef"
|
||
|
||
- name: "sda2"
|
||
usage: "root partition"
|
||
mount: "/"
|
||
size: "100GB"
|
||
fstype: "ext4"
|
||
filesystem: "ext4"
|
||
uuid: "12345678-90ab-cdef-1234-567890abcdef"
|
||
options: "defaults,noatime"
|
||
|
||
- name: "sda3"
|
||
usage: "home partition"
|
||
mount: "/home"
|
||
size: "50TB"
|
||
fstype: "xfs"
|
||
filesystem: "ext4"
|
||
uuid: "12345678-90ab-cdef-1234-567890abcdef"
|
||
|
||
- name: "sda4"
|
||
usage: "var partition"
|
||
mount: "/var"
|
||
size: "150GB"
|
||
fstype: "xfs"
|
||
filesystem: "xfs"
|
||
uuid: "12345678-90ab-cdef-1234-567890abcdef"
|
||
|
||
- device: "/dev/sdb"
|
||
model: "PowerVault ME484"
|
||
type: "ssd"
|
||
size: "50TB"
|
||
vendor: "Dell"
|
||
serial: "1234567890"
|
||
status: "online"
|
||
|
||
partition:
|
||
- name: "sdb1"
|
||
usage: "data partition"
|
||
mount: "/data"
|
||
size: "50TB"
|
||
fstype: "xfs"
|
||
filesystem: "xfs"
|
||
uuid: "12345678-90ab-cdef-1234-567890abcdef"
|
||
|
||
|
||
firewall:
|
||
global_policies:
|
||
- name: "默认策略"
|
||
input: "drop"
|
||
output: "accept"
|
||
forward: "drop"
|
||
|
||
zones:
|
||
- name: "public"
|
||
interfaces: ["eth0", "eth1"]
|
||
services_allowed: ["ssh", "http", "https"]
|
||
source_ranges: ["0.0.0.0/0"]
|
||
|
||
- name: "internal"
|
||
interfaces: ["eth2"]
|
||
services_allowed: ["ssh", "mysql", "redis", "mongodb", "nfs", "samba"]
|
||
source_ranges: ["192.168.0.0/16", "10.0.0.0/8"]
|
||
|
||
- name: "storage"
|
||
interfaces: ["eth3"]
|
||
services_allowed: ["iscsi", "nfs", "smb"]
|
||
source_ranges: ["172.16.0.0/12"]
|
||
|
||
rules:
|
||
- name: "允许Ping"
|
||
protocol: "icmp"
|
||
action: "accept"
|
||
source: "any"
|
||
destination: "any"
|
||
|
||
- name: "限制SSH访问"
|
||
protocol: "tcp"
|
||
port: 22
|
||
action: "accept"
|
||
source: "192.168.1.0/24"
|
||
destination: "any"
|
||
|
||
# 全局服务配置
|
||
services:
|
||
common_services:
|
||
- name: "sshd"
|
||
port: 22
|
||
protocol: "tcp"
|
||
enabled: true
|
||
description: "SSH远程登录服务"
|
||
|
||
- name: "ntpd"
|
||
port: 123
|
||
protocol: "udp"
|
||
enabled: true
|
||
description: "时间同步服务"
|
||
|
||
- name: "rsyslog"
|
||
port: 514
|
||
protocol: "udp"
|
||
enabled: true
|
||
description: "日志收集服务"
|
||
|
||
monitoring_services:
|
||
- name: "prometheus"
|
||
port: 9090
|
||
protocol: "tcp"
|
||
enabled: true
|
||
description: "监控数据采集"
|
||
|
||
- name: "grafana"
|
||
port: 3000
|
||
protocol: "tcp"
|
||
enabled: true
|
||
description: "监控数据可视化"
|
||
|
||
- name: "node_exporter"
|
||
port: 9100
|
||
protocol: "tcp"
|
||
enabled: true
|
||
description: "节点指标采集"
|
||
|
||
database_services:
|
||
- name: "mysql"
|
||
port: 3306
|
||
protocol: "tcp"
|
||
enabled: true
|
||
version: "8.0"
|
||
description: "关系型数据库"
|
||
|
||
- name: "redis"
|
||
port: 6379
|
||
protocol: "tcp"
|
||
enabled: true
|
||
version: "6.2"
|
||
description: "缓存数据库"
|
||
|
||
- name: "mongodb"
|
||
port: 27017
|
||
protocol: "tcp"
|
||
enabled: true
|
||
version: "5.0"
|
||
description: "文档数据库"
|
||
|
||
# 节点列表
|
||
nodes:
|
||
# 计算节点
|
||
compute_nodes:
|
||
- name: "compute-01"
|
||
hostname: "compute01.example.local"
|
||
role: "compute"
|
||
status: "active"
|
||
|
||
basic_info:
|
||
timezone: "Asia/Shanghai"
|
||
cpu: "Intel Xeon Gold 6248R 3.0GHz (48核)"
|
||
memory: "512GB DDR4"
|
||
os: "CentOS 7.9"
|
||
kernel: "3.10.0-1160"
|
||
virtualization: "KVM"
|
||
|
||
network:
|
||
interfaces:
|
||
- name: "eth0"
|
||
ip_address: "192.168.1.11"
|
||
mac_address: "00:0c:29:xx:xx:01"
|
||
network_type: "management"
|
||
speed: "1Gbps"
|
||
|
||
disk:
|
||
- device: "/dev/sda"
|
||
size: "480GB"
|
||
type: "SSD"
|
||
mount_point: "/"
|
||
filesystem: "xfs"
|
||
usage: "系统盘"
|
||
|
||
- device: "/dev/sdb"
|
||
size: "3.6TB"
|
||
type: "NVMe"
|
||
mount_point: "/data/local"
|
||
filesystem: "xfs"
|
||
usage: "本地数据盘"
|
||
|
||
- device: "/dev/sdc"
|
||
size: "10TB"
|
||
type: "HDD"
|
||
mount_point: "/data/shared"
|
||
filesystem: "xfs"
|
||
usage: "共享存储挂载"
|
||
|
||
services:
|
||
enabled:
|
||
- "sshd"
|
||
- "ntpd"
|
||
- "docker"
|
||
- "kubelet"
|
||
- "node_exporter"
|
||
disabled:
|
||
- "firewalld"
|
||
- "postfix"
|
||
|
||
firewall:
|
||
enabled: true
|
||
rules:
|
||
- port: 22
|
||
protocol: "tcp"
|
||
source: "192.168.1.0/24"
|
||
action: "accept"
|
||
- port: 10250
|
||
protocol: "tcp"
|
||
source: "10.10.0.0/16"
|
||
action: "accept"
|
||
|
||
hardware:
|
||
manufacturer: "Dell"
|
||
model: "PowerEdge R740xd"
|
||
serial_number: "ABC123XYZ"
|
||
warranty_expiry: "2025-12-31"
|
||
|
||
location:
|
||
rack: "RACK-01"
|
||
position: "01U"
|
||
power_consumption: "500W"
|
||
|
||
- name: "compute-02"
|
||
hostname: "compute02.example.local"
|
||
role: "compute"
|
||
status: "active"
|
||
# ... 类似配置,IP地址递增
|
||
|
||
# 存储节点
|
||
storage_nodes:
|
||
- name: "storage-01"
|
||
hostname: "storage01.example.local"
|
||
role: "storage"
|
||
status: "active"
|
||
|
||
basic_info:
|
||
timezone: "Asia/Shanghai"
|
||
cpu: "Intel Xeon Silver 4210 2.2GHz (20核)"
|
||
memory: "128GB DDR4"
|
||
os: "CentOS 7.9"
|
||
storage_software: "Ceph"
|
||
|
||
network:
|
||
interfaces:
|
||
- name: "eth0"
|
||
ip_address: "192.168.1.21"
|
||
network_type: "management"
|
||
speed: "1Gbps"
|
||
|
||
- name: "eth1"
|
||
ip_address: "172.16.1.21"
|
||
network_type: "storage_frontend"
|
||
speed: "10Gbps"
|
||
|
||
- name: "eth2"
|
||
ip_address: "172.16.2.21"
|
||
network_type: "storage_backend"
|
||
speed: "25Gbps"
|
||
|
||
- name: "eth3"
|
||
ip_address: "172.16.3.21"
|
||
network_type: "cluster"
|
||
speed: "10Gbps"
|
||
|
||
disk:
|
||
- device: "/dev/sda"
|
||
size: "240GB"
|
||
type: "SSD"
|
||
mount_point: "/"
|
||
filesystem: "xfs"
|
||
usage: "系统盘"
|
||
|
||
- device: "/dev/sdb"
|
||
size: "480GB"
|
||
type: "SSD"
|
||
mount_point: "/var/lib/ceph/osd/ceph-0"
|
||
filesystem: "xfs"
|
||
usage: "OSD (日志/WAL)"
|
||
|
||
- device: "/dev/sdc"
|
||
size: "8TB"
|
||
type: "HDD"
|
||
mount_point: "/var/lib/ceph/osd/ceph-1"
|
||
filesystem: "xfs"
|
||
usage: "OSD (数据)"
|
||
|
||
- device: "/dev/sdd"
|
||
size: "8TB"
|
||
type: "HDD"
|
||
mount_point: "/var/lib/ceph/osd/ceph-2"
|
||
filesystem: "xfs"
|
||
usage: "OSD (数据)"
|
||
|
||
services:
|
||
enabled:
|
||
- "sshd"
|
||
- "ntpd"
|
||
- "ceph-mon"
|
||
- "ceph-mgr"
|
||
- "ceph-osd"
|
||
|
||
ceph_config:
|
||
cluster_name: "ceph-prod"
|
||
fsid: "12345678-1234-1234-1234-123456789012"
|
||
mon_hosts:
|
||
- "192.168.1.21"
|
||
- "192.168.1.22"
|
||
- "192.168.1.23"
|
||
|
||
- name: "storage-02"
|
||
# ... 类似配置
|
||
|
||
# 其他节点
|
||
other_nodes:
|
||
# 管理节点
|
||
- name: "management-01"
|
||
hostname: "mgmt01.example.local"
|
||
role: "management"
|
||
status: "active"
|
||
|
||
basic_info:
|
||
timezone: "Asia/Shanghai"
|
||
cpu: "Intel Xeon Bronze 3204 1.9GHz (6核)"
|
||
memory: "64GB DDR4"
|
||
os: "CentOS 7.9"
|
||
|
||
network:
|
||
interfaces:
|
||
- name: "eth0"
|
||
ip_address: "192.168.1.31"
|
||
network_type: "management"
|
||
speed: "1Gbps"
|
||
|
||
services:
|
||
enabled:
|
||
- "sshd"
|
||
- "ntpd"
|
||
- "ansible"
|
||
- "salt-master"
|
||
- "jumpserver"
|
||
|
||
# 网关节点
|
||
- name: "gateway-01"
|
||
hostname: "gw01.example.local"
|
||
role: "gateway"
|
||
status: "active"
|
||
|
||
basic_info:
|
||
timezone: "Asia/Shanghai"
|
||
cpu: "Intel Xeon E-2234 3.6GHz (4核)"
|
||
memory: "32GB DDR4"
|
||
os: "pfSense 2.5.2"
|
||
|
||
network:
|
||
interfaces:
|
||
- name: "wan"
|
||
ip_address: "202.96.128.86"
|
||
network_type: "external"
|
||
speed: "1Gbps"
|
||
|
||
- name: "lan"
|
||
ip_address: "192.168.1.254"
|
||
network_type: "internal"
|
||
speed: "1Gbps"
|
||
|
||
- name: "dmz"
|
||
ip_address: "192.168.100.254"
|
||
network_type: "dmz"
|
||
speed: "1Gbps"
|
||
|
||
services:
|
||
enabled:
|
||
- "ssh"
|
||
- "dnsmasq"
|
||
- "nginx"
|
||
- "haproxy"
|
||
- "keepalived"
|
||
|
||
# 监控节点
|
||
- name: "monitoring-01"
|
||
hostname: "mon01.example.local"
|
||
role: "monitoring"
|
||
status: "active"
|
||
|
||
basic_info:
|
||
timezone: "Asia/Shanghai"
|
||
cpu: "Intel Xeon Silver 4208 2.1GHz (8核)"
|
||
memory: "64GB DDR4"
|
||
os: "Ubuntu 20.04 LTS"
|
||
|
||
services:
|
||
enabled:
|
||
- "prometheus"
|
||
- "grafana"
|
||
- "alertmanager"
|
||
- "elasticsearch"
|
||
- "kibana"
|
||
- "filebeat"
|
||
|
||
|
||
|
||
# 节点基础数据
|
||
nodes:
|
||
- name: frontend
|
||
cpus: 4
|
||
memory: 8192
|
||
disk: 100
|
||
rack: null
|
||
rank: null
|
||
arch: x86_64
|
||
os: linux
|
||
runaction: os
|
||
installaction: os
|
||
status: active
|
||
description: "管理节点"
|
||
|
||
# 属性基础数据
|
||
attributes:
|
||
# 国家地区
|
||
- node_name: frontend # 通过节点名称关联
|
||
attr: country
|
||
value: CN
|
||
shadow: ""
|
||
# 软件基础数据
|
||
software:
|
||
- name: openssl
|
||
version: "1.1.1k"
|
||
vendor: OpenSSL
|
||
install_method: source
|
||
is_installed: 0
|
||
description: "加密库"
|
||
|
||
- name: slurm
|
||
version: "23.02"
|
||
vendor: SchedMD
|
||
install_method: source
|
||
is_installed: 0
|
||
description: "作业调度系统"
|
||
|
||
- name: openmpi
|
||
version: "4.1.5"
|
||
vendor: OpenMPI
|
||
install_method: source
|
||
is_installed: 0
|
||
description: "MPI 并行计算库"
|