# 数据中心/集群基础配置 metadata: version: "1.0" last_updated: "2024-01-01" description: "数据中心基础设施配置" # 集群配置 cluster: name: "sunhpc-cluster" type: "control" osname: "Rocky Linux" osversion: "9.7" location: country: "China" city: "Beijing" timezone: name: "Asia/Shanghai" offset: "+08:00" ntp_servers: - "ntp1.aliyun.com" - "ntp2.tencent.com" - "pool.ntp.org" environment: type: "production" # production/staging/development region: "华北" availability_zone: "AZ-01" network: domain: "sunhpc.local" dns: primary: "8.8.8.8" secondary: "114.114.114.114" wan: - interface: "eth0" address: "202.96.128.86" netmask: "255.255.255.0" gateway: "202.96.128.1" mtu: 1500 type: "public" description: "public network" lan: - interface: "eth1" address: "192.168.1.100" netmask: "255.255.255.0" gateway: "" mtu: 1500 type: "management" description: "management network" disks: - device: "/dev/sda" model: "PowerVault ME484" type: "ssd" size: "50TB" vendor: "Dell" serial: "1234567890" status: "online" partition: - name: "sda1" usage: "boot partition" mount: "/boot" size: "16GB" fstype: "ext4" filesystem: "ext4" uuid: "12345678-90ab-cdef-1234-567890abcdef" - name: "sda2" usage: "root partition" mount: "/" size: "100GB" fstype: "ext4" filesystem: "ext4" uuid: "12345678-90ab-cdef-1234-567890abcdef" options: "defaults,noatime" - name: "sda3" usage: "home partition" mount: "/home" size: "50TB" fstype: "xfs" filesystem: "ext4" uuid: "12345678-90ab-cdef-1234-567890abcdef" - name: "sda4" usage: "var partition" mount: "/var" size: "150GB" fstype: "xfs" filesystem: "xfs" uuid: "12345678-90ab-cdef-1234-567890abcdef" - device: "/dev/sdb" model: "PowerVault ME484" type: "ssd" size: "50TB" vendor: "Dell" serial: "1234567890" status: "online" partition: - name: "sdb1" usage: "data partition" mount: "/data" size: "50TB" fstype: "xfs" filesystem: "xfs" uuid: "12345678-90ab-cdef-1234-567890abcdef" firewall: global_policies: - name: "默认策略" input: "drop" output: "accept" forward: "drop" zones: - name: "public" interfaces: ["eth0", "eth1"] services_allowed: ["ssh", "http", "https"] source_ranges: ["0.0.0.0/0"] - name: "internal" interfaces: ["eth2"] services_allowed: ["ssh", "mysql", "redis", "mongodb", "nfs", "samba"] source_ranges: ["192.168.0.0/16", "10.0.0.0/8"] - name: "storage" interfaces: ["eth3"] services_allowed: ["iscsi", "nfs", "smb"] source_ranges: ["172.16.0.0/12"] rules: - name: "允许Ping" protocol: "icmp" action: "accept" source: "any" destination: "any" - name: "限制SSH访问" protocol: "tcp" port: 22 action: "accept" source: "192.168.1.0/24" destination: "any" # 全局服务配置 services: common_services: - name: "sshd" port: 22 protocol: "tcp" enabled: true description: "SSH远程登录服务" - name: "ntpd" port: 123 protocol: "udp" enabled: true description: "时间同步服务" - name: "rsyslog" port: 514 protocol: "udp" enabled: true description: "日志收集服务" monitoring_services: - name: "prometheus" port: 9090 protocol: "tcp" enabled: true description: "监控数据采集" - name: "grafana" port: 3000 protocol: "tcp" enabled: true description: "监控数据可视化" - name: "node_exporter" port: 9100 protocol: "tcp" enabled: true description: "节点指标采集" database_services: - name: "mysql" port: 3306 protocol: "tcp" enabled: true version: "8.0" description: "关系型数据库" - name: "redis" port: 6379 protocol: "tcp" enabled: true version: "6.2" description: "缓存数据库" - name: "mongodb" port: 27017 protocol: "tcp" enabled: true version: "5.0" description: "文档数据库" # 节点列表 nodes: # 计算节点 compute_nodes: - name: "compute-01" hostname: "compute01.example.local" role: "compute" status: "active" basic_info: timezone: "Asia/Shanghai" cpu: "Intel Xeon Gold 6248R 3.0GHz (48核)" memory: "512GB DDR4" os: "CentOS 7.9" kernel: "3.10.0-1160" virtualization: "KVM" network: interfaces: - name: "eth0" ip_address: "192.168.1.11" mac_address: "00:0c:29:xx:xx:01" network_type: "management" speed: "1Gbps" disk: - device: "/dev/sda" size: "480GB" type: "SSD" mount_point: "/" filesystem: "xfs" usage: "系统盘" - device: "/dev/sdb" size: "3.6TB" type: "NVMe" mount_point: "/data/local" filesystem: "xfs" usage: "本地数据盘" - device: "/dev/sdc" size: "10TB" type: "HDD" mount_point: "/data/shared" filesystem: "xfs" usage: "共享存储挂载" services: enabled: - "sshd" - "ntpd" - "docker" - "kubelet" - "node_exporter" disabled: - "firewalld" - "postfix" firewall: enabled: true rules: - port: 22 protocol: "tcp" source: "192.168.1.0/24" action: "accept" - port: 10250 protocol: "tcp" source: "10.10.0.0/16" action: "accept" hardware: manufacturer: "Dell" model: "PowerEdge R740xd" serial_number: "ABC123XYZ" warranty_expiry: "2025-12-31" location: rack: "RACK-01" position: "01U" power_consumption: "500W" - name: "compute-02" hostname: "compute02.example.local" role: "compute" status: "active" # ... 类似配置,IP地址递增 # 存储节点 storage_nodes: - name: "storage-01" hostname: "storage01.example.local" role: "storage" status: "active" basic_info: timezone: "Asia/Shanghai" cpu: "Intel Xeon Silver 4210 2.2GHz (20核)" memory: "128GB DDR4" os: "CentOS 7.9" storage_software: "Ceph" network: interfaces: - name: "eth0" ip_address: "192.168.1.21" network_type: "management" speed: "1Gbps" - name: "eth1" ip_address: "172.16.1.21" network_type: "storage_frontend" speed: "10Gbps" - name: "eth2" ip_address: "172.16.2.21" network_type: "storage_backend" speed: "25Gbps" - name: "eth3" ip_address: "172.16.3.21" network_type: "cluster" speed: "10Gbps" disk: - device: "/dev/sda" size: "240GB" type: "SSD" mount_point: "/" filesystem: "xfs" usage: "系统盘" - device: "/dev/sdb" size: "480GB" type: "SSD" mount_point: "/var/lib/ceph/osd/ceph-0" filesystem: "xfs" usage: "OSD (日志/WAL)" - device: "/dev/sdc" size: "8TB" type: "HDD" mount_point: "/var/lib/ceph/osd/ceph-1" filesystem: "xfs" usage: "OSD (数据)" - device: "/dev/sdd" size: "8TB" type: "HDD" mount_point: "/var/lib/ceph/osd/ceph-2" filesystem: "xfs" usage: "OSD (数据)" services: enabled: - "sshd" - "ntpd" - "ceph-mon" - "ceph-mgr" - "ceph-osd" ceph_config: cluster_name: "ceph-prod" fsid: "12345678-1234-1234-1234-123456789012" mon_hosts: - "192.168.1.21" - "192.168.1.22" - "192.168.1.23" - name: "storage-02" # ... 类似配置 # 其他节点 other_nodes: # 管理节点 - name: "management-01" hostname: "mgmt01.example.local" role: "management" status: "active" basic_info: timezone: "Asia/Shanghai" cpu: "Intel Xeon Bronze 3204 1.9GHz (6核)" memory: "64GB DDR4" os: "CentOS 7.9" network: interfaces: - name: "eth0" ip_address: "192.168.1.31" network_type: "management" speed: "1Gbps" services: enabled: - "sshd" - "ntpd" - "ansible" - "salt-master" - "jumpserver" # 网关节点 - name: "gateway-01" hostname: "gw01.example.local" role: "gateway" status: "active" basic_info: timezone: "Asia/Shanghai" cpu: "Intel Xeon E-2234 3.6GHz (4核)" memory: "32GB DDR4" os: "pfSense 2.5.2" network: interfaces: - name: "wan" ip_address: "202.96.128.86" network_type: "external" speed: "1Gbps" - name: "lan" ip_address: "192.168.1.254" network_type: "internal" speed: "1Gbps" - name: "dmz" ip_address: "192.168.100.254" network_type: "dmz" speed: "1Gbps" services: enabled: - "ssh" - "dnsmasq" - "nginx" - "haproxy" - "keepalived" # 监控节点 - name: "monitoring-01" hostname: "mon01.example.local" role: "monitoring" status: "active" basic_info: timezone: "Asia/Shanghai" cpu: "Intel Xeon Silver 4208 2.1GHz (8核)" memory: "64GB DDR4" os: "Ubuntu 20.04 LTS" services: enabled: - "prometheus" - "grafana" - "alertmanager" - "elasticsearch" - "kibana" - "filebeat" # 节点基础数据 nodes: - name: frontend cpus: 4 memory: 8192 disk: 100 rack: null rank: null arch: x86_64 os: linux runaction: os installaction: os status: active description: "管理节点" # 属性基础数据 attributes: # 国家地区 - node_name: frontend # 通过节点名称关联 attr: country value: CN shadow: "" # 软件基础数据 software: - name: openssl version: "1.1.1k" vendor: OpenSSL install_method: source is_installed: 0 description: "加密库" - name: slurm version: "23.02" vendor: SchedMD install_method: source is_installed: 0 description: "作业调度系统" - name: openmpi version: "4.1.5" vendor: OpenMPI install_method: source is_installed: 0 description: "MPI 并行计算库"