Files
sunhpc-go/data/confs/frontend/config.yaml
2026-02-20 20:24:02 +08:00

512 lines
11 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 数据中心/集群基础配置
metadata:
version: "1.0"
last_updated: "2024-01-01"
description: "数据中心基础设施配置"
# 集群配置
cluster:
name: "sunhpc-cluster"
type: "control"
osname: "Rocky Linux"
osversion: "9.7"
location:
country: "China"
city: "Beijing"
timezone:
name: "Asia/Shanghai"
offset: "+08:00"
ntp_servers:
- "ntp1.aliyun.com"
- "ntp2.tencent.com"
- "pool.ntp.org"
environment:
type: "production" # production/staging/development
region: "华北"
availability_zone: "AZ-01"
network:
domain: "sunhpc.local"
dns:
primary: "8.8.8.8"
secondary: "114.114.114.114"
wan:
- interface: "eth0"
address: "202.96.128.86"
netmask: "255.255.255.0"
gateway: "202.96.128.1"
mtu: 1500
type: "public"
description: "public network"
lan:
- interface: "eth1"
address: "192.168.1.100"
netmask: "255.255.255.0"
gateway: ""
mtu: 1500
type: "management"
description: "management network"
disks:
- device: "/dev/sda"
model: "PowerVault ME484"
type: "ssd"
size: "50TB"
vendor: "Dell"
serial: "1234567890"
status: "online"
partition:
- name: "sda1"
usage: "boot partition"
mount: "/boot"
size: "16GB"
fstype: "ext4"
filesystem: "ext4"
uuid: "12345678-90ab-cdef-1234-567890abcdef"
- name: "sda2"
usage: "root partition"
mount: "/"
size: "100GB"
fstype: "ext4"
filesystem: "ext4"
uuid: "12345678-90ab-cdef-1234-567890abcdef"
options: "defaults,noatime"
- name: "sda3"
usage: "home partition"
mount: "/home"
size: "50TB"
fstype: "xfs"
filesystem: "ext4"
uuid: "12345678-90ab-cdef-1234-567890abcdef"
- name: "sda4"
usage: "var partition"
mount: "/var"
size: "150GB"
fstype: "xfs"
filesystem: "xfs"
uuid: "12345678-90ab-cdef-1234-567890abcdef"
- device: "/dev/sdb"
model: "PowerVault ME484"
type: "ssd"
size: "50TB"
vendor: "Dell"
serial: "1234567890"
status: "online"
partition:
- name: "sdb1"
usage: "data partition"
mount: "/data"
size: "50TB"
fstype: "xfs"
filesystem: "xfs"
uuid: "12345678-90ab-cdef-1234-567890abcdef"
firewall:
global_policies:
- name: "默认策略"
input: "drop"
output: "accept"
forward: "drop"
zones:
- name: "public"
interfaces: ["eth0", "eth1"]
services_allowed: ["ssh", "http", "https"]
source_ranges: ["0.0.0.0/0"]
- name: "internal"
interfaces: ["eth2"]
services_allowed: ["ssh", "mysql", "redis", "mongodb", "nfs", "samba"]
source_ranges: ["192.168.0.0/16", "10.0.0.0/8"]
- name: "storage"
interfaces: ["eth3"]
services_allowed: ["iscsi", "nfs", "smb"]
source_ranges: ["172.16.0.0/12"]
rules:
- name: "允许Ping"
protocol: "icmp"
action: "accept"
source: "any"
destination: "any"
- name: "限制SSH访问"
protocol: "tcp"
port: 22
action: "accept"
source: "192.168.1.0/24"
destination: "any"
# 全局服务配置
services:
common_services:
- name: "sshd"
port: 22
protocol: "tcp"
enabled: true
description: "SSH远程登录服务"
- name: "ntpd"
port: 123
protocol: "udp"
enabled: true
description: "时间同步服务"
- name: "rsyslog"
port: 514
protocol: "udp"
enabled: true
description: "日志收集服务"
monitoring_services:
- name: "prometheus"
port: 9090
protocol: "tcp"
enabled: true
description: "监控数据采集"
- name: "grafana"
port: 3000
protocol: "tcp"
enabled: true
description: "监控数据可视化"
- name: "node_exporter"
port: 9100
protocol: "tcp"
enabled: true
description: "节点指标采集"
database_services:
- name: "mysql"
port: 3306
protocol: "tcp"
enabled: true
version: "8.0"
description: "关系型数据库"
- name: "redis"
port: 6379
protocol: "tcp"
enabled: true
version: "6.2"
description: "缓存数据库"
- name: "mongodb"
port: 27017
protocol: "tcp"
enabled: true
version: "5.0"
description: "文档数据库"
# 节点列表
nodes:
# 计算节点
compute_nodes:
- name: "compute-01"
hostname: "compute01.example.local"
role: "compute"
status: "active"
basic_info:
timezone: "Asia/Shanghai"
cpu: "Intel Xeon Gold 6248R 3.0GHz (48核)"
memory: "512GB DDR4"
os: "CentOS 7.9"
kernel: "3.10.0-1160"
virtualization: "KVM"
network:
interfaces:
- name: "eth0"
ip_address: "192.168.1.11"
mac_address: "00:0c:29:xx:xx:01"
network_type: "management"
speed: "1Gbps"
disk:
- device: "/dev/sda"
size: "480GB"
type: "SSD"
mount_point: "/"
filesystem: "xfs"
usage: "系统盘"
- device: "/dev/sdb"
size: "3.6TB"
type: "NVMe"
mount_point: "/data/local"
filesystem: "xfs"
usage: "本地数据盘"
- device: "/dev/sdc"
size: "10TB"
type: "HDD"
mount_point: "/data/shared"
filesystem: "xfs"
usage: "共享存储挂载"
services:
enabled:
- "sshd"
- "ntpd"
- "docker"
- "kubelet"
- "node_exporter"
disabled:
- "firewalld"
- "postfix"
firewall:
enabled: true
rules:
- port: 22
protocol: "tcp"
source: "192.168.1.0/24"
action: "accept"
- port: 10250
protocol: "tcp"
source: "10.10.0.0/16"
action: "accept"
hardware:
manufacturer: "Dell"
model: "PowerEdge R740xd"
serial_number: "ABC123XYZ"
warranty_expiry: "2025-12-31"
location:
rack: "RACK-01"
position: "01U"
power_consumption: "500W"
- name: "compute-02"
hostname: "compute02.example.local"
role: "compute"
status: "active"
# ... 类似配置IP地址递增
# 存储节点
storage_nodes:
- name: "storage-01"
hostname: "storage01.example.local"
role: "storage"
status: "active"
basic_info:
timezone: "Asia/Shanghai"
cpu: "Intel Xeon Silver 4210 2.2GHz (20核)"
memory: "128GB DDR4"
os: "CentOS 7.9"
storage_software: "Ceph"
network:
interfaces:
- name: "eth0"
ip_address: "192.168.1.21"
network_type: "management"
speed: "1Gbps"
- name: "eth1"
ip_address: "172.16.1.21"
network_type: "storage_frontend"
speed: "10Gbps"
- name: "eth2"
ip_address: "172.16.2.21"
network_type: "storage_backend"
speed: "25Gbps"
- name: "eth3"
ip_address: "172.16.3.21"
network_type: "cluster"
speed: "10Gbps"
disk:
- device: "/dev/sda"
size: "240GB"
type: "SSD"
mount_point: "/"
filesystem: "xfs"
usage: "系统盘"
- device: "/dev/sdb"
size: "480GB"
type: "SSD"
mount_point: "/var/lib/ceph/osd/ceph-0"
filesystem: "xfs"
usage: "OSD (日志/WAL)"
- device: "/dev/sdc"
size: "8TB"
type: "HDD"
mount_point: "/var/lib/ceph/osd/ceph-1"
filesystem: "xfs"
usage: "OSD (数据)"
- device: "/dev/sdd"
size: "8TB"
type: "HDD"
mount_point: "/var/lib/ceph/osd/ceph-2"
filesystem: "xfs"
usage: "OSD (数据)"
services:
enabled:
- "sshd"
- "ntpd"
- "ceph-mon"
- "ceph-mgr"
- "ceph-osd"
ceph_config:
cluster_name: "ceph-prod"
fsid: "12345678-1234-1234-1234-123456789012"
mon_hosts:
- "192.168.1.21"
- "192.168.1.22"
- "192.168.1.23"
- name: "storage-02"
# ... 类似配置
# 其他节点
other_nodes:
# 管理节点
- name: "management-01"
hostname: "mgmt01.example.local"
role: "management"
status: "active"
basic_info:
timezone: "Asia/Shanghai"
cpu: "Intel Xeon Bronze 3204 1.9GHz (6核)"
memory: "64GB DDR4"
os: "CentOS 7.9"
network:
interfaces:
- name: "eth0"
ip_address: "192.168.1.31"
network_type: "management"
speed: "1Gbps"
services:
enabled:
- "sshd"
- "ntpd"
- "ansible"
- "salt-master"
- "jumpserver"
# 网关节点
- name: "gateway-01"
hostname: "gw01.example.local"
role: "gateway"
status: "active"
basic_info:
timezone: "Asia/Shanghai"
cpu: "Intel Xeon E-2234 3.6GHz (4核)"
memory: "32GB DDR4"
os: "pfSense 2.5.2"
network:
interfaces:
- name: "wan"
ip_address: "202.96.128.86"
network_type: "external"
speed: "1Gbps"
- name: "lan"
ip_address: "192.168.1.254"
network_type: "internal"
speed: "1Gbps"
- name: "dmz"
ip_address: "192.168.100.254"
network_type: "dmz"
speed: "1Gbps"
services:
enabled:
- "ssh"
- "dnsmasq"
- "nginx"
- "haproxy"
- "keepalived"
# 监控节点
- name: "monitoring-01"
hostname: "mon01.example.local"
role: "monitoring"
status: "active"
basic_info:
timezone: "Asia/Shanghai"
cpu: "Intel Xeon Silver 4208 2.1GHz (8核)"
memory: "64GB DDR4"
os: "Ubuntu 20.04 LTS"
services:
enabled:
- "prometheus"
- "grafana"
- "alertmanager"
- "elasticsearch"
- "kibana"
- "filebeat"
# 节点基础数据
nodes:
- name: frontend
cpus: 4
memory: 8192
disk: 100
rack: null
rank: null
arch: x86_64
os: linux
runaction: os
installaction: os
status: active
description: "管理节点"
# 属性基础数据
attributes:
# 国家地区
- node_name: frontend # 通过节点名称关联
attr: country
value: CN
shadow: ""
# 软件基础数据
software:
- name: openssl
version: "1.1.1k"
vendor: OpenSSL
install_method: source
is_installed: 0
description: "加密库"
- name: slurm
version: "23.02"
vendor: SchedMD
install_method: source
is_installed: 0
description: "作业调度系统"
- name: openmpi
version: "4.1.5"
vendor: OpenMPI
install_method: source
is_installed: 0
description: "MPI 并行计算库"