常用ceph集群故障检查命令(第二篇)

强行删除桶(会导致数据残留)

radosgw-admin bucket rm --bucket=bucket1 --purge-objects

临时修改组件参数 ,重启复原

#后接任意参数,osd可以改成mon,mgr
ceph tell osd.* injectargs --osd_max_backfills=100 

查看集群pool详细信息

ceph osd pool ls detail

转移osd上的pg

#转移
ceph osd pg-upmap-items   
#回退
ceph osd rm-pg-upmap-items 

查询故障

#grep后可以更换对应的故障报错
ceph health detail | grep inconsistent 

查询所有pg

ceph pg ls

关闭pg_autoscale_mode

ceph osd pool set .rgw.control  pg_autoscale_mode off
#关闭所有pool
rados lspools | xargs -i ceph osd pool set {} pg_autoscale_mode off

查看osd使用率

#取使用率最高的前20
 ceph osd df | sort -nr -k 17 | head -20

查看osd上的pg

ceph pg ls-by-osd

故障域crush规则配置

# 创建数据中心:datacenter0
ceph osd crush add-bucket datacenter0 datacenter
​
# 创建机房:room0 
ceph osd crush add-bucket room0 room
​
# 创建机架:rack0、rack1、rack2
ceph osd crush add-bucket rack0 rack
ceph osd crush add-bucket rack1 rack
ceph osd crush add-bucket rack2 rack
​
# 把机房room0移动到数据中心datacenter0下
ceph osd crush move room0 datacenter=datacenter0
​
# 把机架rack0、rack1、rack2移动到机房room0下
ceph osd crush move rack0 room=room0
ceph osd crush move rack1 room=room0
ceph osd crush move rack2 room=room0
​
# 把主机osd01移动到:datacenter0/room0/rack0下
ceph osd crush move osd01 datacenter=datacenter0 room=room0 rack=rack0
​
# 把主机osd02移动到:datacenter0/room0/rack1下
ceph osd crush move osd02 datacenter=datacenter0 room=room0 rack=rack1
​
# 把主机osd03移动到:datacenter0/room0/rack2下
ceph osd crush move osd03 datacenter=datacenter0 room=room0 rack=rack2

更改zone

radosgw-admin zone get > zone.txt
radosgw-admin zone set --rgw-zone=test-zone1 --infile=zone.txt

你可能感兴趣的:(ceph,云存储,linux)