ceph 清理和手动部署N版

2021-10-22 10:44:30 浏览数 (2)

卸载osd

df | grep ceph | awk '{print $NF}' | xargs -n1 umount

删除ceph

rm -rf /var/lib/ceph

rm -rf /var/log/ceph

rm -rf /etc/ceph/*

systemctl stop ceph-radosgw.target

systemctl stop ceph-crash.service

systemctl status ceph-crash.service

rm -rf /var/log/radosgw

卸载ceph rpm包

rpm -qa | grep ceph | xargs -n1 yum -y remove

rm /etc/yum.repos.d/ceph*.repo -f

rpm -qa | grep rados | xargs -n1 yum remove -y

yum remove -y python-rados

5.1 单台 Ceph 节点宕机处理步骤

登陆 ceph monitor 节点,查询 ceph 状态:

ceph health detail

将故障节点上的所有 osd 设置成 out,该步骤会触发数据 recovery, 需要等待数据迁移完成, 同时观察虚拟机是否正常:

ceph osd out osd_id

从 crushmap 将 osd 移除,该步骤会触发数据 reblance,等待数据迁移完成,同时观察虚拟机是否正常:

ceph osd crush remove osd_name

删除 osd 的认证: ceph auth del osd_name

删除 osd :ceph osd rm osd_id

清理lvm残留, 卸载osd重新启用盘时使用。

lvdisplay | grep "LV Path" | awk '{print $NF}' | xargs -i lvremove -f {}

vgdisplay | grep "VG Name" | awk '{print $NF}' | xargs -i vgremove {}

pvremove /dev/sd[b-j]

ceph-volume lvm list

ls /dev/mapper/

systemctl stop ceph-osd@8.service

umount -l

umount -l /var/lib/ceph/osd/ceph-8

ceph yum源

vim /etc/yum.repos.d/ceph.repo

[ceph]

name=ceph

baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/x86_64/

gpgcheck=0

[ceph-noarch]

name=cephnoarch

baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch/

gpgcheck=0

scp /etc/yum.repos.d/ceph.repo node2:/etc/yum.repos.d/

scp /etc/yum.repos.d/ceph.repo node3:/etc/yum.repos.d/

scp /etc/yum.repos.d/ceph.repo node4:/etc/yum.repos.d/

scp /etc/yum.repos.d/ceph.repo node5:/etc/yum.repos.d/

scp /etc/yum.repos.d/ceph.repo node6:/etc/yum.repos.d/

yum clean all

yum makecache

1. 安装

生成fsid

uuidgen

生成秘钥环

ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'

ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'

ceph-authtool --create-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring --gen-key -n client.bootstrap-osd --cap mon 'profile bootstrap-osd' --cap mgr 'allow r'

ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring

ceph-authtool /tmp/ceph.mon.keyring --import-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring

chown ceph:ceph /tmp/ceph.mon.keyring

monmaptool --create --add node1 10.33.132.50 --add node3 10.33.132.52 --add node4 10.33.132.53 --fsid a5303bea-c6ba-4690-9fb2-49ed046be391 /tmp/monmap

monmaptool --print /tmp/monmap

创建默认数据目录

mkdir /var/lib/ceph/mon/ceph-node1

chmod 777 -R /var/lib/ceph/mon/ceph-node1

分发配置文件, 启动mon节点

scp /etc/ceph/ceph.conf node2:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node3:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node4:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node5:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node6:/etc/ceph/ceph.conf

scp /tmp/ceph.mon.keyring node3:/tmp/ceph.mon.keyring

scp /etc/ceph/ceph.client.admin.keyring node3:/etc/ceph/ceph.client.admin.keyring

scp /tmp/monmap node3:/tmp/monmap

scp /tmp/ceph.mon.keyring node4:/tmp/ceph.mon.keyring

scp /etc/ceph/ceph.client.admin.keyring node4:/etc/ceph/ceph.client.admin.keyring

scp /tmp/monmap node4:/tmp/monmap

mkdir /var/lib/ceph/mon/ceph-node3

chmod 777 -R /var/lib/ceph/mon/ceph-node3

mkdir /var/lib/ceph/mon/ceph-node4

chmod 777 -R /var/lib/ceph/mon/ceph-node4

ceph-mon --mkfs -i node3 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring

ceph-mon --mkfs -i node4 --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring

chown -R ceph:ceph /var/lib/ceph/mon/ceph-node1

chown -R ceph:ceph /var/lib/ceph/mon/ceph-node3

chown -R ceph:ceph /var/lib/ceph/mon/ceph-node4

systemctl start ceph-mon@node1 && systemctl enable ceph-mon@node1

systemctl status ceph-mon@node1

systemctl start ceph-mon@node3 && systemctl enable ceph-mon@node3

systemctl status ceph-mon@node1

systemctl start ceph-mon@node4 && systemctl enable ceph-mon@node4

systemctl status ceph-mon@node1

ceph -s 查看状态

mon is allowing insecure global_id reclaim

ceph config set mon auth_allow_insecure_global_id_reclaim false

关闭不安全的回收模式

3 monitors have not enabled msgr2

ceph mon enable-msgr2

创建MGR

在运行ceph-mon守护程序的每个节点上,还应该设置一个ceph-mgr守护程序。

创建密钥目录

所有mgr节点都要执行

sudo -u ceph mkdir /var/lib/ceph/mgr/ceph-`hostname -s`

cd /var/lib/ceph/mgr/ceph-`hostname -s`

ceph auth get-or-create mgr.`hostname -s` mon 'allow profile mgr' osd 'allow *' mds 'allow *' > keyring

systemctl enable ceph-mgr@`hostname -s` && systemctl start ceph-mgr@`hostname -s`

查看osd fsid,看下是否已经存在

ceph-volume lvm list

# 准备osd

ceph-volume lvm prepare --data /dev/sdb

ceph-volume lvm prepare --data /dev/sdc

ceph-volume lvm prepare --data /dev/sdd

ceph-volume lvm prepare --data /dev/sde

ceph-volume lvm prepare --data /dev/sdf

ceph-volume lvm prepare --data /dev/sdg

ceph-volume lvm prepare --data /dev/sdh

ceph-volume lvm prepare --data /dev/sdi

ceph-volume lvm prepare --data /dev/sdj

osd fsid 0d839504-effc-4dcd-9afc-28f4f4d31764

osd id 0

devices /dev/sdb

osd fsid 4b57cd50-5566-449d-bdb8-a7de11034fea

osd id 1

devices /dev/sdc

osd fsid 2f3722fe-eee8-4ad5-a3e8-48b6f5f344e8

osd id 2

devices /dev/sdd

osd fsid 1ff13481-4c5e-43f2-9d0a-50f7d5a78bb9

osd id 3

devices /dev/sde

osd fsid d38af12c-4e0f-4810-a658-e4c062b25908

osd id 4

devices /dev/sdf

osd fsid 24d8938c-10a3-4ed6-9d70-366c55ae6152

osd id 5

devices /dev/sdg

osd fsid 02726643-954c-4744-ad7b-bdac475c0e9d

osd id 6

devices /dev/sdh

osd fsid 5ab11637-fd8b-4e99-b7e0-27288f74574f

osd id 7

devices /dev/sdi

osd fsid 2c57b012-24b9-485a-89fe-5b3418051349

osd id 8

devices /dev/sdj

拷贝秘钥文件到其他主机

scp /var/lib/ceph/bootstrap-osd/ceph.keyring node2:/var/lib/ceph/bootstrap-osd/ceph.keyring

scp /var/lib/ceph/bootstrap-osd/ceph.keyring node3:/var/lib/ceph/bootstrap-osd/ceph.keyring

scp /var/lib/ceph/bootstrap-osd/ceph.keyring node4:/var/lib/ceph/bootstrap-osd/ceph.keyring

scp /var/lib/ceph/bootstrap-osd/ceph.keyring node5:/var/lib/ceph/bootstrap-osd/ceph.keyring

scp /var/lib/ceph/bootstrap-osd/ceph.keyring node6:/var/lib/ceph/bootstrap-osd/ceph.keyring

# 激活osd, 查看没问题复制命令执行

ceph-volume lvm list | egrep "osd id|osd fsid" | sed '$!N;s/n/t/' | awk '{print $NF," ",$3}' | xargs -i echo ceph-volume lvm activate {}

pool 3 '.rgw.root' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode on last_change 155 flags hashpspool,creating stripe_width 0

pool 1 '.rgw.root' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 last_change 28780 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 120 '.rgw.root' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 last_change 28780 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 121 'default.rgw.control' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 last_change 28780 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 122 'default.rgw.meta' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 256 pgp_num 256 last_change 28780 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 123 'default.rgw.log' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 last_change 28780 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 124 'default.rgw.buckets.index' replicated size 3 min_size 1 crush_rule 1 object_hash rjenkins pg_num 512 pgp_num 512 last_change 30441 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 125 'default.rgw.buckets.data' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 10240 pgp_num 10240 last_change 9116 flags hashpspool stripe_width 0 application rgw

pool 126 'default.rgw.buckets.non-ec' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 last_change 28780 flags hashpspool stripe_width 0 expected_num_objects 1 application rgw

pool 1 '.rgw.root' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode warn last_change 233 flags hashpspool stripe_width 0 application rgw

pool 2 'default.rgw.control' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode warn last_change 234 flags hashpspool stripe_width 0 application rgw

pool 3 'default.rgw.meta' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 256 pgp_num 256 autoscale_mode warn last_change 235 flags hashpspool stripe_width 0 application rgw

pool 4 'default.rgw.log' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode warn last_change 236 flags hashpspool stripe_width 0 application rgw

pool 5 'default.rgw.buckets.index' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 512 pgp_num 512 autoscale_mode warn last_change 237 flags hashpspool stripe_width 0 application rgw

pool 6 'default.rgw.buckets.data' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 8192 pgp_num 8192 autoscale_mode warn last_change 238 lfor 0/0/222 flags hashpspool stripe_width 0 application rgw

pool 7 'default.rgw.buckets.non-ec' replicated size 3 min_size 2 crush_rule 1 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode warn last_change 239 flags hashpspool stripe_width 0 application rgw

# 参考

https://blog.51cto.com/yuweibing/2139084

创建host桶

for i in {1..6};do ceph osd crush add-bucket ssd-node$i host;done

for i in {1..6};do ceph osd crush add-bucket hdd-node$i host;done

创建ssd 根桶

ceph osd crush add-bucket ssd root

移动host桶 到根桶

for i in {1..6};do ceph osd crush move hdd-node$i root=default;done

for i in {1..6};do ceph osd crush move ssd-node$i root=ssd;done

移动主机到指定的桶中

osd 中的ssd盘

ceph-volume lvm list | egrep "sdb|==="

ssd osd.0 osd.9 osd.18 osd.27 osd.36 osd.45

ceph osd crush set osd.0 0.894 host=ssd-node1

ceph osd crush set osd.9 0.894 host=ssd-node2

ceph osd crush set osd.18 0.894 host=ssd-node3

ceph osd crush set osd.27 0.894 host=ssd-node4

ceph osd crush set osd.36 0.894 host=ssd-node5

ceph osd crush set osd.45 0.894 host=ssd-node6

# 修改磁盘类型为ssd

ceph osd crush rm-device-class 0

ceph osd crush set-device-class ssd 0 9 18 27 36 45

for((i=1;i<=8;i ));do ceph osd crush set osd."$i" 7.3 host=hdd-node1; done

for((i=10;i<=17;i ));do ceph osd crush set osd."$i" 7.3 host=hdd-node2; done

for((i=19;i<=26;i ));do ceph osd crush set osd."$i" 7.3 host=hdd-node3; done

for((i=28;i<=35;i ));do ceph osd crush set osd."$i" 7.3 host=hdd-node4; done

for((i=37;i<=44;i ));do ceph osd crush set osd."$i" 7.3 host=hdd-node5; done

for((i=46;i<=53;i ));do ceph osd crush set osd."$i" 7.3 host=hdd-node6; done

安装 radosrgw

yum install ceph-radosgw -y

# 创建pool

ceph osd pool create .rgw.root 64 64

ceph osd pool create default.rgw.control 64 64

ceph osd pool create default.rgw.meta 256 256

ceph osd pool create default.rgw.log 64 64

ceph osd pool create default.rgw.buckets.index 512 512

ceph osd pool create default.rgw.buckets.data 8192 8192

ceph osd pool create default.rgw.buckets.non-ec 64 64

创建crush

ceph osd crush rule create-replicated root-ssd ssd host

指定pool crush, 除了data都指向ssd

ceph osd pool ls | grep -v data | xargs -i ceph osd pool set {} crush_rule root-ssd

# 制定application 为rgw

ceph osd pool ls | xargs -i ceph osd pool application enable {} rgw

查看crush 有哪些变化

ceph osd getcrushmap -o crushmap

crushtool -d crushmap -o crushmap

cat crushmap

# 新建RADOSGW用户和keyring

ceph-authtool --create-keyring /etc/ceph/ceph.client.radosgw.keyring

chown ceph:ceph /etc/ceph/ceph.client.radosgw.keyring

生成ceph-radosgw服务对应的用户和key

ceph-authtool /etc/ceph/ceph.client.radosgw.keyring -n client.rgw.node1 --gen-key

ceph-authtool /etc/ceph/ceph.client.radosgw.keyring -n client.rgw.node2 --gen-key

ceph-authtool /etc/ceph/ceph.client.radosgw.keyring -n client.rgw.node3 --gen-key

ceph-authtool /etc/ceph/ceph.client.radosgw.keyring -n client.rgw.node4 --gen-key

ceph-authtool /etc/ceph/ceph.client.radosgw.keyring -n client.rgw.node5 --gen-key

ceph-authtool /etc/ceph/ceph.client.radosgw.keyring -n client.rgw.node6 --gen-key

添加用户访问权限

ceph-authtool -n client.rgw.node1 --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.client.radosgw.keyring

ceph-authtool -n client.rgw.node2 --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.client.radosgw.keyring

ceph-authtool -n client.rgw.node3 --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.client.radosgw.keyring

ceph-authtool -n client.rgw.node4 --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.client.radosgw.keyring

ceph-authtool -n client.rgw.node5 --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.client.radosgw.keyring

ceph-authtool -n client.rgw.node6 --cap osd 'allow rwx' --cap mon 'allow rwx' /etc/ceph/ceph.client.radosgw.keyring

将keyring导入集群中

ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.rgw.node1 -i /etc/ceph/ceph.client.radosgw.keyring

ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.rgw.node2 -i /etc/ceph/ceph.client.radosgw.keyring

ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.rgw.node3 -i /etc/ceph/ceph.client.radosgw.keyring

ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.rgw.node4 -i /etc/ceph/ceph.client.radosgw.keyring

ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.rgw.node5 -i /etc/ceph/ceph.client.radosgw.keyring

ceph -k /etc/ceph/ceph.client.admin.keyring auth add client.rgw.node6 -i /etc/ceph/ceph.client.radosgw.keyring

编辑配置文件

cat >> /etc/ceph/ceph.conf << EOF

[client.rgw.node1]

host=node1

keyring=/etc/ceph/ceph.client.radosgw.keyring

log file=/var/log/radosgw/client.radosgw.gateway.log

rgw_frontends = civetweb port=8080 num_threads=2048

[client.rgw.node2]

host=node2

keyring=/etc/ceph/ceph.client.radosgw.keyring

log file=/var/log/radosgw/client.radosgw.gateway.log

rgw_frontends = civetweb port=8080 num_threads=2048

[client.rgw.node3]

host=node3

keyring=/etc/ceph/ceph.client.radosgw.keyring

log file=/var/log/radosgw/client.radosgw.gateway.log

rgw_frontends = civetweb port=8080 num_threads=2048

[client.rgw.node4]

host=node4

keyring=/etc/ceph/ceph.client.radosgw.keyring

log file=/var/log/radosgw/client.radosgw.gateway.log

rgw_frontends = civetweb port=8080 num_threads=2048

[client.rgw.node5]

host=node5

keyring=/etc/ceph/ceph.client.radosgw.keyring

log file=/var/log/radosgw/client.radosgw.gateway.log

rgw_frontends = civetweb port=8080 num_threads=2048

[client.rgw.node6]

host=node6

keyring=/etc/ceph/ceph.client.radosgw.keyring

log file=/var/log/radosgw/client.radosgw.gateway.log

rgw_frontends = civetweb port=8080 num_threads=2048

EOF

创建日志目录

mkdir /var/log/radosgw

chown ceph:ceph /var/log/radosgw

拷贝keyring、 ceph.conf到其他主机

scp /etc/ceph/ceph.client.radosgw.keyring node2:/etc/ceph/ceph.client.radosgw.keyring

scp /etc/ceph/ceph.client.radosgw.keyring node3:/etc/ceph/ceph.client.radosgw.keyring

scp /etc/ceph/ceph.client.radosgw.keyring node4:/etc/ceph/ceph.client.radosgw.keyring

scp /etc/ceph/ceph.client.radosgw.keyring node5:/etc/ceph/ceph.client.radosgw.keyring

scp /etc/ceph/ceph.client.radosgw.keyring node6:/etc/ceph/ceph.client.radosgw.keyring

scp /etc/ceph/ceph.conf node2:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node3:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node4:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node5:/etc/ceph/ceph.conf

scp /etc/ceph/ceph.conf node6:/etc/ceph/ceph.conf

其他主机执行修改属组

chown ceph:ceph /etc/ceph/ceph.client.radosgw.keyring

创建目录并授权

mkdir /var/log/radosgw

chown ceph:ceph /var/log/radosgw

启动rgw服务

systemctl start ceph-radosgw@rgw.`hostname -s` && systemctl enable ceph-radosgw@rgw.`hostname -s`

测试rgw可用性

{

"user_id": "cltx_test",

"display_name": "cltx_test for php user",

"email": "",

"suspended": 0,

"max_buckets": 1000,

"subusers": [],

"keys": [

{

"user": "cltx_test",

"access_key": "VOLTH06Z29O7N0YEAVBR",

"secret_key": "WWljAH2UhwjFCfKc0vgfgbd11eif9lAjAvLwr8Je"

}

],

"swift_keys": [],

"caps": [],

"op_mask": "read, write, delete",

"default_placement": "",

"default_storage_class": "",

"placement_tags": [],

"bucket_quota": {

"enabled": false,

"check_on_raw": false,

"max_size": -1,

"max_size_kb": 0,

"max_objects": -1

},

"user_quota": {

"enabled": false,

"check_on_raw": false,

"max_size": -1,

"max_size_kb": 0,

"max_objects": -1

},

"temp_url_keys": [],

"type": "rgw",

"mfa_ids": []

}

boto3 压测

rados bench 30 -b 4K -t 32 write --no-cleanup -p test 压测,pool name test

dashboard 安装

1. yum install ceph-mgr-dashboard

2. ceph mgr module enable dashboard

3. 创建证书

ceph dashboard create-self-signed-cert

4. 创建用户名密码

ceph dashboard set-login-credentials admin admin

5.但是会报错 Error EINVAL: Please specify the file containing the password/secret with "-i" option.

把密码写入到 /etc/ceph/dashboard_passwd.txt,名字随便写

6. 访问dashbord

ceph mgr services

0 人点赞