Allen/tgt-iscsi对接rbd的多路径挂载测试验证

Created Tue, 30 Apr 2024 10:49:53 +0800 Modified Mon, 28 Oct 2024 12:22:33 +0800
4900 Words

对于有的业务,有时候无法直接使用rbd,还是需要通过传统的scsi挂载。验证通过tgt-iscsi进行rbd转发。

通过tgt-iscsi实现RBD的多路径挂载测试

环境准备

  • tgt节点和挂载测试节点操作系统采用debian 12.4 (bookworm),内核6.1.0-17-amd64
  • ceph为单节点,操作系统采用Ubuntu 22.04.4 LTS,内核5.15.0-101-generic
主机名 IP CPU/内存 数据硬盘 备注
ceph1 192.168.230.160 4C8G 1 * 50G + 3 * 600G ceph rbd节点
tgt1 192.168.230.161 4C8G 1 * 50G iscsi target 节点
tgt2 192.168.230.162 4C8G 1 * 50G iscsi target 节点
debian12 192.168.230.163 4C8G 1 * 50G iscsi initiator 节点

安装配置步骤

  • 存储节点部署不再重复说明,可参考之前的ceph部署文章
  • tgt节点部署步骤
    1. apt 安装 tgt相关包,更新ceph相关的扩展源,两台tgt节点均执行
    # 更新apt源,并检查是否有tgt-rbd安装包
    wget -q -O- 'https://download.ceph.com/keys/release.asc' | apt-key add -
    echo deb https://download.ceph.com/debian-reef/ $(lsb_release -sc) main | tee /etc/apt/sources.list.d/ceph.list
    echo "deb http://ceph.com/packages/ceph-extras/debian $(lsb_release -sc) main" | tee /etc/apt/sources.list.d/ceph-extras.list
    apt update
    apt list | grep tgt-rbd
    
    # 安装tgt,并检查是否支持rbd
    apt install tgt-rbd
    tgtadm --lld iscsi --op show --mode system
    
    # 返回结果如下,在Backing stores,应支持rbd
    System:
        State: ready
        debug: off
    LLDs:
        iscsi: ready
        iser: error
    Backing stores:
        rbd (bsoflags sync:direct)
        sheepdog
        bsg
        sg
        null
        ssc
        smc (bsoflags sync:direct)
        mmc (bsoflags sync:direct)
        rdwr (bsoflags sync:direct)
        aio
    Device types:
        disk
        cd/dvd
        osd
        controller
        changer
        tape
        passthrough
    iSNS:
        iSNS=Off
        iSNSServerIP=
        iSNSServerPort=3205
        iSNSAccessControl=Off
    
    1. 创建对应的rbd
    # 在ceph节点执行,创建对应的用户,此用户复用对接k8s的,因此起名k8s-rbd,并无特殊要求
    ceph auth get-or-create client.k8s-rbd mon 'profile rbd' osd 'profile rbd pool=rbd' mgr 'profile rbd pool=rbd'
    
    # 在ceph节点执行,创建rbd卷。
    rbd create rbd/tgt1 --size 100G --image-format 2
    
    # 在tgt节点安装ceph-common,两台tgt节点均执行
    apt install ceph-common
    
    # 导出keyring文件,ceph节点执行
    ceph auth export client.k8s-rbd -o ceph.client.k8s-rbd.keyring
    
    # 复制上一部导出的keyring 和 /etc/ceph/ceph.conf 到tgt节点,两台tgt节点均执行
    root@tgt2:~# ls -l /etc/ceph/
    total 12
    -rw-r--r-- 1 root root  73 Apr 16 21:50 ceph.client.k8s-rbd.keyring
    -rw-r--r-- 1 root root 291 Apr 16 21:50 ceph.conf
    
    # tgt节点验证rbd可正常访问,两台tgt节点均执行
    root@tgt2:~# rbd info rbd/tgt1 --id k8s-rbd
    rbd image 'tgt1':
            size 100 GiB in 25600 objects
            order 22 (4 MiB objects)
            snapshot_count: 0
            id: 2aea9d77c32b8
            block_name_prefix: rbd_data.2aea9d77c32b8
            format: 2
            features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
            op_features:
            flags:
            create_timestamp: Tue Apr 16 21:52:13 2024
            access_timestamp: Tue Apr 23 15:44:09 2024
            modify_timestamp: Mon Apr 22 19:29:25 2024
    
    1. 创建对应的tgt配置并启动服务,两台tgt节点均执行。
      • 一定要注意,这里tgt存在bug,如果想通过指定用户方式对接RBD,需要增加bsopts配置项。
      • 因为bsopts配置项包含了ceph.conf路径和id名称两条内容,需要用引号引起,并用分号分隔。
      • tgt服务启动读取配置文件时,不兼容引号,需要加转义字符。否则会启动异常。
    # 创建tgt1节点配置文件
    root@tgt1:~# cat /etc/tgt/conf.d/rbd1.conf
    <target iqn.2014-04.rbdstore.tgt1.com:iscsi>
        bs-type rbd
        backing-store rbd/tgt1
        initiator-address 192.168.3.0/24
        bsopts=\"conf=/etc/ceph/ceph.conf;id=k8s-rbd\"
    </target>
    
    # 创建tgt2节点配置文件
    root@tgt2:~# cat /etc/tgt/conf.d/rbd1.conf
    <target iqn.2014-04.rbdstore.tgt2.com:iscsi>
        bs-type rbd
        backing-store rbd/tgt1
        initiator-address 192.168.3.0/24
        bsopts=\"conf=/etc/ceph/ceph.conf;id=k8s-rbd\"
    </target>
    
    # 重启tgt服务
    systemctl restart tgt
    
    # 检查tgt服务
    root@tgt2:~# tgtadm --lld iscsi --mode target --op show
    Target 1: iqn.2014-04.rbdstore.tgt2.com:iscsi
        System information:
            Driver: iscsi
            State: ready
        I_T nexus information:
            I_T nexus: 1
                Initiator: iqn.1993-08.org.debian:01:5b15a99875b1 alias: debian12 # 如果未挂载,这里为空
                Connection: 0
                    IP Address: 192.168.3.128
        LUN information:
            LUN: 0
                Type: controller
                SCSI ID: IET     00010000
                SCSI SN: beaf10
                Size: 0 MB, Block size: 1
                Online: Yes
                Removable media: No
                Prevent removal: No
                Readonly: No
                SWP: No
                Thin-provisioning: No
                Backing store type: null
                Backing store path: None
                Backing store flags:
            LUN: 1                                      # 如果正常启动,这里应该能看到LUN 1 ,对应大小为RBD的大小 100G
                Type: disk
                SCSI ID: IET     00010001
                SCSI SN: beaf11
                Size: 107374 MB, Block size: 512
                Online: Yes
                Removable media: No
                Prevent removal: No
                Readonly: No
                SWP: No
                Thin-provisioning: No
                Backing store type: rbd
                Backing store path: rbd/tgt1
                Backing store flags:
        Account information:
        ACL information:
            192.168.3.0/24
    
    1. 客户端节点挂载iscsi
    # 安装 open-iscsi
    apt install open-iscsi
    systemctl start open-iscsi.service
    
    # 挂载LUN
    root@debian12:~# iscsiadm -m discovery -t st -p 192.168.3.161
    192.168.3.161:3260,1 iqn.2014-04.rbdstore.tgt1.com:iscsi
    
    root@debian12:~# iscsiadm -m node -T iqn.2014-04.rbdstore.tgt1.com:iscsi -p 192.168.3.161 --login
    Logging in to [iface: default, target: iqn.2014-04.rbdstore.tgt1.com:iscsi, portal: 192.168.3.161,3260]
    Login to [iface: default, target: iqn.2014-04.rbdstore.tgt1.com:iscsi, portal: 192.168.3.161,3260] successful.
    
    root@debian12:~# iscsiadm -m discovery -t st -p 192.168.3.162
    192.168.3.162:3260,1 iqn.2014-04.rbdstore.tgt2.com:iscsi
    
    root@debian12:~# iscsiadm -m node -T iqn.2014-04.rbdstore.tgt2.com:iscsi -p 192.168.3.162 --login
    Logging in to [iface: default, target: iqn.2014-04.rbdstore.tgt2.com:iscsi, portal: 192.168.3.162,3260]
    Login to [iface: default, target: iqn.2014-04.rbdstore.tgt2.com:iscsi, portal: 192.168.3.162,3260] successful.
    
    # lsblk 检查是否识别到对应的LUN
    root@debian12:~# lsblk
    NAME                    MAJ:MIN RM  SIZE RO TYPE  MOUNTPOINTS
    sda                       8:0    0   50G  0 disk
    |-sda1                    8:1    0  487M  0 part  /boot
    |-sda2                    8:2    0    1K  0 part
    `-sda5                    8:5    0 49.5G  0 part
      |-debian12--vg-root   254:0    0 16.8G  0 lvm   /
      |-debian12--vg-swap_1 254:1    0  976M  0 lvm   [SWAP]
      `-debian12--vg-home   254:2    0 31.8G  0 lvm   /home
    sdb                       8:16   0  100G  0 disk
    `-data                  254:3    0  100G  0 mpath  # 这里忽略,因为已经配置了multipath,如果没有配置,就只有一个sdb
    sdc                       8:32   0  100G  0 disk
    `-data                  254:3    0  100G  0 mpath  # 忽略
    sr0                      11:0    1 1024M  0 rom
    
    1. 配置多路径
    # apt 安装 多路径工具
    apt install multipath-tools
    
    # 检查当前存在多路径的LUN
    root@debian12:~# multipath -d -v3 2>/dev/null
    ===== paths list =====
    uuid                              hcil     dev dev_t pri dm_st chk_st vend/prod/rev       dev_st
                                      0:0:0:0  sda 8:0   -1  undef undef  VMware,Virtual disk unknown
    360000000000000000e00000000010001 33:0:0:1 sdb 8:16  1   undef undef  IET,VIRTUAL-DISK    unknown
    360000000000000000e00000000010001 34:0:0:1 sdc 8:32  1   undef undef  IET,VIRTUAL-DISK    unknown
    
    # 配置多路径策略,编辑配置文件并重启服务
    root@debian12:~# cat /etc/multipath/conf.d/multipath.conf
    multipaths {
    multipath {
    wwid 360000000000000000e00000000010001  # 这里为上一步中看到的uuid
    path_grouping_policy  multibus # 配置策略为 multibus
    path_selector "round-robin 0" # IO 策略为rr
    failback manual
    rr_weight priorities
    no_path_retry 5
    alias data
    }
    }
    
    # 重启服务
    systemctl restart multipathd.service
    
    # 检查多路径是否生效
    root@debian12:~# multipath -ll
    data (360000000000000000e00000000010001) dm-3 IET,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active # 转为双活
      |- 33:0:0:1 sdb 8:16 active ready running
      `- 34:0:0:1 sdc 8:32 active ready running
    root@debian12:~# lsblk
    NAME                    MAJ:MIN RM  SIZE RO TYPE  MOUNTPOINTS
    sda                       8:0    0   50G  0 disk
    |-sda1                    8:1    0  487M  0 part  /boot
    |-sda2                    8:2    0    1K  0 part
    `-sda5                    8:5    0 49.5G  0 part
      |-debian12--vg-root   254:0    0 16.8G  0 lvm   /
      |-debian12--vg-swap_1 254:1    0  976M  0 lvm   [SWAP]
      `-debian12--vg-home   254:2    0 31.8G  0 lvm   /home
    sdb                       8:16   0  100G  0 disk
    `-data                  254:3    0  100G  0 mpath # 可以看到对应为data的LUN
    sdc                       8:32   0  100G  0 disk
    `-data                  254:3    0  100G  0 mpath # 可以看到对应为data的LUN
    sr0                      11:0    1 1024M  0 rom
    

故障切换测试

  1. 格式化对应LUN,并挂载。(最终iscsi客户端挂载节点执行)

    mkfs.xfs /dev/mapper/data
    
    mount /dev/mapper/data /mnt/test
    
  2. 进行切换测试准备。(最终iscsi客户端挂载节点执行)

    # 通过dd 和 pv 命令模拟持续性写入,并进行限速。
    dd if=/dev/urandom bs=1M count=10240 | pv -L 10m | dd of=/mnt/test/10G.file bs=1M oflag=direct,nonblock
    
    # 新开窗口实时观察io状况
    iostat -x 1 -m /dev/sd[b-c] /dev/mapper/data
    
    # 再开窗口,观察多路径状况
    multipath -ll
    
  3. 进行故障切换测试。(其中一台tgt节点执行)

    # 关闭其中一台tgt节点的tgt服务
    systemctl stop tgt
    
    # 等待一段时间后再重启tgt服务
    systemctl start tgt
    
  4. 现象记录

    # iostat 在断开前,sdb 和 sdc 均匀承载IO请求,multipath显示两条路径均为running
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.25    0.00   10.28    0.00    0.00   89.47
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await dareq-sz     f/s f_await  aqu-sz  %util
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  162.00     10.00     0.00   0.00    2.20    63.21    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.36  16.80
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   81.00      5.00     0.00   0.00    1.89    63.21    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.15  15.20
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   81.00      5.00     0.00   0.00    2.21    63.21    0.00      0.00     0.00   0.00    0.00     0.00    0.00    0.00    0.18  14.80
    
    root@debian12:~# multipath -ll
    data1 (360000000000000000e00000000010001) dm-3 wzn,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active
      |- 33:0:0:1 sdb 8:16 active ready running
      `- 34:0:0:1 sdc 8:32 active ready running
    
    # 断开其中一条iscsi链路后,可看到io hang住大约5s,multipath状态会从pending转化为fault,最后IO流量全部切换到sdb,sdc不再处理IO请求
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.00    0.00   10.53    0.00    0.00   89.47
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  163.00     10.00     0.00   0.00    2.07    62.82    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   82.00      5.03     0.00   0.00    1.99    62.83    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   81.00      4.97     0.00   0.00    1.88    62.81    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.74    0.00    9.93    0.00    0.00   89.33
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  118.00      7.28     0.00   0.00    2.60    63.19    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   85.00      5.25     0.00   0.00    2.58    63.25    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   41.00      2.03     0.00   0.00    1.61    50.73    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.25    0.00    5.51    8.27    0.00   85.96
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00   50.00      3.06     0.00   0.00    2.26    62.72    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   50.00      3.06     0.00   0.00    1.88    62.72    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.25    0.00    0.25   24.75    0.00   74.75
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.25    0.00    0.25   24.75    0.00   74.75
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.50    0.00    1.26   24.87    0.00   73.37
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               1.01    0.00   16.88   21.91    0.00   60.20
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3            47.00      1.02     0.00   0.00    6.30    22.21  270.00     16.66     0.00   0.00 1639.67    63.17    0.00      0.00     0.00   0.00    0.00
    sdb             48.00      1.02     0.00   0.00    6.17    21.75  270.00     16.66     0.00   0.00   13.08    63.17    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   95.00      5.84     0.00   0.00 4595.62    62.99    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               1.02    0.00   34.95    0.00    0.00   64.03
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             1.00      0.00     0.00   0.00    9.00     0.00  650.00     40.00     0.00   0.00    2.28    63.02    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00  650.00     40.00     0.00   0.00    2.12    63.02    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.00    0.00   14.14    0.00    0.00   85.86
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  212.00     13.00     0.00   0.00    2.94    62.79    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00  212.00     13.00     0.00   0.00    2.78    62.79    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.00    0.00   10.75    0.00    0.00   89.25
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  157.00      9.69     0.00   0.00    5.01    63.18    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00  157.00      9.69     0.00   0.00    4.90    63.18    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.51    0.00   11.36    0.00    0.00   88.13
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  168.00     10.31     0.00   0.00    4.01    62.86    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00  168.00     10.31     0.00   0.00    3.90    62.86    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    root@debian12:~# multipath -ll
    data1 (360000000000000000e00000000010001) dm-3 wzn,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active
      |- 33:0:0:1 sdb 8:16 active ready       running
      `- 34:0:0:1 sdc 8:32 active i/o pending running
    root@debian12:~# multipath -ll
    data1 (360000000000000000e00000000010001) dm-3 wzn,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active
      |- 33:0:0:1 sdb 8:16 active ready  running
      `- 34:0:0:1 sdc 8:32 failed faulty running
    
    # 启动tgt服务后,multipath会自动恢复,IO未观察到卡顿。
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               1.00    0.00   11.22    0.25    0.00   87.53
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  162.00     10.00     0.00   0.00    2.52    63.21    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00  162.00     10.00     0.00   0.00    2.36    63.21    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00     0.00    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.76    0.00   13.13    0.00    0.00   86.11
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  161.00      9.88     0.00   0.00   12.73    62.81    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   95.00      5.81     0.00   0.00    2.08    62.65    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   66.00      4.06     0.00   0.00   27.73    63.03    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.25    0.00    9.05    0.00    0.00   90.70
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  164.00     10.12     0.00   0.00   17.92    63.22    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   76.00      4.69     0.00   0.00    1.75    63.16    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   88.00      5.44     0.00   0.00   31.64    63.27    0.00      0.00     0.00   0.00    0.00
    
    
    avg-cpu:  %user   %nice %system %iowait  %steal   %idle
               0.25    0.00   10.50    0.00    0.00   89.25
    
    Device            r/s     rMB/s   rrqm/s  %rrqm r_await rareq-sz     w/s     wMB/s   wrqm/s  %wrqm w_await wareq-sz     d/s     dMB/s   drqm/s  %drqm d_await
    dm-3             0.00      0.00     0.00   0.00    0.00     0.00  163.00     10.00     0.00   0.00   16.69    62.82    0.00      0.00     0.00   0.00    0.00
    sdb              0.00      0.00     0.00   0.00    0.00     0.00   82.00      5.03     0.00   0.00    1.67    62.83    0.00      0.00     0.00   0.00    0.00
    sdc              0.00      0.00     0.00   0.00    0.00     0.00   81.00      4.97     0.00   0.00   31.57    62.81    0.00      0.00     0.00   0.00    0.00
    
    
    root@debian12:~# multipath -ll
    data1 (360000000000000000e00000000010001) dm-3 wzn,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active
      |- 33:0:0:1 sdb 8:16 active ready running
      `- 34:0:0:1 sdc 8:32 active ready running
    
  5. 故障切换测试总结

    • 在停掉其中一个tgt-iscsi服务之前,两条iscsi链路均匀分摊IO请求。
    • 停掉其中一个tgt-iscsi服务时候,写入IO会卡顿几秒,这时异常的路径会处于i/o pending状态。
    • 切换完成后,写入流量全部切换到正常的路径,异常链路状态为failed
    • 异常链路恢复后,恢复到初始两条链路均摊负载状态,期间未出现IO卡顿。

rbd锁性能对比测试

  1. 问题现象:如果使用默认feature创建rbd并挂载后,能看到大量的加解锁动作,性能存在瓶颈。

    # 执行rbd watch 能看到争抢锁操作
    root@ceph1:~# rbd watch tgt1
    press enter to exit...
    tgt1 received notification: notify_id=1490353659242, cookie=94233003467984, notifier_id=254199, bl.length=26, notify_op=AcquiredLock
    tgt1 received notification: notify_id=1490353659243, cookie=94233003467984, notifier_id=254203, bl.length=27, notify_op=RequestLock
    tgt1 received notification: notify_id=1490353659244, cookie=94233003467984, notifier_id=254199, bl.length=26, notify_op=ReleasedLock
    tgt1 received notification: notify_id=1490353659245, cookie=94233003467984, notifier_id=254203, bl.length=26, notify_op=AcquiredLock
    tgt1 received notification: notify_id=1490353659246, cookie=94233003467984, notifier_id=254199, bl.length=27, notify_op=RequestLock
    tgt1 received notification: notify_id=1490353659247, cookie=94233003467984, notifier_id=254203, bl.length=26, notify_op=ReleasedLock
    tgt1 received notification: notify_id=1490353659248, cookie=94233003467984, notifier_id=254199, bl.length=26, notify_op=AcquiredLock
    tgt1 received notification: notify_id=1490353659249, cookie=94233003467984, notifier_id=254203, bl.length=27, notify_op=RequestLock
    
    # 记录基准dd性能
    root@debian12:/mnt/test# dd if=/dev/urandom of=/mnt/test/512MB.file bs=1M count=512 oflag=direct,nonblock
    512+0 records in
    512+0 records out
    536870912 bytes (537 MB, 512 MiB) copied, 23.3029 s, 23.0 MB/s
    
  2. 创建无exclusive-lock的rbd,并配置tgt-iscsi

    # 创建无exclusive-lock的rbd
    rbd create rbd/tgt2 --size 100G --image-format 2 --image-feature layering
    
    # 修改/etc/tgt/conf.d/rbd1.conf 配置文件,增加一条backing-store配置,重启服务。两台tgt节点均执行。
    root@tgt1:/etc/tgt/conf.d# cat rbd1.conf
    <target iqn.2014-04.rbdstore.tgt1.com:iscsi>
        bs-type rbd
        backing-store rbd/tgt1
        backing-store rbd/tgt2
        initiator-address 192.168.3.0/24
        bsopts=\"conf=/etc/ceph/ceph.conf;id=k8s-rbd\"
        vendor_id wzn
    </target>
    
    root@tgt1:/etc/tgt/conf.d# systemctl restart tgt
    
    # 检查是否生效,可看到存在两个有效的LUN,LUN-1 和 LUN-2。
    root@tgt1:/etc/tgt/conf.d# tgtadm --lld iscsi --mode target --op show
    Target 1: iqn.2014-04.rbdstore.tgt1.com:iscsi
        System information:
            Driver: iscsi
            State: ready
        I_T nexus information:
            I_T nexus: 1
                Initiator: iqn.1993-08.org.debian:01:5b15a99875b1 alias: debian12
                Connection: 0
                    IP Address: 192.168.3.128
        LUN information:
            LUN: 0
                Type: controller
                SCSI ID: IET     00010000
                SCSI SN: beaf10
                Size: 0 MB, Block size: 1
                Online: Yes
                Removable media: No
                Prevent removal: No
                Readonly: No
                SWP: No
                Thin-provisioning: No
                Backing store type: null
                Backing store path: None
                Backing store flags:
            LUN: 1
                Type: disk
                SCSI ID: IET     00010001
                SCSI SN: beaf11
                Size: 107374 MB, Block size: 512
                Online: Yes
                Removable media: No
                Prevent removal: No
                Readonly: No
                SWP: No
                Thin-provisioning: No
                Backing store type: rbd
                Backing store path: rbd/tgt1
                Backing store flags:
            LUN: 2
                Type: disk
                SCSI ID: IET     00010002
                SCSI SN: beaf12
                Size: 107374 MB, Block size: 512
                Online: Yes
                Removable media: No
                Prevent removal: No
                Readonly: No
                SWP: No
                Thin-provisioning: No
                Backing store type: rbd
                Backing store path: rbd/tgt2
                Backing store flags:
        Account information:
        ACL information:
            192.168.3.0/24
    
  3. isci客户端挂载,并配置多路径

    # 主动触发scsi扫盘
    host_id=$(ls /sys/class/scsi_host/)
    for i in $host_id ; do echo "- - -" > /sys/class/scsi_host/$i/scan ; done
    
    # 查看新增加LUN uuid
    root@debian12:/mnt/raid_test# multipath -d -v3 2>/dev/null
    ===== paths list =====
    uuid                              hcil     dev dev_t pri dm_st chk_st vend/prod/rev    dev_st
    360000000000000000e00000000010001 33:0:0:1 sdb 8:16  1   undef undef  wzn,VIRTUAL-DISK unknown
    360000000000000000e00000000010002 33:0:0:2 sdd 8:48  1   undef undef  wzn,VIRTUAL-DISK unknown
    360000000000000000e00000000010001 34:0:0:1 sdc 8:32  1   undef undef  wzn,VIRTUAL-DISK unknown
    360000000000000000e00000000010002 34:0:0:2 sde 8:64  1   undef undef  wzn,VIRTUAL-DISK unknown
    
    # 修改多路径配置文件,增加新LUN配置
    root@debian12:/mnt/raid_test# cat /etc/multipath/conf.d/multipath.conf
    multipaths {
    multipath {
    wwid 360000000000000000e00000000010001
    path_grouping_policy multibus
    path_selector "round-robin 0"
    failback manual
    rr_weight priorities
    no_path_retry 5
    alias data1
    }
    multipath {
    wwid 360000000000000000e00000000010002
    path_grouping_policy multibus
    path_selector "round-robin 0"
    failback manual
    rr_weight priorities
    no_path_retry 5
    alias data2
    }
    }
    
    # 重启multipath服务,并添加新LUN。
    systemctl restart multipathd
    multipath -a 360000000000000000e00000000010002
    
    # 检查最终效果
    root@debian12:/mnt/raid_test# multipath -ll
    data1 (360000000000000000e00000000010001) dm-3 wzn,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active
      |- 33:0:0:1 sdb 8:16 active ready running
      `- 34:0:0:1 sdc 8:32 active ready running
    data2 (360000000000000000e00000000010002) dm-4 wzn,VIRTUAL-DISK
    size=100G features='1 queue_if_no_path' hwhandler='0' wp=rw
    `-+- policy='round-robin 0' prio=1 status=active
      |- 34:0:0:2 sde 8:64 active ready running
      `- 33:0:0:2 sdd 8:48 active ready running
    root@debian12:/mnt/raid_test# lsblk
    NAME                    MAJ:MIN RM  SIZE RO TYPE  MOUNTPOINTS
    sda                       8:0    0   50G  0 disk
    |-sda1                    8:1    0  487M  0 part  /boot
    |-sda2                    8:2    0    1K  0 part
    `-sda5                    8:5    0 49.5G  0 part
      |-debian12--vg-root   254:0    0 16.8G  0 lvm   /
      |-debian12--vg-swap_1 254:1    0  976M  0 lvm   [SWAP]
      `-debian12--vg-home   254:2    0 31.8G  0 lvm   /home
    sdb                       8:16   0  100G  0 disk
    `-data1                 254:3    0  100G  0 mpath
    sdc                       8:32   0  100G  0 disk
    `-data1                 254:3    0  100G  0 mpath
    sdd                       8:48   0  100G  0 disk
    `-data2                 254:4    0  100G  0 mpath
    sde                       8:64   0  100G  0 disk
    `-data2                 254:4    0  100G  0 mpath
    sr0                      11:0    1 1024M  0 rom
    
  4. 进行对比测试

    #格式化新增加LUN,并进行dd写入测试
    root@debian12:~# dd if=/dev/urandom of=/mnt/test/512MB.file bs=1M count=512 oflag=direct,nonblock
    512+0 records in
    512+0 records out
    536870912 bytes (537 MB, 512 MiB) copied, 12.2355 s, 43.9 MB/s
    
  5. 测试结论

    • 取消exclusive-lock后,LUN的读写性能有了明显提升。从23.0 MB/s提升到43.9 MB/s