tcler's blog --- 其实我是一个程序员
Show me your flowcharts and conceal your tables, and I shall continue to be mystified. Show me your tables, and I won’t usually need your flowcharts; they’ll be obvious.

building and testing High Availability NFS Cluster in Fedora/RHEL KVM step-by-step

prerequisite: prepare libvirt/qemu-kvm

Here I use the kiss-vm tool to simplify the installation and configuration process.

#ref: https://github.com/tcler/kiss-vm-ns
curl -Lks https://raw.githubusercontent.com/tcler/kiss-vm-ns/master/utils/kiss-update.sh|sudo bash && sudo vm prepare

create VMs for iSCSI-server,ha-node01,ha-node02 and ha-client

iscsiServ=ha-iscsi-server
node1=ha-node01
node2=ha-node02
haclnt=ha-client
nodepkgs=vim,tmux,iscsi-initiator-utils,pacemaker,fence-agents-scsi,watchdog,pcs,pcp-zeroconf,nfs-utils #,lvm2-lockd,dlm
servpkgs=vim,tmux,targetcli
clntpkgs=vim,tmux,nfs-utils

trun -tmux=$haclnt-$$    vm create -f -n $haclnt $distro --nointeract -p $clntpkgs --net=kissaltnet $@
trun -tmux=$iscsiServ-$$ vm create -f -n $iscsiServ $distro --nointeract -p $servpkgs --net=kissaltnet --xdisk=40 $@
trun -tmux=$node1-$$     vm create -f -n $node1 $distro --nointeract -p $nodepkgs --net=kissaltnet $@
trun                     vm create -f -n $node2 $distro --nointeract -p $nodepkgs --net=kissaltnet $@
while tmux ls | grep -E "($iscsiServ|$node1|$haclnt)"-$$; do sleep 5; done

configure iscsi server and client

#stop firewalld
for vmn in $iscsi $node1 $node2; do vm exec $vmn -- 'systemctl disable firewalld --now 2>/dev/null'; done

#start iscsi services: target on server-side, and iscsid on client-side
vm exec $iscsiServ -- systemctl enable target --now
vm exec $node1 -- systemctl enable iscsid --now
vm exec $node2 -- systemctl enable iscsid --now

## create block and iscsi
vm exec -v $iscsiServ -- targetcli /backstores/block/ create share_disk /dev/vdb
vm exec -v $iscsiServ -- targetcli /iscsi/ create
vm exec -v $iscsiServ -- targetcli /backstores/block/ ls
vm exec -v $iscsiServ -- targetcli /iscsi/ ls

## get initIQN of clients
initIQN01=$(vm exec $node1 -- cat /etc/iscsi/initiatorname.iscsi|awk -F= '{print $2}')
initIQN02=$(vm exec $node2 -- cat /etc/iscsi/initiatorname.iscsi|awk -F= '{print $2}')

## add targetIQN/ACLs
targetIQN=$(vm exec $iscsiServ -- targetcli /iscsi/ ls|grep -o 'iqn\.[^ ]*')
vm exec -v $iscsiServ -- targetcli /iscsi/$targetIQN/tpg1/acls/ create $initIQN01
vm exec -v $iscsiServ -- targetcli /iscsi/$targetIQN/tpg1/acls/ create $initIQN02

## create targetIQN/LUN
vm exec -v $iscsiServ -- targetcli /iscsi/$targetIQN/tpg1/luns/ create /backstores/block/share_disk

## save config and show
vm exec -v $iscsiServ -- targetcli saveconfig
vm exec -v $iscsiServ -- targetcli ls

## discovery/login at nodes
read iscsiServIp _ < <(vm ifaddr $iscsiServ)
for node in $node1 $node2; do
	#system_id_source is neccessary for `pcs resource create nfs-lvm `
	vm exec -v $node -- sed -ir '/.*system_id_source.=.*/s//system_id_source = "uname"/' /etc/lvm/lvm.conf
	vm exec -v $node -- iscsiadm -m discovery -t st -p $iscsiServIp
	vm exec -v $node -- iscsiadm -m node -T $targetIQN -p $iscsiServIp -l
	vm exec -v $node -- iscsiadm -m node -T $targetIQN -p $iscsiServIp --op update -n node.startup -v automatic
	vm exec -v $node -- lsblk
done

Configure SCSI-3 Persistent Reservation support (required for fence_scsi)

vm exec -v $node1 -- "echo 'options scsi_mod dev_flags=$initIQN01:0x0' >/etc/modprobe.d/scsi-pr.conf"
vm exec -v $node1 -- dracut -f
vm exec -v $node2 -- "echo 'options scsi_mod dev_flags=$initIQN02:0x0' >/etc/modprobe.d/scsi-pr.conf"
vm exec -v $node2 -- dracut -f

Configure watchdog (required by Pacemaker)

for node in $node1 $node2; do
	vm exec -v $node -- "echo 'softdog' >>/etc/modules-load.d/watchdog.conf"
	vm exec -v $node -- modprobe softdog
	vm exec -v $node -- systemctl enable watchdog --now
	vm exec -v $node -- systemctl status watchdog --no-pager
done

Configure Pacemaker cluster

## Set up hostname resolution(simplified, using /etc/hosts) and enable pcsd
read node1Ip < <(vm ifaddr $node1)
read node2Ip < <(vm ifaddr $node2)
for node in $node1 $node2; do
	vm exec -v $node -- "echo '$iscsiServIp $iscsiServ' >>/etc/hosts"
	vm exec -v $node -- "echo '$node1Ip $node1' >>/etc/hosts"
	vm exec -v $node -- "echo '$node2Ip $node2' >>/etc/hosts"
	vm exec -v $node -- systemctl enable pcsd --now
	vm exec -v $node -- 'echo hacluster:redhat123 | chpasswd'
done

## Create cluster on node1
vm exec -v $node1 -- pcs host auth $node1 $node2 -u hacluster -p redhat123
vm exec -v $node1 -- pcs cluster setup nfs-cluster $node1 $node2
vm exec -v $node1 -- pcs cluster start --all
vm exec -v $node1 -- pcs cluster enable --all

## Check cluster status
echo "{info} Waiting for cluster to start..."
vm exec -v $node1 -- sleep 8
vm exec -v $node1 -- pcs status

Configure fence_scsi

## Get shared device path (assume it's /dev/sda)
RDEV="/dev/sda"

## Create fence_scsi device
vm exec -v $node1 -- pcs stonith create fence-scsi fence_scsi \
    devices=$RDEV \
    pcmk_host_list="$node1 $node2" \
    pcmk_monitor_action=metadata \
    op monitor interval=60s

## Verify fencing configuration
vm exec -v $node1 -- sleep 8
vm exec -v $node1 -- pcs stonith status
vm exec -v $node1 -- pcs status

Create LVM and FS on shared disk

vm exec -v $node1 -- pvcreate /dev/sda
vm exec -v $node1 -- vgcreate --setautoactivation n --locktype none nfs_vg /dev/sda
vm exec -v $node1 -- vgchange --locktype none nfs_vg
vm exec -v $node1 -- vgs -o+systemid nfs_vg
vm exec -v $node1 -- vgs -o+lock_type nfs_vg
vm exec -v $node1 -- lvcreate -L 30G -n nfs_lv nfs_vg
vm exec -v $node1 -- mkfs.xfs /dev/nfs_vg/nfs_lv

Setup NFS HA

## create resource group
VIP=10.172.192.63
netmasklen=24
netaddr=10.172.192.0/$netmasklen
vm exec -v $node1 -- pcs resource create nfs-lvm ocf:heartbeat:LVM-activate vgname=nfs_vg vg_access_mode=system_id --group nfsgroup
#the directory=/path will be created by `pcs resource create nfs-fs ...`
vm exec -v $node1 -- pcs resource create nfs-fs ocf:heartbeat:Filesystem device=/dev/nfs_vg/nfs_lv directory=/mnt/nfsshare fstype=xfs --group nfsgroup
vm exec -v $node1 -- pcs resource create nfs-vip ocf:heartbeat:IPaddr2 ip=$VIP cidr_netmask=$netmasklen --group nfsgroup
vm exec -v $node1 -- pcs resource create nfs-daemon ocf:heartbeat:nfsserver nfs_shared_infodir=/mnt/nfsshare/nfsinfo --group nfsgroup
vm exec -v $node1 -- 'ls -l /mnt/nfsshare; mkdir -m 755 -p /mnt/nfsshare/exports; ls -l /mnt/nfsshare'
vm exec -v $node2 -- mkdir -p /mnt/nfsshare/exports #only for rhel-8, because the default active node is node2
vm exec -v $node1 -- pcs resource create nfs-export exportfs clientspec="$netaddr" options=rw,sync,no_root_squash directory=/mnt/nfsshare/exports --group nfsgroup

vm exec -v $node1 -- sleep 8
vm exec -v $node1 -- pcs status
vm exec -v $node1 -- pcs resource status nfsgroup

client base test

vm exec -v $haclnt -- showmount -e $VIP

activeNode=$node1; passiveNode=$node2
if ! grep -q $activeNode < <(vm exec -v $node1 -- pcs resource status nfsgroup); then
	activeNode=$node2; passiveNode=$node1
fi

#vm exec -v $activeNode -- pcs node standby $activeNode
vm exec -v $activeNode -- pcs resource move nfsgroup $passiveNode
vm exec -v $activeNode -- sleep 8
vm exec -v $passiveNode -- pcs status

read activeNode passiveNode <<<"$passiveNode $activeNode"
vm exec -vx $activeNode -- "pcs resource status nfsgroup | grep -q $activeNode"

vm exec -vx $haclnt -- showmount -e $VIP
vm exec -vx $haclnt -- mkdir -p /mnt/nfsmp
vm exec -vx $haclnt -- mount $VIP:/mnt/nfsshare/exports /mnt/nfsmp
vm exec -vx $haclnt -- mkdir -p /mnt/nfsmp/testdir
vm exec -vx $haclnt -- touch /mnt/nfsmp/testfile