w1ndy · December 4, 2020 19:53
diff --git a/k8s-iscsi-xfs-lvm-recover.sh b/k8s-iscsi-xfs-lvm-recover.sh
 # Shutdown the Kubernetes cluster first (on every node)
 systemctl stop kubelet

 # Stop all docker containers (on every node)
 docker stop $(docker ps -aq)

 # Unmount all ISCSI disks (on every node)
 mount | grep iqn
 umount --all-targets /dev/sdxx  # replace sdxx with each disk

 # Stop the ISCSI server (on the storage node)
 systemctl stop iscsid iscsid.socket
 systemctl stop targetd

 # Temporarily clear target configurations (on the storage node)
 targetctl clear
 targetcli ls                    # make sure everything is empty

 # List all logical volumes in lvm (on the storage node)
 lvs

 # Repair each logical volume /dev/xxx/pvc-xxx (on the storage node)
 mount /dev/xxx/pvc-xxx /mnt     # mount first to recover metadata logs
 umount /mnt
 xfs_repair /dev/xxx/pvc-xxx     # use -L if necessary (can be destructive!)

 # Restore target configurations (on the storage node)
 targetctl restore
 targetcli ls                    # make sure everything is back

 # Kickoff the Kubernetes cluster (on every node)
 systemctl start kubelet

 # Monitor and restart pods if necessary
 kubectl get pods
 kubectl delete pod xxx
	# Shutdown the Kubernetes cluster first (on every node)
	systemctl stop kubelet

	# Stop all docker containers (on every node)
	docker stop $(docker ps -aq)

	# Unmount all ISCSI disks (on every node)
	mount \| grep iqn
	umount --all-targets /dev/sdxx # replace sdxx with each disk

	# Stop the ISCSI server (on the storage node)
	systemctl stop iscsid iscsid.socket
	systemctl stop targetd

	# Temporarily clear target configurations (on the storage node)
	targetctl clear
	targetcli ls # make sure everything is empty

	# List all logical volumes in lvm (on the storage node)
	lvs

	# Repair each logical volume /dev/xxx/pvc-xxx (on the storage node)
	mount /dev/xxx/pvc-xxx /mnt # mount first to recover metadata logs
	umount /mnt
	xfs_repair /dev/xxx/pvc-xxx # use -L if necessary (can be destructive!)

	# Restore target configurations (on the storage node)
	targetctl restore
	targetcli ls # make sure everything is back

	# Kickoff the Kubernetes cluster (on every node)
	systemctl start kubelet

	# Monitor and restart pods if necessary
	kubectl get pods
	kubectl delete pod xxx