Created
September 18, 2023 11:35
-
-
Save Be-El/3e0fb17ace7a899e1fdb0f15d4ab5fed to your computer and use it in GitHub Desktop.
cinder ceph volume migration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use autodie qw(:all); | |
use Nice::Try; | |
use JSON; | |
# script to migrate cinder volumes from one backup to another backend | |
# CONFIG SECTION | |
use constant OLD_CLUSTER => <name of the old cluster, used to lookup ceph config files>; | |
use constant NEW_CLUSTER => <name of the new cluster, used to lookup ceph config files>; | |
use constant NEW_CINDER_HOST => <new cinder host, used to update cinder database entries>; | |
use constant NEW_SERVICE_UUID => <UUID of cinder service for new cluster>; | |
use constant NEW_VOLUME_TYPE => <UUID of cinder volume type for new clusters>; | |
use constant OLD_CEPH_MGMT_HOST => <host to run ceph management commands on for old cluster>; | |
use constant NEW_CEPH_MGMT_HOST => <host to run ceph management commands on for new cluster>; | |
use constant MYSQL_MGMT_HOST => <host to run mysql commands on for updating cinder database>; | |
use constant VOLUME_POOL => <pool containing cinder volumes>; | |
sub usage ($) { | |
my ($error) = @_; | |
print STDERR <<"EOF"; | |
migrate_volume <instance uuid> | |
The script will check whether the instance is using a volume on the old cluster, | |
and attempts to migrate it to the new cluster | |
EOF | |
if ($error) { | |
print STDERR "ERROR: $error\n"; | |
exit(1); | |
} | |
else { | |
exit(0); | |
} | |
} | |
sub get_volumes ($) { | |
my ($uuid) = @_; | |
my @ids; | |
foreach (`openstack server show -c volumes_attached -f value $uuid`) { | |
chomp; | |
$_ =~ s/^id='(.+)'$/$1/; | |
push @ids, $_; | |
} | |
return @ids; | |
} | |
sub get_volume_type($) { | |
my ($vol_id) = @_; | |
my $type = join('', `openstack volume show -c type -f value $vol_id`); | |
chomp $type; | |
return $type; | |
} | |
sub remote_rbd ($$) { | |
my ($host, $cmd) = @_; | |
my @output = `ssh -l root $host rbd $cmd`; | |
return @output; | |
} | |
sub ensure_journaling ($$) { | |
my ($exec_host, $vol_id) = @_; | |
my $is_enabled = grep { /journaling/ } remote_rbd($exec_host, "info os-volumes/volume-$vol_id"); | |
unless ($is_enabled) { | |
print "enabling journaling on volume $vol_id at $exec_host\n"; | |
remote_rbd($exec_host, "feature enable os-volumes/volume-$vol_id journaling"); | |
} | |
} | |
sub set_image_mirroring ($$$$) { | |
my ($exec_host, $vol_id, $state, $force) = @_; | |
my $cmd = "mirror image "; | |
if ($state) { | |
ensure_journaling($exec_host, $vol_id); | |
print "enabling mirroring of $vol_id on $exec_host\n"; | |
$cmd .= 'enable '; | |
} | |
else { | |
print "disabling mirroring of $vol_id on $exec_host\n"; | |
$cmd .= 'disable '; | |
} | |
$cmd .= "os-volumes/volume-$vol_id"; | |
$cmd .= ' --force' if $force; | |
remote_rbd($exec_host, $cmd); | |
} | |
sub get_mirror_state($$) { | |
my ($host, $id) = @_; | |
my $state = {}; | |
foreach my $line(remote_rbd($host, "mirror image status os-volumes/volume-$id")) { | |
next unless ($line =~/^ /); | |
chomp $line; | |
my $pos = index($line, ':'); | |
my $key = substr($line, 0, $pos); | |
my $value = substr($line, $pos+1); | |
$key =~s/^\s+//; | |
$value =~ s/^\s+//; | |
$state->{$key} = $value; | |
} | |
return $state; | |
} | |
sub wait_for_bootstrap($) { | |
my ($id) = @_; | |
my $json = JSON->new(); | |
print ("waiting for initial bootstrap of volume $id\n"); | |
my $synced = 0; | |
my $state; | |
while (!$synced) { | |
$state = get_mirror_state(NEW_CEPH_MGMT_HOST, $id); | |
if ($state->{state} ne 'up+replaying') { | |
print ("current state: ".$state->{state}." (".$state->{description}."), waiting 5 seconds\n"); | |
sleep(5); | |
} else { | |
$synced = 1; | |
} | |
} | |
print ("initial bootstrap done, state noew: ".$state->{state}."\n"); | |
} | |
sub wait_for_sync($$) { | |
my ($id,$allowed_pending) = @_; | |
my $json = JSON->new(); | |
print ("waiting for complete sync of volume $id\n"); | |
my $synced = 0; | |
while (!$synced) { | |
my $state = get_mirror_state(NEW_CEPH_MGMT_HOST, $id); | |
if ($state->{state} eq 'up+replaying') { | |
my $line = $state->{description}; | |
my $desc = $json->decode(substr($line, index($line,',')+1)); | |
$synced = 1 if (defined ($desc->{entries_behind_primary}) && $desc->{entries_behind_primary} <= $allowed_pending); | |
} | |
unless ($synced) { | |
print "not synched yet, current state ".$state->{state}.", waiting 5 seconds\n"; | |
sleep (5); | |
} | |
} | |
print "sync finished for instance $id\n"; | |
} | |
sub get_instance_state($) { | |
my ($id) = @_; | |
my $state = `openstack server show -c status -f value $id`; | |
chomp $state; | |
return $state; | |
} | |
sub wait_for_instance_state($$) { | |
my ($id, $state) = @_; | |
print "waiting for instance $id to reach state $state\n"; | |
while (1) { | |
my $current = get_instance_state($id); | |
last if ($state eq $current); | |
print "current state $current, waiting 5 seconds\n"; | |
sleep(5); | |
} | |
} | |
sub handle_instance_demotion($) { | |
my ($id) = @_; | |
# demote on old cluster | |
print "demoting image $id on old cluster\n"; | |
remote_rbd(OLD_CEPH_MGMT_HOST, "mirror image demote os-volumes/volume-$id"); | |
print "waiting for demotion to reach new cluster\n"; | |
while (1) { | |
my $state = get_mirror_state(NEW_CEPH_MGMT_HOST, $id); | |
last if ($state->{state} eq 'up+unknown') && $state->{description} eq 'remote image is non-primary'; | |
print "state is ".$state->{state}."/".$state->{description}.", waiting 5 seconds\n"; | |
sleep(5); | |
} | |
print "promoting on new cluster\n"; | |
remote_rbd(NEW_CEPH_MGMT_HOST, "mirror image promote os-volumes/volume-$id"); | |
while(1) { | |
my $state = get_mirror_state(NEW_CEPH_MGMT_HOST, $id); | |
last if ($state->{state} eq 'up+stopped' && $state->{description} eq 'local image is primary'); | |
print "state is ".$state->{state}."/".$state->{description}.", waiting 5 seconds\n"; | |
sleep(5); | |
} | |
} | |
sub live_volume_migration { | |
my ($instance, @volumes) = @_; | |
my $successful = 0; | |
try { | |
# enable mirroring of volume(s) on old host | |
foreach my $id (@volumes) { | |
set_image_mirroring(OLD_CEPH_MGMT_HOST, $id, 1, 0); | |
} | |
# wait some seconds for mirroring to kick in | |
foreach my $id (@volumes) { | |
while (!grep { index($_, $id) != -1 } remote_rbd(NEW_CEPH_MGMT_HOST, "-p os-volumes ls")) { | |
sleep (5); | |
} | |
print "volume $id present on new cluster\n"; | |
} | |
# wait for first initial bootstrap to complete | |
foreach my $id (@volumes) { | |
wait_for_bootstrap($id); | |
} | |
try { | |
# perform a first round of synchronization | |
foreach my $id (@volumes) { | |
wait_for_sync($id,1000); | |
} | |
# pause the instance | |
print ("pausing instance $instance\n"); | |
system("openstack server pause $instance"); | |
wait_for_instance_state($instance, 'PAUSED'); | |
# wait for mirroring to complete | |
foreach my $id (@volumes) { | |
wait_for_sync($id,0); | |
} | |
# modify cinder database entry to point to new cluster | |
foreach my $id (@volumes) { | |
system (sprintf("echo 'update volumes set volume_type_id=\"%s\",host=\"%s\",service_uuid=\"%s\" where id=\"%s\"' | ssh -l root %s mysql cinder", | |
NEW_VOLUME_TYPE, NEW_CINDER_HOST, NEW_SERVICE_UUID, $id, MYSQL_MGMT_HOST)); | |
} | |
# demote rbd image on old cluster and promote it on new cluster | |
foreach my $id (@volumes) { | |
handle_instance_demotion($id); | |
} | |
try { | |
# live migrate instance to renew domain block config | |
my $mig_opts = $ARGV[1] || ""; | |
print "performing instance $instance live migration with options $mig_opts\n"; | |
system ("openstack server migrate --live-migration --wait $mig_opts $instance"); | |
#wait_for_instance_state($instance, 'PAUSED'); | |
$successful = 1; | |
} | |
catch { | |
# if promotion or live migration fails, we need to promote images again on old cluster | |
foreach my $id (@volumes) { | |
no autodie; | |
print "error during live migration, promoting volume $id on old cluster again\n"; | |
remote_rbd(NEW_CEPH_MGMT_HOST, "mirror image demote os-volumes/volume-$id"); | |
remote_rbd(OLD_CEPH_MGMT_HOST, "mirror image promote os-volumes/volume-$id"); | |
} | |
} | |
} | |
finally { | |
# unpause instance | |
print ("resuming instance $instance\n"); | |
no autodie; | |
system ("openstack server unpause $instance"); | |
} | |
} | |
finally { | |
# (force) disable mirroring of volume on old host | |
no autodie; | |
foreach my $id (@volumes) { | |
set_image_mirroring(NEW_CEPH_MGMT_HOST, $id, 0, 1); | |
} | |
foreach my $id (@volumes) { | |
set_image_mirroring(OLD_CEPH_MGMT_HOST, $id, 0, 1); | |
} | |
} | |
if ($successful) { | |
foreach my $id (@volumes) { | |
print "moving old image $id to trash\n"; | |
remote_rbd(OLD_CEPH_MGMT_HOST, "trash mv os-volumes/volume-$id"); | |
} | |
} | |
} | |
sub cold_volume_migration { | |
my ($instance, @volumes) = @_; | |
my $successful = 0; | |
try { | |
# enable mirroring of volume(s) on old host | |
foreach my $id (@volumes) { | |
set_image_mirroring(OLD_CEPH_MGMT_HOST, $id, 1, 0); | |
} | |
# wait some seconds for mirroring to kick in | |
foreach my $id (@volumes) { | |
while (!grep { index($_, $id) != -1 } remote_rbd(NEW_CEPH_MGMT_HOST, "-p os-volumes ls")) { | |
sleep (5); | |
} | |
print "volume $id present on new cluster\n"; | |
} | |
# wait for first initial bootstrap to complete | |
foreach my $id (@volumes) { | |
wait_for_bootstrap($id); | |
} | |
# wait for mirroring to complete | |
foreach my $id (@volumes) { | |
wait_for_sync($id,0); | |
} | |
# modify cinder database entry to point to new cluster | |
foreach my $id (@volumes) { | |
system (sprintf("echo 'update volumes set volume_type_id=\"%s\",host=\"%s\",service_uuid=\"%s\" where id=\"%s\"' | ssh -l root %s mysql cinder", | |
NEW_VOLUME_TYPE, NEW_CINDER_HOST, NEW_SERVICE_UUID, $id, MYSQL_MGMT_HOST)); | |
} | |
# demote rbd image on old cluster and promote it on new cluster | |
foreach my $id (@volumes) { | |
handle_instance_demotion($id); | |
} | |
$successful = 1; | |
} | |
finally { | |
# (force) disable mirroring of volume on old host | |
no autodie; | |
foreach my $id (@volumes) { | |
set_image_mirroring(NEW_CEPH_MGMT_HOST, $id, 0, 1); | |
} | |
foreach my $id (@volumes) { | |
set_image_mirroring(OLD_CEPH_MGMT_HOST, $id, 0, 1); | |
} | |
} | |
if ($successful) { | |
foreach my $id (@volumes) { | |
print "moving old image $id to trash\n"; | |
remote_rbd(OLD_CEPH_MGMT_HOST, "trash mv os-volumes/volume-$id"); | |
} | |
} | |
} | |
if (scalar(@ARGV) < 1) { | |
usage ("Not enough parameters."); | |
} | |
unless (defined($ENV{OS_PASSWORD})) { | |
usage ("No openstack configuration sourced."); | |
} | |
my $instance = $ARGV[0]; | |
# check prerequisites | |
# - instance must have volumes | |
# - at least one volume must be present on the old cluster | |
my @volumes = get_volumes($instance); | |
if (scalar(@volumes) == 0) { | |
usage("No attached volumes found for instance $instance."); | |
} | |
my @old_volumes = grep { get_volume_type($_) eq OLD_CLUSTER } @volumes; | |
if (scalar(@old_volumes) == 0) { | |
usage("No volume on old cluster found for instance $instance."); | |
} | |
print "found volumes on old cluster: ".join(",",@old_volumes)."\n"; | |
my $state = get_instance_state($instance); | |
if ($state eq 'ACTIVE') { | |
live_volume_migration($instance, @old_volumes); | |
} | |
elsif ($state eq 'SHUTOFF') { | |
cold_volume_migration($instance, @old_volumes); | |
} | |
else { | |
usage("Only running instances are supported at the moment, instance state is $state."); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment