]> git.proxmox.com Git - pve-storage.git/blob - PVE/ReplicationTools.pm
PVE::ReplicationTools::sync_guest - do not add snapname to disks_status
[pve-storage.git] / PVE / ReplicationTools.pm
1 package PVE::ReplicationTools;
2
3 use warnings;
4 use strict;
5 use Data::Dumper;
6 use JSON;
7
8 use PVE::INotify;
9 use PVE::Tools;
10 use PVE::Cluster;
11 use PVE::QemuConfig;
12 use PVE::QemuServer;
13 use PVE::LXC::Config;
14 use PVE::LXC;
15 use PVE::Storage;
16
17 my $STATE_DIR = '/var/lib/pve-replica';
18 my $STATE_PATH = "$STATE_DIR/pve-replica.state";
19
20 my $local_node = PVE::INotify::nodename();
21
22 my $get_ssh_cmd = sub {
23 my ($ip) = @_;
24
25 return ['ssh', '-o', 'Batchmode=yes', "root\@$ip" ];
26 };
27
28 my $get_guestconfig = sub {
29 my ($vmid) = @_;
30
31 my $vms = PVE::Cluster::get_vmlist();
32
33 my $type = $vms->{ids}->{$vmid}->{type};
34
35 my $guestconf;
36 my $running;
37
38 if ($type eq 'qemu') {
39 $guestconf = PVE::QemuConfig->load_config($vmid);
40 $running = PVE::QemuServer::check_running($vmid);
41 } elsif ($type eq 'lxc') {
42 $guestconf = PVE::LXC::Config->load_config($vmid);
43 $running = PVE::LXC::check_running($vmid);
44 } else {
45 die "internal error";
46 }
47
48 return ($guestconf, $type, $running);
49 };
50
51 sub write_state {
52 my ($state) = @_;
53
54 mkdir $STATE_DIR;
55
56 PVE::Tools::file_set_contents($STATE_PATH, encode_json($state));
57 }
58
59 sub read_state {
60
61 return {} if ! -e $STATE_PATH;
62
63 my $raw = PVE::Tools::file_get_contents($STATE_PATH);
64
65 return {} if $raw eq '';
66
67 return decode_json($raw);
68 }
69
70 sub get_node_ip {
71 my ($nodename) = @_;
72
73 my $remoteip = PVE::Cluster::remote_node_ip($nodename, 1);
74
75 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
76 if (my $network = $dc_conf->{storage_replication_network}) {
77
78 my $cmd = $get_ssh_cmd->($remoteip);
79
80 push @$cmd, '--', 'pvecm', 'mtunnel', '--get_migration_ip', '--migration_network', $network;
81
82 PVE::Tools::run_command($cmd, outfunc => sub {
83 my $line = shift;
84
85 if ($line =~ m/^ip: '($PVE::Tools::IPRE)'$/) {
86 $remoteip = $1;
87 }
88 });
89 }
90 return $remoteip;
91 }
92
93 sub get_all_jobs {
94
95 my $vms = PVE::Cluster::get_vmlist();
96
97 my $state = read_state();
98
99 my $jobs = {};
100
101 foreach my $vmid (keys %{$vms->{ids}}) {
102 next if $vms->{ids}->{$vmid}->{node} ne $local_node;
103
104 my $vm_state = $state->{$vmid};
105 next if !defined($vm_state);
106
107 my $job = {};
108
109 $job->{limit} = $vm_state->{limit};
110 $job->{interval} = $vm_state->{interval};
111 $job->{tnode} = $vm_state->{tnode};
112 $job->{lastsync} = $vm_state->{lastsync};
113 $job->{state} = $vm_state->{state};
114 $job->{fail} = $vm_state->{fail};
115
116 $jobs->{$vmid} = $job;
117 }
118
119 return $jobs;
120 }
121
122 sub sync_guest {
123 my ($vmid, $param) = @_;
124
125 my $jobs = read_state();
126 $jobs->{$vmid}->{state} = 'sync';
127 write_state($jobs);
128
129 my ($guest_conf, $vm_type, $running) = &$get_guestconfig($vmid);
130 my $qga = 0;
131
132 my $job = $jobs->{$vmid};
133 my $tnode = $job->{tnode};
134
135 if ($vm_type eq 'qemu' && defined($guest_conf->{agent}) ) {
136 $qga = PVE::QemuServer::qga_check_running($vmid)
137 if $running;
138 }
139
140 # will not die if a disk is not syncable
141 my $disks = get_syncable_guestdisks($guest_conf, $vm_type);
142
143 # check if all nodes have the storage availible
144 my $storage_config = PVE::Storage::config();
145 foreach my $volid (keys %$disks) {
146 my ($storeid) = PVE::Storage::parse_volume_id($volid);
147
148 my $store = $storage_config->{ids}->{$storeid};
149 die "Storage $storeid not availible on node: $tnode\n"
150 if $store->{nodes} && !$store->{nodes}->{$tnode};
151 die "Storage $storeid not availible on node: $local_node\n"
152 if $store->{nodes} && !$store->{nodes}->{$local_node};
153
154 }
155
156 my $limit = $param->{limit};
157 $limit = $guest_conf->{replica_rate_limit}
158 if (!defined($limit));
159
160 my $snap_time = time();
161
162 die "Invalid synctime format: $job->{lastsync}."
163 if $job->{lastsync} !~ m/^(\d+)$/;
164
165 my $lastsync = $1;
166 my $incremental_snap = $lastsync ? "replica_$lastsync" : undef;
167
168 # freeze filesystem for data consistency
169 if ($qga) {
170 print "Freeze guest filesystem\n";
171
172 eval {
173 PVE::QemuServer::vm_mon_cmd($vmid, "guest-fsfreeze-freeze");
174 };
175 }
176
177 my $snapname = "replica_$snap_time";
178
179 my $disks_status = {};
180
181 my $sync_job = sub {
182
183 # make snapshot of all volumes
184 foreach my $volid (keys %$disks) {
185
186 eval {
187 PVE::Storage::volume_snapshot($storage_config, $volid, $snapname);
188 };
189
190 if (my $err = $@) {
191 if ($qga) {
192 print "Unfreeze guest filesystem\n";
193 eval {
194 PVE::QemuServer::vm_mon_cmd($vmid, "guest-fsfreeze-thaw");
195 };
196 warn $@ if $@;
197 }
198 cleanup_snapshot($disks_status, $snapname, $storage_config, $running);
199 $jobs->{$vmid}->{state} = 'error';
200 write_state($jobs);
201
202 die $err;
203 }
204
205 $disks_status->{$volid}->{snapshot} = 1;
206 }
207
208 if ($qga) {
209 print "Unfreeze guest filesystem\n";
210 eval { PVE::QemuServer::vm_mon_cmd($vmid, "guest-fsfreeze-thaw"); };
211 warn $@ if $@;
212 }
213
214 my $ip = get_node_ip($tnode);
215
216 foreach my $volid (keys %$disks) {
217
218 eval {
219 PVE::Storage::volume_send($storage_config, $volid, $snapname,
220 $ip, $incremental_snap,
221 $param->{verbose}, $limit);
222 $job->{fail} = 0;
223 };
224
225 if (my $err = $@) {
226 cleanup_snapshot($disks_status, $snapname, $storage_config, $running, $ip);
227 $job->{fail}++;
228 $job->{state} = 'error' if $job->{fail} > 3;
229
230 $jobs->{$vmid} = $job;
231 write_state($jobs);
232 die $err;
233 }
234
235 $disks_status->{$volid}->{synced} = 1;
236 }
237
238 # delet old snapshot if exists
239 cleanup_snapshot($disks_status, $snapname, $storage_config, $running, $ip, $lastsync) if
240 $lastsync != 0;
241
242 $job->{lastsync} = $snap_time;
243 $job->{state} = "ok";
244 $jobs->{$vmid} = $job;
245 write_state($jobs);
246 };
247
248 PVE::Tools::lock_file_full($STATE_PATH, 60, 0 , $sync_job);
249 die $@ if $@;
250
251 return $snap_time;
252 }
253
254 sub send_image {
255 my ($vol, $param, $ip, $all_snaps_in_delta, $alter_path) = @_;
256
257 my $plugin = $vol->{plugin};
258 $plugin->send_image($vol, $param, $ip, $all_snaps_in_delta, $alter_path);
259 }
260
261 sub job_enable {
262 my ($vmid, $no_sync, $target) = @_;
263
264 my $update_state = sub {
265 my ($state) = @_;
266
267 my $jobs = read_state();
268 my $job = $jobs->{$vmid};
269 my ($config) = &$get_guestconfig($vmid);
270 my $param = {};
271
272 $job->{interval} = $config->{replica_interval} || 15;
273
274 $job->{tnode} = $target || $config->{replica_target};
275 die "Replication target must be set\n" if !defined($job->{tnode});
276
277 die "Target and source node can't be the same\n"
278 if $job->{tnode} eq $local_node;
279
280 $job->{fail} = 0;
281 if (!defined($job->{lastsync})) {
282
283 if ( my $lastsync = get_lastsync($vmid)) {
284 $job->{lastsync} = $lastsync;
285 } else {
286 $job->{lastsync} = 0;
287 }
288 }
289
290 $param->{verbose} = 1;
291
292 $job->{state} = 'ok';
293 $jobs->{$vmid} = $job;
294 write_state($jobs);
295
296 eval{
297 sync_guest($vmid, $param) if !defined($no_sync);
298 };
299 if (my $err = $@) {
300 $jobs->{$vmid}->{state} = 'error';
301 write_state($jobs);
302 die $err;
303 }
304 };
305
306 PVE::Tools::lock_file_full($STATE_PATH, 5, 0 , $update_state);
307 die $@ if $@;
308 }
309
310 sub job_disable {
311 my ($vmid) = @_;
312
313 my $update_state = sub {
314
315 my $jobs = read_state();
316
317 if (defined($jobs->{$vmid})) {
318 $jobs->{$vmid}->{state} = 'off';
319 write_state($jobs);
320 } else {
321 print "No replica service for $vmid\n";
322 }
323 };
324
325 PVE::Tools::lock_file_full($STATE_PATH, 5, 0 , $update_state);
326 die $@ if $@;
327 }
328
329 sub job_remove {
330 my ($vmid) = @_;
331
332 my $update_state = sub {
333
334 my $jobs = read_state();
335
336 if (defined($jobs->{$vmid})) {
337 delete($jobs->{$vmid});
338 write_state($jobs);
339 } else {
340 print "No replica service for $vmid\n";
341 }
342 };
343
344 PVE::Tools::lock_file_full($STATE_PATH, 5, 0 , $update_state);
345 die $@ if $@;
346 }
347
348 sub get_syncable_guestdisks {
349 my ($config, $vm_type, $running, $noerr) = @_;
350
351 my $syncable_disks = {};
352
353 my $cfg = PVE::Storage::config();
354
355 my $warnings = 0;
356 my $func = sub {
357 my ($id, $volume) = @_;
358 return if !defined($volume->{replica}) || !$volume->{replica};
359
360 my $volname;
361 if ($vm_type eq 'qemu') {
362 $volname = $volume->{file};
363 } else {
364 $volname = $volume->{volume};
365 }
366
367 if( PVE::Storage::volume_has_feature($cfg, 'replicate', $volname , undef, $running)) {
368 $syncable_disks->{$volname} = 1;
369 } else {
370 warn "Can't sync Volume: $volname\n" if !$noerr;
371 $warnings = 1;
372 }
373 };
374
375 if ($vm_type eq 'qemu') {
376 PVE::QemuServer::foreach_drive($config, $func);
377 } elsif ($vm_type eq 'lxc') {
378 PVE::LXC::Config->foreach_mountpoint($config, $func);
379 } else {
380 die "Unknown VM type: $vm_type";
381 }
382
383 return wantarray ? ($warnings, $syncable_disks) : $syncable_disks;
384 }
385
386 sub destroy_all_snapshots {
387 my ($vmid, $regex, $node) = @_;
388
389 my $ip = defined($node) ? get_node_ip($node) : undef;
390
391 my ($guest_conf, $vm_type, $running) = &$get_guestconfig($vmid);
392
393 my $disks = get_syncable_guestdisks($guest_conf, $vm_type);
394 my $cfg = PVE::Storage::config();
395
396 my $snapshots = {};
397 foreach my $volid (keys %$disks) {
398 $snapshots->{$volid} =
399 PVE::Storage::volume_snapshot_list($cfg, $volid, $regex, $node, $ip);
400 }
401
402 foreach my $volid (keys %$snapshots) {
403
404 if (defined($regex)) {
405 foreach my $snap (@{$snapshots->{$volid}}) {
406 if ($ip) {
407 PVE::Storage::volume_snapshot_delete_remote($cfg, $volid, $snap, $ip);
408 } else {
409 PVE::Storage::volume_snapshot_delete($cfg, $volid, $snap, $running);
410 }
411 }
412 } else {
413 if ($ip) {
414
415 my $cmd = $get_ssh_cmd->($ip);
416
417 push @$cmd, '--', 'pvesm', 'free', $volid;
418
419 PVE::Tools::run_command($cmd);
420 } else {
421 PVE::Storage::vdisk_free($cfg, $volid);
422 }
423 }
424 }
425
426 }
427
428 sub cleanup_snapshot {
429 my ($disks, $snapname, $cfg, $running, $ip, $lastsync_snap) = @_;
430
431 if ($lastsync_snap) {
432 $snapname = "replica_$lastsync_snap";
433 }
434
435 foreach my $volid (keys %$disks) {
436
437 if (defined($ip) && (defined($lastsync_snap) || $disks->{$volid}->{synced})) {
438 PVE::Storage::volume_snapshot_delete_remote($cfg, $volid, $snapname, $ip);
439 }
440
441 if (defined($lastsync_snap) || $disks->{$volid}->{snapshot}) {
442 PVE::Storage::volume_snapshot_delete($cfg, $volid, $snapname, $running);
443 }
444 }
445 }
446
447 sub destroy_replica {
448 my ($vmid) = @_;
449
450 my $code = sub {
451
452 my $jobs = read_state();
453
454 return if !defined($jobs->{$vmid});
455
456 my ($guest_conf, $vm_type) = &$get_guestconfig($vmid);
457
458 destroy_all_snapshots($vmid, 'replica');
459 destroy_all_snapshots($vmid, undef, $guest_conf->{replica_target});
460
461 delete($jobs->{$vmid});
462
463 delete($guest_conf->{replica_rate_limit});
464 delete($guest_conf->{replica_rate_interval});
465 delete($guest_conf->{replica_target});
466 delete($guest_conf->{replica});
467
468 if ($vm_type eq 'qemu') {
469 PVE::QemuConfig->write_config($vmid, $guest_conf);
470 } else {
471 PVE::LXC::Config->write_config($vmid, $guest_conf);
472 }
473 write_state($jobs);
474 };
475
476 PVE::Tools::lock_file_full($STATE_PATH, 30, 0 , $code);
477 die $@ if $@;
478 }
479
480 sub get_lastsync {
481 my ($vmid) = @_;
482
483 my ($conf, $vm_type) = &$get_guestconfig($vmid);
484
485 my $sync_vol = get_syncable_guestdisks($conf, $vm_type);
486 my $cfg = PVE::Storage::config();
487
488 my $time;
489 foreach my $volid (keys %$sync_vol) {
490 my $list =
491 PVE::Storage::volume_snapshot_list($cfg, $volid, 'replica', $local_node);
492
493 if (my $tmp_snap = shift @$list) {
494 $tmp_snap =~ m/^replica_(\d+)$/;
495 die "snapshots are not coherent\n"
496 if defined($time) && !($time eq $1);
497 $time = $1;
498 }
499 }
500
501 return $time;
502 }
503
504 sub get_last_replica_snap {
505 my ($volid) = @_;
506
507 my $cfg = PVE::Storage::config();
508 my $list = PVE::Storage::volume_snapshot_list($cfg, $volid, 'replica_', $local_node);
509
510 return shift @$list;
511 }
512
513 sub check_guest_volumes_syncable {
514 my ($conf, $vm_type) = @_;
515
516 my ($warnings, $disks) = get_syncable_guestdisks($conf, $vm_type, 1);
517
518 return undef if $warnings || !%$disks;
519
520 return 1;
521 }
522
523 sub update_conf {
524 my ($vmid, $key, $value) = @_;
525
526 if ($key eq 'replica_target') {
527 destroy_replica($vmid);
528 job_enable($vmid, undef, $value);
529 return;
530 }
531
532 my $update = sub {
533 my $jobs = read_state();
534
535 return if !defined($jobs->{$vmid});
536
537 if ($key eq 'replica_interval') {
538 $jobs->{$vmid}->{interval} = $value || 15;
539 } elsif ($key eq 'replica_rate_limit'){
540 $jobs->{$vmid}->{limit} = $value ||
541 delet $jobs->{$vmid}->{limit};
542 } else {
543 die "Config parameter $key not known";
544 }
545
546 write_state($jobs);
547 };
548
549 PVE::Tools::lock_file_full($STATE_PATH, 60, 0 , $update);
550 }
551
552 1;