fix #1694: make failure of snapshot removal non-fatal

author Wolfgang Link <w.link@proxmox.com>

Fri, 13 Apr 2018 10:24:39 +0000 (12:24 +0200)

committer Dietmar Maurer <dietmar@proxmox.com>

Mon, 16 Apr 2018 08:40:48 +0000 (10:40 +0200)
author Wolfgang Link <w.link@proxmox.com>
Fri, 13 Apr 2018 10:24:39 +0000 (12:24 +0200)
committer Dietmar Maurer <dietmar@proxmox.com>
Mon, 16 Apr 2018 08:40:48 +0000 (10:40 +0200)
diff --git a/PVE/Replication.pm b/PVE/Replication.pm

index 9bc4e61e4e991263e47e1ef40fe260f59e0fb46f..98ba1b6a5a66a685d1c72eca8c6862ae43fb192e 100644 (file)
--- a/PVE/Replication.pm
+++ b/PVE/Replication.pm
@@ -136,8 +136,21 @@ sub prepare {
                 $last_snapshots->{$volid}->{$snap} = 1;
             } elsif ($snap =~ m/^\Q$prefix\E/) {
                 $logfunc->("delete stale replication snapshot '$snap' on $volid");
                 $last_snapshots->{$volid}->{$snap} = 1;
             } elsif ($snap =~ m/^\Q$prefix\E/) {
                 $logfunc->("delete stale replication snapshot '$snap' on $volid");
-               PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap);
-               $cleaned_replicated_volumes->{$volid} = 1;
+
+               eval {
+                   PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap);
+                   $cleaned_replicated_volumes->{$volid} = 1;
+               };
+
+               # If deleting the snapshot fails, we can not be sure if it was due to an error or a timeout.
+               # The likelihood that the delete has worked out is high at a timeout.
+               # If it really fails, it will try to remove on the next run.
+
+               # warn is for syslog/journal.
+               warn $@ if $@;
+
+               # logfunc will written in replication log.
+               $logfunc->("delete stale replication snapshot error: $@") if $@;
             }
         }
      }
             }
         }
      }
@@ -296,9 +309,18 @@ sub replicate {
      # remove old snapshots because they are no longer needed
      $cleanup_local_snapshots->($last_snapshots, $last_sync_snapname);
  
      # remove old snapshots because they are no longer needed
      $cleanup_local_snapshots->($last_snapshots, $last_sync_snapname);
  
-    remote_finalize_local_job($ssh_info, $jobid, $vmid, $sorted_volids, $start_time, $logfunc);
+    eval {
+       remote_finalize_local_job($ssh_info, $jobid, $vmid, $sorted_volids, $start_time, $logfunc);
+    };
  
  
-    die $err if $err;
+    # old snapshots will removed by next run from prepare_local_job.
+    if ($err = $@) {
+       # warn is for syslog/journal.
+       warn $err;
+
+       # logfunc will written in replication log.
+       $logfunc->("delete stale replication snapshot error: err");
+    }
  
      return $volumes;
  }
  
      return $volumes;
  }
author	Wolfgang Link <w.link@proxmox.com>
	Fri, 13 Apr 2018 10:24:39 +0000 (12:24 +0200)
committer	Dietmar Maurer <dietmar@proxmox.com>
	Mon, 16 Apr 2018 08:40:48 +0000 (10:40 +0200)