]> git.proxmox.com Git - pve-ha-manager.git/commitdiff
fix #1378: allow to specify a service shutdown policy
authorThomas Lamprecht <t.lamprecht@proxmox.com>
Thu, 20 Dec 2018 07:44:42 +0000 (08:44 +0100)
committerWolfgang Bumiller <w.bumiller@proxmox.com>
Mon, 7 Jan 2019 10:17:30 +0000 (11:17 +0100)
Allow an admin to set a datacenter wide HA policy which can change
the way we handle services on a node shutdown.

There's:

* freeze: always freeze servivces, independent of the shutdown type
  (reboot, poweroff)
* failover: never freeze services, this means that a service will get
  recovered to another node if possible and if the current node does
  not comes back up in the grace period of 1 minute.
* default: this is the current behavior, freeze on reboot but do not
  freeze on poweroff

Add to tests, shutdown-policy1 which is based of the reboot1 test,
but enforces no freeze with a failover policy, and shutdown-policy2
which is based on the shutdown1 test but with a explicit freeze
policy. You can compare (diff) each tests log result to the test it's
based on to see what changes.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
13 files changed:
src/PVE/HA/LRM.pm
src/test/test-shutdown-policy1/cmdlist [new file with mode: 0644]
src/test/test-shutdown-policy1/datacenter.cfg [new file with mode: 0644]
src/test/test-shutdown-policy1/hardware_status [new file with mode: 0644]
src/test/test-shutdown-policy1/log.expect [new file with mode: 0644]
src/test/test-shutdown-policy1/manager_status [new file with mode: 0644]
src/test/test-shutdown-policy1/service_config [new file with mode: 0644]
src/test/test-shutdown-policy2/cmdlist [new file with mode: 0644]
src/test/test-shutdown-policy2/datacenter.cfg [new file with mode: 0644]
src/test/test-shutdown-policy2/hardware_status [new file with mode: 0644]
src/test/test-shutdown-policy2/log.expect [new file with mode: 0644]
src/test/test-shutdown-policy2/manager_status [new file with mode: 0644]
src/test/test-shutdown-policy2/service_config [new file with mode: 0644]

index dda82eb833b9949e76560bfbdcd7f67e7d2d9bc6..55e4b50e762322d6e5c75dcf08050faeabcfc89c 100644 (file)
@@ -53,6 +53,20 @@ sub shutdown_request {
 
     my ($shutdown, $reboot) = $haenv->is_node_shutdown();
 
+    my $dc_ha_cfg = $haenv->get_ha_settings();
+    my $shutdown_policy = $dc_ha_cfg->{shutdown_policy} // 'conditional';
+
+    my $freeze_all = $reboot;
+    if ($shutdown_policy eq 'conditional') {
+       $freeze_all = $reboot;
+    } elsif ($shutdown_policy eq 'freeze') {
+       $freeze_all = 1;
+    } elsif ($shutdown_policy eq 'failover') {
+       $freeze_all = 0;
+    } else {
+       $haenv->log('err', "unkown shutdown policy '$shutdown_policy', fall back to conditional");
+    }
+
     if ($shutdown) {
        # *always* queue stop jobs for all services if the node shuts down,
        # independent if it's a reboot or a poweroff, else we may corrupt
@@ -69,8 +83,12 @@ sub shutdown_request {
     }
 
     if ($shutdown) {
-       if ($reboot) {
-           $haenv->log('info', "reboot LRM, stop and freeze all services");
+       if ($freeze_all) {
+           if ($shutdown_policy eq 'conditional') {
+               $haenv->log('info', "reboot LRM, stop and freeze all services");
+           } else {
+               $haenv->log('info', "shutdown LRM, stop and freeze all services");
+           }
            $self->{mode} = 'restart';
        } else {
            $haenv->log('info', "shutdown LRM, stop all services");
diff --git a/src/test/test-shutdown-policy1/cmdlist b/src/test/test-shutdown-policy1/cmdlist
new file mode 100644 (file)
index 0000000..8558351
--- /dev/null
@@ -0,0 +1,4 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "reboot node3" ]
+]
diff --git a/src/test/test-shutdown-policy1/datacenter.cfg b/src/test/test-shutdown-policy1/datacenter.cfg
new file mode 100644 (file)
index 0000000..6108ece
--- /dev/null
@@ -0,0 +1,5 @@
+{
+    "ha": {
+        "shutdown_policy": "failover"
+    }
+}
diff --git a/src/test/test-shutdown-policy1/hardware_status b/src/test/test-shutdown-policy1/hardware_status
new file mode 100644 (file)
index 0000000..451beb1
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-shutdown-policy1/log.expect b/src/test/test-shutdown-policy1/log.expect
new file mode 100644 (file)
index 0000000..385b07a
--- /dev/null
@@ -0,0 +1,40 @@
+info      0     hardware: starting simulation
+info     20      cmdlist: execute power node1 on
+info     20    node1/crm: status change startup => wait_for_quorum
+info     20    node1/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node2 on
+info     20    node2/crm: status change startup => wait_for_quorum
+info     20    node2/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node3 on
+info     20    node3/crm: status change startup => wait_for_quorum
+info     20    node3/lrm: status change startup => wait_for_agent_lock
+info     20    node1/crm: got lock 'ha_manager_lock'
+info     20    node1/crm: status change wait_for_quorum => master
+info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info     20    node1/crm: adding new service 'vm:103' on node 'node3'
+info     22    node2/crm: status change wait_for_quorum => slave
+info     24    node3/crm: status change wait_for_quorum => slave
+info     25    node3/lrm: got lock 'ha_agent_node3_lock'
+info     25    node3/lrm: status change wait_for_agent_lock => active
+info     25    node3/lrm: starting service vm:103
+info     25    node3/lrm: service status vm:103 started
+info    120      cmdlist: execute reboot node3
+info    120    node3/lrm: shutdown LRM, stop all services
+info    125    node3/lrm: stopping service vm:103
+info    125    node3/lrm: service status vm:103 stopped
+info    126    node3/lrm: exit (loop end)
+info    126       reboot: execute crm node3 stop
+info    125    node3/crm: server received shutdown request
+info    145    node3/crm: exit (loop end)
+info    145       reboot: execute power node3 off
+info    145       reboot: execute power node3 on
+info    145    node3/crm: status change startup => wait_for_quorum
+info    140    node3/lrm: status change startup => wait_for_agent_lock
+info    145    node3/lrm: got lock 'ha_agent_node3_lock'
+info    145    node3/lrm: status change wait_for_agent_lock => active
+info    145    node3/lrm: starting service vm:103
+info    145    node3/lrm: service status vm:103 started
+info    164    node3/crm: status change wait_for_quorum => slave
+info    720     hardware: exit simulation - done
diff --git a/src/test/test-shutdown-policy1/manager_status b/src/test/test-shutdown-policy1/manager_status
new file mode 100644 (file)
index 0000000..0967ef4
--- /dev/null
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-shutdown-policy1/service_config b/src/test/test-shutdown-policy1/service_config
new file mode 100644 (file)
index 0000000..c6860e7
--- /dev/null
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node3", "state": "enabled" }
+}
diff --git a/src/test/test-shutdown-policy2/cmdlist b/src/test/test-shutdown-policy2/cmdlist
new file mode 100644 (file)
index 0000000..a86b9e2
--- /dev/null
@@ -0,0 +1,4 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "shutdown node3" ]
+]
diff --git a/src/test/test-shutdown-policy2/datacenter.cfg b/src/test/test-shutdown-policy2/datacenter.cfg
new file mode 100644 (file)
index 0000000..0d411c1
--- /dev/null
@@ -0,0 +1,5 @@
+{
+    "ha": {
+        "shutdown_policy": "freeze"
+    }
+}
diff --git a/src/test/test-shutdown-policy2/hardware_status b/src/test/test-shutdown-policy2/hardware_status
new file mode 100644 (file)
index 0000000..451beb1
--- /dev/null
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-shutdown-policy2/log.expect b/src/test/test-shutdown-policy2/log.expect
new file mode 100644 (file)
index 0000000..a36c628
--- /dev/null
@@ -0,0 +1,34 @@
+info      0     hardware: starting simulation
+info     20      cmdlist: execute power node1 on
+info     20    node1/crm: status change startup => wait_for_quorum
+info     20    node1/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node2 on
+info     20    node2/crm: status change startup => wait_for_quorum
+info     20    node2/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node3 on
+info     20    node3/crm: status change startup => wait_for_quorum
+info     20    node3/lrm: status change startup => wait_for_agent_lock
+info     20    node1/crm: got lock 'ha_manager_lock'
+info     20    node1/crm: status change wait_for_quorum => master
+info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info     20    node1/crm: adding new service 'vm:103' on node 'node3'
+info     22    node2/crm: status change wait_for_quorum => slave
+info     24    node3/crm: status change wait_for_quorum => slave
+info     25    node3/lrm: got lock 'ha_agent_node3_lock'
+info     25    node3/lrm: status change wait_for_agent_lock => active
+info     25    node3/lrm: starting service vm:103
+info     25    node3/lrm: service status vm:103 started
+info    120      cmdlist: execute shutdown node3
+info    120    node3/lrm: shutdown LRM, stop and freeze all services
+info    120    node1/crm: service 'vm:103': state changed from 'started' to 'freeze'
+info    125    node3/lrm: stopping service vm:103
+info    125    node3/lrm: service status vm:103 stopped
+info    126    node3/lrm: exit (loop end)
+info    126     shutdown: execute crm node3 stop
+info    125    node3/crm: server received shutdown request
+info    145    node3/crm: exit (loop end)
+info    145     shutdown: execute power node3 off
+info    160    node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info    720     hardware: exit simulation - done
diff --git a/src/test/test-shutdown-policy2/manager_status b/src/test/test-shutdown-policy2/manager_status
new file mode 100644 (file)
index 0000000..0967ef4
--- /dev/null
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-shutdown-policy2/service_config b/src/test/test-shutdown-policy2/service_config
new file mode 100644 (file)
index 0000000..c6860e7
--- /dev/null
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node3", "state": "enabled" }
+}