3 # Resource Agent for managing PVE VMs (openvz and qemu-kvm)
5 # License: GNU Affero General Public License (AGPL3)
6 # Copyright (C) 2011 Proxmox Server Solutions GmbH
13 use PVE
::RPCEnvironment
;
15 use PVE
::API2
::OpenVZ
;
19 use constant OCF_SUCCESS
=> 0;
20 use constant OCF_ERR_GENERIC
=> 1;
21 use constant OCF_ERR_ARGS
=> 2;
22 use constant OCF_ERR_UNIMPLEMENTED
=> 3;
23 use constant OCF_ERR_PERM
=> 4;
24 use constant OCF_ERR_INSTALLED
=> 5;
25 use constant OCF_ERR_CONFIGURED
=> 6;
26 use constant OCF_NOT_RUNNING
=> 7;
27 use constant OCF_RUNNING_MASTER
=> 8;
28 use constant OCF_FAILED_MASTER
=> 9;
30 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
32 my $ocf_ressource_type = 'pvevm';
42 die @_ if $^S; # skip if inside eval
49 print STDERR
"Cannot control VMs. as non-root user.\n";
53 PVE
::INotify
::inotify_init
();
55 my $rpcenv = PVE
::RPCEnvironment-
>init('ha');
57 $rpcenv->init_request();
58 $rpcenv->set_language($ENV{LANG
});
59 $rpcenv->set_user('root@pam');
61 my $nodename = PVE
::INotify
::nodename
();
63 my @ssh_opts = ('-o', 'BatchMode=yes');
64 my @ssh_cmd = ('ssh', @ssh_opts);
67 my ($level, $msg) = @_;
70 print "$level: $msg\n";
72 my $level_n = $prio_hash->{$level};
73 $level_n = $prio_hash->{note
} if !defined($level_n);
75 my $cmd = ['clulog', '-m', $ocf_ressource_type, '-s', $level_n, $msg];
77 eval { PVE
::Tools
::run_command
($cmd); }; # ignore errors
81 my $default_timeout = 60;
82 my $tout = $default_timeout;
84 if ($ENV{OCF_RESKEY_RGMANAGER_meta_timeout
}) {
85 $tout = $ENV{OCF_RESKEY_RGMANAGER_meta_timeout
};
86 } elsif ($ENV{OCF_RESKEY_CRM_meta_timeout
}) {
87 $tout = $ENV{OCF_RESKEY_CRM_meta_timeout
};
90 return $default_timeout if $tout <= 0;
96 my ($status, $verbose) = @_;
98 if ($status->{type
} eq 'qemu') {
99 $status->{running
} = PVE
::QemuServer
::check_running
($status->{vmid
}, 1);
100 } elsif ($status->{type
} eq 'openvz') {
101 $status->{running
} = PVE
::OpenVZ
::check_running
($status->{vmid
});
103 die "got strange VM type '$status->{type}'\n";
112 my $vmid = $ENV{OCF_RESKEY_vmid
};
113 die "no VMID specified\n" if !defined($vmid);
114 die "got invalid VMID '$vmid'\n" if $vmid !~ m/^[1-9]\d*$/;
116 my $vmlist = PVE
::Cluster
::get_vmlist
();
117 die "got empty cluster VM list\n" if !$vmlist || !$vmlist->{ids
};
118 my $data = $vmlist->{ids
}->{$vmid};
119 die "VM $vmid does not exist\n" if !$data;
121 $status->{vmid
} = $vmid;
122 $status->{type
} = $data->{type
};
123 $status->{node
} = $data->{node
};
125 if ($status->{type
} eq 'qemu') {
126 $status->{name
} = "VM $vmid";
128 $status->{name
} = "CT $vmid";
131 check_running
($status);
134 ocf_log
('err', $err);
144 my $task = PVE
::Tools
::upid_decode
($upid);
147 while (PVE
::ProcFSTools
::check_process_running
($task->{pid
}, $task->{pstart
})) {
148 ocf_log
('debug', "Task still active, waiting");
153 my $cmd = shift || '';
154 my $migratetarget = shift if $cmd eq 'migrate';
156 die "too many arguments\n" if scalar (@ARGV) != 0;
158 if ($cmd eq 'start') {
159 my $status = validate_all
();
160 if ($status->{running
}) {
161 ocf_log
('info', "$status->{name} is already running");
165 if ($status->{node
} ne $nodename) {
166 ocf_log
('info', "Move config for $status->{name} to local node");
167 my ($oldconfig, $newconfig);
168 if ($status->{type
} eq 'qemu') {
169 $oldconfig = PVE
::QemuServer
::config_file
($status->{vmid
}, $status->{node
});
170 $newconfig = PVE
::QemuServer
::config_file
($status->{vmid
}, $nodename);
172 $oldconfig = PVE
::OpenVZ
::config_file
($status->{vmid
}, $status->{node
});
173 $newconfig = PVE
::OpenVZ
::config_file
($status->{vmid
}, $nodename);
175 if (!rename($oldconfig, $newconfig)) {
176 ocf_log
('err', "unable to move config file from '$oldconfig' to '$newconfig' - $!");
177 exit(OCF_ERR_GENERIC
);
183 if ($status->{type
} eq 'qemu') {
184 $upid = PVE
::API2
::Qemu-
>vm_start({node
=> $nodename, vmid
=> $status->{vmid
}});
186 $upid = PVE
::API2
::OpenVZ-
>vm_start({node
=> $nodename, vmid
=> $status->{vmid
}});
191 check_running
($status);
193 exit(OCF_ERR_GENERIC
) if !$status->{running
};
195 if (my $testprog = $ENV{OCF_RESKEY_status_program
}) {
197 my $timeout = get_timeout
();
199 my $wait_func = sub {
200 while (system($testprog) != 0) { sleep(3); }
203 eval { PVE
::Tools
::run_with_timeout
($timeout, $wait_func); };
205 ocf_log
('err', "Start of $status->{name} has failed");
206 ocf_log
('err', "error while waiting for '$testprog' - $err");
207 exit(OCF_ERR_GENERIC
);
213 } elsif($cmd eq 'stop') {
214 my $status = validate_all
();
216 if (!$status->{running
}) {
217 ocf_log
('info', "$status->{name} is already stopped");
221 my $timeout = get_timeout
();
227 vmid
=> $status->{vmid
},
232 if ($status->{type
} eq 'qemu') {
233 $upid = PVE
::API2
::Qemu-
>vm_shutdown($param);
235 $upid = PVE
::API2
::OpenVZ-
>vm_shutdown($param);
240 check_running
($status);
242 exit($status->{running
} ? OCF_ERR_GENERIC
: OCF_SUCCESS
);
244 } elsif($cmd eq 'recover' || $cmd eq 'restart' || $cmd eq 'reload') {
248 } elsif($cmd eq 'status' || $cmd eq 'monitor') {
250 my $status = validate_all
();
252 if (!$status->{running
}) {
253 ocf_log
('debug', "$status->{name} is not running");
254 exit(OCF_NOT_RUNNING
);
257 ocf_log
('debug', "$status->{name} is running");
259 my $testprog = $ENV{OCF_RESKEY_status_program
};
260 my $checklevel = $ENV{OCF_CHECK_LEVEL
};
262 if ($testprog && $checklevel && $checklevel >= 10) {
263 if (system($testprog) != 0) {
264 exit(OCF_NOT_RUNNING
);
270 } elsif($cmd eq 'migrate') {
271 my $status = validate_all
();
272 if (!$status->{running
}) {
273 ocf_log
('err', "$status->{name} is not running");
274 exit(OCF_ERR_GENERIC
);
277 if (!$migratetarget) {
278 ocf_log
('err', "No target specified");
286 vmid
=> $status->{vmid
},
287 target
=> $migratetarget,
292 if ($status->{type
} eq 'qemu') {
293 $oldconfig = PVE
::QemuServer
::config_file
($status->{vmid
}, $status->{node
});
294 $upid = PVE
::API2
::Qemu-
>migrate_vm($params);
296 $oldconfig = PVE
::OpenVZ
::config_file
($status->{vmid
}, $status->{node
});
297 $upid = PVE
::API2
::OpenVZ-
>migrate_vm($params);
302 # something went wrong if old config file is still there
303 exit((-f
$oldconfig) ? OCF_ERR_GENERIC
: OCF_SUCCESS
);
305 } elsif($cmd eq 'stop') {
306 my $status = validate_all
();
308 if (!$status->{running
}) {
309 ocf_log
('info', "$status->{name} is already stopped");
315 if ($status->{type
} eq 'qemu') {
316 $upid = PVE
::API2
::Qemu-
>vm_stop({node
=> $nodename, vmid
=> $status->{vmid
}});
318 $upid = PVE
::API2
::OpenVZ-
>vm_stop({node
=> $nodename, vmid
=> $status->{vmid
}, fast
=> 1});
325 } elsif($cmd eq 'reconfig') {
326 # Reconfigure a running VM
327 my $status = validate_all
();
331 } elsif($cmd eq 'meta-data') {
335 } elsif($cmd eq 'validate-all') {
336 my $status = validate_all
();
338 die "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|validate-all}\n";
344 <?xml version
="1.0"?
>
345 <resource-agent version
="rgmanager 2.0" name
="pvevm">
346 <version
>1.0</version
>
349 Defines a PVE Virtual Machine
351 <shortdesc lang
="en">
352 Defines a PVE Virtual Machine
356 <parameter name
="vmid" primary
="1">
358 This
is the VMID of the virtual machine
.
360 <shortdesc lang
="en">
363 <content type
="string"/>
366 <parameter name
="domain" reconfig
="1">
368 Failover domains define lists of cluster members
369 to
try in the event that the host of the virtual machine
372 <shortdesc lang
="en">
373 Cluster failover Domain
375 <content type
="string"/>
378 <parameter name
="autostart" reconfig
="1">
380 If set to yes
, this resource group will automatically be started
381 after the cluster forms a quorum
. If set to
no, this virtual
382 machine will start
in the
'disabled' state after the cluster
385 <shortdesc lang
="en">
386 Automatic start after quorum formation
388 <content type
="boolean" default="1"/>
391 <parameter name
="exclusive" reconfig
="1">
393 If set
, this resource group will only relocate to
394 nodes which have
no other resource groups running
in the
395 event of a failure
. If
no empty nodes are available
,
396 this resource group will
not be restarted after a failure
.
397 Additionally
, resource groups will
not automatically
398 relocate to the node running this resource group
. This
399 option can be overridden by manual start
and/or relocate
402 <shortdesc lang
="en">
403 Exclusive resource group
405 <content type
="boolean" default="0"/>
408 <parameter name
="recovery" reconfig
="1">
410 This currently
has three possible options
: "restart" tries
411 to restart this virtual machine locally before
412 attempting to relocate
(default); "relocate" does not bother
413 trying to restart the VM locally
; "disable" disables
416 <shortdesc lang
="en">
417 Failure recovery policy
419 <content type
="string"/>
422 <parameter name
="migrate">
424 Migration type
(live
or pause
, default = live
).
426 <shortdesc lang
="en">
427 Migration type
(live
or pause
, default = live
).
429 <content type
="string" default="live"/>
432 <parameter name
="depend">
434 Service dependency
; will
not start without the specified
437 <shortdesc lang
="en">
438 Top-level service this depends on
, in service
:name format
.
440 <content type
="string"/>
443 <parameter name
="depend_mode">
445 Service dependency mode
.
446 hard
- This service
is stopped
/started
if its dependency
448 soft
- This service only depends on the other service
for
449 initial startip
. If the other service stops
, this
450 service
is not stopped
.
452 <shortdesc lang
="en">
453 Service dependency mode
(soft
or hard
).
455 <content type
="string" default="hard"/>
458 <parameter name
="max_restarts" reconfig
="1">
460 Maximum restarts
for this service
.
462 <shortdesc lang
="en">
463 Maximum restarts
for this service
.
465 <content type
="string" default="0"/>
468 <parameter name
="restart_expire_time" reconfig
="1">
470 Restart expiration
time. A restart
is forgotten
471 after this
time. When combined with the max_restarts
472 option
, this lets administrators specify a threshold
473 for when to fail over services
. If max_restarts
474 is exceeded
in this
given expiration
time, the service
475 is relocated instead of restarted again
.
477 <shortdesc lang
="en">
478 Restart expiration
time; amount of
time before a restart
481 <content type
="string" default="0"/>
484 <parameter name
="status_program" reconfig
="1">
486 Ordinarily
, only the presence
/health of a virtual machine
487 is checked
. If specified
, the status_program value
is
488 executed during a depth
10 check
. The intent of this
489 program
is to ascertain the status of critical services
490 within a virtual machine
.
492 <shortdesc lang
="en">
493 Additional status check program
495 <content type
="string" default=""/>
500 <action name
="start" timeout
="75"/>
501 <action name
="stop" timeout
="75"/>
503 <action name
="status" timeout
="10" interval
="30"/>
504 <action name
="monitor" timeout
="10" interval
="30"/>
506 <!-- depth
10 calls the status_program
-->
507 <action name
="status" depth
="10" timeout
="20" interval
="60"/>
508 <action name
="monitor" depth
="10" timeout
="20" interval
="60"/>
510 <!-- reconfigure
- reconfigure with new OCF parameters
.
511 NOT OCF COMPATIBLE AT ALL
-->
512 <action name
="reconfig" timeout
="10"/>
514 <action name
="migrate" timeout
="10m"/>
516 <action name
="meta-data" timeout
="5"/>
517 <action name
="validate-all" timeout
="5"/>
521 <special tag
="rgmanager">
522 <!-- Destroy_on_delete
/ init_on_add are currently only
523 supported
for migratory resources
(no children
524 and the
'migrate' action
; see above
. Do
not try this
525 with normal services
-->
526 <attributes maxinstances
="1" destroy_on_delete
="0" init_on_add
="0"/>