confdb_test_agent_LDFLAGS = -L../../lib
sam_test_agent_SOURCES = sam_test_agent.c common_test_agent.c
-sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc ../../exec/coropoll.o
+sam_test_agent_LDADD = -lsam -lquorum -lcoroipcc -lconfdb ../../exec/coropoll.o
sam_test_agent_LDFLAGS = -L../../lib
votequorum_test_agent_SOURCES = votequorum_test_agent.c common_test_agent.c
SAM_RECOVERY_POLICY_QUORUM = 0x08,
SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT,
SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART,
+ SAM_RECOVERY_POLICY_CONFDB = 0x10,
} sam_recovery_policy_t;
/*
const void *data,
size_t size);
+/*
+ * Marks child as failed. This can be called only with SAM_RECOVERY_POLICY_CONFDB flag set and
+ * makes sense only for SAM_RECOVERY_POLICY_RESTART. This will kill child without sending warn
+ * signal. Confdb state key will be set to failed.
+ *
+ * - CS_OK in case no problem appeared
+ * - CS_ERR_BAD_HANDLE library was not initialized or was already finalized
+ * - CS_ERR_INVALID_PARAM recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set
+ * - CS_ERR_LIBRARY if some internal error appeared (communication with parent
+ * process)
+ */
+cs_error_t sam_mark_failed (void);
#ifdef __cplusplus
}
libconfdb_a_SOURCES = confdb.c sa-confdb.c
libconfdb_a_LIBADD = ../lcr/lcr_ifact.o
CONFDB_LINKER_ADD = $(OS_DYFLAGS) $(OS_LDL)
-SAM_LINKER_ADD = -L. -lquorum
+SAM_LINKER_ADD = -L. -lquorum -lconfdb
libcoroipcc_a_SOURCES = coroipcc.c
libsam_a_SOURCES = sam.c
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <corosync/coroipc_types.h>
#include <corosync/coroipcc.h>
#include <corosync/corodefs.h>
+#include <corosync/confdb.h>
#include <corosync/hdb.h>
#include <corosync/quorum.h>
#include <sys/wait.h>
#include <signal.h>
+#define SAM_CONFDB_S_FAILED "failed"
+#define SAM_CONFDB_S_REGISTERED "registered"
+#define SAM_CONFDB_S_STARTED "started"
+#define SAM_CONFDB_S_Q_WAIT "waiting for quorum"
+
+#define SAM_RP_MASK_Q(pol) (pol & (~SAM_RECOVERY_POLICY_QUORUM))
+#define SAM_RP_MASK_C(pol) (pol & (~SAM_RECOVERY_POLICY_CONFDB))
+#define SAM_RP_MASK(pol) (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CONFDB)))
+
enum sam_internal_status_t {
SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0,
SAM_INTERNAL_STATUS_INITIALIZED,
SAM_COMMAND_HB,
SAM_COMMAND_DATA_STORE,
SAM_COMMAND_WARN_SIGNAL_SET,
+ SAM_COMMAND_MARK_FAILED,
};
enum sam_reply_t {
SAM_PARENT_ACTION_CONTINUE
};
+enum sam_confdb_key_t {
+ SAM_CONFDB_KEY_RECOVERY,
+ SAM_CONFDB_KEY_HC_PERIOD,
+ SAM_CONFDB_KEY_LAST_HC,
+ SAM_CONFDB_KEY_STATE,
+};
+
static struct {
int time_interval;
sam_recovery_policy_t recovery_policy;
size_t user_data_size;
size_t user_data_allocated;
+ pthread_mutex_t lock;
+
quorum_handle_t quorum_handle;
uint32_t quorate;
int quorum_fd;
+
+ confdb_handle_t confdb_handle;
+ hdb_handle_t confdb_pid_handle;
} sam_internal_data;
+extern const char *__progname;
+
+static cs_error_t sam_confdb_update_key (enum sam_confdb_key_t key, const char *value)
+{
+ cs_error_t err;
+ const char *svalue;
+ uint64_t hc_period, last_hc;
+ struct timeval tv;
+ const char *ssvalue[] = { [SAM_RECOVERY_POLICY_QUIT] = "quit", [SAM_RECOVERY_POLICY_RESTART] = "restart" };
+
+ switch (key) {
+ case SAM_CONFDB_KEY_RECOVERY:
+ svalue = ssvalue[SAM_RP_MASK (sam_internal_data.recovery_policy)];
+
+ if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+ "recovery", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) {
+ goto exit_error;
+ }
+ break;
+ case SAM_CONFDB_KEY_HC_PERIOD:
+ hc_period = sam_internal_data.time_interval;
+
+ if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+ "hc_period", &hc_period, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) {
+ goto exit_error;
+ }
+ break;
+ case SAM_CONFDB_KEY_LAST_HC:
+ if (gettimeofday (&tv, NULL) == -1) {
+ last_hc = 0;
+ } else {
+ last_hc = ((uint64_t)tv.tv_sec * 1000) + ((uint64_t)tv.tv_usec / 1000);
+ }
+
+ if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+ "hc_last", &last_hc, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) {
+ goto exit_error;
+ }
+ break;
+ case SAM_CONFDB_KEY_STATE:
+ svalue = value;
+ if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+ "state", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) {
+ goto exit_error;
+ }
+ break;
+ }
+
+ return (CS_OK);
+
+exit_error:
+ return (err);
+}
+
+static cs_error_t sam_confdb_destroy_pid_obj (void)
+{
+ return (confdb_object_destroy (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle));
+}
+
+static cs_error_t sam_confdb_register (void)
+{
+ const char *obj_name;
+ cs_error_t err;
+ confdb_handle_t confdb_handle;
+ hdb_handle_t resource_handle, process_handle, pid_handle, obj_handle;
+ hdb_handle_t *res_handle;
+ char tmp_obj[PATH_MAX];
+ int i;
+
+ if ((err = confdb_initialize (&confdb_handle, NULL)) != CS_OK) {
+ return (err);
+ }
+
+ for (i = 0; i < 3; i++) {
+ switch (i) {
+ case 0:
+ obj_name = "resources";
+ obj_handle = OBJECT_PARENT_HANDLE;
+ res_handle = &resource_handle;
+ break;
+ case 1:
+ obj_name = "process";
+ obj_handle = resource_handle;
+ res_handle = &process_handle;
+ break;
+ case 2:
+ if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, getpid ()) >= sizeof (tmp_obj)) {
+ snprintf (tmp_obj, sizeof (tmp_obj), "%d", getpid ());
+ }
+
+ obj_name = tmp_obj;
+ obj_handle = process_handle;
+ res_handle = &pid_handle;
+ break;
+ }
+
+ if ((err = confdb_object_find_start (confdb_handle, obj_handle)) != CS_OK) {
+ goto finalize_error;
+ }
+
+ if ((err = confdb_object_find (confdb_handle, obj_handle, obj_name, strlen (obj_name),
+ res_handle)) != CS_OK) {
+ if (err == CONFDB_ERR_ACCESS) {
+ /*
+ * Try to create object
+ */
+ if ((err = confdb_object_create (confdb_handle, obj_handle, obj_name,
+ strlen (obj_name), res_handle)) != CS_OK) {
+ goto finalize_error;
+ }
+ } else {
+ goto finalize_error;
+ }
+ } else {
+ if ((err = confdb_object_find_destroy (confdb_handle, obj_handle)) != CS_OK) {
+ goto finalize_error;
+ }
+ }
+ }
+
+ sam_internal_data.confdb_pid_handle = pid_handle;
+ sam_internal_data.confdb_handle = confdb_handle;
+
+ if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_RECOVERY, NULL)) != CS_OK) {
+ goto destroy_finalize_error;
+ }
+
+ if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_HC_PERIOD, NULL)) != CS_OK) {
+ goto destroy_finalize_error;
+ }
+
+ return (CS_OK);
+
+destroy_finalize_error:
+ sam_confdb_destroy_pid_obj ();
+finalize_error:
+ confdb_finalize (confdb_handle);
+ return (err);
+}
+
static void quorum_notification_fn (
quorum_handle_t handle,
uint32_t quorate,
return (CS_ERR_BAD_HANDLE);
}
- if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != SAM_RECOVERY_POLICY_RESTART &&
- recovery_policy != SAM_RECOVERY_POLICY_QUORUM_QUIT && recovery_policy != SAM_RECOVERY_POLICY_QUORUM_RESTART) {
+ if (SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_QUIT &&
+ SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_RESTART) {
return (CS_ERR_INVALID_PARAM);
}
sam_internal_data.user_data_size = 0;
sam_internal_data.user_data_allocated = 0;
+ pthread_mutex_init (&sam_internal_data.lock, NULL);
+
return (CS_OK);
exit_error_quorum:
return (CS_ERR_BAD_HANDLE);
}
+ pthread_mutex_lock (&sam_internal_data.lock);
+
*size = sam_internal_data.user_data_size;
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
return (CS_OK);
}
void *data,
size_t size)
{
+ cs_error_t err;
+
+ err = CS_OK;
+
if (data == NULL) {
return (CS_ERR_INVALID_PARAM);
}
return (CS_ERR_BAD_HANDLE);
}
+ pthread_mutex_lock (&sam_internal_data.lock);
+
if (sam_internal_data.user_data_size == 0) {
- return (CS_OK);
+ err = CS_OK;
+
+ goto error_unlock;
}
if (size < sam_internal_data.user_data_size) {
- return (CS_ERR_INVALID_PARAM);
+ err = CS_ERR_INVALID_PARAM;
+
+ goto error_unlock;
}
memcpy (data, sam_internal_data.user_data, sam_internal_data.user_data_size);
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
return (CS_OK);
+
+error_unlock:
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
+ return (err);
}
cs_error_t sam_data_store (
size = 0;
}
+ pthread_mutex_lock (&sam_internal_data.lock);
+
if (sam_internal_data.am_i_child) {
/*
* We are child so we must send data to parent
*/
command = SAM_COMMAND_DATA_STORE;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
- return (CS_ERR_LIBRARY);
+ err = CS_ERR_LIBRARY;
+
+ goto error_unlock;
}
if (sam_safe_write (sam_internal_data.child_fd_out, &size, sizeof (size)) != sizeof (size)) {
- return (CS_ERR_LIBRARY);
+ err = CS_ERR_LIBRARY;
+
+ goto error_unlock;
}
if (data != NULL && sam_safe_write (sam_internal_data.child_fd_out, data, size) != size) {
- return (CS_ERR_LIBRARY);
+ err = CS_ERR_LIBRARY;
+
+ goto error_unlock;
}
/*
* And wait for reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
- return (err);
+ goto error_unlock;
}
}
} else {
if (sam_internal_data.user_data_allocated < size) {
if ((new_data = realloc (sam_internal_data.user_data, size)) == NULL) {
- return (CS_ERR_NO_MEMORY);
+ err = CS_ERR_NO_MEMORY;
+
+ goto error_unlock;
}
sam_internal_data.user_data_allocated = size;
memcpy (sam_internal_data.user_data, data, size);
}
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
return (CS_OK);
+
+error_unlock:
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
+ return (err);
}
cs_error_t sam_start (void)
{
char command;
cs_error_t err;
+ sam_recovery_policy_t recpol;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
return (CS_ERR_BAD_HANDLE);
}
+ recpol = sam_internal_data.recovery_policy;
+
+ if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ pthread_mutex_lock (&sam_internal_data.lock);
+ }
+
command = SAM_COMMAND_START;
- if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
+ if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
+ if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ pthread_mutex_unlock (&sam_internal_data.lock);
+ }
+
return (CS_ERR_LIBRARY);
+ }
- if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+ if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
/*
* Wait for parent reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
return (err);
}
+
+ pthread_mutex_unlock (&sam_internal_data.lock);
}
if (sam_internal_data.hc_callback)
cs_error_t sam_stop (void)
{
char command;
+ cs_error_t err;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
return (CS_ERR_BAD_HANDLE);
command = SAM_COMMAND_STOP;
- if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
+ if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+ pthread_mutex_lock (&sam_internal_data.lock);
+ }
+
+ if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
+ if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+ pthread_mutex_unlock (&sam_internal_data.lock);
+ }
+
return (CS_ERR_LIBRARY);
+ }
+
+ if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+ /*
+ * Wait for parent reply
+ */
+ if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
+ return (err);
+ }
+
+ pthread_mutex_unlock (&sam_internal_data.lock);
+ }
if (sam_internal_data.hc_callback)
if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) != sizeof (command))
return (CS_OK);
}
+cs_error_t sam_mark_failed (void)
+{
+ char command;
+
+ if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED &&
+ sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
+ return (CS_ERR_BAD_HANDLE);
+ }
+
+ if (!(sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB)) {
+ return (CS_ERR_INVALID_PARAM);
+ }
+
+ command = SAM_COMMAND_MARK_FAILED;
+
+ if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
+ return (CS_ERR_LIBRARY);
+
+ return (CS_OK);
+}
cs_error_t sam_warn_signal_set (int warn_signal)
{
return (CS_ERR_BAD_HANDLE);
}
+ pthread_mutex_lock (&sam_internal_data.lock);
+
if (sam_internal_data.am_i_child) {
/*
* We are child so we must send data to parent
*/
command = SAM_COMMAND_WARN_SIGNAL_SET;
if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
- return (CS_ERR_LIBRARY);
+ err = CS_ERR_LIBRARY;
+
+ goto error_unlock;
}
if (sam_safe_write (sam_internal_data.child_fd_out, &warn_signal, sizeof (warn_signal)) !=
sizeof (warn_signal)) {
- return (CS_ERR_LIBRARY);
+ err = CS_ERR_LIBRARY;
+
+ goto error_unlock;
}
/*
* And wait for reply
*/
if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
- return (err);
+ goto error_unlock;
}
}
*/
sam_internal_data.warn_signal = warn_signal;
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
return (CS_OK);
+
+error_unlock:
+ pthread_mutex_unlock (&sam_internal_data.lock);
+
+ return (err);
}
-static cs_error_t sam_parent_warn_signal_set (
+static cs_error_t sam_parent_reply_send (
+ cs_error_t err,
int parent_fd_in,
int parent_fd_out)
{
char reply;
+
+ if (err == CS_OK) {
+ reply = SAM_REPLY_OK;
+
+ if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
+ err = CS_ERR_LIBRARY;
+ goto error_reply;
+ }
+
+ return (CS_OK);
+ }
+
+error_reply:
+ reply = SAM_REPLY_ERROR;
+ if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
+ return (CS_ERR_LIBRARY);
+ }
+ if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
+ return (CS_ERR_LIBRARY);
+ }
+
+ return (err);
+}
+
+
+static cs_error_t sam_parent_warn_signal_set (
+ int parent_fd_in,
+ int parent_fd_out)
+{
char *user_data;
int warn_signal;
cs_error_t err;
goto error_reply;
}
- reply = SAM_REPLY_OK;
- if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
- err = CS_ERR_LIBRARY;
- goto error_reply;
- }
- return (CS_OK);
+ return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
error_reply:
- reply = SAM_REPLY_ERROR;
- if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
- return (CS_ERR_LIBRARY);
- }
- if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
- return (CS_ERR_LIBRARY);
- }
-
- return (err);
+ return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static cs_error_t sam_parent_wait_for_quorum (
int parent_fd_in,
int parent_fd_out)
{
- char reply;
cs_error_t err;
struct pollfd pfds[2];
int poll_err;
+ if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+ if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_Q_WAIT)) != CS_OK) {
+ goto error_reply;
+ }
+ }
+
/*
* Update current quorum
*/
}
}
- reply = SAM_REPLY_OK;
- if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
- err = CS_ERR_LIBRARY;
- goto error_reply;
+ if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+ if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_STARTED)) != CS_OK) {
+ goto error_reply;
+ }
}
- return (CS_OK);
+ return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
error_reply:
- reply = SAM_REPLY_ERROR;
- if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
- return (CS_ERR_LIBRARY);
+ if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+ sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED);
}
- if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
- return (CS_ERR_LIBRARY);
+
+ return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
+}
+
+static cs_error_t sam_parent_confdb_state_set (
+ int parent_fd_in,
+ int parent_fd_out,
+ int state)
+{
+ cs_error_t err;
+ const char *state_s;
+
+ if (state == 1) {
+ state_s = SAM_CONFDB_S_STARTED;
+ } else {
+ state_s = SAM_CONFDB_S_REGISTERED;
}
- return (err);
+ if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, state_s)) != CS_OK) {
+ goto error_reply;
+ }
+
+ return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
+
+error_reply:
+ return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static cs_error_t sam_parent_kill_child (
return (CS_OK);
}
+static cs_error_t sam_parent_mark_child_failed (
+ int *action,
+ pid_t child_pid)
+{
+ sam_recovery_policy_t recpol;
+
+ recpol = sam_internal_data.recovery_policy;
+
+ sam_internal_data.term_send = 1;
+ sam_internal_data.recovery_policy = SAM_RECOVERY_POLICY_QUIT |
+ (SAM_RP_MASK_C (recpol) ? SAM_RECOVERY_POLICY_CONFDB : 0) |
+ (SAM_RP_MASK_Q (recpol) ? SAM_RECOVERY_POLICY_QUORUM : 0);
+
+ return (sam_parent_kill_child (action, child_pid));
+}
static cs_error_t sam_parent_data_store (
int parent_fd_in,
int parent_fd_out)
{
- char reply;
char *user_data;
ssize_t size;
cs_error_t err;
goto free_error_reply;
}
- reply = SAM_REPLY_OK;
- if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
- err = CS_ERR_LIBRARY;
- goto free_error_reply;
- }
-
free (user_data);
- return (CS_OK);
+ return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
free_error_reply:
free (user_data);
error_reply:
- reply = SAM_REPLY_ERROR;
- if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
- return (CS_ERR_LIBRARY);
- }
- if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
- return (CS_ERR_LIBRARY);
- }
-
- return (err);
+ return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
}
static enum sam_parent_action_t sam_parent_handler (
struct pollfd pfds[2];
nfds_t nfds;
cs_error_t err;
+ sam_recovery_policy_t recpol;
status = 0;
action = SAM_PARENT_ACTION_CONTINUE;
+ recpol = sam_internal_data.recovery_policy;
while (action == SAM_PARENT_ACTION_CONTINUE) {
pfds[0].fd = parent_fd_in;
time_interval = -1;
}
- if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+ if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
pfds[nfds].fd = sam_internal_data.quorum_fd;
pfds[nfds].events = POLLIN;
pfds[nfds].revents = 0;
goto action_exit;
}
+ if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ sam_confdb_update_key (SAM_CONFDB_KEY_LAST_HC, NULL);
+ }
+
/*
* We have read command
*/
/*
* Not started yet
*/
- if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+ if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
if (sam_parent_wait_for_quorum (parent_fd_in,
parent_fd_out) != CS_OK) {
continue;
}
}
+ if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ if (sam_parent_confdb_state_set (parent_fd_in,
+ parent_fd_out, 1) != CS_OK) {
+ continue;
+ }
+ }
+
status = 1;
}
break;
/*
* Started
*/
+ if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ if (sam_parent_confdb_state_set (parent_fd_in,
+ parent_fd_out, 0) != CS_OK) {
+ continue;
+ }
+ }
+
status = 0;
}
break;
case SAM_COMMAND_WARN_SIGNAL_SET:
sam_parent_warn_signal_set (parent_fd_in, parent_fd_out);
break;
+ case SAM_COMMAND_MARK_FAILED:
+ status = 1;
+ sam_parent_mark_child_failed (&action, child_pid);
+ break;
}
} /* if (pfds[0].revents != 0) */
pid_t pid;
int pipe_error;
int pipe_fd_out[2], pipe_fd_in[2];
- enum sam_parent_action_t action;
+ enum sam_parent_action_t action, old_action;
int child_status;
+ sam_recovery_policy_t recpol;
if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED) {
return (CS_ERR_BAD_HANDLE);
}
+ recpol = sam_internal_data.recovery_policy;
+
+ if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ /*
+ * Register to objdb
+ */
+ if ((error = sam_confdb_register ()) != CS_OK) {
+ goto error_exit;
+ }
+ }
+
error = CS_OK;
while (1) {
goto error_exit;
}
+ if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ if ((error = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED)) != CS_OK) {
+ goto error_exit;
+ }
+ }
+
sam_internal_data.instance_id++;
sam_internal_data.term_send = 0;
sam_internal_data.am_i_child = 1;
sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
+ pthread_mutex_init (&sam_internal_data.lock, NULL);
+
goto error_exit;
} else {
/*
while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
;
+ old_action = action;
+
if (action == SAM_PARENT_ACTION_RECOVERY) {
- if (sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUIT ||
- sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUORUM_QUIT)
+ if (SAM_RP_MASK (sam_internal_data.recovery_policy) == SAM_RECOVERY_POLICY_QUIT)
action = SAM_PARENT_ACTION_QUIT;
}
+
if (action == SAM_PARENT_ACTION_QUIT) {
- if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+ if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
quorum_finalize (sam_internal_data.quorum_handle);
}
+ if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+ if (old_action == SAM_PARENT_ACTION_RECOVERY) {
+ /*
+ * Mark as failed
+ */
+ sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_FAILED);
+ } else {
+ sam_confdb_destroy_pid_obj ();
+ }
+ }
+
exit (WEXITSTATUS (child_status));
}
+
}
}
sam_hc_callback_register.3 \
sam_hc_send.3 \
sam_initialize.3 \
+ sam_mark_failed.3 \
sam_overview.8 \
sam_register.3 \
sam_start.3 \
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
-.TH "SAM_INITIALIZE" 3 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+.TH "SAM_INITIALIZE" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
.P
SAM_RECOVERY_POLICY_QUORUM = 0x08,
SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT,
SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART,
+ SAM_RECOVERY_POLICY_CONFDB = 0x10,
} sam_recovery_policy_t;
.fi
SAM_RECOVERY_POLICY_QUORUM_RESTART
same as \fISAM_RECOVERY_POLICY_RESTART\fR but \fBsam_start (3)\fR will block until corosync becomes
quorate and process will be restarted if quorum is lost.
+.TP
+SAM_RECOVERY_POLICY_CONFDB
+is not policy. Used only as flag meaning confdb integration. It can be used with all previous policies.
.P
To perform event driven healthchecking, \fBsam_register(3)\fR and
--- /dev/null
+.\"/*
+.\" * Copyright (c) 2010 Red Hat, Inc.
+.\" *
+.\" * All rights reserved.
+.\" *
+.\" * Author: Jan Friesse (jfriesse@redhat.com)
+.\" *
+.\" * This software licensed under BSD license, the text of which follows:
+.\" *
+.\" * Redistribution and use in source and binary forms, with or without
+.\" * modification, are permitted provided that the following conditions are met:
+.\" *
+.\" * - Redistributions of source code must retain the above copyright notice,
+.\" * this list of conditions and the following disclaimer.
+.\" * - Redistributions in binary form must reproduce the above copyright notice,
+.\" * this list of conditions and the following disclaimer in the documentation
+.\" * and/or other materials provided with the distribution.
+.\" * - Neither the name of the Red Hat, Inc. nor the names of its
+.\" * contributors may be used to endorse or promote products derived from this
+.\" * software without specific prior written permission.
+.\" *
+.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+.\" * THE POSSIBILITY OF SUCH DAMAGE.
+.\" */
+.TH "SAM_STOP" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+
+.SH NAME
+.P
+sam_mark_failed \- Mark process failed
+
+.SH SYNOPSIS
+.P
+\fB#include <corosync/sam.h>\fR
+
+.P
+\fBcs_error_t sam_mark_failed (void);\fR
+
+.SH DESCRIPTION
+.P
+The \fBsam_mark_failed\fR function is used with SAM_RECOVERY_POLICY_CONFDB mostly
+together with SAM_RECOVERY_POLICY_RESTART to mark process failed. Process marked
+failed is killed without sending warn signal and control process will exit
+as with SAM_RECOVERY_POLICY_QUIT policy. Condb key state will be set to failed so
+corosync watchdog can take required action.
+
+.SH RETURN VALUE
+.P
+This call return CS_OK value if successful, otherwise and error is returned.
+
+.SH ERRORS
+.TP
+CS_ERR_BAD_HANDLE
+library was not initialized by calling \fBsam_initialize(3)\fR or was already finalized
+
+.TP
+CS_ERR_INVALID_PARAM
+recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set
+
+.TP
+CS_ERR_LIBRARY
+some internal error appeared (communication with parent process)
+
+.SH "SEE ALSO"
+.BR sam_initialize (3)
.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
.\" * THE POSSIBILITY OF SUCH DAMAGE.
.\" */
-.TH "SAM_OVERVIEW" 8 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+.TH "SAM_OVERVIEW" 8 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
.SH NAME
.P
presented by \fBsam_data_store(3)\fR, \fBsam_data_restore(3)\fR and \fBsam_data_getsize(3)\fR
functions.
+.SH Confdb integration
+.P
+SAM has policy flag used for confdb system integration (\fISAM_RECOVERY_POLICY_CONFDB\fR).
+If process is registered with this flag, new confdb object PROCESS_NAME:PID is created with following
+keys:
+.RS
+.IP \(bu 3
+\fIrecovery\fR - will be quit or restart depending on policy
+.IP \(bu 3
+\fIhc_period\fR - period of health checking in milliseconds
+.IP \(bu 3
+\fIhc_last\fR - last known GMT time in milliseconds when health check was received
+.IP \(bu 3
+\fIstate\fR - state of process (can be one of registered, started, failed, waiting for quorum)
+.RE
+
+.P
+Object is automatically deleted if process exits with stopped health checking.
+
+.P
+Confdb integration with corosync wathdog can be used in implicit and explicit way.
+
+.P
+Implicit way is achieved by setting recovery policy to QUIT and let process exit with started health checking.
+If this happened, object is not deleted and corosync watchdog will take required action.
+
+.P
+Explicit way is usefull for situations, when developer can deal with some non-fatal fall of application.
+This mode is achieved by setting policy to RESTART and using SAM same as without Confdb integration.
+If real fail is needed (like too many restarts at all, per/sec, ...), it's possible to use \fBsam_mark_failed(3)\fR
+and let corosync watchdog take required action.
+
.SH BUGS
.SH "SEE ALSO"
.BR sam_initialize (3),
.BR sam_data_restore (3),
.BR sam_data_store (3),
.BR sam_finalize (3),
+.BR sam_mark_failed (3),
.BR sam_start (3),
.BR sam_stop (3),
.BR sam_register (3),
#include <config.h>
+#include <limits.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <sys/wait.h>
+extern const char *__progname;
+
static int test2_sig_delivered = 0;
static int test5_hc_cb_count = 0;
static int test6_sig_delivered = 0;
return (2);
}
+/*
+ * Test confdb integration + quit policy
+ */
+static int test8 (pid_t pid, pid_t old_pid, int test_n) {
+ confdb_handle_t cdb_handle;
+ cs_error_t err;
+ hdb_handle_t res_handle, proc_handle, pid_handle;
+ size_t value_len;
+ uint64_t tstamp1, tstamp2;
+ char key_value[256];
+ unsigned int instance_id;
+ char tmp_obj[PATH_MAX];
+ confdb_value_types_t cdbtype;
+
+ err = confdb_initialize (&cdb_handle, NULL);
+ if (err != CS_OK) {
+ printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err);
+ return (1);
+ }
+
+ printf ("%s test %d\n", __FUNCTION__, test_n);
+
+ if (test_n == 2) {
+ /*
+ * Object should not exist
+ */
+ printf ("%s Testing if object exists (it shouldn't)\n", __FUNCTION__);
+
+ err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"resources\": %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find_start(cdb_handle, res_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"process\": %d.\n", err);
+ return (2);
+ }
+
+ if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+ snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+ }
+
+ err = confdb_object_find_start(cdb_handle, proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+ if (err == CS_OK) {
+ printf ("Could find object \"%s\": %d.\n", tmp_obj, err);
+ return (2);
+ }
+ }
+
+ if (test_n == 1 || test_n == 2) {
+ printf ("%s: initialize\n", __FUNCTION__);
+ err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CONFDB);
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't initialize SAM API. Error %d\n", err);
+ return 2;
+ }
+
+ printf ("%s: register\n", __FUNCTION__);
+ err = sam_register (&instance_id);
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't register. Error %d\n", err);
+ return 2;
+ }
+
+ err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"resources\": %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find_start(cdb_handle, res_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"process\": %d.\n", err);
+ return (2);
+ }
+
+ if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+ snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+ }
+
+ err = confdb_object_find_start(cdb_handle, proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+ return (2);
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"recovery\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("quit") || memcmp (key_value, "quit", value_len) != 0) {
+ printf ("Recovery key \"%s\" is not \"watchdog\".\n", key_value);
+ return (2);
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+ printf ("State key is not \"registered\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
+ err = sam_start ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't start hc. Error %d\n", err);
+ return 2;
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
+ printf ("State key is not \"started\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: stop\n", __FUNCTION__, instance_id);
+ err = sam_stop ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't stop hc. Error %d\n", err);
+ return 2;
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+ printf ("State key is not \"registered\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: sleeping 5\n", __FUNCTION__, instance_id);
+ sleep (5);
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+ printf ("State key is not \"registered\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: start 2\n", __FUNCTION__, instance_id);
+ err = sam_start ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't start hc. Error %d\n", err);
+ return 2;
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
+ printf ("State key is not \"started\".\n");
+ return (2);
+ }
+
+ if (test_n == 2) {
+ printf ("%s iid %d: sleeping 5. Should be killed\n", __FUNCTION__, instance_id);
+ sleep (5);
+
+ return (2);
+ } else {
+ printf ("%s iid %d: Test HC\n", __FUNCTION__, instance_id);
+ err = sam_hc_send ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't send hc. Error %d\n", err);
+ return 2;
+ }
+ err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp1, &value_len, &cdbtype);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+ printf ("%s iid %d: Sleep 1\n", __FUNCTION__, instance_id);
+ sleep (1);
+ err = sam_hc_send ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't send hc. Error %d\n", err);
+ return 2;
+ }
+ sleep (1);
+ err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp2, &value_len, &cdbtype);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+ if (tstamp2 - tstamp1 < 500 || tstamp2 - tstamp1 > 2000) {
+ printf ("Difference %d is not within <500, 2000> interval.\n", (int)(tstamp2 - tstamp1));
+ return (2);
+ }
+
+ printf ("%s iid %d: stop 2\n", __FUNCTION__, instance_id);
+ err = sam_stop ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't stop hc. Error %d\n", err);
+ return 2;
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+ printf ("State key is not \"registered\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: exiting\n", __FUNCTION__, instance_id);
+ return (0);
+ }
+ }
+
+ if (test_n == 3) {
+ printf ("%s Testing if status is failed\n", __FUNCTION__);
+
+ /*
+ * Previous should be FAILED
+ */
+ err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"resources\": %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find_start(cdb_handle, res_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"process\": %d.\n", err);
+ return (2);
+ }
+
+ if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+ snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+ }
+
+ err = confdb_object_find_start(cdb_handle, proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+ return (2);
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) {
+ printf ("State key is not \"failed\".\n");
+ return (2);
+ }
+
+ return (0);
+ }
+
+ return (2);
+}
+
+/*
+ * Test confdb integration + restart policy
+ */
+static int test9 (pid_t pid, pid_t old_pid, int test_n) {
+ confdb_handle_t cdb_handle;
+ cs_error_t err;
+ hdb_handle_t res_handle, proc_handle, pid_handle;
+ size_t value_len;
+ char key_value[256];
+ unsigned int instance_id;
+ char tmp_obj[PATH_MAX];
+
+ err = confdb_initialize (&cdb_handle, NULL);
+ if (err != CS_OK) {
+ printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err);
+ return (1);
+ }
+
+ printf ("%s test %d\n", __FUNCTION__, test_n);
+
+ if (test_n == 1) {
+ printf ("%s: initialize\n", __FUNCTION__);
+ err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | SAM_RECOVERY_POLICY_CONFDB);
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't initialize SAM API. Error %d\n", err);
+ return 2;
+ }
+
+ printf ("%s: register\n", __FUNCTION__);
+ err = sam_register (&instance_id);
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't register. Error %d\n", err);
+ return 2;
+ }
+ printf ("%s: iid %d\n", __FUNCTION__, instance_id);
+
+ if (instance_id < 3) {
+ err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"),
+ &res_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"resources\": %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find_start(cdb_handle, res_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"process\": %d.\n", err);
+ return (2);
+ }
+
+ if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+ snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+ }
+
+ err = confdb_object_find_start(cdb_handle, proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+ return (2);
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"recovery\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("restart") || memcmp (key_value, "restart", value_len) != 0) {
+ printf ("Recovery key \"%s\" is not \"restart\".\n", key_value);
+ return (2);
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+ printf ("State key is not \"registered\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
+ err = sam_start ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't start hc. Error %d\n", err);
+ return 2;
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
+ printf ("State key is not \"started\".\n");
+ return (2);
+ }
+
+ printf ("%s iid %d: waiting for kill\n", __FUNCTION__, instance_id);
+ sleep (10);
+
+ return (2);
+ }
+
+ if (instance_id == 3) {
+ printf ("%s iid %d: mark failed\n", __FUNCTION__, instance_id);
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't start hc. Error %d\n", err);
+ return 2;
+ }
+ err = sam_mark_failed ();
+ if (err != CS_OK) {
+ fprintf (stderr, "Can't mark failed. Error %d\n", err);
+ return 2;
+ }
+
+ sleep (10);
+
+ return (2);
+ }
+
+ return (2);
+ }
+
+ if (test_n == 2) {
+ printf ("%s Testing if status is failed\n", __FUNCTION__);
+
+ /*
+ * Previous should be FAILED
+ */
+ err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"resources\": %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find_start(cdb_handle, res_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"process\": %d.\n", err);
+ return (2);
+ }
+
+ if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+ snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+ }
+
+ err = confdb_object_find_start(cdb_handle, proc_handle);
+ if (err != CS_OK) {
+ printf ("Could not start object_find %d.\n", err);
+ return (2);
+ }
+
+ err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+ if (err != CS_OK) {
+ printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+ return (2);
+ }
+
+ err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+ if (err != CS_OK) {
+ printf ("Could not get \"state\" key: %d.\n", err);
+ return (2);
+ }
+
+ if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) {
+ printf ("State key is not \"failed\".\n");
+ return (2);
+ }
+
+ return (0);
+ }
+
+ return (2);
+}
+
int main(int argc, char *argv[])
{
- pid_t pid;
+ pid_t pid, old_pid;
int err;
int stat;
int all_passed = 1;
if (pid == -1) {
fprintf (stderr, "Can't fork\n");
- return 1;
+ return 2;
}
if (pid == 0) {
waitpid (pid, &stat, 0);
fprintf (stderr, "test7 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed")));
+ if (WEXITSTATUS (stat) == 1)
+ no_skipped++;
+ if (WEXITSTATUS (stat) > 1)
+ all_passed = 0;
+
+ pid = fork ();
+
+ if (pid == -1) {
+ fprintf (stderr, "Can't fork\n");
+ return 2;
+ }
+
+ if (pid == 0) {
+ err = test8 (getpid (), 0, 1);
+ sam_finalize ();
+ return (err);
+ }
+
+ waitpid (pid, &stat, 0);
+ old_pid = pid;
+
+ if (WEXITSTATUS (stat) == 0) {
+ pid = fork ();
+
+ if (pid == -1) {
+ fprintf (stderr, "Can't fork\n");
+ return 2;
+ }
+
+ if (pid == 0) {
+ err = test8 (getpid (), old_pid, 2);
+ sam_finalize ();
+ return (err);
+ }
+
+ waitpid (pid, &stat, 0);
+ old_pid = pid;
+
+ if (WEXITSTATUS (stat) == 0) {
+ pid = fork ();
+
+ if (pid == -1) {
+ fprintf (stderr, "Can't fork\n");
+ return 2;
+ }
+
+ if (pid == 0) {
+ err = test8 (old_pid, 0, 3);
+ sam_finalize ();
+ return (err);
+ }
+
+ waitpid (pid, &stat, 0);
+ }
+ }
+
+ if (WEXITSTATUS (stat) == 1)
+ no_skipped++;
+ if (WEXITSTATUS (stat) > 1)
+ all_passed = 0;
+
+ pid = fork ();
+
+ if (pid == -1) {
+ fprintf (stderr, "Can't fork\n");
+ return 2;
+ }
+
+ if (pid == 0) {
+ err = test9 (getpid (), 0, 1);
+ sam_finalize ();
+ return (err);
+ }
+
+ waitpid (pid, &stat, 0);
+ old_pid = pid;
+
+ if (WEXITSTATUS (stat) == 0) {
+ pid = fork ();
+
+ if (pid == -1) {
+ fprintf (stderr, "Can't fork\n");
+ return 2;
+ }
+
+ if (pid == 0) {
+ err = test9 (old_pid, 0, 2);
+ sam_finalize ();
+ return (err);
+ }
+
+ waitpid (pid, &stat, 0);
+ }
+ fprintf (stderr, "test9 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed")));
if (WEXITSTATUS (stat) == 1)
no_skipped++;