]> git.proxmox.com Git - mirror_corosync.git/commitdiff
SAM Confdb integration
authorJan Friesse <jfriesse@redhat.com>
Mon, 27 Sep 2010 07:34:21 +0000 (07:34 +0000)
committerJan Friesse <jfriesse@redhat.com>
Mon, 27 Sep 2010 07:34:21 +0000 (07:34 +0000)
Patch add support for Confdb integration with SAM. It's now possible to
use SAM_RECOVERY_POLICY_CONFDB as flag to previous policies.

Also new function sam_mark_failed is added for ability to use RECOVERY
policy together with confdb and get expected results (specially with
integration with corosync watchdog)

Patch also makes SAM thread safe.

git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@3050 fd59a12c-fef9-0310-b244-a6a79926bd2f

cts/agents/Makefile.am
include/corosync/sam.h
lib/Makefile.am
lib/libsam.verso
lib/sam.c
man/Makefile.am
man/sam_initialize.3
man/sam_mark_failed.3 [new file with mode: 0644]
man/sam_overview.8
test/testsam.c

index c1a7f859d09350c9e9e3204399eb9154587262b1..1d73a9bbb4a95815457916c287789adcc4572b8c 100644 (file)
@@ -66,7 +66,7 @@ confdb_test_agent_LDADD =  -lconfdb -lcoroipcc ../../exec/coropoll.o
 confdb_test_agent_LDFLAGS =  -L../../lib 
 
 sam_test_agent_SOURCES = sam_test_agent.c common_test_agent.c
-sam_test_agent_LDADD =  -lsam -lquorum -lcoroipcc ../../exec/coropoll.o
+sam_test_agent_LDADD =  -lsam -lquorum -lcoroipcc  -lconfdb ../../exec/coropoll.o
 sam_test_agent_LDFLAGS =  -L../../lib 
 
 votequorum_test_agent_SOURCES = votequorum_test_agent.c common_test_agent.c
index 41727c2f195ef32c68834f1bd3aa3611dc3b804b..30401a087c7089da4e3788f8abb32beb32301277 100644 (file)
@@ -46,6 +46,7 @@ typedef enum {
        SAM_RECOVERY_POLICY_QUORUM = 0x08,
        SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT,
        SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART,
+       SAM_RECOVERY_POLICY_CONFDB = 0x10,
 } sam_recovery_policy_t;
 
 /*
@@ -205,6 +206,18 @@ cs_error_t sam_data_store (
        const void *data,
        size_t size);
 
+/*
+ * Marks child as failed. This can be called only with SAM_RECOVERY_POLICY_CONFDB flag set and
+ * makes sense only for SAM_RECOVERY_POLICY_RESTART. This will kill child without sending warn
+ * signal. Confdb state key will be set to failed.
+ *
+ * - CS_OK in case no problem appeared
+ * - CS_ERR_BAD_HANDLE library was not initialized or was already finalized
+ * - CS_ERR_INVALID_PARAM recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set
+ * - CS_ERR_LIBRARY if some internal error appeared (communication with parent
+ *   process)
+ */
+cs_error_t sam_mark_failed (void);
 
 #ifdef __cplusplus
 }
index 7aca53b929aafc5bd66e7b559cd17eaa63507ad4..c4ee3c0342dfd280c6d2d4e7750a44ec2917bcb7 100644 (file)
@@ -62,7 +62,7 @@ libvotequorum_a_SOURCES       = votequorum.c
 libconfdb_a_SOURCES    = confdb.c sa-confdb.c
 libconfdb_a_LIBADD     = ../lcr/lcr_ifact.o
 CONFDB_LINKER_ADD      = $(OS_DYFLAGS) $(OS_LDL)
-SAM_LINKER_ADD         = -L. -lquorum
+SAM_LINKER_ADD         = -L. -lquorum -lconfdb
 libcoroipcc_a_SOURCES  = coroipcc.c
 libsam_a_SOURCES       = sam.c
 
index 80895903a15c8a6a7df7ca72da9a3ed78fe07eca..fdc6698807a92654177d5679fe2de81be0c17dd4 100644 (file)
@@ -1 +1 @@
-4.3.0
+4.4.0
index a3d1cd002bbeb28e0159289f38c433a2ef6a545d..53020ac0399d4065674eaf557c94461fbec9b60a 100644 (file)
--- a/lib/sam.c
+++ b/lib/sam.c
@@ -42,6 +42,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <errno.h>
@@ -50,6 +51,7 @@
 #include <corosync/coroipc_types.h>
 #include <corosync/coroipcc.h>
 #include <corosync/corodefs.h>
+#include <corosync/confdb.h>
 #include <corosync/hdb.h>
 #include <corosync/quorum.h>
 
 #include <sys/wait.h>
 #include <signal.h>
 
+#define SAM_CONFDB_S_FAILED            "failed"
+#define SAM_CONFDB_S_REGISTERED                "registered"
+#define SAM_CONFDB_S_STARTED           "started"
+#define SAM_CONFDB_S_Q_WAIT            "waiting for quorum"
+
+#define SAM_RP_MASK_Q(pol)     (pol & (~SAM_RECOVERY_POLICY_QUORUM))
+#define SAM_RP_MASK_C(pol)     (pol & (~SAM_RECOVERY_POLICY_CONFDB))
+#define SAM_RP_MASK(pol)       (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CONFDB)))
+
 enum sam_internal_status_t {
        SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0,
        SAM_INTERNAL_STATUS_INITIALIZED,
@@ -75,6 +86,7 @@ enum sam_command_t {
        SAM_COMMAND_HB,
        SAM_COMMAND_DATA_STORE,
        SAM_COMMAND_WARN_SIGNAL_SET,
+       SAM_COMMAND_MARK_FAILED,
 };
 
 enum sam_reply_t {
@@ -89,6 +101,13 @@ enum sam_parent_action_t {
        SAM_PARENT_ACTION_CONTINUE
 };
 
+enum sam_confdb_key_t {
+       SAM_CONFDB_KEY_RECOVERY,
+       SAM_CONFDB_KEY_HC_PERIOD,
+       SAM_CONFDB_KEY_LAST_HC,
+       SAM_CONFDB_KEY_STATE,
+};
+
 static struct {
        int time_interval;
        sam_recovery_policy_t recovery_policy;
@@ -109,11 +128,156 @@ static struct {
        size_t user_data_size;
        size_t user_data_allocated;
 
+       pthread_mutex_t lock;
+
        quorum_handle_t quorum_handle;
        uint32_t quorate;
        int quorum_fd;
+
+       confdb_handle_t confdb_handle;
+       hdb_handle_t confdb_pid_handle;
 } sam_internal_data;
 
+extern const char *__progname;
+
+static cs_error_t sam_confdb_update_key (enum sam_confdb_key_t key, const char *value)
+{
+       cs_error_t err;
+       const char *svalue;
+       uint64_t hc_period, last_hc;
+       struct timeval tv;
+       const char *ssvalue[] = { [SAM_RECOVERY_POLICY_QUIT] = "quit", [SAM_RECOVERY_POLICY_RESTART] = "restart" };
+
+       switch (key) {
+       case SAM_CONFDB_KEY_RECOVERY:
+               svalue = ssvalue[SAM_RP_MASK (sam_internal_data.recovery_policy)];
+
+               if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+                       "recovery", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) {
+                       goto exit_error;
+               }
+               break;
+       case SAM_CONFDB_KEY_HC_PERIOD:
+               hc_period = sam_internal_data.time_interval;
+
+               if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+                       "hc_period", &hc_period, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) {
+                       goto exit_error;
+               }
+               break;
+       case SAM_CONFDB_KEY_LAST_HC:
+               if (gettimeofday (&tv, NULL) == -1) {
+                       last_hc = 0;
+               } else {
+                       last_hc = ((uint64_t)tv.tv_sec * 1000) + ((uint64_t)tv.tv_usec / 1000);
+               }
+
+               if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+                       "hc_last", &last_hc, sizeof (uint64_t), CONFDB_VALUETYPE_UINT64)) != CS_OK) {
+                       goto exit_error;
+               }
+               break;
+       case SAM_CONFDB_KEY_STATE:
+               svalue = value;
+               if ((err = confdb_key_create_typed (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle,
+                       "state", svalue, strlen ((const char *)svalue), CONFDB_VALUETYPE_STRING)) != CS_OK) {
+                       goto exit_error;
+               }
+               break;
+       }
+
+       return (CS_OK);
+
+exit_error:
+       return (err);
+}
+
+static cs_error_t sam_confdb_destroy_pid_obj (void)
+{
+       return (confdb_object_destroy (sam_internal_data.confdb_handle, sam_internal_data.confdb_pid_handle));
+}
+
+static cs_error_t sam_confdb_register (void)
+{
+       const char *obj_name;
+       cs_error_t err;
+       confdb_handle_t confdb_handle;
+       hdb_handle_t resource_handle, process_handle, pid_handle, obj_handle;
+       hdb_handle_t *res_handle;
+       char tmp_obj[PATH_MAX];
+       int i;
+
+       if ((err = confdb_initialize (&confdb_handle, NULL)) != CS_OK) {
+               return (err);
+       }
+
+       for (i = 0; i < 3; i++) {
+               switch (i) {
+               case 0:
+                       obj_name = "resources";
+                       obj_handle = OBJECT_PARENT_HANDLE;
+                       res_handle = &resource_handle;
+                       break;
+               case 1:
+                       obj_name = "process";
+                       obj_handle = resource_handle;
+                       res_handle = &process_handle;
+                       break;
+               case 2:
+                       if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, getpid ()) >= sizeof (tmp_obj)) {
+                               snprintf (tmp_obj, sizeof (tmp_obj), "%d", getpid ());
+                       }
+
+                       obj_name = tmp_obj;
+                       obj_handle = process_handle;
+                       res_handle = &pid_handle;
+                       break;
+               }
+
+               if ((err = confdb_object_find_start (confdb_handle, obj_handle)) != CS_OK) {
+                       goto finalize_error;
+               }
+
+               if ((err = confdb_object_find (confdb_handle, obj_handle, obj_name, strlen (obj_name),
+                       res_handle)) != CS_OK) {
+                       if (err == CONFDB_ERR_ACCESS) {
+                               /*
+                                * Try to create object
+                                */
+                               if ((err = confdb_object_create (confdb_handle, obj_handle, obj_name,
+                                       strlen (obj_name), res_handle)) != CS_OK) {
+                                       goto finalize_error;
+                               }
+                       } else {
+                               goto finalize_error;
+                       }
+               } else  {
+                       if ((err = confdb_object_find_destroy (confdb_handle, obj_handle)) != CS_OK) {
+                               goto finalize_error;
+                       }
+               }
+       }
+
+       sam_internal_data.confdb_pid_handle = pid_handle;
+       sam_internal_data.confdb_handle = confdb_handle;
+
+       if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_RECOVERY, NULL)) != CS_OK) {
+               goto destroy_finalize_error;
+       }
+
+       if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_HC_PERIOD, NULL)) != CS_OK) {
+               goto destroy_finalize_error;
+       }
+
+       return (CS_OK);
+
+destroy_finalize_error:
+       sam_confdb_destroy_pid_obj ();
+finalize_error:
+       confdb_finalize (confdb_handle);
+       return (err);
+}
+
 static void quorum_notification_fn (
         quorum_handle_t handle,
         uint32_t quorate,
@@ -135,8 +299,8 @@ cs_error_t sam_initialize (
                return (CS_ERR_BAD_HANDLE);
        }
 
-       if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != SAM_RECOVERY_POLICY_RESTART &&
-           recovery_policy != SAM_RECOVERY_POLICY_QUORUM_QUIT && recovery_policy != SAM_RECOVERY_POLICY_QUORUM_RESTART) {
+       if (SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_QUIT &&
+           SAM_RP_MASK (recovery_policy) != SAM_RECOVERY_POLICY_RESTART) {
                return (CS_ERR_INVALID_PARAM);
        }
 
@@ -178,6 +342,8 @@ cs_error_t sam_initialize (
        sam_internal_data.user_data_size = 0;
        sam_internal_data.user_data_allocated = 0;
 
+       pthread_mutex_init (&sam_internal_data.lock, NULL);
+
        return (CS_OK);
 
 exit_error_quorum:
@@ -290,8 +456,12 @@ cs_error_t sam_data_getsize (size_t *size)
                return (CS_ERR_BAD_HANDLE);
        }
 
+       pthread_mutex_lock (&sam_internal_data.lock);
+
        *size = sam_internal_data.user_data_size;
 
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
        return (CS_OK);
 }
 
@@ -299,6 +469,10 @@ cs_error_t sam_data_restore (
        void *data,
        size_t size)
 {
+       cs_error_t err;
+
+       err = CS_OK;
+
        if (data == NULL) {
                return (CS_ERR_INVALID_PARAM);
        }
@@ -310,17 +484,30 @@ cs_error_t sam_data_restore (
                return (CS_ERR_BAD_HANDLE);
        }
 
+       pthread_mutex_lock (&sam_internal_data.lock);
+
        if (sam_internal_data.user_data_size == 0) {
-               return (CS_OK);
+               err = CS_OK;
+
+               goto error_unlock;
        }
 
        if (size < sam_internal_data.user_data_size) {
-               return (CS_ERR_INVALID_PARAM);
+               err = CS_ERR_INVALID_PARAM;
+
+               goto error_unlock;
        }
 
        memcpy (data, sam_internal_data.user_data, sam_internal_data.user_data_size);
 
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
        return (CS_OK);
+
+error_unlock:
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
+       return (err);
 }
 
 cs_error_t sam_data_store (
@@ -343,28 +530,36 @@ cs_error_t sam_data_store (
                size = 0;
        }
 
+       pthread_mutex_lock (&sam_internal_data.lock);
+
        if (sam_internal_data.am_i_child) {
                /*
                 * We are child so we must send data to parent
                 */
                command = SAM_COMMAND_DATA_STORE;
                if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
-                       return (CS_ERR_LIBRARY);
+                       err = CS_ERR_LIBRARY;
+
+                       goto error_unlock;
                }
 
                if (sam_safe_write (sam_internal_data.child_fd_out, &size, sizeof (size)) != sizeof (size)) {
-                       return (CS_ERR_LIBRARY);
+                       err = CS_ERR_LIBRARY;
+
+                       goto error_unlock;
                }
 
                if (data != NULL && sam_safe_write (sam_internal_data.child_fd_out, data, size) != size) {
-                       return (CS_ERR_LIBRARY);
+                       err = CS_ERR_LIBRARY;
+
+                       goto error_unlock;
                }
 
                /*
                 * And wait for reply
                 */
                if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
-                       return (err);
+                       goto error_unlock;
                }
        }
 
@@ -379,7 +574,9 @@ cs_error_t sam_data_store (
        } else {
                if (sam_internal_data.user_data_allocated < size) {
                        if ((new_data = realloc (sam_internal_data.user_data, size)) == NULL) {
-                               return (CS_ERR_NO_MEMORY);
+                               err = CS_ERR_NO_MEMORY;
+
+                               goto error_unlock;
                        }
 
                        sam_internal_data.user_data_allocated = size;
@@ -392,30 +589,53 @@ cs_error_t sam_data_store (
                memcpy (sam_internal_data.user_data, data, size);
        }
 
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
        return (CS_OK);
+
+error_unlock:
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
+       return (err);
 }
 
 cs_error_t sam_start (void)
 {
        char command;
        cs_error_t err;
+       sam_recovery_policy_t recpol;
 
        if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
                return (CS_ERR_BAD_HANDLE);
        }
 
+       recpol = sam_internal_data.recovery_policy;
+
+       if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
+               pthread_mutex_lock (&sam_internal_data.lock);
+       }
+
        command = SAM_COMMAND_START;
 
-       if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
+       if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
+               if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
+                       pthread_mutex_unlock (&sam_internal_data.lock);
+               }
+
                return (CS_ERR_LIBRARY);
+       }
 
-       if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+       if (recpol & SAM_RECOVERY_POLICY_QUORUM || recpol & SAM_RECOVERY_POLICY_CONFDB) {
                /*
                 * Wait for parent reply
                 */
                if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
+                       pthread_mutex_unlock (&sam_internal_data.lock);
+
                        return (err);
                }
+
+               pthread_mutex_unlock (&sam_internal_data.lock);
        }
 
        if (sam_internal_data.hc_callback)
@@ -430,6 +650,7 @@ cs_error_t sam_start (void)
 cs_error_t sam_stop (void)
 {
        char command;
+       cs_error_t err;
 
        if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
                return (CS_ERR_BAD_HANDLE);
@@ -437,8 +658,30 @@ cs_error_t sam_stop (void)
 
        command = SAM_COMMAND_STOP;
 
-       if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
+       if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+               pthread_mutex_lock (&sam_internal_data.lock);
+       }
+
+       if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
+               if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+                       pthread_mutex_unlock (&sam_internal_data.lock);
+               }
+
                return (CS_ERR_LIBRARY);
+       }
+
+       if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+               /*
+                * Wait for parent reply
+                */
+               if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
+                       pthread_mutex_unlock (&sam_internal_data.lock);
+
+                       return (err);
+               }
+
+               pthread_mutex_unlock (&sam_internal_data.lock);
+       }
 
        if (sam_internal_data.hc_callback)
                if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) != sizeof (command))
@@ -489,6 +732,26 @@ exit_error:
        return (CS_OK);
 }
 
+cs_error_t sam_mark_failed (void)
+{
+       char command;
+
+       if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED &&
+           sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
+               return (CS_ERR_BAD_HANDLE);
+       }
+
+       if (!(sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB)) {
+               return (CS_ERR_INVALID_PARAM);
+       }
+
+       command = SAM_COMMAND_MARK_FAILED;
+
+       if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command))
+               return (CS_ERR_LIBRARY);
+
+       return (CS_OK);
+}
 
 cs_error_t sam_warn_signal_set (int warn_signal)
 {
@@ -501,25 +764,31 @@ cs_error_t sam_warn_signal_set (int warn_signal)
                return (CS_ERR_BAD_HANDLE);
        }
 
+       pthread_mutex_lock (&sam_internal_data.lock);
+
        if (sam_internal_data.am_i_child) {
                /*
                 * We are child so we must send data to parent
                 */
                command = SAM_COMMAND_WARN_SIGNAL_SET;
                if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) != sizeof (command)) {
-                       return (CS_ERR_LIBRARY);
+                       err = CS_ERR_LIBRARY;
+
+                       goto error_unlock;
                }
 
                if (sam_safe_write (sam_internal_data.child_fd_out, &warn_signal, sizeof (warn_signal)) !=
                   sizeof (warn_signal)) {
-                       return (CS_ERR_LIBRARY);
+                       err = CS_ERR_LIBRARY;
+
+                       goto error_unlock;
                }
 
                /*
                 * And wait for reply
                 */
                if ((err = sam_read_reply (sam_internal_data.child_fd_in)) != CS_OK) {
-                       return (err);
+                       goto error_unlock;
                }
        }
 
@@ -528,14 +797,51 @@ cs_error_t sam_warn_signal_set (int warn_signal)
         */
        sam_internal_data.warn_signal = warn_signal;
 
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
        return (CS_OK);
+
+error_unlock:
+       pthread_mutex_unlock (&sam_internal_data.lock);
+
+       return (err);
 }
 
-static cs_error_t sam_parent_warn_signal_set (
+static cs_error_t sam_parent_reply_send (
+       cs_error_t err,
        int parent_fd_in,
        int parent_fd_out)
 {
        char reply;
+
+       if (err == CS_OK) {
+               reply = SAM_REPLY_OK;
+
+               if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
+                       err = CS_ERR_LIBRARY;
+                       goto error_reply;
+               }
+
+               return (CS_OK);
+       }
+
+error_reply:
+       reply = SAM_REPLY_ERROR;
+       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
+               return (CS_ERR_LIBRARY);
+       }
+       if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
+               return (CS_ERR_LIBRARY);
+       }
+
+       return (err);
+}
+
+
+static cs_error_t sam_parent_warn_signal_set (
+       int parent_fd_in,
+       int parent_fd_out)
+{
        char *user_data;
        int warn_signal;
        cs_error_t err;
@@ -553,35 +859,27 @@ static cs_error_t sam_parent_warn_signal_set (
                goto error_reply;
        }
 
-       reply = SAM_REPLY_OK;
-       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
-               err = CS_ERR_LIBRARY;
-               goto error_reply;
-       }
 
-       return (CS_OK);
+       return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
 
 error_reply:
-       reply = SAM_REPLY_ERROR;
-       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
-               return (CS_ERR_LIBRARY);
-       }
-       if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
-               return (CS_ERR_LIBRARY);
-       }
-
-       return (err);
+       return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
 }
 
 static cs_error_t sam_parent_wait_for_quorum (
        int parent_fd_in,
        int parent_fd_out)
 {
-       char reply;
        cs_error_t err;
        struct pollfd pfds[2];
        int poll_err;
 
+       if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+               if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_Q_WAIT)) != CS_OK) {
+                       goto error_reply;
+               }
+       }
+
        /*
         * Update current quorum
         */
@@ -630,24 +928,44 @@ static cs_error_t sam_parent_wait_for_quorum (
                }
        }
 
-       reply = SAM_REPLY_OK;
-       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
-               err = CS_ERR_LIBRARY;
-               goto error_reply;
+       if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+               if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_STARTED)) != CS_OK) {
+                       goto error_reply;
+               }
        }
 
-       return (CS_OK);
+       return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
 
 error_reply:
-       reply = SAM_REPLY_ERROR;
-       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
-               return (CS_ERR_LIBRARY);
+       if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_CONFDB) {
+               sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED);
        }
-       if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
-               return (CS_ERR_LIBRARY);
+
+       return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
+}
+
+static cs_error_t sam_parent_confdb_state_set (
+       int parent_fd_in,
+       int parent_fd_out,
+       int state)
+{
+       cs_error_t err;
+       const char *state_s;
+
+       if (state == 1) {
+               state_s = SAM_CONFDB_S_STARTED;
+       } else {
+               state_s = SAM_CONFDB_S_REGISTERED;
        }
 
-       return (err);
+       if ((err = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, state_s)) != CS_OK) {
+               goto error_reply;
+       }
+
+       return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
+
+error_reply:
+       return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
 }
 
 static cs_error_t sam_parent_kill_child (
@@ -675,12 +993,26 @@ static cs_error_t sam_parent_kill_child (
        return (CS_OK);
 }
 
+static cs_error_t sam_parent_mark_child_failed (
+       int *action,
+       pid_t child_pid)
+{
+       sam_recovery_policy_t recpol;
+
+       recpol = sam_internal_data.recovery_policy;
+
+       sam_internal_data.term_send = 1;
+       sam_internal_data.recovery_policy = SAM_RECOVERY_POLICY_QUIT |
+           (SAM_RP_MASK_C (recpol) ? SAM_RECOVERY_POLICY_CONFDB : 0) |
+           (SAM_RP_MASK_Q (recpol) ? SAM_RECOVERY_POLICY_QUORUM : 0);
+
+       return (sam_parent_kill_child (action, child_pid));
+}
 
 static cs_error_t sam_parent_data_store (
        int parent_fd_in,
        int parent_fd_out)
 {
-       char reply;
        char *user_data;
        ssize_t size;
        cs_error_t err;
@@ -711,28 +1043,14 @@ static cs_error_t sam_parent_data_store (
                goto free_error_reply;
        }
 
-       reply = SAM_REPLY_OK;
-       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
-               err = CS_ERR_LIBRARY;
-               goto free_error_reply;
-       }
-
        free (user_data);
 
-       return (CS_OK);
+       return (sam_parent_reply_send (CS_OK, parent_fd_in, parent_fd_out));
 
 free_error_reply:
        free (user_data);
 error_reply:
-       reply = SAM_REPLY_ERROR;
-       if (sam_safe_write (parent_fd_out, &reply, sizeof (reply)) != sizeof (reply)) {
-               return (CS_ERR_LIBRARY);
-       }
-       if (sam_safe_write (parent_fd_out, &err, sizeof (err)) != sizeof (err)) {
-               return (CS_ERR_LIBRARY);
-       }
-
-       return (err);
+       return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
 }
 
 static enum sam_parent_action_t sam_parent_handler (
@@ -749,10 +1067,12 @@ static enum sam_parent_action_t sam_parent_handler (
        struct pollfd pfds[2];
        nfds_t nfds;
        cs_error_t err;
+       sam_recovery_policy_t recpol;
 
        status = 0;
 
        action = SAM_PARENT_ACTION_CONTINUE;
+       recpol = sam_internal_data.recovery_policy;
 
        while (action == SAM_PARENT_ACTION_CONTINUE) {
                pfds[0].fd = parent_fd_in;
@@ -766,7 +1086,7 @@ static enum sam_parent_action_t sam_parent_handler (
                        time_interval = -1;
                }
 
-               if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+               if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
                        pfds[nfds].fd = sam_internal_data.quorum_fd;
                        pfds[nfds].events = POLLIN;
                        pfds[nfds].revents = 0;
@@ -820,6 +1140,10 @@ static enum sam_parent_action_t sam_parent_handler (
                                        goto action_exit;
                                }
 
+                               if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+                                       sam_confdb_update_key (SAM_CONFDB_KEY_LAST_HC, NULL);
+                               }
+
                                /*
                                 * We have read command
                                 */
@@ -829,13 +1153,20 @@ static enum sam_parent_action_t sam_parent_handler (
                                                /*
                                                 *  Not started yet
                                                 */
-                                               if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+                                               if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
                                                        if (sam_parent_wait_for_quorum (parent_fd_in,
                                                            parent_fd_out) != CS_OK) {
                                                                continue;
                                                        }
                                                }
 
+                                               if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+                                                       if (sam_parent_confdb_state_set (parent_fd_in,
+                                                           parent_fd_out, 1) != CS_OK) {
+                                                               continue;
+                                                           }
+                                               }
+
                                                status = 1;
                                        }
                                        break;
@@ -844,6 +1175,13 @@ static enum sam_parent_action_t sam_parent_handler (
                                                /*
                                                 *  Started
                                                 */
+                                               if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+                                                       if (sam_parent_confdb_state_set (parent_fd_in,
+                                                           parent_fd_out, 0) != CS_OK) {
+                                                               continue;
+                                                           }
+                                               }
+
                                                status = 0;
                                        }
                                        break;
@@ -853,6 +1191,10 @@ static enum sam_parent_action_t sam_parent_handler (
                                case SAM_COMMAND_WARN_SIGNAL_SET:
                                        sam_parent_warn_signal_set (parent_fd_in, parent_fd_out);
                                        break;
+                               case SAM_COMMAND_MARK_FAILED:
+                                       status = 1;
+                                       sam_parent_mark_child_failed (&action, child_pid);
+                                       break;
                                }
                        } /* if (pfds[0].revents != 0) */
 
@@ -882,13 +1224,25 @@ cs_error_t sam_register (
        pid_t pid;
        int pipe_error;
        int pipe_fd_out[2], pipe_fd_in[2];
-       enum sam_parent_action_t action;
+       enum sam_parent_action_t action, old_action;
        int child_status;
+       sam_recovery_policy_t recpol;
 
        if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED) {
                return (CS_ERR_BAD_HANDLE);
        }
 
+       recpol = sam_internal_data.recovery_policy;
+
+       if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+               /*
+                * Register to objdb
+                */
+               if ((error = sam_confdb_register ()) != CS_OK) {
+                       goto error_exit;
+               }
+       }
+
        error = CS_OK;
 
        while (1) {
@@ -905,6 +1259,12 @@ cs_error_t sam_register (
                        goto error_exit;
                }
 
+               if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+                       if ((error = sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_REGISTERED)) != CS_OK) {
+                               goto error_exit;
+                       }
+               }
+
                sam_internal_data.instance_id++;
 
                sam_internal_data.term_send = 0;
@@ -937,6 +1297,8 @@ cs_error_t sam_register (
                        sam_internal_data.am_i_child = 1;
                        sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
 
+                       pthread_mutex_init (&sam_internal_data.lock, NULL);
+
                        goto error_exit;
                } else {
                        /*
@@ -961,20 +1323,34 @@ cs_error_t sam_register (
                        while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
                                ;
 
+                       old_action = action;
+
                        if (action == SAM_PARENT_ACTION_RECOVERY) {
-                               if (sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUIT ||
-                                   sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUORUM_QUIT)
+                               if (SAM_RP_MASK (sam_internal_data.recovery_policy) == SAM_RECOVERY_POLICY_QUIT)
                                        action = SAM_PARENT_ACTION_QUIT;
                        }
 
+
                        if (action == SAM_PARENT_ACTION_QUIT) {
-                               if (sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) {
+                               if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
                                        quorum_finalize (sam_internal_data.quorum_handle);
                                }
 
+                               if (recpol & SAM_RECOVERY_POLICY_CONFDB) {
+                                       if (old_action == SAM_PARENT_ACTION_RECOVERY) {
+                                               /*
+                                                * Mark as failed
+                                                */
+                                               sam_confdb_update_key (SAM_CONFDB_KEY_STATE, SAM_CONFDB_S_FAILED);
+                                       } else {
+                                               sam_confdb_destroy_pid_obj ();
+                                       }
+                               }
+
                                exit (WEXITSTATUS (child_status));
                        }
 
+
                }
        }
 
index 27a12db3dcff97777032959a1957f2682f1844e7..58923f00313a7680132728ea310fbba1e3eff127 100644 (file)
@@ -116,6 +116,7 @@ dist_man_MANS = \
        sam_hc_callback_register.3 \
        sam_hc_send.3 \
        sam_initialize.3 \
+       sam_mark_failed.3 \
        sam_overview.8 \
        sam_register.3 \
        sam_start.3 \
index 1043954b753b1d35bd7b5b875d72a8a56f3ee387..5a3334f5d0d7c3749fcc95d219bd7b42d763acb2 100644 (file)
@@ -31,7 +31,7 @@
 .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" * THE POSSIBILITY OF SUCH DAMAGE.
 .\" */
-.TH "SAM_INITIALIZE" 3 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+.TH "SAM_INITIALIZE" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
 
 .SH NAME
 .P
@@ -71,6 +71,7 @@ The \fIrecovery_policy\fR is defined as type:
         SAM_RECOVERY_POLICY_QUORUM = 0x08,
         SAM_RECOVERY_POLICY_QUORUM_QUIT = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_QUIT,
         SAM_RECOVERY_POLICY_QUORUM_RESTART = SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_RESTART,
+        SAM_RECOVERY_POLICY_CONFDB = 0x10,
   } sam_recovery_policy_t;
 .fi
 
@@ -94,6 +95,9 @@ quorate and process will be terminated if quorum is lost.
 SAM_RECOVERY_POLICY_QUORUM_RESTART
 same as \fISAM_RECOVERY_POLICY_RESTART\fR but \fBsam_start (3)\fR will block until corosync becomes
 quorate and process will be restarted if quorum is lost.
+.TP
+SAM_RECOVERY_POLICY_CONFDB
+is not policy. Used only as flag meaning confdb integration. It can be used with all previous policies.
 
 .P
 To perform event driven healthchecking, \fBsam_register(3)\fR and
diff --git a/man/sam_mark_failed.3 b/man/sam_mark_failed.3
new file mode 100644 (file)
index 0000000..dabe2da
--- /dev/null
@@ -0,0 +1,73 @@
+.\"/*
+.\" * Copyright (c) 2010 Red Hat, Inc.
+.\" *
+.\" * All rights reserved.
+.\" *
+.\" * Author: Jan Friesse (jfriesse@redhat.com)
+.\" *
+.\" * This software licensed under BSD license, the text of which follows:
+.\" *
+.\" * Redistribution and use in source and binary forms, with or without
+.\" * modification, are permitted provided that the following conditions are met:
+.\" *
+.\" * - Redistributions of source code must retain the above copyright notice,
+.\" *   this list of conditions and the following disclaimer.
+.\" * - Redistributions in binary form must reproduce the above copyright notice,
+.\" *   this list of conditions and the following disclaimer in the documentation
+.\" *   and/or other materials provided with the distribution.
+.\" * - Neither the name of the Red Hat, Inc. nor the names of its
+.\" *   contributors may be used to endorse or promote products derived from this
+.\" *   software without specific prior written permission.
+.\" *
+.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+.\" * THE POSSIBILITY OF SUCH DAMAGE.
+.\" */
+.TH "SAM_STOP" 3 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+
+.SH NAME
+.P
+sam_mark_failed \- Mark process failed
+
+.SH SYNOPSIS
+.P
+\fB#include <corosync/sam.h>\fR
+
+.P
+\fBcs_error_t sam_mark_failed (void);\fR
+
+.SH DESCRIPTION
+.P
+The \fBsam_mark_failed\fR function is used with SAM_RECOVERY_POLICY_CONFDB mostly
+together with SAM_RECOVERY_POLICY_RESTART to mark process failed. Process marked
+failed is killed without sending warn signal and control process will exit
+as with SAM_RECOVERY_POLICY_QUIT policy. Condb key state will be set to failed so
+corosync watchdog can take required action.
+
+.SH RETURN VALUE
+.P
+This call return CS_OK value if successful, otherwise and error is returned.
+
+.SH ERRORS
+.TP
+CS_ERR_BAD_HANDLE
+library was not initialized by calling \fBsam_initialize(3)\fR or was already finalized
+
+.TP
+CS_ERR_INVALID_PARAM
+recovery policy doesn't has SAM_RECOVERY_POLICY_CONFDB flag set
+
+.TP
+CS_ERR_LIBRARY
+some internal error appeared (communication with parent process)
+
+.SH "SEE ALSO"
+.BR sam_initialize (3)
index d521a8a68b0bd0179d2ecbb1a0b908ec844b1564..a5807cffd51e78caf3d334e66e68583c57e8a800 100644 (file)
@@ -32,7 +32,7 @@
 .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" * THE POSSIBILITY OF SUCH DAMAGE.
 .\" */
-.TH "SAM_OVERVIEW" 8 "30/04/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+.TH "SAM_OVERVIEW" 8 "21/05/2010" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
 
 .SH NAME
 .P
@@ -133,6 +133,38 @@ One can in such case use files, databases, ... or much simpler in memory solutio
 presented by \fBsam_data_store(3)\fR, \fBsam_data_restore(3)\fR and \fBsam_data_getsize(3)\fR
 functions.
 
+.SH Confdb integration
+.P
+SAM has policy flag used for confdb system integration (\fISAM_RECOVERY_POLICY_CONFDB\fR).
+If process is registered with this flag, new confdb object PROCESS_NAME:PID is created with following
+keys:
+.RS
+.IP \(bu 3
+\fIrecovery\fR - will be quit or restart depending on policy
+.IP \(bu 3
+\fIhc_period\fR - period of health checking in milliseconds
+.IP \(bu 3
+\fIhc_last\fR - last known GMT time in milliseconds when health check was received
+.IP \(bu 3
+\fIstate\fR - state of process (can be one of registered, started, failed, waiting for quorum)
+.RE
+
+.P
+Object is automatically deleted if process exits with stopped health checking.
+
+.P
+Confdb integration with corosync wathdog can be used in implicit and explicit way.
+
+.P
+Implicit way is achieved by setting recovery policy to QUIT and let process exit with started health checking.
+If this happened, object is not deleted and corosync watchdog will take required action.
+
+.P
+Explicit way is usefull for situations, when developer can deal with some non-fatal fall of application.
+This mode is achieved by setting policy to RESTART and using SAM same as without Confdb integration.
+If real fail is needed (like too many restarts at all, per/sec, ...), it's possible to use \fBsam_mark_failed(3)\fR
+and let corosync watchdog take required action.
+
 .SH BUGS
 .SH "SEE ALSO"
 .BR sam_initialize (3),
@@ -140,6 +172,7 @@ functions.
 .BR sam_data_restore (3),
 .BR sam_data_store (3),
 .BR sam_finalize (3),
+.BR sam_mark_failed (3),
 .BR sam_start (3),
 .BR sam_stop (3),
 .BR sam_register (3),
index 95d8e12952ccae2b434cc018b1e95abc12a14b42..1972d9ee760721f77129d2ceed18a44d5241decd 100644 (file)
@@ -38,6 +38,7 @@
 
 #include <config.h>
 
+#include <limits.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -50,6 +51,8 @@
 #include <string.h>
 #include <sys/wait.h>
 
+extern const char *__progname;
+
 static int test2_sig_delivered = 0;
 static int test5_hc_cb_count = 0;
 static int test6_sig_delivered = 0;
@@ -864,9 +867,551 @@ static int test7 (void) {
        return (2);
 }
 
+/*
+ * Test confdb integration + quit policy
+ */
+static int test8 (pid_t pid, pid_t old_pid, int test_n) {
+       confdb_handle_t cdb_handle;
+       cs_error_t err;
+       hdb_handle_t res_handle, proc_handle, pid_handle;
+       size_t value_len;
+       uint64_t tstamp1, tstamp2;
+       char key_value[256];
+       unsigned int instance_id;
+       char tmp_obj[PATH_MAX];
+       confdb_value_types_t cdbtype;
+
+       err = confdb_initialize (&cdb_handle, NULL);
+       if (err != CS_OK) {
+               printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err);
+               return (1);
+       }
+
+       printf ("%s test %d\n", __FUNCTION__, test_n);
+
+       if (test_n == 2) {
+               /*
+                * Object should not exist
+                */
+               printf ("%s Testing if object exists (it shouldn't)\n", __FUNCTION__);
+
+               err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"resources\": %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find_start(cdb_handle, res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"process\": %d.\n", err);
+                       return (2);
+               }
+
+               if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+                       snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+               }
+
+               err = confdb_object_find_start(cdb_handle, proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+               if (err == CS_OK) {
+                       printf ("Could find object \"%s\": %d.\n", tmp_obj, err);
+                       return (2);
+               }
+       }
+
+       if (test_n == 1 || test_n == 2) {
+               printf ("%s: initialize\n", __FUNCTION__);
+               err = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT | SAM_RECOVERY_POLICY_CONFDB);
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't initialize SAM API. Error %d\n", err);
+                       return 2;
+               }
+
+               printf ("%s: register\n", __FUNCTION__);
+               err = sam_register (&instance_id);
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't register. Error %d\n", err);
+                       return 2;
+               }
+
+               err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"resources\": %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find_start(cdb_handle, res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"process\": %d.\n", err);
+                       return (2);
+               }
+
+               if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+                       snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+               }
+
+               err = confdb_object_find_start(cdb_handle, proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+                       return (2);
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"recovery\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("quit") || memcmp (key_value, "quit", value_len) != 0) {
+                       printf ("Recovery key \"%s\" is not \"watchdog\".\n", key_value);
+                       return (2);
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+                       printf ("State key is not \"registered\".\n");
+                       return (2);
+               }
+
+               printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
+               err = sam_start ();
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't start hc. Error %d\n", err);
+                       return 2;
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
+                       printf ("State key is not \"started\".\n");
+                       return (2);
+               }
+
+               printf ("%s iid %d: stop\n", __FUNCTION__, instance_id);
+               err = sam_stop ();
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't stop hc. Error %d\n", err);
+                       return 2;
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+                       printf ("State key is not \"registered\".\n");
+                       return (2);
+               }
+
+               printf ("%s iid %d: sleeping 5\n", __FUNCTION__, instance_id);
+               sleep (5);
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+                       printf ("State key is not \"registered\".\n");
+                       return (2);
+               }
+
+               printf ("%s iid %d: start 2\n", __FUNCTION__, instance_id);
+               err = sam_start ();
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't start hc. Error %d\n", err);
+                       return 2;
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
+                       printf ("State key is not \"started\".\n");
+                       return (2);
+               }
+
+               if (test_n == 2) {
+                       printf ("%s iid %d: sleeping 5. Should be killed\n", __FUNCTION__, instance_id);
+                       sleep (5);
+
+                       return (2);
+               } else {
+                       printf ("%s iid %d: Test HC\n", __FUNCTION__, instance_id);
+                       err = sam_hc_send ();
+                       if (err != CS_OK) {
+                               fprintf (stderr, "Can't send hc. Error %d\n", err);
+                               return 2;
+                       }
+                       err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp1, &value_len, &cdbtype);
+                       if (err != CS_OK) {
+                               printf ("Could not get \"state\" key: %d.\n", err);
+                               return (2);
+                       }
+                       printf ("%s iid %d: Sleep 1\n", __FUNCTION__, instance_id);
+                       sleep (1);
+                       err = sam_hc_send ();
+                       if (err != CS_OK) {
+                               fprintf (stderr, "Can't send hc. Error %d\n", err);
+                               return 2;
+                       }
+                       sleep (1);
+                       err = confdb_key_get_typed (cdb_handle, pid_handle, "hc_last", &tstamp2, &value_len, &cdbtype);
+                       if (err != CS_OK) {
+                               printf ("Could not get \"state\" key: %d.\n", err);
+                               return (2);
+                       }
+                       if (tstamp2 - tstamp1 < 500 || tstamp2 - tstamp1 > 2000) {
+                               printf ("Difference %d is not within <500, 2000> interval.\n", (int)(tstamp2 - tstamp1));
+                               return (2);
+                       }
+
+                       printf ("%s iid %d: stop 2\n", __FUNCTION__, instance_id);
+                       err = sam_stop ();
+                       if (err != CS_OK) {
+                               fprintf (stderr, "Can't stop hc. Error %d\n", err);
+                               return 2;
+                       }
+
+                       err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+                       if (err != CS_OK) {
+                               printf ("Could not get \"state\" key: %d.\n", err);
+                               return (2);
+                       }
+
+                       if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+                               printf ("State key is not \"registered\".\n");
+                               return (2);
+                       }
+
+                       printf ("%s iid %d: exiting\n", __FUNCTION__, instance_id);
+                       return (0);
+               }
+       }
+
+       if (test_n == 3) {
+               printf ("%s Testing if status is failed\n", __FUNCTION__);
+
+               /*
+                * Previous should be FAILED
+                */
+               err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"resources\": %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find_start(cdb_handle, res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"process\": %d.\n", err);
+                       return (2);
+               }
+
+               if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+                       snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+               }
+
+               err = confdb_object_find_start(cdb_handle, proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+                       return (2);
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) {
+                       printf ("State key is not \"failed\".\n");
+                       return (2);
+               }
+
+               return (0);
+       }
+
+       return (2);
+}
+
+/*
+ * Test confdb integration + restart policy
+ */
+static int test9 (pid_t pid, pid_t old_pid, int test_n) {
+       confdb_handle_t cdb_handle;
+       cs_error_t err;
+       hdb_handle_t res_handle, proc_handle, pid_handle;
+       size_t value_len;
+       char key_value[256];
+       unsigned int instance_id;
+       char tmp_obj[PATH_MAX];
+
+       err = confdb_initialize (&cdb_handle, NULL);
+       if (err != CS_OK) {
+               printf ("Could not initialize Cluster Configuration Database API instance error %d. Test skipped\n", err);
+               return (1);
+       }
+
+       printf ("%s test %d\n", __FUNCTION__, test_n);
+
+       if (test_n == 1) {
+               printf ("%s: initialize\n", __FUNCTION__);
+               err = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART | SAM_RECOVERY_POLICY_CONFDB);
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't initialize SAM API. Error %d\n", err);
+                       return 2;
+               }
+
+               printf ("%s: register\n", __FUNCTION__);
+               err = sam_register (&instance_id);
+               if (err != CS_OK) {
+                       fprintf (stderr, "Can't register. Error %d\n", err);
+                       return 2;
+               }
+               printf ("%s: iid %d\n", __FUNCTION__, instance_id);
+
+               if (instance_id < 3) {
+                       err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+                       if (err != CS_OK) {
+                               printf ("Could not start object_find %d.\n", err);
+                               return (2);
+                       }
+
+                       err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"),
+                           &res_handle);
+                       if (err != CS_OK) {
+                               printf ("Could not object_find \"resources\": %d.\n", err);
+                               return (2);
+                       }
+
+                       err = confdb_object_find_start(cdb_handle, res_handle);
+                       if (err != CS_OK) {
+                               printf ("Could not start object_find %d.\n", err);
+                               return (2);
+                       }
+
+                       err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+                       if (err != CS_OK) {
+                               printf ("Could not object_find \"process\": %d.\n", err);
+                               return (2);
+                       }
+
+                       if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+                               snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+                       }
+
+                       err = confdb_object_find_start(cdb_handle, proc_handle);
+                       if (err != CS_OK) {
+                               printf ("Could not start object_find %d.\n", err);
+                               return (2);
+                       }
+
+                       err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+                       if (err != CS_OK) {
+                               printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+                               return (2);
+                       }
+
+                       err = confdb_key_get(cdb_handle, pid_handle, "recovery", strlen("recovery"), key_value, &value_len);
+                       if (err != CS_OK) {
+                               printf ("Could not get \"recovery\" key: %d.\n", err);
+                               return (2);
+                       }
+
+                       if (value_len != strlen ("restart") || memcmp (key_value, "restart", value_len) != 0) {
+                               printf ("Recovery key \"%s\" is not \"restart\".\n", key_value);
+                               return (2);
+                       }
+
+                       err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+                       if (err != CS_OK) {
+                               printf ("Could not get \"state\" key: %d.\n", err);
+                               return (2);
+                       }
+
+                       if (value_len != strlen ("registered") || memcmp (key_value, "registered", value_len) != 0) {
+                               printf ("State key is not \"registered\".\n");
+                               return (2);
+                       }
+
+                       printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
+                       err = sam_start ();
+                       if (err != CS_OK) {
+                               fprintf (stderr, "Can't start hc. Error %d\n", err);
+                               return 2;
+                       }
+
+                       err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+                       if (err != CS_OK) {
+                               printf ("Could not get \"state\" key: %d.\n", err);
+                               return (2);
+                       }
+
+                       if (value_len != strlen ("started") || memcmp (key_value, "started", value_len) != 0) {
+                               printf ("State key is not \"started\".\n");
+                               return (2);
+                       }
+
+                       printf ("%s iid %d: waiting for kill\n", __FUNCTION__, instance_id);
+                       sleep (10);
+
+                       return (2);
+               }
+
+               if (instance_id == 3) {
+                       printf ("%s iid %d: mark failed\n", __FUNCTION__, instance_id);
+                       if (err != CS_OK) {
+                               fprintf (stderr, "Can't start hc. Error %d\n", err);
+                               return 2;
+                       }
+                       err = sam_mark_failed ();
+                       if (err != CS_OK) {
+                               fprintf (stderr, "Can't mark failed. Error %d\n", err);
+                               return 2;
+                       }
+
+                       sleep (10);
+
+                       return (2);
+               }
+
+               return (2);
+       }
+
+       if (test_n == 2) {
+               printf ("%s Testing if status is failed\n", __FUNCTION__);
+
+               /*
+                * Previous should be FAILED
+                */
+               err = confdb_object_find_start(cdb_handle, OBJECT_PARENT_HANDLE);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, OBJECT_PARENT_HANDLE, "resources", strlen("resources"), &res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"resources\": %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find_start(cdb_handle, res_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, res_handle, "process", strlen("process"), &proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"process\": %d.\n", err);
+                       return (2);
+               }
+
+               if (snprintf (tmp_obj, sizeof (tmp_obj), "%s:%d", __progname, pid) >= sizeof (tmp_obj)) {
+                       snprintf (tmp_obj, sizeof (tmp_obj), "%d", pid);
+               }
+
+               err = confdb_object_find_start(cdb_handle, proc_handle);
+               if (err != CS_OK) {
+                       printf ("Could not start object_find %d.\n", err);
+                       return (2);
+               }
+
+               err = confdb_object_find(cdb_handle, proc_handle, tmp_obj, strlen(tmp_obj), &pid_handle);
+               if (err != CS_OK) {
+                       printf ("Could not object_find \"%s\": %d.\n", tmp_obj, err);
+                       return (2);
+               }
+
+               err = confdb_key_get(cdb_handle, pid_handle, "state", strlen("state"), key_value, &value_len);
+               if (err != CS_OK) {
+                       printf ("Could not get \"state\" key: %d.\n", err);
+                       return (2);
+               }
+
+               if (value_len != strlen ("failed") || memcmp (key_value, "failed", value_len) != 0) {
+                       printf ("State key is not \"failed\".\n");
+                       return (2);
+               }
+
+               return (0);
+       }
+
+       return (2);
+}
+
 int main(int argc, char *argv[])
 {
-       pid_t pid;
+       pid_t pid, old_pid;
        int err;
        int stat;
        int all_passed = 1;
@@ -990,7 +1535,7 @@ int main(int argc, char *argv[])
 
        if (pid == -1) {
                fprintf (stderr, "Can't fork\n");
-               return 1;
+               return 2;
        }
 
        if (pid == 0) {
@@ -1001,6 +1546,100 @@ int main(int argc, char *argv[])
 
        waitpid (pid, &stat, 0);
        fprintf (stderr, "test7 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed")));
+       if (WEXITSTATUS (stat) == 1)
+               no_skipped++;
+       if (WEXITSTATUS (stat) > 1)
+               all_passed = 0;
+
+       pid = fork ();
+
+       if (pid == -1) {
+               fprintf (stderr, "Can't fork\n");
+               return 2;
+       }
+
+       if (pid == 0) {
+               err = test8 (getpid (), 0, 1);
+               sam_finalize ();
+               return (err);
+       }
+
+       waitpid (pid, &stat, 0);
+       old_pid = pid;
+
+       if (WEXITSTATUS (stat) == 0) {
+               pid = fork ();
+
+               if (pid == -1) {
+                       fprintf (stderr, "Can't fork\n");
+                       return 2;
+               }
+
+               if (pid == 0) {
+                       err = test8 (getpid (), old_pid, 2);
+                       sam_finalize ();
+                       return (err);
+               }
+
+               waitpid (pid, &stat, 0);
+               old_pid = pid;
+
+               if (WEXITSTATUS (stat) == 0) {
+                       pid = fork ();
+
+                       if (pid == -1) {
+                               fprintf (stderr, "Can't fork\n");
+                               return 2;
+                       }
+
+                       if (pid == 0) {
+                               err = test8 (old_pid, 0, 3);
+                               sam_finalize ();
+                               return (err);
+                       }
+
+                       waitpid (pid, &stat, 0);
+               }
+       }
+
+       if (WEXITSTATUS (stat) == 1)
+               no_skipped++;
+       if (WEXITSTATUS (stat) > 1)
+               all_passed = 0;
+
+       pid = fork ();
+
+       if (pid == -1) {
+               fprintf (stderr, "Can't fork\n");
+               return 2;
+       }
+
+       if (pid == 0) {
+               err = test9 (getpid (), 0, 1);
+               sam_finalize ();
+               return (err);
+       }
+
+       waitpid (pid, &stat, 0);
+       old_pid = pid;
+
+       if (WEXITSTATUS (stat) == 0) {
+               pid = fork ();
+
+               if (pid == -1) {
+                       fprintf (stderr, "Can't fork\n");
+                       return 2;
+               }
+
+               if (pid == 0) {
+                       err = test9 (old_pid, 0, 2);
+                       sam_finalize ();
+                       return (err);
+               }
+
+               waitpid (pid, &stat, 0);
+       }
+       fprintf (stderr, "test9 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : (WEXITSTATUS (stat) == 1 ? "skipped" : "failed")));
        if (WEXITSTATUS (stat) == 1)
                no_skipped++;