diff options
Diffstat (limited to 'sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch')
-rw-r--r-- | sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch b/sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch new file mode 100644 index 0000000..87f5d45 --- /dev/null +++ b/sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch @@ -0,0 +1,110 @@ + +# HG changeset patch +# User Rasto Levrinc <rasto@linbit.com> +# Date 1206539836 -3600 +# Node ID 47f60bebe7b25abd88ea7b5488e66dfe187416ae +# Parent 17c0cf487322287d0689a036c32f21b900ce5a80 +dopd: fix basic failover; fix hb message corruption by fprintf(stderr) + +check_drbd_peer() used to return FALSE for "node name not in node list", +so drbd-peer-outdater returned "invalid nodename". +Then the semantic changed, and check_drbd_peer learned about "dead" peers +and returned FALSE for them as well. Which made basic failover impossible :( + +The return code was now changed to "peer unreachable" for a dead peer. +And even for nodes which really are not in the host list (and thus could be +classified as invalide), because, after all, thats what they are. +unreachable. + +Node name comparison needs to be case insensitive; fixed. + +During testing with 15 concurrent drbd resources several dopd crashes have been +observed, which after some debugging turned out to be simply a wrong assumption +about the global availability of stderr: some fprintf(stderr, "debug message") +had accidentally used the heartbeat communication channel file descriptor, +which seriously confused the comm layer. +All those fprintfs have now been changed to use cl_log. + +--- a/contrib/drbd-outdate-peer/dopd.c Mon Mar 24 16:14:12 2008 +0100 ++++ b/contrib/drbd-outdate-peer/dopd.c Wed Mar 26 14:57:16 2008 +0100 +@@ -202,14 +202,17 @@ msg_outdate_rc(struct ha_msg *msg_in, vo + } + + /* check_drbd_peer() +- * walk the nodes and return TRUE if peer is not this node and it exists. ++ * walk the nodes and return ++ * FALSE if peer is not found, not a "normal" node, or "dead" ++ * (no point in trying to reach those nodes). ++ * TRUE if peer is found to be alive and "normal". + */ + gboolean + check_drbd_peer(const char *drbd_peer) + { + const char *node; + gboolean found = FALSE; +- if (!strcmp(drbd_peer, node_name)) { ++ if (!strcasecmp(drbd_peer, node_name)) { + cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer); + return FALSE; + } +@@ -306,9 +309,9 @@ outdater_callback(IPC_Channel *client, g + } else + pthread_mutex_unlock(&conn_mutex); + } else { +- /* wrong peer was specified, +- send return code 20 to the client */ +- send_to_client(curr_client, "20"); ++ /* peer "dead" or not in node list. ++ * return "peer could not be reached" */ ++ send_to_client(curr_client, "5"); + } + + ha_msg_del(msg); +--- a/contrib/drbd-outdate-peer/drbd-peer-outdater.c Mon Mar 24 16:14:12 2008 +0100 ++++ b/contrib/drbd-outdate-peer/drbd-peer-outdater.c Wed Mar 26 14:57:16 2008 +0100 +@@ -76,7 +76,7 @@ outdate_callback(IPC_Channel * server, g + + msg = msgfromIPC_noauth(server); + if (!msg) { +- fprintf(stderr, "no message from server or other " ++ cl_log(LOG_WARNING, "no message from server or other " + "instance is running\n"); + if (client->mainloop != NULL && + g_main_is_running(client->mainloop)) +@@ -92,7 +92,7 @@ outdate_callback(IPC_Channel * server, g + errno = 0; + rc = strtol(rc_string, &ep, 10); + if (errno != 0 || *ep != EOS) { +- fprintf(stderr, "unknown message: %s from server", rc_string); ++ cl_log(LOG_WARNING, "unknown message: %s from server", rc_string); + client->rc = 20; /* "officially undefined", unspecified error */ + ha_msg_del(msg); + if (client->mainloop != NULL && +@@ -124,7 +124,7 @@ outdater_timeout_dispatch(gpointer user_ + outdater_timeout_dispatch(gpointer user_data) + { + dop_client_t *client = (dop_client_t *)user_data; +- fprintf(stderr, "error: could not connect to dopd after %i seconds" ++ cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds" + ": timeout reached\n", client->timeout); + if (client->mainloop != NULL && g_main_is_running(client->mainloop)) + g_main_quit(client->mainloop); +@@ -255,7 +255,7 @@ main(int argc, char ** argv) + (gpointer)new_client, &ipc_server); + + if (ipc_server == NULL) { +- fprintf(stderr, "Could not connect to "T_OUTDATER" channel\n"); ++ cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n"); + dop_exit(new_client); /* unreachable */ + } + +@@ -267,7 +267,7 @@ main(int argc, char ** argv) + ha_msg_add(update, F_OUTDATER_RES, drbd_resource); + + if (msg2ipcchan(update, ipc_server) != HA_OK) { +- fprintf(stderr, "Could not send message\n"); ++ cl_log(LOG_WARNING, "Could not send message\n"); + dop_exit(new_client); + } + + |