summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch')
-rw-r--r--sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch110
1 files changed, 110 insertions, 0 deletions
diff --git a/sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch b/sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch
new file mode 100644
index 0000000..87f5d45
--- /dev/null
+++ b/sys-cluster/heartbeat/files/heartbeat-2.1.3-dopd-47f60bebe7b2.patch
@@ -0,0 +1,110 @@
+
+# HG changeset patch
+# User Rasto Levrinc <rasto@linbit.com>
+# Date 1206539836 -3600
+# Node ID 47f60bebe7b25abd88ea7b5488e66dfe187416ae
+# Parent 17c0cf487322287d0689a036c32f21b900ce5a80
+dopd: fix basic failover; fix hb message corruption by fprintf(stderr)
+
+check_drbd_peer() used to return FALSE for "node name not in node list",
+so drbd-peer-outdater returned "invalid nodename".
+Then the semantic changed, and check_drbd_peer learned about "dead" peers
+and returned FALSE for them as well. Which made basic failover impossible :(
+
+The return code was now changed to "peer unreachable" for a dead peer.
+And even for nodes which really are not in the host list (and thus could be
+classified as invalide), because, after all, thats what they are.
+unreachable.
+
+Node name comparison needs to be case insensitive; fixed.
+
+During testing with 15 concurrent drbd resources several dopd crashes have been
+observed, which after some debugging turned out to be simply a wrong assumption
+about the global availability of stderr: some fprintf(stderr, "debug message")
+had accidentally used the heartbeat communication channel file descriptor,
+which seriously confused the comm layer.
+All those fprintfs have now been changed to use cl_log.
+
+--- a/contrib/drbd-outdate-peer/dopd.c Mon Mar 24 16:14:12 2008 +0100
++++ b/contrib/drbd-outdate-peer/dopd.c Wed Mar 26 14:57:16 2008 +0100
+@@ -202,14 +202,17 @@ msg_outdate_rc(struct ha_msg *msg_in, vo
+ }
+
+ /* check_drbd_peer()
+- * walk the nodes and return TRUE if peer is not this node and it exists.
++ * walk the nodes and return
++ * FALSE if peer is not found, not a "normal" node, or "dead"
++ * (no point in trying to reach those nodes).
++ * TRUE if peer is found to be alive and "normal".
+ */
+ gboolean
+ check_drbd_peer(const char *drbd_peer)
+ {
+ const char *node;
+ gboolean found = FALSE;
+- if (!strcmp(drbd_peer, node_name)) {
++ if (!strcasecmp(drbd_peer, node_name)) {
+ cl_log(LOG_WARNING, "drbd peer node %s is me!\n", drbd_peer);
+ return FALSE;
+ }
+@@ -306,9 +309,9 @@ outdater_callback(IPC_Channel *client, g
+ } else
+ pthread_mutex_unlock(&conn_mutex);
+ } else {
+- /* wrong peer was specified,
+- send return code 20 to the client */
+- send_to_client(curr_client, "20");
++ /* peer "dead" or not in node list.
++ * return "peer could not be reached" */
++ send_to_client(curr_client, "5");
+ }
+
+ ha_msg_del(msg);
+--- a/contrib/drbd-outdate-peer/drbd-peer-outdater.c Mon Mar 24 16:14:12 2008 +0100
++++ b/contrib/drbd-outdate-peer/drbd-peer-outdater.c Wed Mar 26 14:57:16 2008 +0100
+@@ -76,7 +76,7 @@ outdate_callback(IPC_Channel * server, g
+
+ msg = msgfromIPC_noauth(server);
+ if (!msg) {
+- fprintf(stderr, "no message from server or other "
++ cl_log(LOG_WARNING, "no message from server or other "
+ "instance is running\n");
+ if (client->mainloop != NULL &&
+ g_main_is_running(client->mainloop))
+@@ -92,7 +92,7 @@ outdate_callback(IPC_Channel * server, g
+ errno = 0;
+ rc = strtol(rc_string, &ep, 10);
+ if (errno != 0 || *ep != EOS) {
+- fprintf(stderr, "unknown message: %s from server", rc_string);
++ cl_log(LOG_WARNING, "unknown message: %s from server", rc_string);
+ client->rc = 20; /* "officially undefined", unspecified error */
+ ha_msg_del(msg);
+ if (client->mainloop != NULL &&
+@@ -124,7 +124,7 @@ outdater_timeout_dispatch(gpointer user_
+ outdater_timeout_dispatch(gpointer user_data)
+ {
+ dop_client_t *client = (dop_client_t *)user_data;
+- fprintf(stderr, "error: could not connect to dopd after %i seconds"
++ cl_log(LOG_WARNING, "error: could not connect to dopd after %i seconds"
+ ": timeout reached\n", client->timeout);
+ if (client->mainloop != NULL && g_main_is_running(client->mainloop))
+ g_main_quit(client->mainloop);
+@@ -255,7 +255,7 @@ main(int argc, char ** argv)
+ (gpointer)new_client, &ipc_server);
+
+ if (ipc_server == NULL) {
+- fprintf(stderr, "Could not connect to "T_OUTDATER" channel\n");
++ cl_log(LOG_WARNING, "Could not connect to "T_OUTDATER" channel\n");
+ dop_exit(new_client); /* unreachable */
+ }
+
+@@ -267,7 +267,7 @@ main(int argc, char ** argv)
+ ha_msg_add(update, F_OUTDATER_RES, drbd_resource);
+
+ if (msg2ipcchan(update, ipc_server) != HA_OK) {
+- fprintf(stderr, "Could not send message\n");
++ cl_log(LOG_WARNING, "Could not send message\n");
+ dop_exit(new_client);
+ }
+
+