From 4a2e328fc08a768052afd5da296a676f93394aef Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 26 Aug 2016 16:29:47 +1000 Subject: [PATCH 1/6] ctdb-daemon: Schedule running of callback if there are no event scripts The callback should never be called before an immediate return. The callback might reply to a control and the caller of ctdb_event_script_callback_v() may not have assigned/stolen the pointer to control structure into the private data. Therefore, calling the callback can dereference an uninitialised pointer to the control structure when attempting to reply. ctdb_event_script_callback_v() must succeed when there are no event scripts. On success the caller will mark the call as asynchronous and expect the callback to be called. Given that it can't be called before return then it needs to be scheduled. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12180 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 9076c44f35bf309b9e183bae98829f7154b93f33) --- ctdb/server/eventscript.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index bd5bc0d..a5f2680 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -699,6 +699,62 @@ static int remove_callback(struct event_script_callback *callback) return 0; } +struct schedule_callback_state { + struct ctdb_context *ctdb; + void (*callback)(struct ctdb_context *, int, void *); + void *private_data; + int status; + struct tevent_immediate *im; +}; + +static void schedule_callback_handler(struct tevent_context *ctx, + struct tevent_immediate *im, + void *private_data) +{ + struct schedule_callback_state *state = + talloc_get_type_abort(private_data, + struct schedule_callback_state); + + if (state->callback != NULL) { + state->callback(state->ctdb, state->status, + state->private_data); + } + talloc_free(state); +} + +static int +schedule_callback_immediate(struct ctdb_context *ctdb, + void (*callback)(struct ctdb_context *, + int, void *), + void *private_data, + int status) +{ + struct schedule_callback_state *state; + struct tevent_immediate *im; + + state = talloc_zero(ctdb, struct schedule_callback_state); + if (state == NULL) { + DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); + return -1; + } + im = tevent_create_immediate(state); + if (im == NULL) { + DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); + talloc_free(state); + return -1; + } + + state->ctdb = ctdb; + state->callback = callback; + state->private_data = private_data; + state->status = status; + state->im = im; + + tevent_schedule_immediate(im, ctdb->ev, + schedule_callback_handler, state); + return 0; +} + /* run the event script in the background, calling the callback when finished @@ -825,8 +881,14 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, /* Nothing to do? */ if (state->scripts->num_scripts == 0) { - callback(ctdb, 0, private_data); + int ret = schedule_callback_immediate(ctdb, callback, + private_data, 0); talloc_free(state); + if (ret != 0) { + DEBUG(DEBUG_ERR, + ("Unable to schedule callback for 0 scripts\n")); + return 1; + } return 0; } -- 2.9.3 From 4f151ea417f3aa45279d36aed97b2b12ebfcbd18 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 26 Aug 2016 16:38:56 +1000 Subject: [PATCH 2/6] ctdb-daemon: Handle failure immediately, do housekeeping later The callback should never be called before an immediate return. The callback might reply to a control and the caller of ctdb_event_script_callback_v() may not have assigned/stolen the pointer to control structure into the private data. Therefore, calling the callback can dereference an uninitialised pointer to the control structure when attempting to reply. An event script isn't being run until the child has been forked. So update relevant state and set the destructor after this. If the child can't be forked then free the state and return with an error. The callback will not be called and the caller will process the error correctly. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12180 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 582518c7e89b279e34147bdb0b04b73056fac048) --- ctdb/server/eventscript.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index a5f2680..f555625 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -871,14 +871,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, state->current = 0; state->child = 0; - if (call == CTDB_EVENT_MONITOR) { - ctdb->current_monitor = state; - } - - talloc_set_destructor(state, event_script_destructor); - - ctdb->active_events++; - /* Nothing to do? */ if (state->scripts->num_scripts == 0) { int ret = schedule_callback_immediate(ctdb, callback, @@ -894,11 +886,18 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb, state->scripts->scripts[0].status = fork_child_for_script(ctdb, state); if (state->scripts->scripts[0].status != 0) { - /* Callback is called from destructor, with fail result. */ talloc_free(state); - return 0; + return -1; } + if (call == CTDB_EVENT_MONITOR) { + ctdb->current_monitor = state; + } + + ctdb->active_events++; + + talloc_set_destructor(state, event_script_destructor); + if (!timeval_is_zero(&state->timeout)) { tevent_add_timer(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, -- 2.9.3 From c04011c9b0f1e6d6e556588d35c44d2b457b83b9 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 31 Aug 2016 08:29:13 +1000 Subject: [PATCH 3/6] ctdb-daemon: Don't steal control structure before synchronous reply If *async_reply isn't set then the calling code will reply to the control and free the control structure. In some places the control structure pointer is stolen onto state before a synchronous exit due to an error condition. The error handling then frees state and returns an error. The calling code will access-after-free when trying to reply to the control. To make this easier to understand, the convention is that any (immediate) error results in a synchronous reply to the control via an error return code AND *async_reply not being set. In this case the control structure pointer should never be stolen onto state. State is never used for a synchronous reply, it is only ever used by a callback. Also initialise state->c to NULL so that any premature call to a callback (e.g. in an immediate error path) is more obvious. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12180 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 9d975b860d52030a702723c70791c6a2829107c0) --- ctdb/server/ctdb_takeover.c | 11 +++++++---- ctdb/server/eventscript.c | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index ede635e..d10ffef 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -522,7 +522,7 @@ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, state = talloc(vnn, struct ctdb_do_takeip_state); CTDB_NO_MEMORY(ctdb, state); - state->c = talloc_steal(ctdb, c); + state->c = NULL; state->vnn = vnn; vnn->update_in_flight = true; @@ -551,6 +551,7 @@ static int32_t ctdb_do_takeip(struct ctdb_context *ctdb, return -1; } + state->c = talloc_steal(ctdb, c); return 0; } @@ -659,7 +660,7 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, state = talloc(vnn, struct ctdb_do_updateip_state); CTDB_NO_MEMORY(ctdb, state); - state->c = talloc_steal(ctdb, c); + state->c = NULL; state->old = old; state->vnn = vnn; @@ -691,6 +692,7 @@ static int32_t ctdb_do_updateip(struct ctdb_context *ctdb, return -1; } + state->c = talloc_steal(ctdb, c); return 0; } @@ -1003,8 +1005,8 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, return -1; } - state->c = talloc_steal(state, c); - state->addr = talloc(state, ctdb_sock_addr); + state->c = NULL; + state->addr = talloc(state, ctdb_sock_addr); if (state->addr == NULL) { ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); @@ -1037,6 +1039,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, /* tell the control that we will be reply asynchronously */ *async_reply = true; + state->c = talloc_steal(state, c); return 0; } diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index f555625..86d37d9 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -1076,7 +1076,7 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb, state = talloc(ctdb->event_script_ctx, struct eventscript_callback_state); CTDB_NO_MEMORY(ctdb, state); - state->c = talloc_steal(state, c); + state->c = NULL; DEBUG(DEBUG_NOTICE,("Running eventscripts with arguments %s\n", indata.dptr)); @@ -1092,7 +1092,7 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb, /* tell ctdb_control.c that we will be replying asynchronously */ *async_reply = true; - + state->c = talloc_steal(state, c); return 0; } -- 2.9.3 From eea882de3876ec82464043fac5273b2a29e52d3f Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 29 Aug 2016 16:05:33 +1000 Subject: [PATCH 4/6] ctdb-tests: Factor out function config_from_environment() BUG: https://bugzilla.samba.org/show_bug.cgi?id=12180 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit a2bbf71ad67e5c3a6287cf62f54ff13389bf2143) --- ctdb/tests/simple/scripts/local_daemons.bash | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ctdb/tests/simple/scripts/local_daemons.bash b/ctdb/tests/simple/scripts/local_daemons.bash index ecb64f9..022bbf8 100644 --- a/ctdb/tests/simple/scripts/local_daemons.bash +++ b/ctdb/tests/simple/scripts/local_daemons.bash @@ -22,6 +22,15 @@ export CTDB_NODES="${TEST_VAR_DIR}/nodes.txt" ####################################### +config_from_environment () +{ + # Override from the environment. This would be easier if env was + # guaranteed to quote its output so it could be reused. + env | + grep '^CTDB_' | + sed -e 's@=\([^"]\)@="\1@' -e 's@[^"]$@&"@' -e 's@="$@&"@' +} + setup_ctdb () { mkdir -p "${TEST_VAR_DIR}/test.db/persistent" @@ -99,11 +108,9 @@ CTDB_SOCKET="${TEST_VAR_DIR}/sock.$pnn" CTDB_NOSETSCHED=yes EOF - # Override from the environment. This would be easier if env was - # guaranteed to quote its output so it could be reused. - env | - grep '^CTDB_' | - sed -e 's@=\([^"]\)@="\1@' -e 's@[^"]$@&"@' -e 's@="$@&"@' >>"$conf" + # Append any configuration variables set in environment to + # configuration file so they affect CTDB after each restart. + config_from_environment >>"$conf" done } -- 2.9.3 From a1bcc0a8a9ceadf13fd395416758f5eea06bd597 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 29 Aug 2016 16:49:07 +1000 Subject: [PATCH 5/6] ctdb-tests: Conditionally use temporary config file for local daemons If there's configuration in the environment then daemons_start() should use a temporary configuration file with that appended. This means that global overrides don't (harmlessly) build up in the configuration file during each test and individual tests can override configuration when calling daemons_start() directly. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12180 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 7885b9652fcb3b30361a8b2e0b4688c261b55065) --- ctdb/tests/simple/scripts/local_daemons.bash | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ctdb/tests/simple/scripts/local_daemons.bash b/ctdb/tests/simple/scripts/local_daemons.bash index 022bbf8..fb1e7e1 100644 --- a/ctdb/tests/simple/scripts/local_daemons.bash +++ b/ctdb/tests/simple/scripts/local_daemons.bash @@ -123,9 +123,25 @@ daemons_start () local pidfile="${TEST_VAR_DIR}/ctdbd.${pnn}.pid" local conf="${TEST_VAR_DIR}/ctdbd.${pnn}.conf" + # If there is any CTDB configuration in the environment then + # append it to the regular configuration in a temporary + # configuration file and use it just this once. + local tmp_conf="" + local env_conf=$(config_from_environment) + if [ -n "$env_conf" ] ; then + tmp_conf=$(mktemp --tmpdir="$TEST_VAR_DIR") + cat "$conf" >"$tmp_conf" + echo "$env_conf" >>"$tmp_conf" + conf="$tmp_conf" + fi + CTDBD="${VALGRIND} ctdbd --sloppy-start --nopublicipcheck" \ CTDBD_CONF="$conf" \ ctdbd_wrapper "$pidfile" start + + if [ -n "$tmp_conf" ] ; then + rm -f "$tmp_conf" + fi done } -- 2.9.3 From ea7d5eb1226f359e8e50e27932ab8fb4734ed192 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 29 Aug 2016 16:52:45 +1000 Subject: [PATCH 6/6] ctdb-tests: Add a test to ensure that CTDB works with no eventscripts This only tests something on local daemons, since the configuration can't be easily manipulated on a real cluster. BUG: https://bugzilla.samba.org/show_bug.cgi?id=12180 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Thu Sep 1 17:15:06 CEST 2016 on sn-devel-144 (cherry picked from commit 625f080f213d03fd4b08e1b6ff9f1415f77ee73b) --- ctdb/tests/simple/28_zero_eventscripts.sh | 45 +++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100755 ctdb/tests/simple/28_zero_eventscripts.sh diff --git a/ctdb/tests/simple/28_zero_eventscripts.sh b/ctdb/tests/simple/28_zero_eventscripts.sh new file mode 100755 index 0000000..7c03ae4 --- /dev/null +++ b/ctdb/tests/simple/28_zero_eventscripts.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +test_info() +{ + cat <' | xargs | sed -e 's| |,|g') -o args ww + +echo +echo "Good, that seems to work!" -- 2.9.3