From 76b02337d28bdccd8e409c7f2530cbd2fdfad94b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 29 Nov 2016 17:13:41 +1100 Subject: [PATCH 1/3] ctdb-locking: Remove support for locking multiple databases BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469 The code to lock multiple databases has been dropped from ctdb_lock.c. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 5b1076dc61f5e3f006c1b8cef98e7d2d3cc1bfba) --- ctdb/server/ctdb_lock_helper.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c index 8aa0870..10d6f01 100644 --- a/ctdb/server/ctdb_lock_helper.c +++ b/ctdb/server/ctdb_lock_helper.c @@ -72,7 +72,7 @@ static void usage(void) fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s RECORD \n", progname); - fprintf(stderr, " %s DB [ ...]\n", + fprintf(stderr, " %s DB \n", progname); } @@ -196,17 +196,14 @@ int main(int argc, char *argv[]) result = lock_record(argv[5], argv[6], argv[7]); } else if (strcmp(lock_type, "DB") == 0) { - int n; - - /* If there are no databases specified, no need for lock */ - if (argc > 5) { - for (n=5; n+1 Date: Tue, 29 Nov 2016 17:20:45 +1100 Subject: [PATCH 2/3] ctdb-locking: Explicitly unlock record/db in lock helper BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469 Instead of killing lock helper processes with SIGKILL, send SIGTERM so lock helper processes can explicitly unlock record/db. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 3a56a16b06cf6d1cce613ec29f5ea46630902072) --- ctdb/server/ctdb_lock.c | 6 +- ctdb/server/ctdb_lock_helper.c | 184 ++++++++++++++++++++++++++++++++++++----- ctdb/wscript | 3 +- 3 files changed, 167 insertions(+), 26 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 405a29e..5ecf454e 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -192,7 +192,7 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx) lock_ctx->request->lctx = NULL; } if (lock_ctx->child > 0) { - ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL); + ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM); if (lock_ctx->type == LOCK_RECORD) { DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx); } else { @@ -672,7 +672,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) ctdb_lock_timeout_handler, (void *)lock_ctx); if (lock_ctx->ttimer == NULL) { - ctdb_kill(ctdb, lock_ctx->child, SIGKILL); + ctdb_kill(ctdb, lock_ctx->child, SIGTERM); lock_ctx->child = -1; close(lock_ctx->fd[0]); return; @@ -687,7 +687,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) (void *)lock_ctx); if (lock_ctx->tfd == NULL) { TALLOC_FREE(lock_ctx->ttimer); - ctdb_kill(ctdb, lock_ctx->child, SIGKILL); + ctdb_kill(ctdb, lock_ctx->child, SIGTERM); lock_ctx->child = -1; close(lock_ctx->fd[0]); return; diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c index 10d6f01..c4f628f 100644 --- a/ctdb/server/ctdb_lock_helper.c +++ b/ctdb/server/ctdb_lock_helper.c @@ -20,8 +20,12 @@ #include "replace.h" #include "system/filesys.h" #include "system/network.h" +#include "system/wait.h" #include +#include + +#include "lib/util/tevent_unix.h" #include "ctdb_private.h" @@ -30,6 +34,11 @@ static char *progname = NULL; static bool realtime = true; +struct lock_state { + struct tdb_context *tdb; + TDB_DATA key; +}; + static void set_priority(void) { const char *ptr; @@ -93,10 +102,9 @@ static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx, return buffer; } -static int lock_record(const char *dbpath, const char *dbflags, const char *dbkey) +static int lock_record(const char *dbpath, const char *dbflags, + const char *dbkey, struct lock_state *state) { - TDB_DATA key; - struct tdb_context *tdb; int tdb_flags; /* No error checking since CTDB always passes sane values */ @@ -104,23 +112,25 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke /* Convert hex key to key */ if (strcmp(dbkey, "NULL") == 0) { - key.dptr = NULL; - key.dsize = 0; + state->key.dptr = NULL; + state->key.dsize = 0; } else { - key.dptr = hex_decode_talloc(NULL, dbkey, &key.dsize); + state->key.dptr = hex_decode_talloc(NULL, dbkey, + &state->key.dsize); } - tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); - if (tdb == NULL) { - fprintf(stderr, "%s: Error opening database %s\n", progname, dbpath); + state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); + if (state->tdb == NULL) { + fprintf(stderr, "%s: Error opening database %s\n", + progname, dbpath); return 1; } set_priority(); - if (tdb_chainlock(tdb, key) < 0) { + if (tdb_chainlock(state->tdb, state->key) < 0) { fprintf(stderr, "%s: Error getting record lock (%s)\n", - progname, tdb_errorstr(tdb)); + progname, tdb_errorstr(state->tdb)); return 1; } @@ -130,26 +140,26 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke } - -static int lock_db(const char *dbpath, const char *dbflags) +static int lock_db(const char *dbpath, const char *dbflags, + struct lock_state *state) { - struct tdb_context *tdb; int tdb_flags; /* No error checking since CTDB always passes sane values */ tdb_flags = strtol(dbflags, NULL, 0); - tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); - if (tdb == NULL) { - fprintf(stderr, "%s: Error opening database %s\n", progname, dbpath); + state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); + if (state->tdb == NULL) { + fprintf(stderr, "%s: Error opening database %s\n", + progname, dbpath); return 1; } set_priority(); - if (tdb_lockall(tdb) < 0) { + if (tdb_lockall(state->tdb) < 0) { fprintf(stderr, "%s: Error getting db lock (%s)\n", - progname, tdb_errorstr(tdb)); + progname, tdb_errorstr(state->tdb)); return 1; } @@ -158,13 +168,114 @@ static int lock_db(const char *dbpath, const char *dbflags) return 0; } +struct wait_for_parent_state { + struct tevent_context *ev; + pid_t ppid; +}; + +static void wait_for_parent_check(struct tevent_req *subreq); + +static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + pid_t ppid) +{ + struct tevent_req *req, *subreq; + struct wait_for_parent_state *state; + + req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->ppid = ppid; + + if (ppid == 1) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + subreq = tevent_wakeup_send(state, ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, wait_for_parent_check, req); + + return req; +} + +static void wait_for_parent_check(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct wait_for_parent_state *state = tevent_req_data( + req, struct wait_for_parent_state); + bool status; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + /* Ignore error */ + fprintf(stderr, "locking: tevent_wakeup_recv() failed\n"); + } + + if (kill(state->ppid, 0) == -1 && errno == ESRCH) { + tevent_req_done(req); + return; + } + + subreq = tevent_wakeup_send(state, state->ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, wait_for_parent_check, req); +} + +static bool wait_for_parent_recv(struct tevent_req *req) +{ + if (tevent_req_is_unix_error(req, NULL)) { + return false; + } + + return true; +} + +static void cleanup(struct lock_state *state) +{ + if (state->tdb != NULL) { + if (state->key.dsize == 0) { + tdb_unlockall(state->tdb); + } else { + tdb_chainunlock(state->tdb, state->key); + } + tdb_close(state->tdb); + } +} + +static void signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, void *siginfo, + void *private_data) +{ + struct lock_state *state = (struct lock_state *)private_data; + + cleanup(state); + exit(0); +} int main(int argc, char *argv[]) { + struct tevent_context *ev; + struct tevent_signal *se; + struct tevent_req *req; + struct lock_state state = { 0 }; int write_fd, log_fd; char result = 0; int ppid; const char *lock_type; + bool status; reset_scheduler(); @@ -186,6 +297,20 @@ int main(int argc, char *argv[]) write_fd = atoi(argv[3]); lock_type = argv[4]; + ev = tevent_context_init(NULL); + if (ev == NULL) { + fprintf(stderr, "locking: tevent_context_init() failed\n"); + exit(1); + } + + se = tevent_add_signal(ev, ev, SIGTERM, 0, + signal_handler, &state); + if (se == NULL) { + fprintf(stderr, "locking: tevent_add_signal() failed\n"); + talloc_free(ev); + exit(1); + } + if (strcmp(lock_type, "RECORD") == 0) { if (argc != 8) { fprintf(stderr, "%s: Invalid number of arguments (%d)\n", @@ -193,7 +318,7 @@ int main(int argc, char *argv[]) usage(); exit(1); } - result = lock_record(argv[5], argv[6], argv[7]); + result = lock_record(argv[5], argv[6], argv[7], &state); } else if (strcmp(lock_type, "DB") == 0) { if (argc != 7) { @@ -203,7 +328,7 @@ int main(int argc, char *argv[]) usage(); exit(1); } - result = lock_db(argv[5], argv[6]); + result = lock_db(argv[5], argv[6], &state); } else { fprintf(stderr, "%s: Invalid lock-type '%s'\n", progname, lock_type); @@ -213,6 +338,21 @@ int main(int argc, char *argv[]) send_result(write_fd, result); - ctdb_wait_for_process_to_exit(ppid); + req = wait_for_parent_send(ev, ev, ppid); + if (req == NULL) { + fprintf(stderr, "locking: wait_for_parent_send() failed\n"); + cleanup(&state); + exit(1); + } + + tevent_req_poll(req, ev); + + status = wait_for_parent_recv(req); + if (! status) { + fprintf(stderr, "locking: wait_for_parent_recv() failed\n"); + } + + talloc_free(ev); + cleanup(&state); return 0; } diff --git a/ctdb/wscript b/ctdb/wscript index f61d087..24681e3 100644 --- a/ctdb/wscript +++ b/ctdb/wscript @@ -444,7 +444,8 @@ def build(bld): bld.SAMBA_BINARY('ctdb_lock_helper', source='server/ctdb_lock_helper.c', - deps='samba-util ctdb-system talloc tdb', + deps='''samba-util sys_rw ctdb-system tevent-util + talloc tevent tdb''', includes='include', install_path='${CTDB_HELPER_BINDIR}') -- 2.9.3 From 91a4ea6c4dc4a7e4393e63a0bffa167d6b053f54 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 2 Dec 2016 15:11:20 +1100 Subject: [PATCH 3/3] ctdb-tests: Add robust mutex test BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469 This demonstrates robust mutex bug on linux/glibc system. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Thu Jan 12 07:59:34 CET 2017 on sn-devel-144 (cherry picked from commit 7794497bc909fa7b02da9d9ce1fc496a8fa2a9ae) --- ctdb/tests/src/test_mutex_raw.c | 261 ++++++++++++++++++++++++++++++++++++++++ ctdb/wscript | 5 + 2 files changed, 266 insertions(+) create mode 100644 ctdb/tests/src/test_mutex_raw.c diff --git a/ctdb/tests/src/test_mutex_raw.c b/ctdb/tests/src/test_mutex_raw.c new file mode 100644 index 0000000..8e3cae3 --- /dev/null +++ b/ctdb/tests/src/test_mutex_raw.c @@ -0,0 +1,261 @@ +/* + Robust mutex test + + Copyright (C) Amitay Isaacs 2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . +*/ + +/* + * Run this test as follows: + * + * 1. Running all processes at normal priority + * + * $ while true ; do ./bin/test_mutex_raw /tmp/foo 10 0 ; done + * + * 2. Running all processes at real-time priority + * + * # while true ; do ./bin/test_mutex_raw /tmp/foo 10 1 ; done + * + * The test will block after few iterations. At this time none of the + * child processes is holding the mutex. + * + * To check which process is holding a lock: + * + * $ ./bin/test_mutex_raw /tmp/foo debug + * + * If no pid is printed, then no process is holding the mutex. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int pthread_mutex_consistent_np(pthread_mutex_t *); + +static void set_realtime(void) +{ + struct sched_param p; + int ret; + + p.sched_priority = 1; + + ret = sched_setscheduler(0, SCHED_FIFO, &p); + if (ret == -1) { + fprintf(stderr, "Failed to set scheduler to SCHED_FIFO\n"); + } +} + +static void high_priority(void) +{ + int ret; + + ret = nice(-20); + if (ret == -1) { + fprintf(stderr, "Failed to set high priority\n"); + } +} + +static void run_child(const char *filename) +{ + pthread_mutex_t *mutex; + void *addr; + int ret, fd; + + fd = open(filename, O_RDWR, 0600); + if (fd == -1) { + exit(1); + } + + addr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FILE, fd, 0); + if (addr == NULL) { + exit(2); + } + + mutex = (pthread_mutex_t *)addr; + +again: + ret = pthread_mutex_lock(mutex); + if (ret == EOWNERDEAD) { + ret = pthread_mutex_consistent_np(mutex); + } else if (ret == EAGAIN) { + goto again; + } + if (ret != 0) { + fprintf(stderr, "pid %u lock failed, ret=%d\n", getpid(), ret); + exit(3); + } + + fprintf(stderr, "pid %u locked\n", getpid()); + kill(getpid(), SIGKILL); +} + +#define PRIO_NORMAL 0 +#define PRIO_REALTIME 1 +#define PRIO_NICE_20 2 + +int main(int argc, const char **argv) +{ + pthread_mutexattr_t ma; + pthread_mutex_t *mutex; + int fd, ret, i; + pid_t pid; + void *addr; + int num_children; + int priority = PRIO_NORMAL; + + if (argc < 3 || argc > 4) { + fprintf(stderr, "Usage: %s [0|1|2]\n", argv[0]); + fprintf(stderr, " %s debug\n", argv[0]); + exit(1); + } + + if (argc == 4) { + priority = atoi(argv[3]); + } + + if (priority == PRIO_REALTIME) { + set_realtime(); + } else if (priority == PRIO_NICE_20) { + high_priority(); + } + + fd = open(argv[1], O_CREAT|O_RDWR, 0600); + if (fd == -1) { + fprintf(stderr, "open failed\n"); + exit(1); + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret != 0) { + fprintf(stderr, "lseek failed\n"); + exit(1); + } + + ret = ftruncate(fd, sizeof(pthread_mutex_t)); + if (ret != 0) { + fprintf(stderr, "ftruncate failed\n"); + exit(1); + } + + addr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FILE, fd, 0); + if (addr == NULL) { + fprintf(stderr, "mmap failed\n"); + exit(1); + } + + mutex = (pthread_mutex_t *)addr; + + if (strcmp(argv[2], "debug") == 0) { + ret = pthread_mutex_trylock(mutex); + if (ret == EOWNERDEAD) { + ret = pthread_mutex_consistent_np(mutex); + if (ret == 0) { + pthread_mutex_unlock(mutex); + } + } else if (ret == EBUSY) { + printf("pid=%u\n", mutex->__data.__owner); + } else if (ret == 0) { + pthread_mutex_unlock(mutex); + } + exit(0); + } + + ret = pthread_mutexattr_init(&ma); + if (ret != 0) { + fprintf(stderr, "pthread_mutexattr_init failed\n"); + exit(1); + } + + ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (ret != 0) { + fprintf(stderr, "pthread_mutexattr_settype failed\n"); + exit(1); + } + + ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED); + if (ret != 0) { + fprintf(stderr, "pthread_mutexattr_setpshared failed\n"); + exit(1); + } + + ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST); + if (ret != 0) { + fprintf(stderr, "pthread_mutexattr_setrobust failed\n"); + exit(1); + } + + ret = pthread_mutex_init(mutex, &ma); + if (ret != 0) { + fprintf(stderr, "pthread_mutex_init failed\n"); + exit(1); + } + + ret = pthread_mutex_lock(mutex); + if (ret != 0) { + fprintf(stderr, "pthread_mutex_lock failed\n"); + exit(1); + } + + setpgid(0, 0); + + fprintf(stderr, "Creating children\n"); + num_children = atoi(argv[2]); + + for (i=0; i