From d25f27a53d8382ee6b3fe4a009a755f5c4050777 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Mon, 28 Aug 2017 16:38:19 +0200 Subject: [PATCH 1/2] pthreadpool: Fix fork behaviour glibc's pthread_cond_wait(&c, &m) increments m.__data.__nusers, making pthread_mutex_destroy return EBUSY. Thus we can't allow any thread waiting for a job across a fork. Also, the state of the condvar itself is unclear across a fork. Right now to me it looks like an initialized but unused condvar can be used in the child. Busy worker threads don't cause any trouble here, they don't hold mutexes or condvars. Also, they can't reach the condvar because _prepare holds all mutexes. Bug: https://bugzilla.samba.org/show_bug.cgi?id=13006 Signed-off-by: Volker Lendecke Reviewed-by: Jeremy Allison Reviewed-by: Stefan Metzmacher (cherry picked from commit ff98e3fb666b57b56a1427aa1196948ceebdec66) --- source3/lib/pthreadpool/pthreadpool.c | 67 +++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/source3/lib/pthreadpool/pthreadpool.c b/source3/lib/pthreadpool/pthreadpool.c index f97cdcc6c15..23885aa6d11 100644 --- a/source3/lib/pthreadpool/pthreadpool.c +++ b/source3/lib/pthreadpool/pthreadpool.c @@ -89,6 +89,13 @@ struct pthreadpool { * Number of idle threads */ int num_idle; + + /* + * Condition variable indicating that we should quickly go + * away making way for fork() without anybody waiting on + * pool->condvar. + */ + pthread_cond_t *prefork_cond; }; static pthread_mutex_t pthreadpools_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -148,6 +155,7 @@ int pthreadpool_init(unsigned max_threads, struct pthreadpool **presult, pool->num_threads = 0; pool->max_threads = max_threads; pool->num_idle = 0; + pool->prefork_cond = NULL; ret = pthread_mutex_lock(&pthreadpools_mutex); if (ret != 0) { @@ -169,6 +177,47 @@ int pthreadpool_init(unsigned max_threads, struct pthreadpool **presult, return 0; } +static void pthreadpool_prepare_pool(struct pthreadpool *pool) +{ + pthread_cond_t prefork_cond = PTHREAD_COND_INITIALIZER; + int ret; + + ret = pthread_mutex_lock(&pool->mutex); + assert(ret == 0); + + while (pool->num_idle != 0) { + /* + * Exit all idle threads, which are all blocked in + * pool->condvar. In the child we can destroy the + * pool, which would result in undefined behaviour in + * the pthread_cond_destroy(pool->condvar). glibc just + * blocks here. + */ + pool->prefork_cond = &prefork_cond; + + ret = pthread_cond_signal(&pool->condvar); + assert(ret == 0); + + ret = pthread_cond_wait(&prefork_cond, &pool->mutex); + assert(ret == 0); + + pool->prefork_cond = NULL; + } + + ret = pthread_cond_destroy(&prefork_cond); + assert(ret == 0); + + /* + * Probably it's well-defined somewhere: What happens to + * condvars after a fork? The rationale of pthread_atfork only + * writes about mutexes. So better be safe than sorry and + * destroy/reinit pool->condvar across a fork. + */ + + ret = pthread_cond_destroy(&pool->condvar); + assert(ret == 0); +} + static void pthreadpool_prepare(void) { int ret; @@ -180,8 +229,7 @@ static void pthreadpool_prepare(void) pool = pthreadpools; while (pool != NULL) { - ret = pthread_mutex_lock(&pool->mutex); - assert(ret == 0); + pthreadpool_prepare_pool(pool); pool = pool->next; } } @@ -194,6 +242,8 @@ static void pthreadpool_parent(void) for (pool = DLIST_TAIL(pthreadpools); pool != NULL; pool = DLIST_PREV(pool)) { + ret = pthread_cond_init(&pool->condvar, NULL); + assert(ret == 0); ret = pthread_mutex_unlock(&pool->mutex); assert(ret == 0); } @@ -216,6 +266,8 @@ static void pthreadpool_child(void) pool->head = 0; pool->num_jobs = 0; + ret = pthread_cond_init(&pool->condvar, NULL); + assert(ret == 0); ret = pthread_mutex_unlock(&pool->mutex); assert(ret == 0); } @@ -406,6 +458,17 @@ static void *pthreadpool_server(void *arg) &pool->condvar, &pool->mutex, &ts); pool->num_idle -= 1; + if (pool->prefork_cond != NULL) { + /* + * Me must allow fork() to continue + * without anybody waiting on + * &pool->condvar. + */ + pthread_cond_signal(pool->prefork_cond); + pthreadpool_server_exit(pool); + return NULL; + } + if (res == ETIMEDOUT) { if (pool->num_jobs == 0) { -- 2.14.1.581.gf28d330327-goog From e325fceee51ab007fc73c61f9a3236db21cb9f16 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 1 Sep 2017 15:55:00 -0700 Subject: [PATCH 2/2] pthreadpool: Test fork with an active thread Bug: https://bugzilla.samba.org/show_bug.cgi?id=13006 Signed-off-by: Volker Lendecke Reviewed-by: Jeremy Allison Reviewed-by: Stefan Metzmacher Autobuild-User(master): Volker Lendecke Autobuild-Date(master): Thu Aug 31 21:34:57 CEST 2017 on sn-devel-144 (cherry picked from commit 981e674a7472017274c9b169c776d5c5e8bd1469) --- source3/lib/pthreadpool/tests.c | 114 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/source3/lib/pthreadpool/tests.c b/source3/lib/pthreadpool/tests.c index c4d2e6a1382..999118286eb 100644 --- a/source3/lib/pthreadpool/tests.c +++ b/source3/lib/pthreadpool/tests.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "pthreadpool_pipe.h" #include "pthreadpool_tevent.h" @@ -192,6 +193,113 @@ static int test_fork(void) return 0; } +static void busyfork_job(void *private_data) +{ + return; +} + +static int test_busyfork(void) +{ + struct pthreadpool_pipe *p; + int fds[2]; + struct pollfd pfd; + pid_t child, waitret; + int ret, jobnum, wstatus; + + ret = pipe(fds); + if (ret == -1) { + perror("pipe failed"); + return -1; + } + + ret = pthreadpool_pipe_init(1, &p); + if (ret != 0) { + fprintf(stderr, "pthreadpool_pipe_init failed: %s\n", + strerror(ret)); + return -1; + } + + ret = pthreadpool_pipe_add_job(p, 1, busyfork_job, NULL); + if (ret != 0) { + fprintf(stderr, "pthreadpool_add_job failed: %s\n", + strerror(ret)); + return -1; + } + + ret = pthreadpool_pipe_finished_jobs(p, &jobnum, 1); + if (ret != 1) { + fprintf(stderr, "pthreadpool_pipe_finished_jobs failed\n"); + return -1; + } + + poll(NULL, 0, 200); + + child = fork(); + if (child < 0) { + perror("fork failed"); + return -1; + } + + if (child == 0) { + ret = pthreadpool_pipe_destroy(p); + if (ret != 0) { + fprintf(stderr, "pthreadpool_pipe_destroy failed: " + "%s\n", strerror(ret)); + exit(1); + } + exit(0); + } + + ret = close(fds[1]); + if (ret == -1) { + perror("close failed"); + return -1; + } + + pfd = (struct pollfd) { .fd = fds[0], .events = POLLIN }; + + ret = poll(&pfd, 1, 5000); + if (ret == -1) { + perror("poll failed"); + return -1; + } + if (ret == 0) { + fprintf(stderr, "Child did not exit for 5 seconds\n"); + /* + * The child might hang forever in + * pthread_cond_destroy for example. Be kind to the + * system and kill it. + */ + kill(child, SIGTERM); + return -1; + } + if (ret != 1) { + fprintf(stderr, "poll returned %d -- huh??\n", ret); + return -1; + } + + poll(NULL, 0, 200); + + waitret = waitpid(child, &wstatus, WNOHANG); + if (waitret != child) { + fprintf(stderr, "waitpid returned %d\n", (int)waitret); + return -1; + } + + if (!WIFEXITED(wstatus)) { + fprintf(stderr, "child did not properly exit\n"); + return -1; + } + + ret = WEXITSTATUS(wstatus); + if (ret != 0) { + fprintf(stderr, "child returned %d\n", ret); + return -1; + } + + return 0; +} + static void test_tevent_wait(void *private_data) { int *timeout = private_data; @@ -301,6 +409,12 @@ int main(void) return 1; } + ret = test_busyfork(); + if (ret != 0) { + fprintf(stderr, "test_busyfork failed\n"); + return 1; + } + printf("success\n"); return 0; } -- 2.14.1.581.gf28d330327-goog