From a4d7ed0e9d5f33a248aa6c067938680d397e58e4 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 11 Feb 2020 21:26:18 +0100 Subject: [PATCH 1/4] lib: Simplify register_msg_pool_usage() We can do as much as we want in the filter. This gives us automatic retry, we don't have to do the messaging_filtered_read_send() over and over again Bug: https://bugzilla.samba.org/show_bug.cgi?id=14281 Signed-off-by: Volker Lendecke Reviewed-by: Martin Schwenke Reviewed-by: Stefan Metzmacher (cherry picked from commit 8a23031b7bfea4cdaa71d6815bca24dcc3685b22) --- source3/lib/tallocmsg.c | 57 ++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/source3/lib/tallocmsg.c b/source3/lib/tallocmsg.c index 1e243e77781..9cef2a8e10b 100644 --- a/source3/lib/tallocmsg.c +++ b/source3/lib/tallocmsg.c @@ -25,6 +25,9 @@ static bool pool_usage_filter(struct messaging_rec *rec, void *private_data) { + FILE *f = NULL; + int fd; + if (rec->msg_type != MSG_REQ_POOL_USAGE) { return false; } @@ -36,52 +39,31 @@ static bool pool_usage_filter(struct messaging_rec *rec, void *private_data) return false; } - return true; -} - - -static void msg_pool_usage_do(struct tevent_req *req) -{ - struct messaging_context *msg_ctx = tevent_req_callback_data( - req, struct messaging_context); - struct messaging_rec *rec = NULL; - FILE *f = NULL; - int ret; - - ret = messaging_filtered_read_recv(req, talloc_tos(), &rec); - TALLOC_FREE(req); - if (ret != 0) { - DBG_DEBUG("messaging_filtered_read_recv returned %s\n", - strerror(ret)); - return; + fd = dup(rec->fds[0]); + if (fd == -1) { + DBG_DEBUG("dup(%"PRIi64") failed: %s\n", + rec->fds[0], + strerror(errno)); + return false; } - f = fdopen(rec->fds[0], "w"); + f = fdopen(fd, "w"); if (f == NULL) { - close(rec->fds[0]); - TALLOC_FREE(rec); DBG_DEBUG("fdopen failed: %s\n", strerror(errno)); - return; + close(fd); + return false; } - TALLOC_FREE(rec); - talloc_full_report_printf(NULL, f); fclose(f); - f = NULL; - - req = messaging_filtered_read_send( - msg_ctx, - messaging_tevent_context(msg_ctx), - msg_ctx, - pool_usage_filter, - NULL); - if (req == NULL) { - DBG_WARNING("messaging_filtered_read_send failed\n"); - return; - } - tevent_req_set_callback(req, msg_pool_usage_do, msg_ctx); + /* + * Returning false, means messaging_dispatch_waiters() + * won't call messaging_filtered_read_done() and + * our messaging_filtered_read_send() stays alive + * and will get messages. + */ + return false; } /** @@ -101,6 +83,5 @@ void register_msg_pool_usage(struct messaging_context *msg_ctx) DBG_WARNING("messaging_filtered_read_send failed\n"); return; } - tevent_req_set_callback(req, msg_pool_usage_do, msg_ctx); DEBUG(2, ("Registered MSG_REQ_POOL_USAGE\n")); } -- 2.17.1 From e2c5803a5e7a2896ba3998033da33b2641603bfd Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 11 Feb 2020 21:47:39 +0100 Subject: [PATCH 2/4] lib: Add a TALLOC_CTX to base register_msg_pool_usage() on Add a simple way to deactivate the registration Bug: https://bugzilla.samba.org/show_bug.cgi?id=14281 Signed-off-by: Volker Lendecke Reviewed-by: Martin Schwenke Reviewed-by: Stefan Metzmacher (cherry picked from commit dab982d88e9132cbff52db22f441c08ee59bb159) --- source3/include/proto.h | 3 ++- source3/lib/messages.c | 2 +- source3/lib/tallocmsg.c | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/source3/include/proto.h b/source3/include/proto.h index 72ac69e72f2..6818361d78e 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -267,7 +267,8 @@ bool getgroups_unix_user(TALLOC_CTX *mem_ctx, const char *user, /* The following definitions come from lib/tallocmsg.c */ -void register_msg_pool_usage(struct messaging_context *msg_ctx); +void register_msg_pool_usage(TALLOC_CTX *mem_ctx, + struct messaging_context *msg_ctx); /* The following definitions come from lib/time.c */ diff --git a/source3/lib/messages.c b/source3/lib/messages.c index a6bf99578b6..065ccd3a262 100644 --- a/source3/lib/messages.c +++ b/source3/lib/messages.c @@ -590,7 +590,7 @@ static NTSTATUS messaging_init_internal(TALLOC_CTX *mem_ctx, /* Register some debugging related messages */ - register_msg_pool_usage(ctx); + register_msg_pool_usage(ctx, ctx); register_dmalloc_msgs(ctx); debug_register_msgs(ctx); diff --git a/source3/lib/tallocmsg.c b/source3/lib/tallocmsg.c index 9cef2a8e10b..bc0fa132e32 100644 --- a/source3/lib/tallocmsg.c +++ b/source3/lib/tallocmsg.c @@ -69,12 +69,13 @@ static bool pool_usage_filter(struct messaging_rec *rec, void *private_data) /** * Register handler for MSG_REQ_POOL_USAGE **/ -void register_msg_pool_usage(struct messaging_context *msg_ctx) +void register_msg_pool_usage( + TALLOC_CTX *mem_ctx, struct messaging_context *msg_ctx) { struct tevent_req *req = NULL; req = messaging_filtered_read_send( - msg_ctx, + mem_ctx, messaging_tevent_context(msg_ctx), msg_ctx, pool_usage_filter, -- 2.17.1 From ee3916f1f932ad7a5e33909a4d616c2e9a96c310 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 11 Feb 2020 21:57:42 +0100 Subject: [PATCH 3/4] lib: Introduce messaging_context->per_process_talloc_ctx Consolidate "msg_dgm_ref" and "msg_ctdb_ref": The only purpose of those pointers was to TALLOC_FREE() them in messaging_reinit(). We'll have a third entity to talloc_free() in the next commit, make that simpler. Bug: https://bugzilla.samba.org/show_bug.cgi?id=14281 Signed-off-by: Volker Lendecke Reviewed-by: Martin Schwenke Reviewed-by: Stefan Metzmacher (cherry picked from commit 7209357f9ba5525a207d301b299931d6bdee9c2f) --- source3/lib/messages.c | 84 +++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/source3/lib/messages.c b/source3/lib/messages.c index 065ccd3a262..b29df0a44f9 100644 --- a/source3/lib/messages.c +++ b/source3/lib/messages.c @@ -97,10 +97,9 @@ struct messaging_context { struct tevent_req **waiters; size_t num_waiters; - void *msg_dgm_ref; - void *msg_ctdb_ref; - struct server_id_db *names_db; + + TALLOC_CTX *per_process_talloc_ctx; }; static struct messaging_rec *messaging_rec_dup(TALLOC_CTX *mem_ctx, @@ -484,6 +483,7 @@ static NTSTATUS messaging_init_internal(TALLOC_CTX *mem_ctx, int ret; const char *lck_path; const char *priv_path; + void *ref; bool ok; /* @@ -537,21 +537,28 @@ static NTSTATUS messaging_init_internal(TALLOC_CTX *mem_ctx, ctx->event_ctx = ev; + ctx->per_process_talloc_ctx = talloc_new(ctx); + if (ctx->per_process_talloc_ctx == NULL) { + status = NT_STATUS_NO_MEMORY; + goto done; + } + ok = messaging_register_event_context(ctx, ev); if (!ok) { status = NT_STATUS_NO_MEMORY; goto done; } - ctx->msg_dgm_ref = messaging_dgm_ref(ctx, - ctx->event_ctx, - &ctx->id.unique_id, - priv_path, - lck_path, - messaging_recv_cb, - ctx, - &ret); - if (ctx->msg_dgm_ref == NULL) { + ref = messaging_dgm_ref( + ctx->per_process_talloc_ctx, + ctx->event_ctx, + &ctx->id.unique_id, + priv_path, + lck_path, + messaging_recv_cb, + ctx, + &ret); + if (ref == NULL) { DEBUG(2, ("messaging_dgm_ref failed: %s\n", strerror(ret))); status = map_nt_error_from_unix(ret); goto done; @@ -560,11 +567,16 @@ static NTSTATUS messaging_init_internal(TALLOC_CTX *mem_ctx, #ifdef CLUSTER_SUPPORT if (lp_clustering()) { - ctx->msg_ctdb_ref = messaging_ctdb_ref( - ctx, ctx->event_ctx, - lp_ctdbd_socket(), lp_ctdb_timeout(), - ctx->id.unique_id, messaging_recv_cb, ctx, &ret); - if (ctx->msg_ctdb_ref == NULL) { + ref = messaging_ctdb_ref( + ctx->per_process_talloc_ctx, + ctx->event_ctx, + lp_ctdbd_socket(), + lp_ctdb_timeout(), + ctx->id.unique_id, + messaging_recv_cb, + ctx, + &ret); + if (ref == NULL) { DBG_NOTICE("messaging_ctdb_ref failed: %s\n", strerror(ret)); status = map_nt_error_from_unix(ret); @@ -636,9 +648,14 @@ NTSTATUS messaging_reinit(struct messaging_context *msg_ctx) { int ret; char *lck_path; + void *ref; + + TALLOC_FREE(msg_ctx->per_process_talloc_ctx); - TALLOC_FREE(msg_ctx->msg_dgm_ref); - TALLOC_FREE(msg_ctx->msg_ctdb_ref); + msg_ctx->per_process_talloc_ctx = talloc_new(msg_ctx); + if (msg_ctx->per_process_talloc_ctx == NULL) { + return NT_STATUS_NO_MEMORY; + } msg_ctx->id = (struct server_id) { .pid = getpid(), .vnn = msg_ctx->id.vnn @@ -649,23 +666,32 @@ NTSTATUS messaging_reinit(struct messaging_context *msg_ctx) return NT_STATUS_NO_MEMORY; } - msg_ctx->msg_dgm_ref = messaging_dgm_ref( - msg_ctx, msg_ctx->event_ctx, &msg_ctx->id.unique_id, - private_path("msg.sock"), lck_path, - messaging_recv_cb, msg_ctx, &ret); + ref = messaging_dgm_ref( + msg_ctx->per_process_talloc_ctx, + msg_ctx->event_ctx, + &msg_ctx->id.unique_id, + private_path("msg.sock"), + lck_path, + messaging_recv_cb, + msg_ctx, + &ret); - if (msg_ctx->msg_dgm_ref == NULL) { + if (ref == NULL) { DEBUG(2, ("messaging_dgm_ref failed: %s\n", strerror(ret))); return map_nt_error_from_unix(ret); } if (lp_clustering()) { - msg_ctx->msg_ctdb_ref = messaging_ctdb_ref( - msg_ctx, msg_ctx->event_ctx, - lp_ctdbd_socket(), lp_ctdb_timeout(), - msg_ctx->id.unique_id, messaging_recv_cb, msg_ctx, + ref = messaging_ctdb_ref( + msg_ctx->per_process_talloc_ctx, + msg_ctx->event_ctx, + lp_ctdbd_socket(), + lp_ctdb_timeout(), + msg_ctx->id.unique_id, + messaging_recv_cb, + msg_ctx, &ret); - if (msg_ctx->msg_ctdb_ref == NULL) { + if (ref == NULL) { DBG_NOTICE("messaging_ctdb_ref failed: %s\n", strerror(ret)); return map_nt_error_from_unix(ret); -- 2.17.1 From 8a7342134e78edf3ea5a4596ccc54308d3cb052c Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 11 Feb 2020 22:10:32 +0100 Subject: [PATCH 4/4] lib: Fix a shutdown crash with "clustering = yes" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a bit confusing now, sorry for that: register_msg_pool_usage() in the ctdb case uses messaging_ctdb_register_tevent_context(), which talloc_reference()s the central struct messaging_ctdb_fde_ev of the messaging_ctdb_context. In messaging_reinit(), we talloc_free only one of those references and allocate a new messaging_ctdb_fde_ev. The remaining messaging_ctdb_fde_ev should have been deleted as well, but due to the second reference this does not happen. When doing the shutdown messaging_ctdb_fde_ev_destructor() is called twice, once on the properly reinitialized fde_ev, and once much later on the leftover one which references invalid data structures. By the way, this is not a problem with talloc_reference(), this would have happened with explicit refcounting too. Bug: https://bugzilla.samba.org/show_bug.cgi?id=14281 Signed-off-by: Volker Lendecke Reviewed-by: Martin Schwenke Reviewed-by: Stefan Metzmacher Autobuild-User(master): Björn Baumbach Autobuild-Date(master): Tue Feb 18 13:05:53 UTC 2020 on sn-devel-184 (cherry picked from commit f1577c2bc13c91ea912ae461870e470065f250c1) --- source3/lib/messages.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source3/lib/messages.c b/source3/lib/messages.c index b29df0a44f9..63d6362e0c9 100644 --- a/source3/lib/messages.c +++ b/source3/lib/messages.c @@ -602,7 +602,7 @@ static NTSTATUS messaging_init_internal(TALLOC_CTX *mem_ctx, /* Register some debugging related messages */ - register_msg_pool_usage(ctx, ctx); + register_msg_pool_usage(ctx->per_process_talloc_ctx, ctx); register_dmalloc_msgs(ctx); debug_register_msgs(ctx); @@ -699,6 +699,7 @@ NTSTATUS messaging_reinit(struct messaging_context *msg_ctx) } server_id_db_reinit(msg_ctx->names_db, msg_ctx->id); + register_msg_pool_usage(msg_ctx->per_process_talloc_ctx, msg_ctx); return NT_STATUS_OK; } -- 2.17.1