From 40a6ebc42487f30ec9da095eddfc543b92f15c23 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 7 Feb 2019 15:57:06 +0100 Subject: [PATCH 1/3] messages_dgm: Use saved errno value In this case this is just a cleanup, the value has just been set by messaging_dgm_sendmsg. But as that already saves errno into a local variable, use that. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13786 Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher Reviewed-by: Andrew Bartlett (cherry picked from commit c27afc098398274abaed6dc9bef2019091c1b635) --- source3/lib/messages_dgm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source3/lib/messages_dgm.c b/source3/lib/messages_dgm.c index 37eefeb0a4a7..cb0c17d6c247 100644 --- a/source3/lib/messages_dgm.c +++ b/source3/lib/messages_dgm.c @@ -553,7 +553,7 @@ static void messaging_dgm_out_threaded_job(void *private_data) if (state->sent != -1) { return; } - if (errno != ENOBUFS) { + if (state->err != ENOBUFS) { return; } -- 2.17.1 From c78fa028060cbb0c835f2d2a98665f9a3fb67da8 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 7 Feb 2019 17:48:34 +0100 Subject: [PATCH 2/3] torture3: Extend read3 for the "messaging target re-inits" failure Do ping_pong a hundred times, re-initializing the msg_ctx every time. https://bugzilla.samba.org/show_bug.cgi?id=13786 Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher Reviewed-by: Andrew Bartlett (cherry picked from commit 8d8f62c4b9dea381ce9f5833bc794553ae358173) --- selftest/knownfail.d/local-messaging | 1 + source3/torture/test_messaging_read.c | 44 +++++++++++++++------------ 2 files changed, 25 insertions(+), 20 deletions(-) create mode 100644 selftest/knownfail.d/local-messaging diff --git a/selftest/knownfail.d/local-messaging b/selftest/knownfail.d/local-messaging new file mode 100644 index 000000000000..46cf30c73167 --- /dev/null +++ b/selftest/knownfail.d/local-messaging @@ -0,0 +1 @@ +^samba3.smbtorture_s3.LOCAL-MESSAGING-READ3 diff --git a/source3/torture/test_messaging_read.c b/source3/torture/test_messaging_read.c index d3e4079074b4..555f084c040b 100644 --- a/source3/torture/test_messaging_read.c +++ b/source3/torture/test_messaging_read.c @@ -250,14 +250,13 @@ fail: } struct msg_pingpong_state { - uint8_t dummy; + struct messaging_context *msg_ctx; }; static void msg_pingpong_done(struct tevent_req *subreq); static struct tevent_req *msg_pingpong_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, - struct messaging_context *msg_ctx, struct server_id dst) { struct tevent_req *req, *subreq; @@ -269,13 +268,23 @@ static struct tevent_req *msg_pingpong_send(TALLOC_CTX *mem_ctx, return NULL; } - status = messaging_send_buf(msg_ctx, dst, MSG_PING, NULL, 0); + if (!tevent_req_set_endtime(req, ev, timeval_current_ofs(10, 0))) { + return tevent_req_post(req, ev); + } + + state->msg_ctx = messaging_init(state, ev); + if (tevent_req_nomem(state->msg_ctx, req)) { + return tevent_req_post(req, ev); + } + + status = messaging_send_buf(state->msg_ctx, dst, MSG_PING, NULL, 0); if (!NT_STATUS_IS_OK(status)) { + DBG_DEBUG("messaging_send_buf failed: %s\n", nt_errstr(status)); tevent_req_error(req, map_errno_from_nt_status(status)); return tevent_req_post(req, ev); } - subreq = messaging_read_send(state, ev, msg_ctx, MSG_PONG); + subreq = messaging_read_send(state, ev, state->msg_ctx, MSG_PONG); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -308,18 +317,17 @@ static int msg_pingpong_recv(struct tevent_req *req) return 0; } -static int msg_pingpong(struct messaging_context *msg_ctx, - struct server_id dst) +static int msg_pingpong(struct server_id dst) { struct tevent_context *ev; struct tevent_req *req; int ret = ENOMEM; - ev = tevent_context_init(msg_ctx); + ev = tevent_context_init(talloc_tos()); if (ev == NULL) { goto fail; } - req = msg_pingpong_send(ev, ev, msg_ctx, dst); + req = msg_pingpong_send(ev, ev, dst); if (req == NULL) { goto fail; } @@ -398,7 +406,7 @@ bool run_messaging_read3(int dummy) pid_t child; int ready_pipe[2]; int exit_pipe[2]; - int ret; + int i, ret; char c; struct server_id dst; ssize_t written; @@ -433,19 +441,15 @@ bool run_messaging_read3(int dummy) fprintf(stderr, "tevent_context_init failed\n"); goto fail; } - msg_ctx = messaging_init(ev, ev); - if (msg_ctx == NULL) { - fprintf(stderr, "messaging_init failed\n"); - goto fail; - } - dst = messaging_server_id(msg_ctx); - dst.pid = child; + dst = (struct server_id){ .pid = child, .vnn = NONCLUSTER_VNN, }; - ret = msg_pingpong(msg_ctx, dst); - if (ret != 0){ - fprintf(stderr, "msg_pingpong failed\n"); - goto fail; + for (i=0; i<100; i++) { + ret = msg_pingpong(dst); + if (ret != 0){ + fprintf(stderr, "msg_pingpong failed\n"); + goto fail; + } } printf("Parent: telling child to exit\n"); -- 2.17.1 From 0a27183da235cc0d37a1dc293d76c7ba85b6c109 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 7 Feb 2019 16:15:46 +0100 Subject: [PATCH 3/3] messages_dgm: Properly handle receiver re-initialization This only properly covers the small-message nonblocking case. Covering the large-message and the blocking case is a much larger effort assuming we want to re-send the failed message if parts of the message has gone through properly. Don't do that for now. This was found by sanba_dnsupdate constantly recreating its irpc handle to winbindd in the RODC case. The messaging_dgm code cached connected datagram sockets based on the destination pid for 1 second. Which means the IRPC responses from winbindd are never delivered to samba_dnsupdate, which will then hit a timeout. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13786 Signed-off-by: Volker Lendecke Reviewed-by: Stefan Metzmacher Reviewed-by: Andrew Bartlett (cherry picked from commit 2543bba0364d8054e9ad316f5611621841bc061d) --- selftest/knownfail.d/local-messaging | 1 - source3/lib/messages_dgm.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) delete mode 100644 selftest/knownfail.d/local-messaging diff --git a/selftest/knownfail.d/local-messaging b/selftest/knownfail.d/local-messaging deleted file mode 100644 index 46cf30c73167..000000000000 --- a/selftest/knownfail.d/local-messaging +++ /dev/null @@ -1 +0,0 @@ -^samba3.smbtorture_s3.LOCAL-MESSAGING-READ3 diff --git a/source3/lib/messages_dgm.c b/source3/lib/messages_dgm.c index cb0c17d6c247..aaafcc103078 100644 --- a/source3/lib/messages_dgm.c +++ b/source3/lib/messages_dgm.c @@ -1419,6 +1419,7 @@ int messaging_dgm_send(pid_t pid, struct messaging_dgm_context *ctx = global_dgm_context; struct messaging_dgm_out *out; int ret; + unsigned retries = 0; if (ctx == NULL) { return ENOTCONN; @@ -1426,6 +1427,7 @@ int messaging_dgm_send(pid_t pid, messaging_dgm_validate(ctx); +again: ret = messaging_dgm_out_get(ctx, pid, &out); if (ret != 0) { return ret; @@ -1435,6 +1437,20 @@ int messaging_dgm_send(pid_t pid, ret = messaging_dgm_out_send_fragmented(ctx->ev, out, iov, iovlen, fds, num_fds); + if (ret == ECONNREFUSED) { + /* + * We cache outgoing sockets. If the receiver has + * closed and re-opened the socket since our last + * message, we get connection refused. Retry. + */ + + TALLOC_FREE(out); + + if (retries < 5) { + retries += 1; + goto again; + } + } return ret; } -- 2.17.1