The Samba-Bugzilla – Attachment 15874 Details for
Bug 14294
CTDB recovery corner cases can cause record resurrection and node banning
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch for 4.11
BZ14294-v4-11+vacuuming.patch (text/plain), 185.70 KB, created by
Martin Schwenke
on 2020-03-30 01:31:15 UTC
(
hide
)
Description:
Patch for 4.11
Filename:
MIME Type:
Creator:
Martin Schwenke
Created:
2020-03-30 01:31:15 UTC
Size:
185.70 KB
patch
obsolete
>From 201777eaa2bc78229909f0f301768e79e0ba29a5 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Wed, 4 Sep 2019 14:14:22 +1000 >Subject: [PATCH 01/38] ctdb-client: Fix some typos in debug messages > > tdb_sore -> tdb_store > SCHDULE_FOR_DELETION -> SCHEDULE_FOR_DELETION > >Switch to modern debug macros while touching the lines. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> > >Autobuild-User(master): Amitay Isaacs <amitay@samba.org> >Autobuild-Date(master): Tue Sep 17 05:52:15 UTC 2019 on sn-devel-184 > >(cherry picked from commit 84f544b55f235e2f08596bf4b7854460af008f88) >--- > ctdb/client/client_db.c | 12 ++++++------ > 1 file changed, 6 insertions(+), 6 deletions(-) > >diff --git a/ctdb/client/client_db.c b/ctdb/client/client_db.c >index dfa8d970de5..a008f2ad63d 100644 >--- a/ctdb/client/client_db.c >+++ b/ctdb/client/client_db.c >@@ -1540,9 +1540,9 @@ struct tevent_req *ctdb_delete_record_send(TALLOC_CTX *mem_ctx, > > ret = tdb_store(h->db->ltdb->tdb, h->key, rec, TDB_REPLACE); > if (ret != 0) { >- DEBUG(DEBUG_ERR, >- ("fetch_lock delete: %s tdb_sore failed, %s\n", >- h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); >+ D_ERR("fetch_lock delete: %s tdb_store failed, %s\n", >+ h->db->db_name, >+ tdb_errorstr(h->db->ltdb->tdb)); > tevent_req_error(req, EIO); > return tevent_req_post(req, ev); > } >@@ -1576,9 +1576,9 @@ static void ctdb_delete_record_done(struct tevent_req *subreq) > status = ctdb_client_control_recv(subreq, &ret, NULL, NULL); > TALLOC_FREE(subreq); > if (! status) { >- DEBUG(DEBUG_ERR, >- ("delete_record: %s SCHDULE_FOR_DELETION failed, " >- "ret=%d\n", state->h->db->db_name, ret)); >+ D_ERR("delete_record: %s SCHEDULE_FOR_DELETION failed, ret=%d\n", >+ state->h->db->db_name, >+ ret); > tevent_req_error(req, ret); > return; > } >-- >2.25.1 > > >From f1ede2508254d1419a3d7530d6a577635b1be248 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Thu, 15 Feb 2018 12:13:53 +1100 >Subject: [PATCH 02/38] ctdb-protocol: Drop code related to obsolete controls > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 688567f080156892270cbfb2907cd712cb77cb7a) >--- > ctdb/protocol/protocol_client.c | 9 ----- > ctdb/protocol/protocol_control.c | 60 -------------------------------- > 2 files changed, 69 deletions(-) > >diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c >index 9aa32a9bba7..a39dd730e23 100644 >--- a/ctdb/protocol/protocol_client.c >+++ b/ctdb/protocol/protocol_client.c >@@ -424,8 +424,6 @@ int ctdb_reply_control_db_attach(struct ctdb_reply_control *reply, > return reply->status; > } > >-/* CTDB_CONTROL_SET_CALL */ >- > /* CTDB_CONTROL_TRAVERSE_START */ > > void ctdb_req_control_traverse_start(struct ctdb_req_control *request, >@@ -718,8 +716,6 @@ int ctdb_reply_control_shutdown(struct ctdb_reply_control *reply) > return ctdb_reply_control_generic(reply, CTDB_CONTROL_SHUTDOWN); > } > >-/* CTDB_CONTROL_GET_MONMODE */ >- > /* CTDB_CONTROL_TCP_CLIENT */ > > void ctdb_req_control_tcp_client(struct ctdb_req_control *request, >@@ -1170,9 +1166,6 @@ int ctdb_reply_control_try_delete_records(struct ctdb_reply_control *reply, > return reply->status; > } > >-/* CTDB_CONTROL_ENABLE_MONITOR */ >-/* CTDB_CONTROL_DISABLE_MONITOR */ >- > /* CTDB_CONTROL_ADD_PUBLIC_IP */ > > void ctdb_req_control_add_public_ip(struct ctdb_req_control *request, >@@ -1855,8 +1848,6 @@ int ctdb_reply_control_set_db_readonly(struct ctdb_reply_control *reply) > return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_DB_READONLY); > } > >-/* CTDB_CONTROL_CHECK_SRVIDS */ >- > /* CTDB_CONTROL_TRAVERSE_START_EXT */ > > void ctdb_req_control_traverse_start_ext(struct ctdb_req_control *request, >diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c >index 0b88b5c8b5a..b2d17611108 100644 >--- a/ctdb/protocol/protocol_control.c >+++ b/ctdb/protocol/protocol_control.c >@@ -90,9 +90,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) > len = ctdb_string_len(&cd->data.db_name); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > len = ctdb_traverse_start_len(cd->data.traverse_start); > break; >@@ -145,9 +142,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) > case CTDB_CONTROL_SHUTDOWN: > break; > >- case CTDB_CONTROL_GET_MONMODE: >- break; >- > case CTDB_CONTROL_TCP_CLIENT: > len = ctdb_connection_len(cd->data.conn); > break; >@@ -221,12 +215,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) > len = ctdb_rec_buffer_len(cd->data.recbuf); > break; > >- case CTDB_CONTROL_ENABLE_MONITOR: >- break; >- >- case CTDB_CONTROL_DISABLE_MONITOR: >- break; >- > case CTDB_CONTROL_ADD_PUBLIC_IP: > len = ctdb_addr_info_len(cd->data.addr_info); > break; >@@ -338,9 +326,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) > len = ctdb_uint32_len(&cd->data.db_id); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > len = ctdb_traverse_start_ext_len(cd->data.traverse_start_ext); > break; >@@ -466,9 +451,6 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, > ctdb_string_push(&cd->data.db_name, buf, &np); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > ctdb_traverse_start_push(cd->data.traverse_start, buf, &np); > break; >@@ -635,9 +617,6 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, > ctdb_uint32_push(&cd->data.db_id, buf, &np); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > ctdb_traverse_start_ext_push(cd->data.traverse_start_ext, buf, > &np); >@@ -757,9 +736,6 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, > &cd->data.db_name, &np); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > ret = ctdb_traverse_start_pull(buf, buflen, mem_ctx, > &cd->data.traverse_start, &np); >@@ -957,9 +933,6 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, > ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > ret = ctdb_traverse_start_ext_pull(buf, buflen, mem_ctx, > &cd->data.traverse_start_ext, >@@ -1104,9 +1077,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > len = ctdb_uint32_len(&cd->data.db_id); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > break; > >@@ -1154,9 +1124,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > case CTDB_CONTROL_SHUTDOWN: > break; > >- case CTDB_CONTROL_GET_MONMODE: >- break; >- > case CTDB_CONTROL_TCP_CLIENT: > break; > >@@ -1224,12 +1191,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > len = ctdb_rec_buffer_len(cd->data.recbuf); > break; > >- case CTDB_CONTROL_ENABLE_MONITOR: >- break; >- >- case CTDB_CONTROL_DISABLE_MONITOR: >- break; >- > case CTDB_CONTROL_ADD_PUBLIC_IP: > break; > >@@ -1286,12 +1247,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > len = ctdb_ban_state_len(cd->data.ban_state); > break; > >- case CTDB_CONTROL_SET_DB_PRIORITY: >- break; >- >- case CTDB_CONTROL_GET_DB_PRIORITY: >- break; >- > case CTDB_CONTROL_REGISTER_NOTIFY: > break; > >@@ -1336,9 +1291,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > case CTDB_CONTROL_SET_DB_READONLY: > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > break; > >@@ -1515,9 +1467,6 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, > ctdb_ban_state_push(cd->data.ban_state, buf, &np); > break; > >- case CTDB_CONTROL_GET_DB_PRIORITY: >- break; >- > case CTDB_CONTROL_GET_DB_SEQNUM: > ctdb_uint64_push(&cd->data.seqnum, buf, &np); > break; >@@ -1538,9 +1487,6 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, > ctdb_statistics_list_push(cd->data.stats_list, buf, &np); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_GET_DB_STATISTICS: > ctdb_db_statistics_push(cd->data.dbstats, buf, &np); > break; >@@ -1697,9 +1643,6 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, > &cd->data.ban_state, &np); > break; > >- case CTDB_CONTROL_GET_DB_PRIORITY: >- break; >- > case CTDB_CONTROL_GET_DB_SEQNUM: > ret = ctdb_uint64_pull(buf, buflen, &cd->data.seqnum, &np); > break; >@@ -1724,9 +1667,6 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, > &cd->data.stats_list, &np); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_GET_DB_STATISTICS: > ret = ctdb_db_statistics_pull(buf, buflen, mem_ctx, > &cd->data.dbstats, &np); >-- >2.25.1 > > >From b2314e47fb2dd840bda33b7f62d6c6fd1cc2583c Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Thu, 15 Feb 2018 12:28:11 +1100 >Subject: [PATCH 03/38] ctdb-tests: Drop code releated to obsolete controls > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 913bd331f65e9fe3d7cb16e041cd37b01987841f) >--- > ctdb/tests/src/protocol_common_ctdb.c | 78 --------------------------- > 1 file changed, 78 deletions(-) > >diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c >index 6a6573486a1..4aa4cfc5bde 100644 >--- a/ctdb/tests/src/protocol_common_ctdb.c >+++ b/ctdb/tests/src/protocol_common_ctdb.c >@@ -203,9 +203,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > assert(cd->data.db_name != NULL); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > cd->data.traverse_start = talloc(mem_ctx, struct ctdb_traverse_start); > assert(cd->data.traverse_start != NULL); >@@ -264,9 +261,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > case CTDB_CONTROL_SHUTDOWN: > break; > >- case CTDB_CONTROL_GET_MONMODE: >- break; >- > case CTDB_CONTROL_TCP_CLIENT: > cd->data.conn = talloc(mem_ctx, struct ctdb_connection); > assert(cd->data.conn != NULL); >@@ -364,12 +358,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); > break; > >- case CTDB_CONTROL_ENABLE_MONITOR: >- break; >- >- case CTDB_CONTROL_DISABLE_MONITOR: >- break; >- > case CTDB_CONTROL_ADD_PUBLIC_IP: > cd->data.addr_info = talloc(mem_ctx, struct ctdb_addr_info); > assert(cd->data.addr_info != NULL); >@@ -385,12 +373,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > case CTDB_CONTROL_GET_CAPABILITIES: > break; > >- case CTDB_CONTROL_START_PERSISTENT_UPDATE: >- break; >- >- case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: >- break; >- > case CTDB_CONTROL_RECD_PING: > break; > >@@ -510,9 +492,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > cd->data.db_id = rand32(); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > cd->data.traverse_start_ext = talloc(mem_ctx, struct ctdb_traverse_start_ext); > assert(cd->data.traverse_start_ext != NULL); >@@ -671,9 +650,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > verify_ctdb_string(&cd->data.db_name, &cd2->data.db_name); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > verify_ctdb_traverse_start(cd->data.traverse_start, > cd2->data.traverse_start); >@@ -728,9 +704,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > case CTDB_CONTROL_SHUTDOWN: > break; > >- case CTDB_CONTROL_GET_MONMODE: >- break; >- > case CTDB_CONTROL_TCP_CLIENT: > verify_ctdb_connection(cd->data.conn, cd2->data.conn); > break; >@@ -805,12 +778,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); > break; > >- case CTDB_CONTROL_ENABLE_MONITOR: >- break; >- >- case CTDB_CONTROL_DISABLE_MONITOR: >- break; >- > case CTDB_CONTROL_ADD_PUBLIC_IP: > verify_ctdb_addr_info(cd->data.addr_info, cd2->data.addr_info); > break; >@@ -822,12 +789,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > case CTDB_CONTROL_GET_CAPABILITIES: > break; > >- case CTDB_CONTROL_START_PERSISTENT_UPDATE: >- break; >- >- case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: >- break; >- > case CTDB_CONTROL_RECD_PING: > break; > >@@ -928,9 +889,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > assert(cd->data.db_id == cd2->data.db_id); > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > verify_ctdb_traverse_start_ext(cd->data.traverse_start_ext, > cd2->data.traverse_start_ext); >@@ -1111,9 +1069,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > cd->data.db_id = rand32(); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > break; > >@@ -1163,9 +1118,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > case CTDB_CONTROL_SHUTDOWN: > break; > >- case CTDB_CONTROL_GET_MONMODE: >- break; >- > case CTDB_CONTROL_TCP_CLIENT: > break; > >@@ -1243,12 +1195,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); > break; > >- case CTDB_CONTROL_ENABLE_MONITOR: >- break; >- >- case CTDB_CONTROL_DISABLE_MONITOR: >- break; >- > case CTDB_CONTROL_ADD_PUBLIC_IP: > break; > >@@ -1259,12 +1205,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > cd->data.caps = rand32(); > break; > >- case CTDB_CONTROL_START_PERSISTENT_UPDATE: >- break; >- >- case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: >- break; >- > case CTDB_CONTROL_RECD_PING: > break; > >@@ -1369,9 +1309,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > case CTDB_CONTROL_SET_DB_READONLY: > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > break; > >@@ -1492,9 +1429,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, > assert(cd->data.db_id == cd2->data.db_id); > break; > >- case CTDB_CONTROL_SET_CALL: >- break; >- > case CTDB_CONTROL_TRAVERSE_START: > break; > >@@ -1542,9 +1476,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, > case CTDB_CONTROL_SHUTDOWN: > break; > >- case CTDB_CONTROL_GET_MONMODE: >- break; >- > case CTDB_CONTROL_TCP_CLIENT: > break; > >@@ -1613,12 +1544,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, > verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); > break; > >- case CTDB_CONTROL_ENABLE_MONITOR: >- break; >- >- case CTDB_CONTROL_DISABLE_MONITOR: >- break; >- > case CTDB_CONTROL_ADD_PUBLIC_IP: > break; > >@@ -1723,9 +1648,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, > case CTDB_CONTROL_SET_DB_READONLY: > break; > >- case CTDB_CONTROL_CHECK_SRVIDS: >- break; >- > case CTDB_CONTROL_TRAVERSE_START_EXT: > break; > >-- >2.25.1 > > >From 3e6c89a451a787b97131977faf26e15b00b40829 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Thu, 15 Feb 2018 11:57:24 +1100 >Subject: [PATCH 04/38] ctdb-protocol: Add new control VACUUM_FETCH > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 0872c52ef0497f96f53318cf7e4d31be0854adde) >--- > ctdb/protocol/protocol.h | 1 + > 1 file changed, 1 insertion(+) > >diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h >index b868553f6e8..e47daeadba1 100644 >--- a/ctdb/protocol/protocol.h >+++ b/ctdb/protocol/protocol.h >@@ -373,6 +373,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, > CTDB_CONTROL_CHECK_PID_SRVID = 151, > CTDB_CONTROL_TUNNEL_REGISTER = 152, > CTDB_CONTROL_TUNNEL_DEREGISTER = 153, >+ CTDB_CONTROL_VACUUM_FETCH = 154, > }; > > #define MAX_COUNT_BUCKETS 16 >-- >2.25.1 > > >From 2c60b2158239c267544439294337f154bfd9f6c6 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Thu, 15 Feb 2018 11:57:44 +1100 >Subject: [PATCH 05/38] ctdb-protocol: Add marshalling for new control > VACUUM_FETCH > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit b71d8cd80f84169bacf2dd1e753e468a305c50ce) >--- > ctdb/protocol/protocol_api.h | 4 ++++ > ctdb/protocol/protocol_client.c | 24 ++++++++++++++++++++++++ > ctdb/protocol/protocol_control.c | 22 ++++++++++++++++++++++ > ctdb/protocol/protocol_debug.c | 1 + > 4 files changed, 51 insertions(+) > >diff --git a/ctdb/protocol/protocol_api.h b/ctdb/protocol/protocol_api.h >index 6104c10e7b5..cf4c4635dd4 100644 >--- a/ctdb/protocol/protocol_api.h >+++ b/ctdb/protocol/protocol_api.h >@@ -603,6 +603,10 @@ void ctdb_req_control_tunnel_deregister(struct ctdb_req_control *request, > uint64_t tunnel_id); > int ctdb_reply_control_tunnel_deregister(struct ctdb_reply_control *reply); > >+void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request, >+ struct ctdb_rec_buffer *recbuf); >+int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply); >+ > /* From protocol/protocol_debug.c */ > > void ctdb_packet_print(uint8_t *buf, size_t buflen, FILE *fp); >diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c >index a39dd730e23..d5f6a222fe4 100644 >--- a/ctdb/protocol/protocol_client.c >+++ b/ctdb/protocol/protocol_client.c >@@ -2333,3 +2333,27 @@ int ctdb_reply_control_tunnel_deregister(struct ctdb_reply_control *reply) > > return reply->status; > } >+ >+/* CTDB_CONTROL_VACUUM_FETCH */ >+ >+void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request, >+ struct ctdb_rec_buffer *recbuf) >+{ >+ request->opcode = CTDB_CONTROL_VACUUM_FETCH; >+ request->pad = 0; >+ request->srvid = 0; >+ request->client_id = 0; >+ request->flags = 0; >+ >+ request->rdata.opcode = CTDB_CONTROL_VACUUM_FETCH; >+ request->rdata.data.recbuf = recbuf; >+} >+ >+int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply) >+{ >+ if (reply->rdata.opcode != CTDB_CONTROL_VACUUM_FETCH) { >+ return EPROTO; >+ } >+ >+ return reply->status; >+} >diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c >index b2d17611108..1cc985a71a7 100644 >--- a/ctdb/protocol/protocol_control.c >+++ b/ctdb/protocol/protocol_control.c >@@ -407,6 +407,10 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) > > case CTDB_CONTROL_TUNNEL_DEREGISTER: > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ len = ctdb_rec_buffer_len(cd->data.recbuf); >+ break; > } > > return len; >@@ -682,6 +686,10 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, > case CTDB_CONTROL_CHECK_PID_SRVID: > ctdb_pid_srvid_push(cd->data.pid_srvid, buf, &np); > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ ctdb_rec_buffer_push(cd->data.recbuf, buf, &np); >+ break; > } > > *npush = np; >@@ -1006,6 +1014,11 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, > ret = ctdb_pid_srvid_pull(buf, buflen, mem_ctx, > &cd->data.pid_srvid, &np); > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx, >+ &cd->data.recbuf, &np); >+ break; > } > > if (ret != 0) { >@@ -1363,6 +1376,9 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > > case CTDB_CONTROL_TUNNEL_DEREGISTER: > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ break; > } > > return len; >@@ -1517,6 +1533,9 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, > > case CTDB_CONTROL_CHECK_PID_SRVID: > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ break; > } > > *npush = np; >@@ -1701,6 +1720,9 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, > > case CTDB_CONTROL_CHECK_PID_SRVID: > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ break; > } > > if (ret != 0) { >diff --git a/ctdb/protocol/protocol_debug.c b/ctdb/protocol/protocol_debug.c >index a34f5a86947..97903ea98f4 100644 >--- a/ctdb/protocol/protocol_debug.c >+++ b/ctdb/protocol/protocol_debug.c >@@ -242,6 +242,7 @@ static void ctdb_opcode_print(uint32_t opcode, FILE *fp) > { CTDB_CONTROL_CHECK_PID_SRVID, "CHECK_PID_SRVID" }, > { CTDB_CONTROL_TUNNEL_REGISTER, "TUNNEL_REGISTER" }, > { CTDB_CONTROL_TUNNEL_DEREGISTER, "TUNNEL_DEREGISTER" }, >+ { CTDB_CONTROL_VACUUM_FETCH, "VACUUM_FETCH" }, > { MAP_END, "" }, > }; > >-- >2.25.1 > > >From 9fe916b4651d7accddad397bba55ea8973fdbd42 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Fri, 16 Feb 2018 17:28:49 +1100 >Subject: [PATCH 06/38] ctdb-tests: Add marshalling tests for new control > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 36f9b4953a8def40681a6f02f6576795a1ba5fbe) >--- > ctdb/tests/cunit/protocol_test_101.sh | 2 +- > ctdb/tests/src/protocol_common_ctdb.c | 15 +++++++++++++++ > ctdb/tests/src/protocol_ctdb_test.c | 2 +- > 3 files changed, 17 insertions(+), 2 deletions(-) > >diff --git a/ctdb/tests/cunit/protocol_test_101.sh b/ctdb/tests/cunit/protocol_test_101.sh >index 36751d4fbe7..a0bf9d08754 100755 >--- a/ctdb/tests/cunit/protocol_test_101.sh >+++ b/ctdb/tests/cunit/protocol_test_101.sh >@@ -2,7 +2,7 @@ > > . "${TEST_SCRIPTS_DIR}/unit.sh" > >-last_control=153 >+last_control=154 > > generate_control_output () > { >diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c >index 4aa4cfc5bde..b02976b5d67 100644 >--- a/ctdb/tests/src/protocol_common_ctdb.c >+++ b/ctdb/tests/src/protocol_common_ctdb.c >@@ -588,6 +588,12 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > > case CTDB_CONTROL_TUNNEL_DEREGISTER: > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer); >+ assert(cd->data.recbuf != NULL); >+ fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); >+ break; > } > } > >@@ -974,6 +980,10 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > > case CTDB_CONTROL_TUNNEL_DEREGISTER: > break; >+ >+ case CTDB_CONTROL_VACUUM_FETCH: >+ verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); >+ break; > } > } > >@@ -1368,6 +1378,9 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > case CTDB_CONTROL_TUNNEL_DEREGISTER: > break; > >+ case CTDB_CONTROL_VACUUM_FETCH: >+ break; >+ > } > } > >@@ -1703,6 +1716,8 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, > case CTDB_CONTROL_TUNNEL_DEREGISTER: > break; > >+ case CTDB_CONTROL_VACUUM_FETCH: >+ break; > } > } > >diff --git a/ctdb/tests/src/protocol_ctdb_test.c b/ctdb/tests/src/protocol_ctdb_test.c >index 9eb25d96186..3ebf15dff6c 100644 >--- a/ctdb/tests/src/protocol_ctdb_test.c >+++ b/ctdb/tests/src/protocol_ctdb_test.c >@@ -284,7 +284,7 @@ PROTOCOL_CTDB4_TEST(struct ctdb_req_dmaster, ctdb_req_dmaster, > PROTOCOL_CTDB4_TEST(struct ctdb_reply_dmaster, ctdb_reply_dmaster, > CTDB_REPLY_DMASTER); > >-#define NUM_CONTROLS 154 >+#define NUM_CONTROLS 155 > > PROTOCOL_CTDB2_TEST(struct ctdb_req_control_data, ctdb_req_control_data); > PROTOCOL_CTDB2_TEST(struct ctdb_reply_control_data, ctdb_reply_control_data); >-- >2.25.1 > > >From 7ae70cc7424d781578b7540d37689049288e3140 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Fri, 16 Feb 2018 15:30:13 +1100 >Subject: [PATCH 07/38] ctdb-daemon: Add implementation of VACUUM_FETCH control > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit da617f90d90151f955ee354c57bdc4bc6f6498f2) >--- > ctdb/include/ctdb_private.h | 3 ++ > ctdb/server/ctdb_control.c | 3 ++ > ctdb/server/ctdb_freeze.c | 9 ++++- > ctdb/server/ctdb_ltdb_server.c | 6 ++++ > ctdb/server/ctdb_vacuum.c | 66 ++++++++++++++++++++++++++++++++++ > 5 files changed, 86 insertions(+), 1 deletion(-) > >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index 7f160c0c9db..d7b568d6c0c 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -359,6 +359,7 @@ struct ctdb_db_context { > struct revokechild_handle *revokechild_active; > struct ctdb_persistent_state *persistent_state; > struct trbt_tree *delete_queue; >+ struct trbt_tree *fetch_queue; > struct trbt_tree *sticky_records; > int (*ctdb_ltdb_store_fn)(struct ctdb_db_context *ctdb_db, > TDB_DATA key, >@@ -998,6 +999,8 @@ void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db, > const struct ctdb_ltdb_header *hdr, > const TDB_DATA key); > >+int32_t ctdb_control_vacuum_fetch(struct ctdb_context *ctdb, TDB_DATA indata); >+ > /* from eventscript.c */ > > int ctdb_start_eventd(struct ctdb_context *ctdb); >diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c >index 6c91e211660..0174f303f14 100644 >--- a/ctdb/server/ctdb_control.c >+++ b/ctdb/server/ctdb_control.c >@@ -729,6 +729,9 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, > case CTDB_CONTROL_TUNNEL_DEREGISTER: > return ctdb_control_tunnel_deregister(ctdb, client_id, srvid); > >+ case CTDB_CONTROL_VACUUM_FETCH: >+ return ctdb_control_vacuum_fetch(ctdb, indata); >+ > default: > DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); > return -1; >diff --git a/ctdb/server/ctdb_freeze.c b/ctdb/server/ctdb_freeze.c >index b4b99a0e5c9..06aeacfd939 100644 >--- a/ctdb/server/ctdb_freeze.c >+++ b/ctdb/server/ctdb_freeze.c >@@ -869,10 +869,17 @@ int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata) > > if (ctdb_db_volatile(ctdb_db)) { > talloc_free(ctdb_db->delete_queue); >+ talloc_free(ctdb_db->fetch_queue); > ctdb_db->delete_queue = trbt_create(ctdb_db, 0); > if (ctdb_db->delete_queue == NULL) { > DEBUG(DEBUG_ERR, (__location__ " Failed to re-create " >- "the vacuum tree.\n")); >+ "the delete queue.\n")); >+ return -1; >+ } >+ ctdb_db->fetch_queue = trbt_create(ctdb_db, 0); >+ if (ctdb_db->fetch_queue == NULL) { >+ DEBUG(DEBUG_ERR, (__location__ " Failed to re-create " >+ "the fetch queue.\n")); > return -1; > } > } >diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c >index 022baf62d92..1ccf60832e1 100644 >--- a/ctdb/server/ctdb_ltdb_server.c >+++ b/ctdb/server/ctdb_ltdb_server.c >@@ -770,6 +770,11 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, > CTDB_NO_MEMORY(ctdb, ctdb_db->delete_queue); > } > >+ ctdb_db->fetch_queue = trbt_create(ctdb_db, 0); >+ if (ctdb_db->fetch_queue == NULL) { >+ CTDB_NO_MEMORY(ctdb, ctdb_db->fetch_queue); >+ } >+ > ctdb_db->ctdb_ltdb_store_fn = ctdb_ltdb_store_server; > } > >@@ -1272,6 +1277,7 @@ int32_t ctdb_control_db_detach(struct ctdb_context *ctdb, TDB_DATA indata, > /* Disable vacuuming and drop all vacuuming data */ > talloc_free(ctdb_db->vacuum_handle); > talloc_free(ctdb_db->delete_queue); >+ talloc_free(ctdb_db->fetch_queue); > > /* Terminate any deferred fetch */ > talloc_free(ctdb_db->deferred_fetch); >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 4fd11e3738c..6f28fa89cc9 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -41,6 +41,8 @@ > #include "common/common.h" > #include "common/logging.h" > >+#include "protocol/protocol_api.h" >+ > #define TIMELIMIT() timeval_current_ofs(10, 0) > > enum vacuum_child_status { VACUUM_RUNNING, VACUUM_OK, VACUUM_ERROR, VACUUM_TIMEOUT}; >@@ -117,6 +119,11 @@ struct delete_records_list { > struct vacuum_data *vdata; > }; > >+struct fetch_record_data { >+ TDB_DATA key; >+ uint8_t keydata[1]; >+}; >+ > static int insert_record_into_delete_queue(struct ctdb_db_context *ctdb_db, > const struct ctdb_ltdb_header *hdr, > TDB_DATA key); >@@ -1573,3 +1580,62 @@ void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db, > > return; > } >+ >+static int vacuum_fetch_parser(uint32_t reqid, >+ struct ctdb_ltdb_header *header, >+ TDB_DATA key, TDB_DATA data, >+ void *private_data) >+{ >+ struct ctdb_db_context *ctdb_db = talloc_get_type_abort( >+ private_data, struct ctdb_db_context); >+ struct fetch_record_data *rd; >+ size_t len; >+ uint32_t hash; >+ >+ len = offsetof(struct fetch_record_data, keydata) + key.dsize; >+ >+ rd = (struct fetch_record_data *)talloc_size(ctdb_db->fetch_queue, >+ len); >+ if (rd == NULL) { >+ DEBUG(DEBUG_ERR, (__location__ " Memory error\n")); >+ return -1; >+ } >+ talloc_set_name_const(rd, "struct fetch_record_data"); >+ >+ rd->key.dsize = key.dsize; >+ rd->key.dptr = rd->keydata; >+ memcpy(rd->keydata, key.dptr, key.dsize); >+ >+ hash = ctdb_hash(&key); >+ >+ trbt_insert32(ctdb_db->fetch_queue, hash, rd); >+ >+ return 0; >+} >+ >+int32_t ctdb_control_vacuum_fetch(struct ctdb_context *ctdb, TDB_DATA indata) >+{ >+ struct ctdb_rec_buffer *recbuf; >+ struct ctdb_db_context *ctdb_db; >+ size_t npull; >+ int ret; >+ >+ ret = ctdb_rec_buffer_pull(indata.dptr, indata.dsize, ctdb, &recbuf, >+ &npull); >+ if (ret != 0) { >+ DEBUG(DEBUG_ERR, ("Invalid data in vacuum_fetch\n")); >+ return -1; >+ } >+ >+ ctdb_db = find_ctdb_db(ctdb, recbuf->db_id); >+ if (ctdb_db == NULL) { >+ talloc_free(recbuf); >+ DEBUG(DEBUG_ERR, (__location__ " Unknown db 0x%08x\n", >+ recbuf->db_id)); >+ return -1; >+ } >+ >+ ret = ctdb_rec_buffer_traverse(recbuf, vacuum_fetch_parser, ctdb_db); >+ talloc_free(recbuf); >+ return ret; >+} >-- >2.25.1 > > >From 708223d1d28719374c6f8dbb5bbbbd3cc6be9954 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Fri, 16 Feb 2018 17:00:40 +1100 >Subject: [PATCH 08/38] ctdb-vacuum: Add processing of fetch queue > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 86521837b684df3b7c5a0a1e3b7e606c8b91f63e) >--- > ctdb/server/ctdb_vacuum.c | 192 +++++++++++++++++++++++++++++++++++++- > 1 file changed, 189 insertions(+), 3 deletions(-) > >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 6f28fa89cc9..410ef8bf722 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -317,6 +317,181 @@ static int delete_marshall_traverse(void *param, void *data) > return 0; > } > >+struct fetch_queue_state { >+ struct ctdb_db_context *ctdb_db; >+ int count; >+}; >+ >+struct fetch_record_migrate_state { >+ struct fetch_queue_state *fetch_queue; >+ TDB_DATA key; >+}; >+ >+static void fetch_record_migrate_callback(struct ctdb_client_call_state *state) >+{ >+ struct fetch_record_migrate_state *fetch = talloc_get_type_abort( >+ state->async.private_data, struct fetch_record_migrate_state); >+ struct fetch_queue_state *fetch_queue = fetch->fetch_queue; >+ struct ctdb_ltdb_header hdr; >+ struct ctdb_call call = { 0 }; >+ int ret; >+ >+ ret = ctdb_call_recv(state, &call); >+ fetch_queue->count--; >+ if (ret != 0) { >+ D_ERR("Failed to migrate record for vacuuming\n"); >+ goto done; >+ } >+ >+ ret = tdb_chainlock_nonblock(fetch_queue->ctdb_db->ltdb->tdb, >+ fetch->key); >+ if (ret != 0) { >+ goto done; >+ } >+ >+ ret = tdb_parse_record(fetch_queue->ctdb_db->ltdb->tdb, >+ fetch->key, >+ vacuum_record_parser, >+ &hdr); >+ >+ tdb_chainunlock(fetch_queue->ctdb_db->ltdb->tdb, fetch->key); >+ >+ if (ret != 0) { >+ goto done; >+ } >+ >+ D_INFO("Vacuum Fetch record, key=%.*s\n", >+ (int)fetch->key.dsize, >+ fetch->key.dptr); >+ >+ (void) ctdb_local_schedule_for_deletion(fetch_queue->ctdb_db, >+ &hdr, >+ fetch->key); >+ >+done: >+ talloc_free(fetch); >+} >+ >+static int fetch_record_parser(TDB_DATA key, TDB_DATA data, void *private_data) >+{ >+ struct ctdb_ltdb_header *header = >+ (struct ctdb_ltdb_header *)private_data; >+ >+ if (data.dsize < sizeof(struct ctdb_ltdb_header)) { >+ return -1; >+ } >+ >+ memcpy(header, data.dptr, sizeof(*header)); >+ return 0; >+} >+ >+/** >+ * traverse function for the traversal of the fetch_queue. >+ * >+ * Send a record migration request. >+ */ >+static int fetch_queue_traverse(void *param, void *data) >+{ >+ struct fetch_record_data *rd = talloc_get_type_abort( >+ data, struct fetch_record_data); >+ struct fetch_queue_state *fetch_queue = >+ (struct fetch_queue_state *)param; >+ struct ctdb_db_context *ctdb_db = fetch_queue->ctdb_db; >+ struct ctdb_client_call_state *state; >+ struct fetch_record_migrate_state *fetch; >+ struct ctdb_call call = { 0 }; >+ struct ctdb_ltdb_header header; >+ int ret; >+ >+ ret = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, rd->key); >+ if (ret != 0) { >+ return 0; >+ } >+ >+ ret = tdb_parse_record(ctdb_db->ltdb->tdb, >+ rd->key, >+ fetch_record_parser, >+ &header); >+ >+ tdb_chainunlock(ctdb_db->ltdb->tdb, rd->key); >+ >+ if (ret != 0) { >+ goto skipped; >+ } >+ >+ if (header.dmaster == ctdb_db->ctdb->pnn) { >+ /* If the record is already migrated, skip */ >+ goto skipped; >+ } >+ >+ fetch = talloc_zero(ctdb_db, struct fetch_record_migrate_state); >+ if (fetch == NULL) { >+ D_ERR("Failed to setup fetch record migrate state\n"); >+ return 0; >+ } >+ >+ fetch->fetch_queue = fetch_queue; >+ >+ fetch->key.dsize = rd->key.dsize; >+ fetch->key.dptr = talloc_memdup(fetch, rd->key.dptr, rd->key.dsize); >+ if (fetch->key.dptr == NULL) { >+ D_ERR("Memory error in fetch_queue_traverse\n"); >+ talloc_free(fetch); >+ return 0; >+ } >+ >+ call.call_id = CTDB_NULL_FUNC; >+ call.flags = CTDB_IMMEDIATE_MIGRATION | >+ CTDB_CALL_FLAG_VACUUM_MIGRATION; >+ call.key = fetch->key; >+ >+ state = ctdb_call_send(ctdb_db, &call); >+ if (state == NULL) { >+ DEBUG(DEBUG_ERR, ("Failed to setup vacuum fetch call\n")); >+ talloc_free(fetch); >+ return 0; >+ } >+ >+ state->async.fn = fetch_record_migrate_callback; >+ state->async.private_data = fetch; >+ >+ fetch_queue->count++; >+ >+ return 0; >+ >+skipped: >+ D_INFO("Skipped Fetch record, key=%.*s\n", >+ (int)rd->key.dsize, >+ rd->key.dptr); >+ return 0; >+} >+ >+/** >+ * Traverse the fetch. >+ * Records are migrated to the local node and >+ * added to delete queue for further processing. >+ */ >+static void ctdb_process_fetch_queue(struct ctdb_db_context *ctdb_db) >+{ >+ struct fetch_queue_state state; >+ int ret; >+ >+ state.ctdb_db = ctdb_db; >+ state.count = 0; >+ >+ ret = trbt_traversearray32(ctdb_db->fetch_queue, 1, >+ fetch_queue_traverse, &state); >+ if (ret != 0) { >+ DEBUG(DEBUG_ERR, (__location__ " Error traversing " >+ "the fetch queue.\n")); >+ } >+ >+ /* Wait for all migrations to complete */ >+ while (state.count > 0) { >+ tevent_loop_once(ctdb_db->ctdb->ev); >+ } >+} >+ > /** > * traverse function for the traversal of the delete_queue, > * the fast-path vacuuming list. >@@ -998,8 +1173,10 @@ fail: > /** > * Vacuum a DB: > * - Always do the fast vacuuming run, which traverses >- * the in-memory delete queue: these records have been >- * scheduled for deletion. >+ * - the in-memory fetch queue: these records have been >+ * scheduled for migration >+ * - the in-memory delete queue: these records have been >+ * scheduled for deletion. > * - Only if explicitly requested, the database is traversed > * in order to use the traditional heuristics on empty records > * to trigger deletion. >@@ -1070,6 +1247,8 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, > ctdb_vacuum_traverse_db(ctdb_db, vdata); > } > >+ ctdb_process_fetch_queue(ctdb_db); >+ > ctdb_process_delete_queue(ctdb_db, vdata); > > ctdb_process_vacuum_fetch_lists(ctdb_db, vdata); >@@ -1309,10 +1488,17 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > ctdb_db->delete_queue = trbt_create(ctdb_db, 0); > if (ctdb_db->delete_queue == NULL) { > /* fatal here? ... */ >- ctdb_fatal(ctdb, "Out of memory when re-creating vacuum tree " >+ ctdb_fatal(ctdb, "Out of memory when re-creating delete queue " > "in parent context. Shutting down\n"); > } > >+ talloc_free(ctdb_db->fetch_queue); >+ ctdb_db->fetch_queue = trbt_create(ctdb_db, 0); >+ if (ctdb_db->fetch_queue == NULL) { >+ ctdb_fatal(ctdb, "Out of memory when re-create fetch queue " >+ " in parent context. Shutting down\n"); >+ } >+ > tevent_add_timer(ctdb->ev, child_ctx, > timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, 0), > vacuum_child_timeout, child_ctx); >-- >2.25.1 > > >From ecf1598f1d9e8bf770e82c991b02ca461cdb2e07 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Fri, 16 Feb 2018 17:01:21 +1100 >Subject: [PATCH 09/38] ctdb-vacuum: Replace VACUUM_FETCH message with control > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 498932c0e8e8614bd52f3270c4d63e2b5f9e26a4) >--- > ctdb/server/ctdb_vacuum.c | 18 +++++++++--------- > 1 file changed, 9 insertions(+), 9 deletions(-) > >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 410ef8bf722..ddaef863045 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -839,6 +839,7 @@ static void ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db, > { > unsigned int i; > struct ctdb_context *ctdb = ctdb_db->ctdb; >+ int ret, res; > > for (i = 0; i < ctdb->num_nodes; i++) { > TDB_DATA data; >@@ -857,17 +858,16 @@ static void ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db, > ctdb_db->db_name)); > > data = ctdb_marshall_finish(vfl); >- if (ctdb_client_send_message(ctdb, ctdb->nodes[i]->pnn, >- CTDB_SRVID_VACUUM_FETCH, >- data) != 0) >- { >- DEBUG(DEBUG_ERR, (__location__ " Failed to send vacuum " >- "fetch message to %u\n", >+ >+ ret = ctdb_control(ctdb, ctdb->nodes[i]->pnn, 0, >+ CTDB_CONTROL_VACUUM_FETCH, 0, >+ data, NULL, NULL, &res, NULL, NULL); >+ if (ret != 0 || res != 0) { >+ DEBUG(DEBUG_ERR, ("Failed to send vacuum " >+ "fetch control to node %u\n", > ctdb->nodes[i]->pnn)); > } > } >- >- return; > } > > /** >@@ -1197,7 +1197,7 @@ fail: > * - The vacuum_fetch lists > * (one for each other lmaster node): > * The records in this list are sent for deletion to >- * their lmaster in a bulk VACUUM_FETCH message. >+ * their lmaster in a bulk VACUUM_FETCH control. > * > * The lmaster then migrates all these records to itelf > * so that they can be vacuumed there. >-- >2.25.1 > > >From ea40b47835702b02df87253278e5d37a62aed82d Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Fri, 16 Feb 2018 17:13:35 +1100 >Subject: [PATCH 10/38] ctdb-recoverd: Drop VACUUM_FETCH message handling > >This is now implemented in the ctdb daemon using VACUMM_FETCH control. > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit fc81729dd2d8eddea1e60e22b183894d6541c7dc) >--- > ctdb/server/ctdb_recoverd.c | 149 ------------------------------------ > 1 file changed, 149 deletions(-) > >diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c >index 3d5b727715a..bbaf1270558 100644 >--- a/ctdb/server/ctdb_recoverd.c >+++ b/ctdb/server/ctdb_recoverd.c >@@ -574,152 +574,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node > return 0; > } > >-/* >- called when a vacuum fetch has completed - just free it and do the next one >- */ >-static void vacuum_fetch_callback(struct ctdb_client_call_state *state) >-{ >- talloc_free(state); >-} >- >- >-/** >- * Process one elements of the vacuum fetch list: >- * Migrate it over to us with the special flag >- * CTDB_CALL_FLAG_VACUUM_MIGRATION. >- */ >-static bool vacuum_fetch_process_one(struct ctdb_db_context *ctdb_db, >- uint32_t pnn, >- struct ctdb_rec_data_old *r) >-{ >- struct ctdb_client_call_state *state; >- TDB_DATA data; >- struct ctdb_ltdb_header *hdr; >- struct ctdb_call call; >- >- ZERO_STRUCT(call); >- call.call_id = CTDB_NULL_FUNC; >- call.flags = CTDB_IMMEDIATE_MIGRATION; >- call.flags |= CTDB_CALL_FLAG_VACUUM_MIGRATION; >- >- call.key.dptr = &r->data[0]; >- call.key.dsize = r->keylen; >- >- /* ensure we don't block this daemon - just skip a record if we can't get >- the chainlock */ >- if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, call.key) != 0) { >- return true; >- } >- >- data = tdb_fetch(ctdb_db->ltdb->tdb, call.key); >- if (data.dptr == NULL) { >- tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); >- return true; >- } >- >- if (data.dsize < sizeof(struct ctdb_ltdb_header)) { >- free(data.dptr); >- tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); >- return true; >- } >- >- hdr = (struct ctdb_ltdb_header *)data.dptr; >- if (hdr->dmaster == pnn) { >- /* its already local */ >- free(data.dptr); >- tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); >- return true; >- } >- >- free(data.dptr); >- >- state = ctdb_call_send(ctdb_db, &call); >- tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); >- if (state == NULL) { >- DEBUG(DEBUG_ERR,(__location__ " Failed to setup vacuum fetch call\n")); >- return false; >- } >- state->async.fn = vacuum_fetch_callback; >- state->async.private_data = NULL; >- >- return true; >-} >- >- >-/* >- handler for vacuum fetch >-*/ >-static void vacuum_fetch_handler(uint64_t srvid, TDB_DATA data, >- void *private_data) >-{ >- struct ctdb_recoverd *rec = talloc_get_type( >- private_data, struct ctdb_recoverd); >- struct ctdb_context *ctdb = rec->ctdb; >- struct ctdb_marshall_buffer *recs; >- unsigned int i; >- int ret; >- TALLOC_CTX *tmp_ctx = talloc_new(ctdb); >- const char *name; >- struct ctdb_dbid_map_old *dbmap=NULL; >- uint8_t db_flags = 0; >- struct ctdb_db_context *ctdb_db; >- struct ctdb_rec_data_old *r; >- >- recs = (struct ctdb_marshall_buffer *)data.dptr; >- >- if (recs->count == 0) { >- goto done; >- } >- >- /* work out if the database is persistent */ >- ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &dbmap); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from local node\n")); >- goto done; >- } >- >- for (i=0;i<dbmap->num;i++) { >- if (dbmap->dbs[i].db_id == recs->db_id) { >- db_flags = dbmap->dbs[i].flags; >- break; >- } >- } >- if (i == dbmap->num) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to find db_id 0x%x on local node\n", recs->db_id)); >- goto done; >- } >- >- /* find the name of this database */ >- if (ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, recs->db_id, tmp_ctx, &name) != 0) { >- DEBUG(DEBUG_ERR,(__location__ " Failed to get name of db 0x%x\n", recs->db_id)); >- goto done; >- } >- >- /* attach to it */ >- ctdb_db = ctdb_attach(ctdb, CONTROL_TIMEOUT(), name, db_flags); >- if (ctdb_db == NULL) { >- DEBUG(DEBUG_ERR,(__location__ " Failed to attach to database '%s'\n", name)); >- goto done; >- } >- >- r = (struct ctdb_rec_data_old *)&recs->data[0]; >- while (recs->count) { >- bool ok; >- >- ok = vacuum_fetch_process_one(ctdb_db, rec->ctdb->pnn, r); >- if (!ok) { >- break; >- } >- >- r = (struct ctdb_rec_data_old *)(r->length + (uint8_t *)r); >- recs->count--; >- } >- >-done: >- talloc_free(tmp_ctx); >-} >- >- > /* > * handler for database detach > */ >@@ -3147,9 +3001,6 @@ static void monitor_cluster(struct ctdb_context *ctdb) > /* when we are asked to puch out a flag change */ > ctdb_client_set_message_handler(ctdb, CTDB_SRVID_PUSH_NODE_FLAGS, push_flags_handler, rec); > >- /* register a message port for vacuum fetch */ >- ctdb_client_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec); >- > /* register a message port for reloadnodes */ > ctdb_client_set_message_handler(ctdb, CTDB_SRVID_RELOAD_NODES, reload_nodes_handler, rec); > >-- >2.25.1 > > >From def9c781591aaf33b37ab7a0fe3153d2f7c3fe82 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Fri, 16 Feb 2018 17:17:38 +1100 >Subject: [PATCH 11/38] ctdb-recoverd: No need for database detach handler > >The only reason for recoverd attaching to databases was to migrate >records to the local node as part of vacuuming. Recovery daemon does >not take part in database vacuuming any more. > >The actual database recovery is handled via the recovery_helper and >recovery daemon should not need to attach to the databases any more. > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit c6427dddf5425b267d8c09e8df18653a48679646) >--- > ctdb/server/ctdb_ltdb_server.c | 8 -------- > ctdb/server/ctdb_recoverd.c | 35 ---------------------------------- > 2 files changed, 43 deletions(-) > >diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c >index 1ccf60832e1..970eb54b00b 100644 >--- a/ctdb/server/ctdb_ltdb_server.c >+++ b/ctdb/server/ctdb_ltdb_server.c >@@ -1266,14 +1266,6 @@ int32_t ctdb_control_db_detach(struct ctdb_context *ctdb, TDB_DATA indata, > return -1; > } > >- /* Detach database from recoverd */ >- if (ctdb_daemon_send_message(ctdb, ctdb->pnn, >- CTDB_SRVID_DETACH_DATABASE, >- indata) != 0) { >- DEBUG(DEBUG_ERR, ("Unable to detach DB from recoverd\n")); >- return -1; >- } >- > /* Disable vacuuming and drop all vacuuming data */ > talloc_free(ctdb_db->vacuum_handle); > talloc_free(ctdb_db->delete_queue); >diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c >index bbaf1270558..857736e30c8 100644 >--- a/ctdb/server/ctdb_recoverd.c >+++ b/ctdb/server/ctdb_recoverd.c >@@ -574,36 +574,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node > return 0; > } > >-/* >- * handler for database detach >- */ >-static void detach_database_handler(uint64_t srvid, TDB_DATA data, >- void *private_data) >-{ >- struct ctdb_recoverd *rec = talloc_get_type( >- private_data, struct ctdb_recoverd); >- struct ctdb_context *ctdb = rec->ctdb; >- uint32_t db_id; >- struct ctdb_db_context *ctdb_db; >- >- if (data.dsize != sizeof(db_id)) { >- return; >- } >- db_id = *(uint32_t *)data.dptr; >- >- ctdb_db = find_ctdb_db(ctdb, db_id); >- if (ctdb_db == NULL) { >- /* database is not attached */ >- return; >- } >- >- DLIST_REMOVE(ctdb->db_list, ctdb_db); >- >- DEBUG(DEBUG_NOTICE, ("Detached from database '%s'\n", >- ctdb_db->db_name)); >- talloc_free(ctdb_db); >-} >- > /* > called when ctdb_wait_timeout should finish > */ >@@ -3024,11 +2994,6 @@ static void monitor_cluster(struct ctdb_context *ctdb) > CTDB_SRVID_DISABLE_RECOVERIES, > disable_recoveries_handler, rec); > >- /* register a message port for detaching database */ >- ctdb_client_set_message_handler(ctdb, >- CTDB_SRVID_DETACH_DATABASE, >- detach_database_handler, rec); >- > for (;;) { > TALLOC_CTX *mem_ctx = talloc_new(ctdb); > struct timeval start; >-- >2.25.1 > > >From 8bea2828efde8d4bc2761fd0b5c674927967f6c4 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Wed, 6 Jun 2018 15:47:13 +0200 >Subject: [PATCH 12/38] ctdb-daemon: Avoid memory leak when packet is deferred > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 680df07630a94b3e76edefe98ee0986e7e5e1f12) >--- > ctdb/server/ctdb_server.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > >diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c >index 4b4c2e9896f..1470b00dba5 100644 >--- a/ctdb/server/ctdb_server.c >+++ b/ctdb/server/ctdb_server.c >@@ -374,8 +374,9 @@ static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header > return; > } > q->ctdb = ctdb; >- q->hdr = talloc_memdup(ctdb, hdr, hdr->length); >+ q->hdr = talloc_memdup(q, hdr, hdr->length); > if (q->hdr == NULL) { >+ talloc_free(q); > DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n")); > return; > } >-- >2.25.1 > > >From c19bbeb3cdb2e23a8cddeb93027d2fa6e97bdae1 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Tue, 1 Oct 2019 15:05:10 +1000 >Subject: [PATCH 13/38] ctdb-vacuum: Avoid processing any more packets > >All the vacuum operations if required have an event loop to ensure >completion of pending operations. Once all the steps are complete, >there is no reason to process any more packets. > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit d0cc9edc05b6218a8e20a0a8009cbb9918ff4d02) >--- > ctdb/server/ctdb_vacuum.c | 3 --- > 1 file changed, 3 deletions(-) > >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index ddaef863045..79dced38bf5 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -1257,9 +1257,6 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, > > talloc_free(tmp_ctx); > >- /* this ensures we run our event queue */ >- ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE); >- > return 0; > } > >-- >2.25.1 > > >From e9d2b18a1b2d79ba9b297604e06cff2ee1b2b7bb Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 30 Jul 2019 10:34:03 +1000 >Subject: [PATCH 14/38] ctdb-protocol: Add new control CTDB_CONTROL_DB_VACUUM > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit b314835341e4028f0770fa7f9a37d2d21448ddfd) >--- > ctdb/protocol/protocol.h | 8 ++++++++ > 1 file changed, 8 insertions(+) > >diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h >index e47daeadba1..43175ae3a95 100644 >--- a/ctdb/protocol/protocol.h >+++ b/ctdb/protocol/protocol.h >@@ -374,6 +374,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, > CTDB_CONTROL_TUNNEL_REGISTER = 152, > CTDB_CONTROL_TUNNEL_DEREGISTER = 153, > CTDB_CONTROL_VACUUM_FETCH = 154, >+ CTDB_CONTROL_DB_VACUUM = 155, > }; > > #define MAX_COUNT_BUCKETS 16 >@@ -852,6 +853,12 @@ struct ctdb_pid_srvid { > uint64_t srvid; > }; > >+struct ctdb_db_vacuum { >+ uint32_t db_id; >+ bool full_vacuum_run; >+ >+}; >+ > struct ctdb_req_control_data { > uint32_t opcode; > union { >@@ -889,6 +896,7 @@ struct ctdb_req_control_data { > struct ctdb_traverse_start_ext *traverse_start_ext; > struct ctdb_traverse_all_ext *traverse_all_ext; > struct ctdb_pid_srvid *pid_srvid; >+ struct ctdb_db_vacuum *db_vacuum; > } data; > }; > >-- >2.25.1 > > >From 67cef10770048fe9f096659d34cf50ea22f2e140 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 30 Jul 2019 16:59:37 +1000 >Subject: [PATCH 15/38] ctdb-protocol: Add marshalling for struct > ctdb_db_vacuum > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit a896486b62bbcf9915727ba7bfc768fb5383f0c7) >--- > ctdb/protocol/protocol_private.h | 10 +++++ > ctdb/protocol/protocol_types.c | 63 ++++++++++++++++++++++++++++ > ctdb/tests/src/protocol_common.c | 13 ++++++ > ctdb/tests/src/protocol_common.h | 4 ++ > ctdb/tests/src/protocol_types_test.c | 2 + > 5 files changed, 92 insertions(+) > >diff --git a/ctdb/protocol/protocol_private.h b/ctdb/protocol/protocol_private.h >index 1c3e56fcb7a..b151e64ef09 100644 >--- a/ctdb/protocol/protocol_private.h >+++ b/ctdb/protocol/protocol_private.h >@@ -83,6 +83,16 @@ void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *in, uint8_t *buf, > int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx, > struct ctdb_pulldb_ext **out, size_t *npull); > >+size_t ctdb_db_vacuum_len(struct ctdb_db_vacuum *in); >+void ctdb_db_vacuum_push(struct ctdb_db_vacuum *in, >+ uint8_t *buf, >+ size_t *npush); >+int ctdb_db_vacuum_pull(uint8_t *buf, >+ size_t buflen, >+ TALLOC_CTX *mem_ctx, >+ struct ctdb_db_vacuum **out, >+ size_t *npull); >+ > size_t ctdb_traverse_start_len(struct ctdb_traverse_start *in); > void ctdb_traverse_start_push(struct ctdb_traverse_start *in, uint8_t *buf, > size_t *npush); >diff --git a/ctdb/protocol/protocol_types.c b/ctdb/protocol/protocol_types.c >index d9388b74b1e..fb288635234 100644 >--- a/ctdb/protocol/protocol_types.c >+++ b/ctdb/protocol/protocol_types.c >@@ -1240,6 +1240,69 @@ fail: > return ret; > } > >+size_t ctdb_db_vacuum_len(struct ctdb_db_vacuum *in) >+{ >+ return ctdb_uint32_len(&in->db_id) + >+ ctdb_bool_len(&in->full_vacuum_run); >+} >+ >+void ctdb_db_vacuum_push(struct ctdb_db_vacuum *in, >+ uint8_t *buf, >+ size_t *npush) >+{ >+ size_t offset = 0, np; >+ >+ ctdb_uint32_push(&in->db_id, buf+offset, &np); >+ offset += np; >+ >+ ctdb_bool_push(&in->full_vacuum_run, buf+offset, &np); >+ offset += np; >+ >+ *npush = offset; >+} >+ >+int ctdb_db_vacuum_pull(uint8_t *buf, >+ size_t buflen, >+ TALLOC_CTX *mem_ctx, >+ struct ctdb_db_vacuum **out, >+ size_t *npull) >+{ >+ struct ctdb_db_vacuum *val; >+ size_t offset = 0, np; >+ int ret; >+ >+ val = talloc(mem_ctx, struct ctdb_db_vacuum); >+ if (val == NULL) { >+ return ENOMEM; >+ } >+ >+ ret = ctdb_uint32_pull(buf+offset, >+ buflen-offset, >+ &val->db_id, >+ &np); >+ if (ret != 0) { >+ goto fail;; >+ } >+ offset += np; >+ >+ ret = ctdb_bool_pull(buf+offset, >+ buflen-offset, >+ &val->full_vacuum_run, >+ &np); >+ if (ret != 0) { >+ goto fail; >+ } >+ offset += np; >+ >+ *out = val; >+ *npull = offset; >+ return 0; >+ >+fail: >+ talloc_free(val); >+ return ret; >+} >+ > size_t ctdb_ltdb_header_len(struct ctdb_ltdb_header *in) > { > return ctdb_uint64_len(&in->rsn) + >diff --git a/ctdb/tests/src/protocol_common.c b/ctdb/tests/src/protocol_common.c >index 73e9a8c5d0d..2030b4bb5e5 100644 >--- a/ctdb/tests/src/protocol_common.c >+++ b/ctdb/tests/src/protocol_common.c >@@ -310,6 +310,19 @@ void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1, > assert(p1->srvid == p2->srvid); > } > >+void fill_ctdb_db_vacuum(TALLOC_CTX *mem_ctx, struct ctdb_db_vacuum *p) >+{ >+ fill_ctdb_uint32(&p->db_id); >+ fill_ctdb_bool(&p->full_vacuum_run); >+} >+ >+void verify_ctdb_db_vacuum(struct ctdb_db_vacuum *p1, >+ struct ctdb_db_vacuum *p2) >+{ >+ verify_ctdb_uint32(&p1->db_id, &p2->db_id); >+ verify_ctdb_bool(&p1->full_vacuum_run, &p2->full_vacuum_run); >+} >+ > void fill_ctdb_ltdb_header(struct ctdb_ltdb_header *p) > { > p->rsn = rand64(); >diff --git a/ctdb/tests/src/protocol_common.h b/ctdb/tests/src/protocol_common.h >index ec00cf97b63..2b4fb6a07a9 100644 >--- a/ctdb/tests/src/protocol_common.h >+++ b/ctdb/tests/src/protocol_common.h >@@ -64,6 +64,10 @@ void fill_ctdb_pulldb_ext(TALLOC_CTX *mem_ctx, struct ctdb_pulldb_ext *p); > void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1, > struct ctdb_pulldb_ext *p2); > >+void fill_ctdb_db_vacuum(TALLOC_CTX *mem_ctx, struct ctdb_db_vacuum *p); >+void verify_ctdb_db_vacuum(struct ctdb_db_vacuum *p1, >+ struct ctdb_db_vacuum *p2); >+ > void fill_ctdb_ltdb_header(struct ctdb_ltdb_header *p); > void verify_ctdb_ltdb_header(struct ctdb_ltdb_header *p1, > struct ctdb_ltdb_header *p2); >diff --git a/ctdb/tests/src/protocol_types_test.c b/ctdb/tests/src/protocol_types_test.c >index e607d06b820..e9cf4debe89 100644 >--- a/ctdb/tests/src/protocol_types_test.c >+++ b/ctdb/tests/src/protocol_types_test.c >@@ -38,6 +38,7 @@ PROTOCOL_TYPE3_TEST(struct ctdb_dbid, ctdb_dbid); > PROTOCOL_TYPE3_TEST(struct ctdb_dbid_map, ctdb_dbid_map); > PROTOCOL_TYPE3_TEST(struct ctdb_pulldb, ctdb_pulldb); > PROTOCOL_TYPE3_TEST(struct ctdb_pulldb_ext, ctdb_pulldb_ext); >+PROTOCOL_TYPE3_TEST(struct ctdb_db_vacuum, ctdb_db_vacuum); > PROTOCOL_TYPE1_TEST(struct ctdb_ltdb_header, ctdb_ltdb_header); > PROTOCOL_TYPE3_TEST(struct ctdb_rec_data, ctdb_rec_data); > PROTOCOL_TYPE3_TEST(struct ctdb_rec_buffer, ctdb_rec_buffer); >@@ -143,6 +144,7 @@ int main(int argc, char *argv[]) > TEST_FUNC(ctdb_dbid_map)(); > TEST_FUNC(ctdb_pulldb)(); > TEST_FUNC(ctdb_pulldb_ext)(); >+ TEST_FUNC(ctdb_db_vacuum)(); > TEST_FUNC(ctdb_ltdb_header)(); > TEST_FUNC(ctdb_rec_data)(); > TEST_FUNC(ctdb_rec_buffer)(); >-- >2.25.1 > > >From d653d75e3af6d77bba9ea9afbb939574717e3680 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 30 Jul 2019 10:52:05 +1000 >Subject: [PATCH 16/38] ctdb-protocol: Add marshalling for control DB_VACUUM > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 496204feb0e2b6eb2f3d9a74e45596a3e74ad9b1) >--- > ctdb/protocol/protocol_api.h | 4 ++++ > ctdb/protocol/protocol_client.c | 24 ++++++++++++++++++++++++ > ctdb/protocol/protocol_control.c | 25 +++++++++++++++++++++++++ > ctdb/protocol/protocol_debug.c | 1 + > ctdb/tests/cunit/protocol_test_101.sh | 2 +- > ctdb/tests/src/protocol_common_ctdb.c | 15 +++++++++++++++ > ctdb/tests/src/protocol_ctdb_test.c | 2 +- > 7 files changed, 71 insertions(+), 2 deletions(-) > >diff --git a/ctdb/protocol/protocol_api.h b/ctdb/protocol/protocol_api.h >index cf4c4635dd4..c2cd4a76289 100644 >--- a/ctdb/protocol/protocol_api.h >+++ b/ctdb/protocol/protocol_api.h >@@ -607,6 +607,10 @@ void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request, > struct ctdb_rec_buffer *recbuf); > int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply); > >+void ctdb_req_control_db_vacuum(struct ctdb_req_control *request, >+ struct ctdb_db_vacuum *db_vacuum); >+int ctdb_reply_control_db_vacuum(struct ctdb_reply_control *reply); >+ > /* From protocol/protocol_debug.c */ > > void ctdb_packet_print(uint8_t *buf, size_t buflen, FILE *fp); >diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c >index d5f6a222fe4..84dc55a34a3 100644 >--- a/ctdb/protocol/protocol_client.c >+++ b/ctdb/protocol/protocol_client.c >@@ -2357,3 +2357,27 @@ int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply) > > return reply->status; > } >+ >+/* CTDB_CONTROL_DB_VACUUM */ >+ >+void ctdb_req_control_db_vacuum(struct ctdb_req_control *request, >+ struct ctdb_db_vacuum *db_vacuum) >+{ >+ request->opcode = CTDB_CONTROL_DB_VACUUM; >+ request->pad = 0; >+ request->srvid = 0; >+ request->client_id = 0; >+ request->flags = 0; >+ >+ request->rdata.opcode = CTDB_CONTROL_DB_VACUUM; >+ request->rdata.data.db_vacuum = db_vacuum; >+} >+ >+int ctdb_reply_control_db_vacuum(struct ctdb_reply_control *reply) >+{ >+ if (reply->rdata.opcode != CTDB_CONTROL_DB_VACUUM) { >+ return EPROTO; >+ } >+ >+ return reply->status; >+} >diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c >index 1cc985a71a7..a25c9b1cfe0 100644 >--- a/ctdb/protocol/protocol_control.c >+++ b/ctdb/protocol/protocol_control.c >@@ -411,6 +411,10 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) > case CTDB_CONTROL_VACUUM_FETCH: > len = ctdb_rec_buffer_len(cd->data.recbuf); > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ len = ctdb_db_vacuum_len(cd->data.db_vacuum); >+ break; > } > > return len; >@@ -690,6 +694,10 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, > case CTDB_CONTROL_VACUUM_FETCH: > ctdb_rec_buffer_push(cd->data.recbuf, buf, &np); > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ ctdb_db_vacuum_push(cd->data.db_vacuum, buf, &np); >+ break; > } > > *npush = np; >@@ -1019,6 +1027,14 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, > ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx, > &cd->data.recbuf, &np); > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ ret = ctdb_db_vacuum_pull(buf, >+ buflen, >+ mem_ctx, >+ &cd->data.db_vacuum, >+ &np); >+ break; > } > > if (ret != 0) { >@@ -1379,6 +1395,9 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) > > case CTDB_CONTROL_VACUUM_FETCH: > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ break; > } > > return len; >@@ -1536,6 +1555,9 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, > > case CTDB_CONTROL_VACUUM_FETCH: > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ break; > } > > *npush = np; >@@ -1723,6 +1745,9 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, > > case CTDB_CONTROL_VACUUM_FETCH: > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ break; > } > > if (ret != 0) { >diff --git a/ctdb/protocol/protocol_debug.c b/ctdb/protocol/protocol_debug.c >index 97903ea98f4..3fe78b13162 100644 >--- a/ctdb/protocol/protocol_debug.c >+++ b/ctdb/protocol/protocol_debug.c >@@ -243,6 +243,7 @@ static void ctdb_opcode_print(uint32_t opcode, FILE *fp) > { CTDB_CONTROL_TUNNEL_REGISTER, "TUNNEL_REGISTER" }, > { CTDB_CONTROL_TUNNEL_DEREGISTER, "TUNNEL_DEREGISTER" }, > { CTDB_CONTROL_VACUUM_FETCH, "VACUUM_FETCH" }, >+ { CTDB_CONTROL_DB_VACUUM, "DB_VACUUM" }, > { MAP_END, "" }, > }; > >diff --git a/ctdb/tests/cunit/protocol_test_101.sh b/ctdb/tests/cunit/protocol_test_101.sh >index a0bf9d08754..6b07dc017ba 100755 >--- a/ctdb/tests/cunit/protocol_test_101.sh >+++ b/ctdb/tests/cunit/protocol_test_101.sh >@@ -2,7 +2,7 @@ > > . "${TEST_SCRIPTS_DIR}/unit.sh" > >-last_control=154 >+last_control=155 > > generate_control_output () > { >diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c >index b02976b5d67..6989010cd6f 100644 >--- a/ctdb/tests/src/protocol_common_ctdb.c >+++ b/ctdb/tests/src/protocol_common_ctdb.c >@@ -594,6 +594,12 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, > assert(cd->data.recbuf != NULL); > fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ cd->data.db_vacuum = talloc(mem_ctx, struct ctdb_db_vacuum); >+ assert(cd->data.db_vacuum != NULL); >+ fill_ctdb_db_vacuum(mem_ctx, cd->data.db_vacuum); >+ break; > } > } > >@@ -984,6 +990,10 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, > case CTDB_CONTROL_VACUUM_FETCH: > verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ verify_ctdb_db_vacuum(cd->data.db_vacuum, cd2->data.db_vacuum); >+ break; > } > } > >@@ -1381,6 +1391,8 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, > case CTDB_CONTROL_VACUUM_FETCH: > break; > >+ case CTDB_CONTROL_DB_VACUUM: >+ break; > } > } > >@@ -1718,6 +1730,9 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, > > case CTDB_CONTROL_VACUUM_FETCH: > break; >+ >+ case CTDB_CONTROL_DB_VACUUM: >+ break; > } > } > >diff --git a/ctdb/tests/src/protocol_ctdb_test.c b/ctdb/tests/src/protocol_ctdb_test.c >index 3ebf15dff6c..b13cd5491d1 100644 >--- a/ctdb/tests/src/protocol_ctdb_test.c >+++ b/ctdb/tests/src/protocol_ctdb_test.c >@@ -284,7 +284,7 @@ PROTOCOL_CTDB4_TEST(struct ctdb_req_dmaster, ctdb_req_dmaster, > PROTOCOL_CTDB4_TEST(struct ctdb_reply_dmaster, ctdb_reply_dmaster, > CTDB_REPLY_DMASTER); > >-#define NUM_CONTROLS 155 >+#define NUM_CONTROLS 156 > > PROTOCOL_CTDB2_TEST(struct ctdb_req_control_data, ctdb_req_control_data); > PROTOCOL_CTDB2_TEST(struct ctdb_reply_control_data, ctdb_reply_control_data); >-- >2.25.1 > > >From 06cf521a1cbdd1f1c682cc8a789d9e60e8cd16dd Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 4 Oct 2019 12:06:21 +1000 >Subject: [PATCH 17/38] ctdb-vacuum: Simplify recording of in-progress > vacuuming child > >There can only be one, so simplify the logic. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 5539edfdbe69d1d5f084d06753cce8ed6e524999) >--- > ctdb/include/ctdb_private.h | 2 +- > ctdb/server/ctdb_vacuum.c | 20 ++++++++------------ > 2 files changed, 9 insertions(+), 13 deletions(-) > >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index d7b568d6c0c..19b8bb98d8f 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -318,7 +318,7 @@ struct ctdb_context { > > TALLOC_CTX *banning_ctx; > >- struct ctdb_vacuum_child_context *vacuumers; >+ struct ctdb_vacuum_child_context *vacuumer; > > /* mapping from pid to ctdb_client * */ > struct ctdb_client_pid_list *client_pids; >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 79dced38bf5..255898c84d9 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -48,7 +48,6 @@ > enum vacuum_child_status { VACUUM_RUNNING, VACUUM_OK, VACUUM_ERROR, VACUUM_TIMEOUT}; > > struct ctdb_vacuum_child_context { >- struct ctdb_vacuum_child_context *next, *prev; > struct ctdb_vacuum_handle *vacuum_handle; > /* fd child writes status to */ > int fd[2]; >@@ -59,7 +58,6 @@ struct ctdb_vacuum_child_context { > > struct ctdb_vacuum_handle { > struct ctdb_db_context *ctdb_db; >- struct ctdb_vacuum_child_context *child_ctx; > uint32_t fast_path_count; > }; > >@@ -1325,7 +1323,7 @@ static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx) > child_ctx->vacuum_handle->fast_path_count++; > } > >- DLIST_REMOVE(ctdb->vacuumers, child_ctx); >+ ctdb->vacuumer = NULL; > > tevent_add_timer(ctdb->ev, child_ctx->vacuum_handle, > timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), >@@ -1407,7 +1405,7 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > * same time. If there is vacuuming child process active, delay > * new vacuuming event to stagger vacuuming events. > */ >- if (ctdb->vacuumers != NULL) { >+ if (ctdb->vacuumer != NULL) { > tevent_add_timer(ctdb->ev, vacuum_handle, > timeval_current_ofs(0, 500*1000), > ctdb_vacuum_event, vacuum_handle); >@@ -1475,7 +1473,7 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > child_ctx->status = VACUUM_RUNNING; > child_ctx->start_time = timeval_current(); > >- DLIST_ADD(ctdb->vacuumers, child_ctx); >+ ctdb->vacuumer = child_ctx; > talloc_set_destructor(child_ctx, vacuum_child_destructor); > > /* >@@ -1506,19 +1504,17 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > TEVENT_FD_READ, vacuum_child_handler, child_ctx); > tevent_fd_set_auto_close(fde); > >- vacuum_handle->child_ctx = child_ctx; > child_ctx->vacuum_handle = vacuum_handle; > } > > void ctdb_stop_vacuuming(struct ctdb_context *ctdb) > { >- /* Simply free them all. */ >- while (ctdb->vacuumers) { >- DEBUG(DEBUG_INFO, ("Aborting vacuuming for %s (%i)\n", >- ctdb->vacuumers->vacuum_handle->ctdb_db->db_name, >- (int)ctdb->vacuumers->child_pid)); >+ if (ctdb->vacuumer != NULL) { >+ D_INFO("Aborting vacuuming for %s (%i)\n", >+ ctdb->vacuumer->vacuum_handle->ctdb_db->db_name, >+ (int)ctdb->vacuumer->child_pid); > /* vacuum_child_destructor kills it, removes from list */ >- talloc_free(ctdb->vacuumers); >+ talloc_free(ctdb->vacuumer); > } > } > >-- >2.25.1 > > >From 657252c84a9d54470079fc2410deece682d0cb73 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 30 Jul 2019 14:16:13 +1000 >Subject: [PATCH 18/38] ctdb-daemon: Factor out code to create vacuuming child > >This changes the behaviour for some failures from exiting to simply >attempting to schedule the next run. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 13cedaf0195c6bda3a3820aedb1ee65f36dfc23e) >--- > ctdb/server/ctdb_vacuum.c | 134 ++++++++++++++++++++++++-------------- > 1 file changed, 86 insertions(+), 48 deletions(-) > >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 255898c84d9..7d13c0a4222 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -1377,28 +1377,23 @@ static void vacuum_child_handler(struct tevent_context *ev, > /* > * this event is called every time we need to start a new vacuum process > */ >-static void ctdb_vacuum_event(struct tevent_context *ev, >- struct tevent_timer *te, >- struct timeval t, void *private_data) >+static int vacuum_db_child(TALLOC_CTX *mem_ctx, >+ struct ctdb_db_context *ctdb_db, >+ bool full_vacuum_run, >+ struct ctdb_vacuum_child_context **out) > { >- struct ctdb_vacuum_handle *vacuum_handle = talloc_get_type(private_data, struct ctdb_vacuum_handle); >- struct ctdb_db_context *ctdb_db = vacuum_handle->ctdb_db; > struct ctdb_context *ctdb = ctdb_db->ctdb; > struct ctdb_vacuum_child_context *child_ctx; > struct tevent_fd *fde; >- bool full_vacuum_run = false; > int ret; > > /* we don't vacuum if we are in recovery mode, or db frozen */ > if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || > ctdb_db_frozen(ctdb_db)) { >- DEBUG(DEBUG_INFO, ("Not vacuuming %s (%s)\n", ctdb_db->db_name, >- ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ? >- "in recovery" : "frozen")); >- tevent_add_timer(ctdb->ev, vacuum_handle, >- timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), >- ctdb_vacuum_event, vacuum_handle); >- return; >+ D_INFO("Not vacuuming %s (%s)\n", ctdb_db->db_name, >+ ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ? >+ "in recovery" : "frozen"); >+ return EAGAIN; > } > > /* Do not allow multiple vacuuming child processes to be active at the >@@ -1406,35 +1401,22 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > * new vacuuming event to stagger vacuuming events. > */ > if (ctdb->vacuumer != NULL) { >- tevent_add_timer(ctdb->ev, vacuum_handle, >- timeval_current_ofs(0, 500*1000), >- ctdb_vacuum_event, vacuum_handle); >- return; >+ return EBUSY; > } > >- child_ctx = talloc(vacuum_handle, struct ctdb_vacuum_child_context); >+ child_ctx = talloc_zero(mem_ctx, struct ctdb_vacuum_child_context); > if (child_ctx == NULL) { >- DEBUG(DEBUG_CRIT, (__location__ " Failed to allocate child context for vacuuming of %s\n", ctdb_db->db_name)); >- ctdb_fatal(ctdb, "Out of memory when crating vacuum child context. Shutting down\n"); >+ DBG_ERR("Failed to allocate child context for vacuuming of %s\n", >+ ctdb_db->db_name); >+ return ENOMEM; > } > > > ret = pipe(child_ctx->fd); > if (ret != 0) { > talloc_free(child_ctx); >- DEBUG(DEBUG_ERR, ("Failed to create pipe for vacuum child process.\n")); >- tevent_add_timer(ctdb->ev, vacuum_handle, >- timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), >- ctdb_vacuum_event, vacuum_handle); >- return; >- } >- >- if (vacuum_handle->fast_path_count >= >- ctdb->tunable.vacuum_fast_path_count) { >- if (ctdb->tunable.vacuum_fast_path_count > 0) { >- full_vacuum_run = true; >- } >- vacuum_handle->fast_path_count = 0; >+ D_ERR("Failed to create pipe for vacuum child process.\n"); >+ return EAGAIN; > } > > child_ctx->child_pid = ctdb_fork(ctdb); >@@ -1442,11 +1424,8 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > close(child_ctx->fd[0]); > close(child_ctx->fd[1]); > talloc_free(child_ctx); >- DEBUG(DEBUG_ERR, ("Failed to fork vacuum child process.\n")); >- tevent_add_timer(ctdb->ev, vacuum_handle, >- timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), >- ctdb_vacuum_event, vacuum_handle); >- return; >+ D_ERR("Failed to fork vacuum child process.\n"); >+ return EAGAIN; > } > > >@@ -1454,11 +1433,15 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > char cc = 0; > close(child_ctx->fd[0]); > >- DEBUG(DEBUG_INFO,("Vacuuming child process %d for db %s started\n", getpid(), ctdb_db->db_name)); >+ D_INFO("Vacuuming child process %d for db %s started\n", >+ getpid(), >+ ctdb_db->db_name); > prctl_set_comment("ctdb_vacuum"); >- if (switch_from_server_to_client(ctdb) != 0) { >- DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch vacuum daemon into client mode. Shutting down.\n")); >- _exit(1); >+ ret = switch_from_server_to_client(ctdb); >+ if (ret != 0) { >+ DBG_ERR("ERROR: failed to switch vacuum daemon " >+ "into client mode.\n"); >+ return EIO; > } > > cc = ctdb_vacuum_and_repack_db(ctdb_db, full_vacuum_run); >@@ -1482,9 +1465,8 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > talloc_free(ctdb_db->delete_queue); > ctdb_db->delete_queue = trbt_create(ctdb_db, 0); > if (ctdb_db->delete_queue == NULL) { >- /* fatal here? ... */ >- ctdb_fatal(ctdb, "Out of memory when re-creating delete queue " >- "in parent context. Shutting down\n"); >+ DBG_ERR("Out of memory when re-creating vacuum tree\n"); >+ return ENOMEM; > } > > talloc_free(ctdb_db->fetch_queue); >@@ -1495,16 +1477,72 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > } > > tevent_add_timer(ctdb->ev, child_ctx, >- timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, 0), >+ timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, >+ 0), > vacuum_child_timeout, child_ctx); > >- DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child vacuum process\n", child_ctx->fd[0])); >+ DBG_DEBUG(" Created PIPE FD:%d to child vacuum process\n", >+ child_ctx->fd[0]); > > fde = tevent_add_fd(ctdb->ev, child_ctx, child_ctx->fd[0], > TEVENT_FD_READ, vacuum_child_handler, child_ctx); > tevent_fd_set_auto_close(fde); > >- child_ctx->vacuum_handle = vacuum_handle; >+ child_ctx->vacuum_handle = ctdb_db->vacuum_handle; >+ >+ *out = child_ctx; >+ return 0; >+} >+ >+static void ctdb_vacuum_event(struct tevent_context *ev, >+ struct tevent_timer *te, >+ struct timeval t, void *private_data) >+{ >+ struct ctdb_vacuum_handle *vacuum_handle = talloc_get_type( >+ private_data, struct ctdb_vacuum_handle); >+ struct ctdb_db_context *ctdb_db = vacuum_handle->ctdb_db; >+ struct ctdb_context *ctdb = ctdb_db->ctdb; >+ struct ctdb_vacuum_child_context *child_ctx = NULL; >+ uint32_t fast_path_max = ctdb->tunable.vacuum_fast_path_count; >+ bool full_vacuum_run = false; >+ int ret; >+ >+ if (vacuum_handle->fast_path_count >= fast_path_max) { >+ if (fast_path_max > 0) { >+ full_vacuum_run = true; >+ } >+ vacuum_handle->fast_path_count = 0; >+ } >+ >+ ret = vacuum_db_child(vacuum_handle, >+ ctdb_db, >+ full_vacuum_run, >+ &child_ctx); >+ >+ if (ret == 0) { >+ return; >+ } >+ >+ switch (ret) { >+ case EBUSY: >+ /* Stagger */ >+ tevent_add_timer(ctdb->ev, >+ vacuum_handle, >+ timeval_current_ofs(0, 500*1000), >+ ctdb_vacuum_event, >+ vacuum_handle); >+ break; >+ >+ default: >+ /* Temporary failure, schedule next attempt */ >+ tevent_add_timer(ctdb->ev, >+ vacuum_handle, >+ timeval_current_ofs( >+ get_vacuum_interval(ctdb_db), 0), >+ ctdb_vacuum_event, >+ vacuum_handle); >+ } >+ > } > > void ctdb_stop_vacuuming(struct ctdb_context *ctdb) >-- >2.25.1 > > >From 1f34c5e5c8f796791aae40bdc9ae9afc6bca8a1e Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 15 Oct 2019 16:36:44 +1100 >Subject: [PATCH 19/38] ctdb-vacuum: Only schedule next vacuum event if > vacuuuming is scheduled > >At the moment vacuuming is always scheduled. > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit d462d64cdf001fd5d1cbf2a109df62e087ad0c49) >--- > ctdb/server/ctdb_vacuum.c | 15 ++++++++++++--- > 1 file changed, 12 insertions(+), 3 deletions(-) > >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 7d13c0a4222..910751d59e0 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -54,6 +54,7 @@ struct ctdb_vacuum_child_context { > pid_t child_pid; > enum vacuum_child_status status; > struct timeval start_time; >+ bool scheduled; > }; > > struct ctdb_vacuum_handle { >@@ -1325,9 +1326,14 @@ static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx) > > ctdb->vacuumer = NULL; > >- tevent_add_timer(ctdb->ev, child_ctx->vacuum_handle, >- timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), >- ctdb_vacuum_event, child_ctx->vacuum_handle); >+ if (child_ctx->scheduled) { >+ tevent_add_timer( >+ ctdb->ev, >+ child_ctx->vacuum_handle, >+ timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), >+ ctdb_vacuum_event, >+ child_ctx->vacuum_handle); >+ } > > return 0; > } >@@ -1379,6 +1385,7 @@ static void vacuum_child_handler(struct tevent_context *ev, > */ > static int vacuum_db_child(TALLOC_CTX *mem_ctx, > struct ctdb_db_context *ctdb_db, >+ bool scheduled, > bool full_vacuum_run, > struct ctdb_vacuum_child_context **out) > { >@@ -1454,6 +1461,7 @@ static int vacuum_db_child(TALLOC_CTX *mem_ctx, > close(child_ctx->fd[1]); > > child_ctx->status = VACUUM_RUNNING; >+ child_ctx->scheduled = scheduled; > child_ctx->start_time = timeval_current(); > > ctdb->vacuumer = child_ctx; >@@ -1516,6 +1524,7 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > > ret = vacuum_db_child(vacuum_handle, > ctdb_db, >+ true, > full_vacuum_run, > &child_ctx); > >-- >2.25.1 > > >From 9f74111b4b279b846d2beb6ce3ff8aa7f681f926 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 30 Jul 2019 14:17:11 +1000 >Subject: [PATCH 20/38] ctdb-daemon: Implement DB_VACUUM control > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 41a41d5f3e2b8e16e25221e14939dc5962997ac7) >--- > ctdb/include/ctdb_private.h | 5 +++ > ctdb/server/ctdb_control.c | 9 ++++ > ctdb/server/ctdb_vacuum.c | 89 +++++++++++++++++++++++++++++++++++++ > 3 files changed, 103 insertions(+) > >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index 19b8bb98d8f..d3e70b5e2fa 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -986,6 +986,11 @@ int32_t ctdb_control_uptime(struct ctdb_context *ctdb, TDB_DATA *outdata); > > /* from ctdb_vacuum.c */ > >+int32_t ctdb_control_db_vacuum(struct ctdb_context *ctdb, >+ struct ctdb_req_control_old *c, >+ TDB_DATA indata, >+ bool *async_reply); >+ > void ctdb_stop_vacuuming(struct ctdb_context *ctdb); > int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db); > >diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c >index 0174f303f14..d162268a178 100644 >--- a/ctdb/server/ctdb_control.c >+++ b/ctdb/server/ctdb_control.c >@@ -33,6 +33,8 @@ > #include "ctdb_private.h" > #include "ctdb_client.h" > >+#include "protocol/protocol_private.h" >+ > #include "common/reqid.h" > #include "common/common.h" > #include "common/logging.h" >@@ -732,6 +734,13 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, > case CTDB_CONTROL_VACUUM_FETCH: > return ctdb_control_vacuum_fetch(ctdb, indata); > >+ case CTDB_CONTROL_DB_VACUUM: { >+ struct ctdb_db_vacuum db_vacuum; >+ >+ CHECK_CONTROL_DATA_SIZE(ctdb_db_vacuum_len(&db_vacuum)); >+ return ctdb_control_db_vacuum(ctdb, c, indata, async_reply); >+ } >+ > default: > DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); > return -1; >diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c >index 910751d59e0..2cc6aa53ecb 100644 >--- a/ctdb/server/ctdb_vacuum.c >+++ b/ctdb/server/ctdb_vacuum.c >@@ -37,6 +37,8 @@ > #include "ctdb_private.h" > #include "ctdb_client.h" > >+#include "protocol/protocol_private.h" >+ > #include "common/rb_tree.h" > #include "common/common.h" > #include "common/logging.h" >@@ -1554,6 +1556,93 @@ static void ctdb_vacuum_event(struct tevent_context *ev, > > } > >+struct vacuum_control_state { >+ struct ctdb_vacuum_child_context *child_ctx; >+ struct ctdb_req_control_old *c; >+ struct ctdb_context *ctdb; >+}; >+ >+static int vacuum_control_state_destructor(struct vacuum_control_state *state) >+{ >+ struct ctdb_vacuum_child_context *child_ctx = state->child_ctx; >+ int32_t status; >+ >+ status = (child_ctx->status == VACUUM_OK ? 0 : -1); >+ ctdb_request_control_reply(state->ctdb, state->c, NULL, status, NULL); >+ >+ return 0; >+} >+ >+int32_t ctdb_control_db_vacuum(struct ctdb_context *ctdb, >+ struct ctdb_req_control_old *c, >+ TDB_DATA indata, >+ bool *async_reply) >+{ >+ struct ctdb_db_context *ctdb_db; >+ struct ctdb_vacuum_child_context *child_ctx = NULL; >+ struct ctdb_db_vacuum *db_vacuum; >+ struct vacuum_control_state *state; >+ size_t np; >+ int ret; >+ >+ ret = ctdb_db_vacuum_pull(indata.dptr, >+ indata.dsize, >+ ctdb, >+ &db_vacuum, >+ &np); >+ if (ret != 0) { >+ DBG_ERR("Invalid data\n"); >+ return -1; >+ } >+ >+ ctdb_db = find_ctdb_db(ctdb, db_vacuum->db_id); >+ if (ctdb_db == NULL) { >+ DBG_ERR("Unknown db id 0x%08x\n", db_vacuum->db_id); >+ talloc_free(db_vacuum); >+ return -1; >+ } >+ >+ state = talloc(ctdb, struct vacuum_control_state); >+ if (state == NULL) { >+ DBG_ERR("Memory allocation error\n"); >+ return -1; >+ } >+ >+ ret = vacuum_db_child(ctdb_db, >+ ctdb_db, >+ false, >+ db_vacuum->full_vacuum_run, >+ &child_ctx); >+ >+ talloc_free(db_vacuum); >+ >+ if (ret == 0) { >+ (void) talloc_steal(child_ctx, state); >+ >+ state->child_ctx = child_ctx; >+ state->c = talloc_steal(state, c); >+ state->ctdb = ctdb; >+ >+ talloc_set_destructor(state, vacuum_control_state_destructor); >+ >+ *async_reply = true; >+ return 0; >+ } >+ >+ talloc_free(state); >+ >+ switch (ret) { >+ case EBUSY: >+ DBG_WARNING("Vacuuming collision\n"); >+ break; >+ >+ default: >+ DBG_ERR("Temporary vacuuming failure, ret=%d\n", ret); >+ } >+ >+ return -1; >+} >+ > void ctdb_stop_vacuuming(struct ctdb_context *ctdb) > { > if (ctdb->vacuumer != NULL) { >-- >2.25.1 > > >From 73767a2520823e57f128707f20578c7cafc76f30 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 27 Sep 2019 16:49:01 +1000 >Subject: [PATCH 21/38] ctdb-client: Factor out function client_db_tdb() > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 439ef65d290357e513103530183091a9a6fed197) >--- > ctdb/client/client_db.c | 37 ++++++++++++++++++++---------------- > ctdb/client/client_private.h | 4 ++++ > 2 files changed, 25 insertions(+), 16 deletions(-) > >diff --git a/ctdb/client/client_db.c b/ctdb/client/client_db.c >index a008f2ad63d..0b06d6e5e52 100644 >--- a/ctdb/client/client_db.c >+++ b/ctdb/client/client_db.c >@@ -37,6 +37,11 @@ > #include "client/client_private.h" > #include "client/client.h" > >+struct tdb_context *client_db_tdb(struct ctdb_db_context *db) >+{ >+ return db->ltdb->tdb; >+} >+ > static struct ctdb_db_context *client_db_handle( > struct ctdb_client_context *client, > const char *db_name) >@@ -835,11 +840,11 @@ int ctdb_db_traverse_local(struct ctdb_db_context *db, bool readonly, > state.error = 0; > > if (readonly) { >- ret = tdb_traverse_read(db->ltdb->tdb, >+ ret = tdb_traverse_read(client_db_tdb(db), > ctdb_db_traverse_local_handler, > &state); > } else { >- ret = tdb_traverse(db->ltdb->tdb, >+ ret = tdb_traverse(client_db_tdb(db), > ctdb_db_traverse_local_handler, &state); > } > >@@ -1105,14 +1110,14 @@ int ctdb_ltdb_fetch(struct ctdb_db_context *db, TDB_DATA key, > size_t np; > int ret; > >- rec = tdb_fetch(db->ltdb->tdb, key); >+ rec = tdb_fetch(client_db_tdb(db), key); > if (rec.dsize < sizeof(struct ctdb_ltdb_header)) { > /* No record present */ > if (rec.dptr != NULL) { > free(rec.dptr); > } > >- if (tdb_error(db->ltdb->tdb) != TDB_ERR_NOEXIST) { >+ if (tdb_error(client_db_tdb(db)) != TDB_ERR_NOEXIST) { > return EIO; > } > >@@ -1235,18 +1240,18 @@ static int ctdb_fetch_lock_check(struct tevent_req *req) > int ret, err = 0; > bool do_migrate = false; > >- ret = tdb_chainlock(h->db->ltdb->tdb, h->key); >+ ret = tdb_chainlock(client_db_tdb(h->db), h->key); > if (ret != 0) { > DEBUG(DEBUG_ERR, > ("fetch_lock: %s tdb_chainlock failed, %s\n", >- h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); >+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); > err = EIO; > goto failed; > } > >- data = tdb_fetch(h->db->ltdb->tdb, h->key); >+ data = tdb_fetch(client_db_tdb(h->db), h->key); > if (data.dptr == NULL) { >- if (tdb_error(h->db->ltdb->tdb) == TDB_ERR_NOEXIST) { >+ if (tdb_error(client_db_tdb(h->db)) == TDB_ERR_NOEXIST) { > goto migrate; > } else { > err = EIO; >@@ -1297,11 +1302,11 @@ failed: > if (data.dptr != NULL) { > free(data.dptr); > } >- ret = tdb_chainunlock(h->db->ltdb->tdb, h->key); >+ ret = tdb_chainunlock(client_db_tdb(h->db), h->key); > if (ret != 0) { > DEBUG(DEBUG_ERR, > ("fetch_lock: %s tdb_chainunlock failed, %s\n", >- h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); >+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); > return EIO; > } > >@@ -1377,11 +1382,11 @@ static int ctdb_record_handle_destructor(struct ctdb_record_handle *h) > { > int ret; > >- ret = tdb_chainunlock(h->db->ltdb->tdb, h->key); >+ ret = tdb_chainunlock(client_db_tdb(h->db), h->key); > if (ret != 0) { > DEBUG(DEBUG_ERR, > ("fetch_lock: %s tdb_chainunlock failed, %s\n", >- h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); >+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); > } > free(h->data.dptr); > return 0; >@@ -1487,11 +1492,11 @@ int ctdb_store_record(struct ctdb_record_handle *h, TDB_DATA data) > rec[1].dsize = data.dsize; > rec[1].dptr = data.dptr; > >- ret = tdb_storev(h->db->ltdb->tdb, h->key, rec, 2, TDB_REPLACE); >+ ret = tdb_storev(client_db_tdb(h->db), h->key, rec, 2, TDB_REPLACE); > if (ret != 0) { > DEBUG(DEBUG_ERR, > ("store_record: %s tdb_storev failed, %s\n", >- h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); >+ h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); > return EIO; > } > >@@ -1538,11 +1543,11 @@ struct tevent_req *ctdb_delete_record_send(TALLOC_CTX *mem_ctx, > rec.dsize = np; > rec.dptr = header; > >- ret = tdb_store(h->db->ltdb->tdb, h->key, rec, TDB_REPLACE); >+ ret = tdb_store(client_db_tdb(h->db), h->key, rec, TDB_REPLACE); > if (ret != 0) { > D_ERR("fetch_lock delete: %s tdb_store failed, %s\n", > h->db->db_name, >- tdb_errorstr(h->db->ltdb->tdb)); >+ tdb_errorstr(client_db_tdb(h->db))); > tevent_req_error(req, EIO); > return tevent_req_post(req, ev); > } >diff --git a/ctdb/client/client_private.h b/ctdb/client/client_private.h >index bb1705534e6..0bb2ad590ea 100644 >--- a/ctdb/client/client_private.h >+++ b/ctdb/client/client_private.h >@@ -77,6 +77,10 @@ struct ctdb_tunnel_context { > void ctdb_client_reply_call(struct ctdb_client_context *client, > uint8_t *buf, size_t buflen, uint32_t reqid); > >+/* From client_db.c */ >+ >+struct tdb_context *client_db_tdb(struct ctdb_db_context *db); >+ > /* From client_message.c */ > > void ctdb_client_req_message(struct ctdb_client_context *client, >-- >2.25.1 > > >From 6744b82657cfbd7e32bb4d0bb630fc94ccc3006b Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Mon, 2 Mar 2020 13:59:42 +1100 >Subject: [PATCH 22/38] ctdb-recovery: Fetched vnnmap is never used, so don't > fetch it > >New vnnmap is constructed using the information from all the connected >nodes. So there is no need to fetch the vnnmap from recovery master. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 072ff4d12b8f34766120ddae888d772e97bca491) >--- > ctdb/server/ctdb_recovery_helper.c | 40 ------------------------------ > 1 file changed, 40 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 0597c507ba6..7356ebdb062 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -2048,7 +2048,6 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) > * > * - Get tunables > * - Get nodemap >- * - Get vnnmap > * - Get capabilities from all nodes > * - Get dbmap > * - Set RECOVERY_ACTIVE >@@ -2076,7 +2075,6 @@ struct recovery_state { > > static void recovery_tunables_done(struct tevent_req *subreq); > static void recovery_nodemap_done(struct tevent_req *subreq); >-static void recovery_vnnmap_done(struct tevent_req *subreq); > static void recovery_capabilities_done(struct tevent_req *subreq); > static void recovery_dbmap_done(struct tevent_req *subreq); > static void recovery_active_done(struct tevent_req *subreq); >@@ -2199,43 +2197,6 @@ static void recovery_nodemap_done(struct tevent_req *subreq) > return; > } > >- ctdb_req_control_getvnnmap(&request); >- subreq = ctdb_client_control_send(state, state->ev, state->client, >- state->destnode, TIMEOUT(), >- &request); >- if (tevent_req_nomem(subreq, req)) { >- return; >- } >- tevent_req_set_callback(subreq, recovery_vnnmap_done, req); >-} >- >-static void recovery_vnnmap_done(struct tevent_req *subreq) >-{ >- struct tevent_req *req = tevent_req_callback_data( >- subreq, struct tevent_req); >- struct recovery_state *state = tevent_req_data( >- req, struct recovery_state); >- struct ctdb_reply_control *reply; >- struct ctdb_req_control request; >- bool status; >- int ret; >- >- status = ctdb_client_control_recv(subreq, &ret, state, &reply); >- TALLOC_FREE(subreq); >- if (! status) { >- D_ERR("control GETVNNMAP failed to node %u, ret=%d\n", >- state->destnode, ret); >- tevent_req_error(req, ret); >- return; >- } >- >- ret = ctdb_reply_control_getvnnmap(reply, state, &state->vnnmap); >- if (ret != 0) { >- D_ERR("control GETVNNMAP failed, ret=%d\n", ret); >- tevent_req_error(req, ret); >- return; >- } >- > ctdb_req_control_get_capabilities(&request); > subreq = ctdb_client_control_multi_send(state, state->ev, > state->client, >@@ -2435,7 +2396,6 @@ static void recovery_active_done(struct tevent_req *subreq) > > vnnmap->generation = state->generation; > >- talloc_free(state->vnnmap); > state->vnnmap = vnnmap; > > ctdb_req_control_start_recovery(&request); >-- >2.25.1 > > >From f31abdc66d5cb66eb9cf0815e71c16017014163f Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Mon, 2 Mar 2020 15:07:21 +1100 >Subject: [PATCH 23/38] ctdb-recovery: Consolidate node state > >This avoids passing multiple arguments to async computation. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 6e2f8756f1bce4dfc7fdc435e082f400116e29ec) >--- > ctdb/server/ctdb_recovery_helper.c | 585 +++++++++++++++++------------ > 1 file changed, 346 insertions(+), 239 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 7356ebdb062..893cb15d9d6 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -70,6 +70,105 @@ static uint64_t srvid_next(void) > return rec_srvid; > } > >+/* >+ * Node related functions >+ */ >+ >+struct node_list { >+ uint32_t *pnn_list; >+ uint32_t *caps; >+ uint32_t *ban_credits; >+ unsigned int size; >+ unsigned int count; >+}; >+ >+static struct node_list *node_list_init(TALLOC_CTX *mem_ctx, unsigned int size) >+{ >+ struct node_list *nlist; >+ unsigned int i; >+ >+ nlist = talloc_zero(mem_ctx, struct node_list); >+ if (nlist == NULL) { >+ return NULL; >+ } >+ >+ nlist->pnn_list = talloc_array(nlist, uint32_t, size); >+ nlist->caps = talloc_zero_array(nlist, uint32_t, size); >+ nlist->ban_credits = talloc_zero_array(nlist, uint32_t, size); >+ >+ if (nlist->pnn_list == NULL || >+ nlist->caps == NULL || >+ nlist->ban_credits == NULL) { >+ talloc_free(nlist); >+ return NULL; >+ } >+ nlist->size = size; >+ >+ for (i=0; i<nlist->size; i++) { >+ nlist->pnn_list[i] = CTDB_UNKNOWN_PNN; >+ } >+ >+ return nlist; >+} >+ >+static bool node_list_add(struct node_list *nlist, uint32_t pnn) >+{ >+ unsigned int i; >+ >+ if (nlist->count == nlist->size) { >+ return false; >+ } >+ >+ for (i=0; i<nlist->count; i++) { >+ if (nlist->pnn_list[i] == pnn) { >+ return false; >+ } >+ } >+ >+ nlist->pnn_list[nlist->count] = pnn; >+ nlist->count += 1; >+ >+ return true; >+} >+ >+static uint32_t *node_list_lmaster(struct node_list *nlist, >+ TALLOC_CTX *mem_ctx, >+ unsigned int *pnn_count) >+{ >+ uint32_t *pnn_list; >+ unsigned int count, i; >+ >+ pnn_list = talloc_zero_array(mem_ctx, uint32_t, nlist->count); >+ if (pnn_list == NULL) { >+ return NULL; >+ } >+ >+ count = 0; >+ for (i=0; i<nlist->count; i++) { >+ if (!(nlist->caps[i] & CTDB_CAP_LMASTER)) { >+ continue; >+ } >+ >+ pnn_list[count] = nlist->pnn_list[i]; >+ count += 1; >+ } >+ >+ *pnn_count = count; >+ return pnn_list; >+} >+ >+static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn) >+{ >+ unsigned int i; >+ >+ for (i=0; i<nlist->count; i++) { >+ if (nlist->pnn_list[i] == pnn) { >+ nlist->ban_credits[i] += 1; >+ break; >+ } >+ } >+} >+ > /* > * Recovery database functions > */ >@@ -665,9 +764,9 @@ struct push_database_old_state { > struct ctdb_client_context *client; > struct recdb_context *recdb; > uint32_t *pnn_list; >- int count; >+ unsigned int count; > struct ctdb_rec_buffer *recbuf; >- int index; >+ unsigned int index; > }; > > static void push_database_old_push_done(struct tevent_req *subreq); >@@ -676,7 +775,8 @@ static struct tevent_req *push_database_old_send( > TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, >- uint32_t *pnn_list, int count, >+ uint32_t *pnn_list, >+ unsigned int count, > struct recdb_context *recdb) > { > struct tevent_req *req, *subreq; >@@ -769,7 +869,7 @@ struct push_database_new_state { > struct ctdb_client_context *client; > struct recdb_context *recdb; > uint32_t *pnn_list; >- int count; >+ unsigned int count; > uint64_t srvid; > uint32_t dmaster; > int fd; >@@ -787,7 +887,8 @@ static struct tevent_req *push_database_new_send( > TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, >- uint32_t *pnn_list, int count, >+ uint32_t *pnn_list, >+ unsigned int count, > struct recdb_context *recdb, > int max_size) > { >@@ -991,7 +1092,8 @@ static void push_database_new_confirmed(struct tevent_req *subreq) > struct ctdb_reply_control **reply; > int *err_list; > bool status; >- int ret, i; >+ unsigned int i; >+ int ret; > uint32_t num_records; > > status = ctdb_client_control_multi_recv(subreq, &ret, state, >@@ -1062,7 +1164,7 @@ static struct tevent_req *push_database_send( > TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, >- uint32_t *pnn_list, int count, uint32_t *caps, >+ struct node_list *nlist, > struct ctdb_tunable_list *tun_list, > struct recdb_context *recdb) > { >@@ -1070,7 +1172,7 @@ static struct tevent_req *push_database_send( > struct push_database_state *state; > uint32_t *old_list, *new_list; > unsigned int old_count, new_count; >- int i; >+ unsigned int i; > > req = tevent_req_create(mem_ctx, &state, struct push_database_state); > if (req == NULL) { >@@ -1082,21 +1184,19 @@ static struct tevent_req *push_database_send( > > old_count = 0; > new_count = 0; >- old_list = talloc_array(state, uint32_t, count); >- new_list = talloc_array(state, uint32_t, count); >+ old_list = talloc_array(state, uint32_t, nlist->count); >+ new_list = talloc_array(state, uint32_t, nlist->count); > if (tevent_req_nomem(old_list, req) || > tevent_req_nomem(new_list,req)) { > return tevent_req_post(req, ev); > } > >- for (i=0; i<count; i++) { >- uint32_t pnn = pnn_list[i]; >- >- if (caps[pnn] & CTDB_CAP_FRAGMENTED_CONTROLS) { >- new_list[new_count] = pnn; >+ for (i=0; i<nlist->count; i++) { >+ if (nlist->caps[i] & CTDB_CAP_FRAGMENTED_CONTROLS) { >+ new_list[new_count] = nlist->pnn_list[i]; > new_count += 1; > } else { >- old_list[old_count] = pnn; >+ old_list[old_count] = nlist->pnn_list[i]; > old_count += 1; > } > } >@@ -1183,12 +1283,10 @@ static bool push_database_recv(struct tevent_req *req, int *perr) > struct collect_highseqnum_db_state { > struct tevent_context *ev; > struct ctdb_client_context *client; >- uint32_t *pnn_list; >- int count; >- uint32_t *caps; >- uint32_t *ban_credits; >+ struct node_list *nlist; > uint32_t db_id; > struct recdb_context *recdb; >+ > uint32_t max_pnn; > }; > >@@ -1199,8 +1297,8 @@ static struct tevent_req *collect_highseqnum_db_send( > TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, >- uint32_t *pnn_list, int count, uint32_t *caps, >- uint32_t *ban_credits, uint32_t db_id, >+ struct node_list *nlist, >+ uint32_t db_id, > struct recdb_context *recdb) > { > struct tevent_req *req, *subreq; >@@ -1215,17 +1313,18 @@ static struct tevent_req *collect_highseqnum_db_send( > > state->ev = ev; > state->client = client; >- state->pnn_list = pnn_list; >- state->count = count; >- state->caps = caps; >- state->ban_credits = ban_credits; >+ state->nlist = nlist; > state->db_id = db_id; > state->recdb = recdb; > > ctdb_req_control_get_db_seqnum(&request, db_id); >- subreq = ctdb_client_control_multi_send(mem_ctx, ev, client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ subreq = ctdb_client_control_multi_send(mem_ctx, >+ ev, >+ client, >+ nlist->pnn_list, >+ nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return tevent_req_post(req, ev); > } >@@ -1244,8 +1343,10 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) > struct ctdb_reply_control **reply; > int *err_list; > bool status; >- int ret, i; >+ unsigned int i; >+ int ret; > uint64_t seqnum, max_seqnum; >+ uint32_t max_caps; > > status = ctdb_client_control_multi_recv(subreq, &ret, state, > &err_list, &reply); >@@ -1254,8 +1355,9 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, err_list, >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, > &pnn); > if (ret2 != 0) { > D_ERR("control GET_DB_SEQNUM failed for db %s" >@@ -1271,8 +1373,9 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) > } > > max_seqnum = 0; >- state->max_pnn = state->pnn_list[0]; >- for (i=0; i<state->count; i++) { >+ state->max_pnn = state->nlist->pnn_list[0]; >+ max_caps = state->nlist->caps[0]; >+ for (i=0; i<state->nlist->count; i++) { > ret = ctdb_reply_control_get_db_seqnum(reply[i], &seqnum); > if (ret != 0) { > tevent_req_error(req, EPROTO); >@@ -1281,7 +1384,8 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) > > if (max_seqnum < seqnum) { > max_seqnum = seqnum; >- state->max_pnn = state->pnn_list[i]; >+ state->max_pnn = state->nlist->pnn_list[i]; >+ max_caps = state->nlist->caps[i]; > } > } > >@@ -1290,9 +1394,11 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) > D_INFO("Pull persistent db %s from node %d with seqnum 0x%"PRIx64"\n", > recdb_name(state->recdb), state->max_pnn, max_seqnum); > >- subreq = pull_database_send(state, state->ev, state->client, >+ subreq = pull_database_send(state, >+ state->ev, >+ state->client, > state->max_pnn, >- state->caps[state->max_pnn], >+ max_caps, > state->recdb); > if (tevent_req_nomem(subreq, req)) { > return; >@@ -1313,7 +1419,7 @@ static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq) > status = pull_database_recv(subreq, &ret); > TALLOC_FREE(subreq); > if (! status) { >- state->ban_credits[state->max_pnn] += 1; >+ node_list_ban_credits(state->nlist, state->max_pnn); > tevent_req_error(req, ret); > return; > } >@@ -1333,14 +1439,12 @@ static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr) > struct collect_all_db_state { > struct tevent_context *ev; > struct ctdb_client_context *client; >- uint32_t *pnn_list; >- int count; >- uint32_t *caps; >- uint32_t *ban_credits; >+ struct node_list *nlist; > uint32_t db_id; > struct recdb_context *recdb; >+ > struct ctdb_pulldb pulldb; >- int index; >+ unsigned int index; > }; > > static void collect_all_db_pulldb_done(struct tevent_req *subreq); >@@ -1349,13 +1453,12 @@ static struct tevent_req *collect_all_db_send( > TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, >- uint32_t *pnn_list, int count, uint32_t *caps, >- uint32_t *ban_credits, uint32_t db_id, >+ struct node_list *nlist, >+ uint32_t db_id, > struct recdb_context *recdb) > { > struct tevent_req *req, *subreq; > struct collect_all_db_state *state; >- uint32_t pnn; > > req = tevent_req_create(mem_ctx, &state, > struct collect_all_db_state); >@@ -1365,17 +1468,17 @@ static struct tevent_req *collect_all_db_send( > > state->ev = ev; > state->client = client; >- state->pnn_list = pnn_list; >- state->count = count; >- state->caps = caps; >- state->ban_credits = ban_credits; >+ state->nlist = nlist; > state->db_id = db_id; > state->recdb = recdb; > state->index = 0; > >- pnn = state->pnn_list[state->index]; >- >- subreq = pull_database_send(state, ev, client, pnn, caps[pnn], recdb); >+ subreq = pull_database_send(state, >+ ev, >+ client, >+ nlist->pnn_list[state->index], >+ nlist->caps[state->index], >+ recdb); > if (tevent_req_nomem(subreq, req)) { > return tevent_req_post(req, ev); > } >@@ -1390,28 +1493,30 @@ static void collect_all_db_pulldb_done(struct tevent_req *subreq) > subreq, struct tevent_req); > struct collect_all_db_state *state = tevent_req_data( > req, struct collect_all_db_state); >- uint32_t pnn; > int ret; > bool status; > > status = pull_database_recv(subreq, &ret); > TALLOC_FREE(subreq); > if (! status) { >- pnn = state->pnn_list[state->index]; >- state->ban_credits[pnn] += 1; >+ node_list_ban_credits(state->nlist, >+ state->nlist->pnn_list[state->index]); > tevent_req_error(req, ret); > return; > } > > state->index += 1; >- if (state->index == state->count) { >+ if (state->index == state->nlist->count) { > tevent_req_done(req); > return; > } > >- pnn = state->pnn_list[state->index]; >- subreq = pull_database_send(state, state->ev, state->client, >- pnn, state->caps[pnn], state->recdb); >+ subreq = pull_database_send(state, >+ state->ev, >+ state->client, >+ state->nlist->pnn_list[state->index], >+ state->nlist->caps[state->index], >+ state->recdb); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -1441,10 +1546,7 @@ struct recover_db_state { > struct tevent_context *ev; > struct ctdb_client_context *client; > struct ctdb_tunable_list *tun_list; >- uint32_t *pnn_list; >- int count; >- uint32_t *caps; >- uint32_t *ban_credits; >+ struct node_list *nlist; > uint32_t db_id; > uint8_t db_flags; > >@@ -1469,11 +1571,10 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, > struct ctdb_tunable_list *tun_list, >- uint32_t *pnn_list, int count, >- uint32_t *caps, >- uint32_t *ban_credits, >+ struct node_list *nlist, > uint32_t generation, >- uint32_t db_id, uint8_t db_flags) >+ uint32_t db_id, >+ uint8_t db_flags) > { > struct tevent_req *req, *subreq; > struct recover_db_state *state; >@@ -1487,10 +1588,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, > state->ev = ev; > state->client = client; > state->tun_list = tun_list; >- state->pnn_list = pnn_list; >- state->count = count; >- state->caps = caps; >- state->ban_credits = ban_credits; >+ state->nlist = nlist; > state->db_id = db_id; > state->db_flags = db_flags; > >@@ -1580,10 +1678,13 @@ static void recover_db_path_done(struct tevent_req *subreq) > talloc_free(reply); > > ctdb_req_control_db_freeze(&request, state->db_id); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -1608,14 +1709,16 @@ static void recover_db_freeze_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, err_list, >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, > &pnn); > if (ret2 != 0) { > D_ERR("control FREEZE_DB failed for db %s" > " on node %u, ret=%d\n", > state->db_name, pnn, ret2); >- state->ban_credits[pnn] += 1; >+ >+ node_list_ban_credits(state->nlist, pnn); > } else { > D_ERR("control FREEZE_DB failed for db %s, ret=%d\n", > state->db_name, ret); >@@ -1625,10 +1728,13 @@ static void recover_db_freeze_done(struct tevent_req *subreq) > } > > ctdb_req_control_db_transaction_start(&request, &state->transdb); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -1652,9 +1758,10 @@ static void recover_db_transaction_started(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("control TRANSACTION_DB failed for db=%s" > " on node %u, ret=%d\n", >@@ -1677,17 +1784,19 @@ static void recover_db_transaction_started(struct tevent_req *subreq) > > if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || > (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { >- subreq = collect_highseqnum_db_send( >- state, state->ev, state->client, >- state->pnn_list, state->count, state->caps, >- state->ban_credits, state->db_id, >- state->recdb); >+ subreq = collect_highseqnum_db_send(state, >+ state->ev, >+ state->client, >+ state->nlist, >+ state->db_id, >+ state->recdb); > } else { >- subreq = collect_all_db_send( >- state, state->ev, state->client, >- state->pnn_list, state->count, state->caps, >- state->ban_credits, state->db_id, >- state->recdb); >+ subreq = collect_all_db_send(state, >+ state->ev, >+ state->client, >+ state->nlist, >+ state->db_id, >+ state->recdb); > } > if (tevent_req_nomem(subreq, req)) { > return; >@@ -1718,10 +1827,13 @@ static void recover_db_collect_done(struct tevent_req *subreq) > } > > ctdb_req_control_wipe_database(&request, &state->transdb); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -1745,9 +1857,10 @@ static void recover_db_wipedb_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("control WIPEDB failed for db %s on node %u," > " ret=%d\n", state->db_name, pnn, ret2); >@@ -1759,9 +1872,11 @@ static void recover_db_wipedb_done(struct tevent_req *subreq) > return; > } > >- subreq = push_database_send(state, state->ev, state->client, >- state->pnn_list, state->count, >- state->caps, state->tun_list, >+ subreq = push_database_send(state, >+ state->ev, >+ state->client, >+ state->nlist, >+ state->tun_list, > state->recdb); > if (tevent_req_nomem(subreq, req)) { > return; >@@ -1789,10 +1904,13 @@ static void recover_db_pushdb_done(struct tevent_req *subreq) > TALLOC_FREE(state->recdb); > > ctdb_req_control_db_transaction_commit(&request, &state->transdb); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -1817,9 +1935,10 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("control DB_TRANSACTION_COMMIT failed for db %s" > " on node %u, ret=%d\n", >@@ -1833,10 +1952,13 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) > } > > ctdb_req_control_db_thaw(&request, state->db_id); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -1860,9 +1982,10 @@ static void recover_db_thaw_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("control DB_THAW failed for db %s on node %u," > " ret=%d\n", state->db_name, pnn, ret2); >@@ -1901,10 +2024,7 @@ struct db_recovery_one_state { > struct ctdb_client_context *client; > struct ctdb_dbid_map *dbmap; > struct ctdb_tunable_list *tun_list; >- uint32_t *pnn_list; >- int count; >- uint32_t *caps; >- uint32_t *ban_credits; >+ struct node_list *nlist; > uint32_t generation; > uint32_t db_id; > uint8_t db_flags; >@@ -1918,9 +2038,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > struct ctdb_client_context *client, > struct ctdb_dbid_map *dbmap, > struct ctdb_tunable_list *tun_list, >- uint32_t *pnn_list, int count, >- uint32_t *caps, >- uint32_t *ban_credits, >+ struct node_list *nlist, > uint32_t generation) > { > struct tevent_req *req, *subreq; >@@ -1954,17 +2072,18 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > substate->client = client; > substate->dbmap = dbmap; > substate->tun_list = tun_list; >- substate->pnn_list = pnn_list; >- substate->count = count; >- substate->caps = caps; >- substate->ban_credits = ban_credits; >+ substate->nlist = nlist; > substate->generation = generation; > substate->db_id = dbmap->dbs[i].db_id; > substate->db_flags = dbmap->dbs[i].flags; > >- subreq = recover_db_send(state, ev, client, tun_list, >- pnn_list, count, caps, ban_credits, >- generation, substate->db_id, >+ subreq = recover_db_send(state, >+ ev, >+ client, >+ tun_list, >+ nlist, >+ generation, >+ substate->db_id, > substate->db_flags); > if (tevent_req_nomem(subreq, req)) { > return tevent_req_post(req, ev); >@@ -1996,11 +2115,13 @@ static void db_recovery_one_done(struct tevent_req *subreq) > > substate->num_fails += 1; > if (substate->num_fails < NUM_RETRIES) { >- subreq = recover_db_send(state, state->ev, substate->client, >+ subreq = recover_db_send(state, >+ state->ev, >+ substate->client, > substate->tun_list, >- substate->pnn_list, substate->count, >- substate->caps, substate->ban_credits, >- substate->generation, substate->db_id, >+ substate->nlist, >+ substate->generation, >+ substate->db_id, > substate->db_flags); > if (tevent_req_nomem(subreq, req)) { > goto failed; >@@ -2062,12 +2183,8 @@ struct recovery_state { > struct tevent_context *ev; > struct ctdb_client_context *client; > uint32_t generation; >- uint32_t *pnn_list; >- unsigned int count; > uint32_t destnode; >- struct ctdb_node_map *nodemap; >- uint32_t *caps; >- uint32_t *ban_credits; >+ struct node_list *nlist; > struct ctdb_tunable_list *tun_list; > struct ctdb_vnn_map *vnnmap; > struct ctdb_dbid_map *dbmap; >@@ -2165,6 +2282,8 @@ static void recovery_nodemap_done(struct tevent_req *subreq) > req, struct recovery_state); > struct ctdb_reply_control *reply; > struct ctdb_req_control request; >+ struct ctdb_node_map *nodemap; >+ unsigned int i; > bool status; > int ret; > >@@ -2177,31 +2296,34 @@ static void recovery_nodemap_done(struct tevent_req *subreq) > return; > } > >- ret = ctdb_reply_control_get_nodemap(reply, state, &state->nodemap); >+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); > if (ret != 0) { > D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); > tevent_req_error(req, ret); > return; > } > >- state->count = list_of_active_nodes(state->nodemap, CTDB_UNKNOWN_PNN, >- state, &state->pnn_list); >- if (state->count <= 0) { >- tevent_req_error(req, ENOMEM); >+ state->nlist = node_list_init(state, nodemap->num); >+ if (tevent_req_nomem(state->nlist, req)) { > return; > } > >- state->ban_credits = talloc_zero_array(state, uint32_t, >- state->nodemap->num); >- if (tevent_req_nomem(state->ban_credits, req)) { >- return; >+ for (i=0; i<nodemap->num; i++) { >+ if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { >+ continue; >+ } >+ >+ node_list_add(state->nlist, nodemap->node[i].pnn); > } > > ctdb_req_control_get_capabilities(&request); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2228,9 +2350,10 @@ static void recovery_capabilities_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("control GET_CAPABILITIES failed on node %u," > " ret=%d\n", pnn, ret2); >@@ -2242,25 +2365,18 @@ static void recovery_capabilities_done(struct tevent_req *subreq) > return; > } > >- /* Make the array size same as nodemap */ >- state->caps = talloc_zero_array(state, uint32_t, >- state->nodemap->num); >- if (tevent_req_nomem(state->caps, req)) { >- return; >- } >- >- for (i=0; i<state->count; i++) { >- uint32_t pnn; >+ for (i=0; i<state->nlist->count; i++) { >+ uint32_t caps; > >- pnn = state->pnn_list[i]; >- ret = ctdb_reply_control_get_capabilities(reply[i], >- &state->caps[pnn]); >+ ret = ctdb_reply_control_get_capabilities(reply[i], &caps); > if (ret != 0) { > D_ERR("control GET_CAPABILITIES failed on node %u\n", >- pnn); >+ state->nlist->pnn_list[i]); > tevent_req_error(req, EPROTO); > return; > } >+ >+ state->nlist->caps[i] = caps; > } > > talloc_free(reply); >@@ -2303,10 +2419,13 @@ static void recovery_dbmap_done(struct tevent_req *subreq) > } > > ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2323,7 +2442,6 @@ static void recovery_active_done(struct tevent_req *subreq) > struct ctdb_vnn_map *vnnmap; > int *err_list; > int ret; >- unsigned int count, i; > bool status; > > status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list, >@@ -2333,9 +2451,10 @@ static void recovery_active_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("failed to set recovery mode ACTIVE on node %u," > " ret=%d\n", pnn, ret2); >@@ -2350,48 +2469,20 @@ static void recovery_active_done(struct tevent_req *subreq) > D_ERR("Set recovery mode to ACTIVE\n"); > > /* Calculate new VNNMAP */ >- count = 0; >- for (i=0; i<state->nodemap->num; i++) { >- if (state->nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { >- continue; >- } >- if (!(state->caps[i] & CTDB_CAP_LMASTER)) { >- continue; >- } >- count += 1; >- } >- >- if (count == 0) { >- D_WARNING("No active lmasters found. Adding recmaster anyway\n"); >- } >- > vnnmap = talloc_zero(state, struct ctdb_vnn_map); > if (tevent_req_nomem(vnnmap, req)) { > return; > } > >- vnnmap->size = (count == 0 ? 1 : count); >- vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size); >+ vnnmap->map = node_list_lmaster(state->nlist, vnnmap, &vnnmap->size); > if (tevent_req_nomem(vnnmap->map, req)) { > return; > } > >- if (count == 0) { >+ if (vnnmap->size == 0) { >+ D_WARNING("No active lmasters found. Adding recmaster anyway\n"); > vnnmap->map[0] = state->destnode; >- } else { >- count = 0; >- for (i=0; i<state->nodemap->num; i++) { >- if (state->nodemap->node[i].flags & >- NODE_FLAGS_INACTIVE) { >- continue; >- } >- if (!(state->caps[i] & CTDB_CAP_LMASTER)) { >- continue; >- } >- >- vnnmap->map[count] = state->nodemap->node[i].pnn; >- count += 1; >- } >+ vnnmap->size = 1; > } > > vnnmap->generation = state->generation; >@@ -2399,10 +2490,13 @@ static void recovery_active_done(struct tevent_req *subreq) > state->vnnmap = vnnmap; > > ctdb_req_control_start_recovery(&request); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2427,9 +2521,10 @@ static void recovery_start_recovery_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("failed to run start_recovery event on node %u," > " ret=%d\n", pnn, ret2); >@@ -2444,10 +2539,13 @@ static void recovery_start_recovery_done(struct tevent_req *subreq) > D_ERR("start_recovery event finished\n"); > > ctdb_req_control_setvnnmap(&request, state->vnnmap); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2471,9 +2569,10 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("failed to update VNNMAP on node %u, ret=%d\n", > pnn, ret2); >@@ -2486,10 +2585,12 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) > > D_NOTICE("updated VNNMAP\n"); > >- subreq = db_recovery_send(state, state->ev, state->client, >- state->dbmap, state->tun_list, >- state->pnn_list, state->count, >- state->caps, state->ban_credits, >+ subreq = db_recovery_send(state, >+ state->ev, >+ state->client, >+ state->dbmap, >+ state->tun_list, >+ state->nlist, > state->vnnmap->generation); > if (tevent_req_nomem(subreq, req)) { > return; >@@ -2522,12 +2623,10 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) > return; > } > >- for (i=0; i<state->count; i++) { >- uint32_t pnn; >- pnn = state->pnn_list[i]; >- if (state->ban_credits[pnn] > max_credits) { >- max_pnn = pnn; >- max_credits = state->ban_credits[pnn]; >+ for (i=0; i<state->nlist->count; i++) { >+ if (state->nlist->ban_credits[i] > max_credits) { >+ max_pnn = state->nlist->pnn_list[i]; >+ max_credits = state->nlist->ban_credits[i]; > } > } > >@@ -2563,10 +2662,13 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) > } > > ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_NORMAL); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2617,9 +2719,10 @@ static void recovery_normal_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("failed to set recovery mode NORMAL on node %u," > " ret=%d\n", pnn, ret2); >@@ -2634,10 +2737,13 @@ static void recovery_normal_done(struct tevent_req *subreq) > D_ERR("Set recovery mode to NORMAL\n"); > > ctdb_req_control_end_recovery(&request); >- subreq = ctdb_client_control_multi_send(state, state->ev, >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, > state->client, >- state->pnn_list, state->count, >- TIMEOUT(), &request); >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2661,9 +2767,10 @@ static void recovery_end_recovery_done(struct tevent_req *subreq) > int ret2; > uint32_t pnn; > >- ret2 = ctdb_client_control_multi_error(state->pnn_list, >- state->count, >- err_list, &pnn); >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); > if (ret2 != 0) { > D_ERR("failed to run recovered event on node %u," > " ret=%d\n", pnn, ret2); >-- >2.25.1 > > >From 0ac2da85acec21ae454ae6981bb1d4e983a3badf Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Tue, 18 Feb 2020 16:17:00 +1100 >Subject: [PATCH 24/38] ctdb-recovery: Don't trust nodemap obtained from local > node > >It's possible to have a node stopped, but recovery master not yet >updated flags on the local ctdb daemon when recovery is started. So do >not trust the list of active nodes obtained from the local node. Query >the connected nodes to calculate the list of active nodes. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit c6a0ff1bed0265e44fd6135d16bfc41919fe5bf5) >--- > ctdb/server/ctdb_recovery_helper.c | 116 ++++++++++++++++++++++++++++- > 1 file changed, 113 insertions(+), 3 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 893cb15d9d6..5f38d55e50e 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -2168,7 +2168,7 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) > * Run the parallel database recovery > * > * - Get tunables >- * - Get nodemap >+ * - Get nodemap from all nodes > * - Get capabilities from all nodes > * - Get dbmap > * - Set RECOVERY_ACTIVE >@@ -2192,6 +2192,7 @@ struct recovery_state { > > static void recovery_tunables_done(struct tevent_req *subreq); > static void recovery_nodemap_done(struct tevent_req *subreq); >+static void recovery_nodemap_verify(struct tevent_req *subreq); > static void recovery_capabilities_done(struct tevent_req *subreq); > static void recovery_dbmap_done(struct tevent_req *subreq); > static void recovery_active_done(struct tevent_req *subreq); >@@ -2309,13 +2310,122 @@ static void recovery_nodemap_done(struct tevent_req *subreq) > } > > for (i=0; i<nodemap->num; i++) { >- if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { >+ bool ok; >+ >+ if (nodemap->node[i].flags & NODE_FLAGS_DISCONNECTED) { > continue; > } > >- node_list_add(state->nlist, nodemap->node[i].pnn); >+ ok = node_list_add(state->nlist, nodemap->node[i].pnn); >+ if (!ok) { >+ tevent_req_error(req, EINVAL); >+ return; >+ } > } > >+ talloc_free(nodemap); >+ talloc_free(reply); >+ >+ /* Verify flags by getting local node information from each node */ >+ ctdb_req_control_get_nodemap(&request); >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, >+ state->client, >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); >+ if (tevent_req_nomem(subreq, req)) { >+ return; >+ } >+ tevent_req_set_callback(subreq, recovery_nodemap_verify, req); >+} >+ >+static void recovery_nodemap_verify(struct tevent_req *subreq) >+{ >+ struct tevent_req *req = tevent_req_callback_data( >+ subreq, struct tevent_req); >+ struct recovery_state *state = tevent_req_data( >+ req, struct recovery_state); >+ struct ctdb_req_control request; >+ struct ctdb_reply_control **reply; >+ struct node_list *nlist; >+ unsigned int i; >+ int *err_list; >+ int ret; >+ bool status; >+ >+ status = ctdb_client_control_multi_recv(subreq, >+ &ret, >+ state, >+ &err_list, >+ &reply); >+ TALLOC_FREE(subreq); >+ if (! status) { >+ int ret2; >+ uint32_t pnn; >+ >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); >+ if (ret2 != 0) { >+ D_ERR("control GET_NODEMAP failed on node %u," >+ " ret=%d\n", pnn, ret2); >+ } else { >+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); >+ } >+ tevent_req_error(req, ret); >+ return; >+ } >+ >+ nlist = node_list_init(state, state->nlist->size); >+ if (tevent_req_nomem(nlist, req)) { >+ return; >+ } >+ >+ for (i=0; i<state->nlist->count; i++) { >+ struct ctdb_node_map *nodemap = NULL; >+ uint32_t pnn, flags; >+ unsigned int j; >+ bool ok; >+ >+ pnn = state->nlist->pnn_list[i]; >+ ret = ctdb_reply_control_get_nodemap(reply[i], >+ state, >+ &nodemap); >+ if (ret != 0) { >+ D_ERR("control GET_NODEMAP failed on node %u\n", pnn); >+ tevent_req_error(req, EPROTO); >+ return; >+ } >+ >+ flags = NODE_FLAGS_DISCONNECTED; >+ for (j=0; j<nodemap->num; j++) { >+ if (nodemap->node[j].pnn == pnn) { >+ flags = nodemap->node[j].flags; >+ break; >+ } >+ } >+ >+ TALLOC_FREE(nodemap); >+ >+ if (flags & NODE_FLAGS_INACTIVE) { >+ continue; >+ } >+ >+ ok = node_list_add(nlist, pnn); >+ if (!ok) { >+ tevent_req_error(req, EINVAL); >+ return; >+ } >+ } >+ >+ talloc_free(reply); >+ >+ talloc_free(state->nlist); >+ state->nlist = nlist; >+ > ctdb_req_control_get_capabilities(&request); > subreq = ctdb_client_control_multi_send(state, > state->ev, >-- >2.25.1 > > >From 981a9d56be37d75128b66ef1d69f033ce5b9c322 Mon Sep 17 00:00:00 2001 >From: Amitay Isaacs <amitay@gmail.com> >Date: Mon, 2 Mar 2020 16:16:26 +1100 >Subject: [PATCH 25/38] ctdb-recovery: Refactor banning a node into separate > computation > >If a node is marked for banning, confirm that it's not become inactive >during the recovery. If yes, then don't ban the node. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Amitay Isaacs <amitay@gmail.com> >Reviewed-by: Martin Schwenke <martin@meltin.net> >(cherry picked from commit 1c56d6413f86cc15ebac232f39ef1e2a53ae4297) >--- > ctdb/server/ctdb_recovery_helper.c | 261 +++++++++++++++++++++++------ > 1 file changed, 208 insertions(+), 53 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 5f38d55e50e..1f3b58312c4 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -2163,6 +2163,206 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) > return true; > } > >+struct ban_node_state { >+ struct tevent_context *ev; >+ struct ctdb_client_context *client; >+ struct ctdb_tunable_list *tun_list; >+ struct node_list *nlist; >+ uint32_t destnode; >+ >+ uint32_t max_pnn; >+}; >+ >+static bool ban_node_check(struct tevent_req *req); >+static void ban_node_check_done(struct tevent_req *subreq); >+static void ban_node_done(struct tevent_req *subreq); >+ >+static struct tevent_req *ban_node_send(TALLOC_CTX *mem_ctx, >+ struct tevent_context *ev, >+ struct ctdb_client_context *client, >+ struct ctdb_tunable_list *tun_list, >+ struct node_list *nlist) >+{ >+ struct tevent_req *req; >+ struct ban_node_state *state; >+ bool ok; >+ >+ req = tevent_req_create(mem_ctx, &state, struct ban_node_state); >+ if (req == NULL) { >+ return NULL; >+ } >+ >+ state->ev = ev; >+ state->client = client; >+ state->tun_list = tun_list; >+ state->nlist = nlist; >+ state->destnode = ctdb_client_pnn(client); >+ >+ /* Bans are not enabled */ >+ if (state->tun_list->enable_bans == 0) { >+ D_ERR("Bans are not enabled\n"); >+ tevent_req_done(req); >+ return tevent_req_post(req, ev); >+ } >+ >+ ok = ban_node_check(req); >+ if (!ok) { >+ return tevent_req_post(req, ev); >+ } >+ >+ return req; >+} >+ >+static bool ban_node_check(struct tevent_req *req) >+{ >+ struct tevent_req *subreq; >+ struct ban_node_state *state = tevent_req_data( >+ req, struct ban_node_state); >+ struct ctdb_req_control request; >+ unsigned max_credits = 0, i; >+ >+ for (i=0; i<state->nlist->count; i++) { >+ if (state->nlist->ban_credits[i] > max_credits) { >+ state->max_pnn = state->nlist->pnn_list[i]; >+ max_credits = state->nlist->ban_credits[i]; >+ } >+ } >+ >+ if (max_credits < NUM_RETRIES) { >+ tevent_req_done(req); >+ return false; >+ } >+ >+ ctdb_req_control_get_nodemap(&request); >+ subreq = ctdb_client_control_send(state, >+ state->ev, >+ state->client, >+ state->max_pnn, >+ TIMEOUT(), >+ &request); >+ if (tevent_req_nomem(subreq, req)) { >+ return false; >+ } >+ tevent_req_set_callback(subreq, ban_node_check_done, req); >+ >+ return true; >+} >+ >+static void ban_node_check_done(struct tevent_req *subreq) >+{ >+ struct tevent_req *req = tevent_req_callback_data( >+ subreq, struct tevent_req); >+ struct ban_node_state *state = tevent_req_data( >+ req, struct ban_node_state); >+ struct ctdb_reply_control *reply; >+ struct ctdb_node_map *nodemap; >+ struct ctdb_req_control request; >+ struct ctdb_ban_state ban; >+ unsigned int i; >+ int ret; >+ bool ok; >+ >+ ok = ctdb_client_control_recv(subreq, &ret, state, &reply); >+ TALLOC_FREE(subreq); >+ if (!ok) { >+ D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n", >+ state->max_pnn, ret); >+ tevent_req_error(req, ret); >+ return; >+ } >+ >+ ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); >+ if (ret != 0) { >+ D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); >+ tevent_req_error(req, ret); >+ return; >+ } >+ >+ for (i=0; i<nodemap->num; i++) { >+ if (nodemap->node[i].pnn != state->max_pnn) { >+ continue; >+ } >+ >+ /* If the node became inactive, reset ban_credits */ >+ if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { >+ unsigned int j; >+ >+ for (j=0; j<state->nlist->count; j++) { >+ if (state->nlist->pnn_list[j] == >+ state->max_pnn) { >+ state->nlist->ban_credits[j] = 0; >+ break; >+ } >+ } >+ state->max_pnn = CTDB_UNKNOWN_PNN; >+ } >+ } >+ >+ talloc_free(nodemap); >+ talloc_free(reply); >+ >+ /* If node becames inactive during recovery, pick next */ >+ if (state->max_pnn == CTDB_UNKNOWN_PNN) { >+ (void) ban_node_check(req); >+ return; >+ } >+ >+ ban = (struct ctdb_ban_state) { >+ .pnn = state->max_pnn, >+ .time = state->tun_list->recovery_ban_period, >+ }; >+ >+ D_ERR("Banning node %u for %u seconds\n", ban.pnn, ban.time); >+ >+ ctdb_req_control_set_ban_state(&request, &ban); >+ subreq = ctdb_client_control_send(state, >+ state->ev, >+ state->client, >+ ban.pnn, >+ TIMEOUT(), >+ &request); >+ if (tevent_req_nomem(subreq, req)) { >+ return; >+ } >+ tevent_req_set_callback(subreq, ban_node_done, req); >+} >+ >+static void ban_node_done(struct tevent_req *subreq) >+{ >+ struct tevent_req *req = tevent_req_callback_data( >+ subreq, struct tevent_req); >+ struct node_ban_state *state = tevent_req_data( >+ req, struct node_ban_state); >+ struct ctdb_reply_control *reply; >+ int ret; >+ bool status; >+ >+ status = ctdb_client_control_recv(subreq, &ret, state, &reply); >+ TALLOC_FREE(subreq); >+ if (! status) { >+ tevent_req_error(req, ret); >+ return; >+ } >+ >+ ret = ctdb_reply_control_set_ban_state(reply); >+ if (ret != 0) { >+ D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret); >+ tevent_req_error(req, ret); >+ return; >+ } >+ >+ talloc_free(reply); >+ tevent_req_done(req); >+} >+ >+static bool ban_node_recv(struct tevent_req *req, int *perr) >+{ >+ if (tevent_req_is_unix_error(req, perr)) { >+ return false; >+ } >+ >+ return true; >+} > > /* > * Run the parallel database recovery >@@ -2724,50 +2924,15 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) > D_ERR("%d of %d databases recovered\n", count, state->dbmap->num); > > if (! status) { >- uint32_t max_pnn = CTDB_UNKNOWN_PNN, max_credits = 0; >- unsigned int i; >- >- /* Bans are not enabled */ >- if (state->tun_list->enable_bans == 0) { >- tevent_req_error(req, EIO); >+ subreq = ban_node_send(state, >+ state->ev, >+ state->client, >+ state->tun_list, >+ state->nlist); >+ if (tevent_req_nomem(subreq, req)) { > return; > } >- >- for (i=0; i<state->nlist->count; i++) { >- if (state->nlist->ban_credits[i] > max_credits) { >- max_pnn = state->nlist->pnn_list[i]; >- max_credits = state->nlist->ban_credits[i]; >- } >- } >- >- /* If pulling database fails multiple times */ >- if (max_credits >= NUM_RETRIES) { >- struct ctdb_ban_state ban_state = { >- .pnn = max_pnn, >- .time = state->tun_list->recovery_ban_period, >- }; >- >- D_ERR("Banning node %u for %u seconds\n", >- ban_state.pnn, >- ban_state.time); >- >- ctdb_req_control_set_ban_state(&request, >- &ban_state); >- subreq = ctdb_client_control_send(state, >- state->ev, >- state->client, >- ban_state.pnn, >- TIMEOUT(), >- &request); >- if (tevent_req_nomem(subreq, req)) { >- return; >- } >- tevent_req_set_callback(subreq, >- recovery_failed_done, >- req); >- } else { >- tevent_req_error(req, EIO); >- } >+ tevent_req_set_callback(subreq, recovery_failed_done, req); > return; > } > >@@ -2789,25 +2954,15 @@ static void recovery_failed_done(struct tevent_req *subreq) > { > struct tevent_req *req = tevent_req_callback_data( > subreq, struct tevent_req); >- struct recovery_state *state = tevent_req_data( >- req, struct recovery_state); >- struct ctdb_reply_control *reply; > int ret; > bool status; > >- status = ctdb_client_control_recv(subreq, &ret, state, &reply); >+ status = ban_node_recv(subreq, &ret); > TALLOC_FREE(subreq); > if (! status) { > D_ERR("failed to ban node, ret=%d\n", ret); >- goto done; > } > >- ret = ctdb_reply_control_set_ban_state(reply); >- if (ret != 0) { >- D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret); >- } >- >-done: > tevent_req_error(req, EIO); > } > >-- >2.25.1 > > >From fb977f726aeca56126a504e922ded88645a788ee Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Thu, 20 Feb 2020 13:48:13 +1100 >Subject: [PATCH 26/38] ctdb-daemon: Fix database attach deferral logic > >Commit 3cc230b5eeca749ab68d19cfda969f72c269f1f6 says: > > Dont allow clients to connect to databases untile we are well past > and through the initial recovery phase > >It is unclear what this commit was attempting to do. The commit >message implies that more attaches should be deferred but the code >change adds a conjunction that causes less attaches to be deferred. >In particular, no attaches will be deferred after startup is complete. >This seems wrong. > >To implement what seems to be stated in the commit message an "or" >needs to be used so that non-recovery daemon attaches are deferred >either when in recovery or before startup is complete. Making this >change highlights that attaches need to be allowed during the >"startup" event because this is when smbd is started. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit c6c89495fbe9b6f238d10a538eccc92b937a69de) >--- > ctdb/server/ctdb_ltdb_server.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > >diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c >index 970eb54b00b..a6709ff72de 100644 >--- a/ctdb/server/ctdb_ltdb_server.c >+++ b/ctdb/server/ctdb_ltdb_server.c >@@ -1135,9 +1135,9 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, > return -1; > } > >- if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE && >- client->pid != ctdb->recoverd_pid && >- ctdb->runstate < CTDB_RUNSTATE_RUNNING) { >+ if (client->pid != ctdb->recoverd_pid && >+ (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || >+ ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { > struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); > > if (da_ctx == NULL) { >-- >2.25.1 > > >From fa14e812efc0ae395819d943f5b22a227f47d56a Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Wed, 26 Feb 2020 17:03:49 +1100 >Subject: [PATCH 27/38] ctdb-daemon: Remove unused old client database > functions > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit fc23cd1b9cdd1d70067491614b16e616291e8ff2) >--- > ctdb/include/ctdb_client.h | 22 ------ > ctdb/include/ctdb_private.h | 2 + > ctdb/server/ctdb_client.c | 146 ------------------------------------ > 3 files changed, 2 insertions(+), 168 deletions(-) > >diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h >index ef4950ab533..d1dce1e68d8 100644 >--- a/ctdb/include/ctdb_client.h >+++ b/ctdb/include/ctdb_client.h >@@ -176,9 +176,6 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, > int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode, uint32_t *runstate); > >-int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, >- uint32_t destnode, uint32_t dbid, >- TALLOC_CTX *mem_ctx, const char **path); > int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode, uint32_t dbid, > TALLOC_CTX *mem_ctx, const char **name); >@@ -190,25 +187,6 @@ int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, > int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, > int32_t *level); > >-/* >- attach to a ctdb database >-*/ >-int ctdb_ctrl_db_open_flags(struct ctdb_context *ctdb, uint32_t db_id, >- int *tdb_flags); >- >-struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, >- struct timeval timeout, >- const char *name, >- uint8_t db_flags); >- >-/* a ctdb call function */ >-typedef int (*ctdb_fn_t)(struct ctdb_call_info *); >- >-/* >- setup a ctdb call function >-*/ >-int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id); >- > int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode); > >diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h >index d3e70b5e2fa..2f37db36e0c 100644 >--- a/ctdb/include/ctdb_private.h >+++ b/ctdb/include/ctdb_private.h >@@ -36,6 +36,8 @@ struct ctdb_tcp_array { > /* > an installed ctdb remote call > */ >+typedef int (*ctdb_fn_t)(struct ctdb_call_info *); >+ > struct ctdb_registered_call { > struct ctdb_registered_call *next, *prev; > uint32_t id; >diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c >index dc7836abb2e..4c67990c0b5 100644 >--- a/ctdb/server/ctdb_client.c >+++ b/ctdb/server/ctdb_client.c >@@ -1107,36 +1107,6 @@ int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, > return 0; > } > >-/* >- find the real path to a ltdb >- */ >-int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, >- const char **path) >-{ >- int ret; >- int32_t res; >- TDB_DATA data; >- >- data.dptr = (uint8_t *)&dbid; >- data.dsize = sizeof(dbid); >- >- ret = ctdb_control(ctdb, destnode, 0, >- CTDB_CONTROL_GETDBPATH, 0, data, >- mem_ctx, &data, &res, &timeout, NULL); >- if (ret != 0 || res != 0) { >- return -1; >- } >- >- (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); >- if ((*path) == NULL) { >- return -1; >- } >- >- talloc_free(data.dptr); >- >- return 0; >-} >- > /* > find the name of a db > */ >@@ -1233,122 +1203,6 @@ int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32 > return 0; > } > >-/* >- * Get db open flags >- */ >-int ctdb_ctrl_db_open_flags(struct ctdb_context *ctdb, uint32_t db_id, >- int *tdb_flags) >-{ >- TDB_DATA indata, outdata; >- int ret; >- int32_t res; >- >- indata.dptr = (uint8_t *)&db_id; >- indata.dsize = sizeof(db_id); >- >- ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, >- CTDB_CONTROL_DB_OPEN_FLAGS, 0, indata, >- ctdb, &outdata, &res, NULL, NULL); >- if (ret != 0 || res != 0) { >- D_ERR("ctdb control for db open flags failed\n"); >- return -1; >- } >- >- if (outdata.dsize != sizeof(int32_t)) { >- D_ERR(__location__ " expected %zi bytes, received %zi bytes\n", >- sizeof(int32_t), outdata.dsize); >- talloc_free(outdata.dptr); >- return -1; >- } >- >- *tdb_flags = *(int32_t *)outdata.dptr; >- talloc_free(outdata.dptr); >- return 0; >-} >- >-/* >- attach to a specific database - client call >-*/ >-struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, >- struct timeval timeout, >- const char *name, >- uint8_t db_flags) >-{ >- struct ctdb_db_context *ctdb_db; >- int ret; >- int tdb_flags; >- >- ctdb_db = ctdb_db_handle(ctdb, name); >- if (ctdb_db) { >- return ctdb_db; >- } >- >- ctdb_db = talloc_zero(ctdb, struct ctdb_db_context); >- CTDB_NO_MEMORY_NULL(ctdb, ctdb_db); >- >- ctdb_db->ctdb = ctdb; >- ctdb_db->db_name = talloc_strdup(ctdb_db, name); >- CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name); >- >- /* tell ctdb daemon to attach */ >- ret = ctdb_ctrl_createdb(ctdb, timeout, CTDB_CURRENT_NODE, >- ctdb_db, name, db_flags, &ctdb_db->db_id); >- if (ret != 0) { >- DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name)); >- talloc_free(ctdb_db); >- return NULL; >- } >- >- ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path); >- if (ret != 0) { >- DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name)); >- talloc_free(ctdb_db); >- return NULL; >- } >- >- ret = ctdb_ctrl_db_open_flags(ctdb, ctdb_db->db_id, &tdb_flags); >- if (ret != 0) { >- D_ERR("Failed to get tdb_flags for database '%s'\n", name); >- talloc_free(ctdb_db); >- return NULL; >- } >- >- ctdb_db->ltdb = tdb_wrap_open(ctdb_db, ctdb_db->db_path, 0, tdb_flags, >- O_RDWR, 0); >- if (ctdb_db->ltdb == NULL) { >- ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path); >- talloc_free(ctdb_db); >- return NULL; >- } >- >- ctdb_db->db_flags = db_flags; >- >- DLIST_ADD(ctdb->db_list, ctdb_db); >- >- /* add well known functions */ >- ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC); >- ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC); >- ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC); >- >- return ctdb_db; >-} >- >-/* >- setup a call for a database >- */ >-int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id) >-{ >- struct ctdb_registered_call *call; >- >- /* register locally */ >- call = talloc(ctdb_db, struct ctdb_registered_call); >- call->fn = fn; >- call->id = id; >- >- DLIST_ADD(ctdb_db->calls, call); >- return 0; >-} >- > /* Freeze all databases */ > int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode) >-- >2.25.1 > > >From 81501d83f3e81fa874e85d20fe947350f49fa096 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 21 Feb 2020 11:04:14 +1100 >Subject: [PATCH 28/38] ctdb-protocol: Add control flag > CTDB_CTRL_FLAG_ATTACH_RECOVERY > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 17ed0425904a98624284d351ab7617b3e02c0f7b) >--- > ctdb/protocol/protocol.h | 1 + > 1 file changed, 1 insertion(+) > >diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h >index 43175ae3a95..04a651018be 100644 >--- a/ctdb/protocol/protocol.h >+++ b/ctdb/protocol/protocol.h >@@ -944,6 +944,7 @@ struct ctdb_req_control { > #define CTDB_CTRL_FLAG_OPCODE_SPECIFIC 0xFFFF0000 > /* Ugly overloading of this field... */ > #define CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE 0x00010000 >+#define CTDB_CTRL_FLAG_ATTACH_RECOVERY 0x00020000 > uint32_t flags; > struct ctdb_req_control_data rdata; > }; >-- >2.25.1 > > >From 9ce65cca8d9568638a2c574d2195f624a5a00ae6 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 21 Feb 2020 11:13:05 +1100 >Subject: [PATCH 29/38] ctdb-recovery: Use CTDB_CTRL_FLAG_ATTACH_RECOVERY to > attach during recovery > >ctdb_ctrl_createdb() is only called by the recovery daemon, so this is >a safe, temporary change. This is temporary because >ctdb_ctrl_createdb(), create_missing_remote_databases() and >create_missing_local_databases() will all go away soon. > >Note that this doesn't cause a change in behaviour. The main daemon >will still only defer attaches from non-recoverd processes during >recovery. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 98e3d0db2bc5f33217e26fab1dfb4bb91eae534f) >--- > ctdb/server/ctdb_client.c | 13 +++++++++++-- > 1 file changed, 11 insertions(+), 2 deletions(-) > >diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c >index 4c67990c0b5..26055698568 100644 >--- a/ctdb/server/ctdb_client.c >+++ b/ctdb/server/ctdb_client.c >@@ -1160,8 +1160,17 @@ int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, > opcode = CTDB_CONTROL_DB_ATTACH; > } > >- ret = ctdb_control(ctdb, destnode, 0, opcode, 0, data, >- mem_ctx, &data, &res, &timeout, NULL); >+ ret = ctdb_control(ctdb, >+ destnode, >+ 0, >+ opcode, >+ CTDB_CTRL_FLAG_ATTACH_RECOVERY, >+ data, >+ mem_ctx, >+ &data, >+ &res, >+ &timeout, >+ NULL); > > if (ret != 0 || res != 0) { > return -1; >-- >2.25.1 > > >From f75adc0c0b7c7fcd34c35db5d9dcdf5cdc62e2f6 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Wed, 26 Feb 2020 11:50:09 +1100 >Subject: [PATCH 30/38] ctdb-daemon: Respect CTDB_CTRL_FLAG_ATTACH_RECOVERY > when attaching databases > >This is currently only set by the recovery daemon when it attaches >missing databases, so there is no obvious behaviour change. However, >attaching missing databases can now be moved to the recovery helper as >long as it sets this flag. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 7e5a8a4884ea87bb985fe0e2b65ff130fc2ba8aa) >--- > ctdb/server/ctdb_ltdb_server.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > >diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c >index a6709ff72de..e050b7304fe 100644 >--- a/ctdb/server/ctdb_ltdb_server.c >+++ b/ctdb/server/ctdb_ltdb_server.c >@@ -1135,7 +1135,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, > return -1; > } > >- if (client->pid != ctdb->recoverd_pid && >+ if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && > (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || > ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { > struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); >-- >2.25.1 > > >From 297b7ee6e2b70c6c7db69665994f60ffb91a39f7 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 21 Feb 2020 12:24:39 +1100 >Subject: [PATCH 31/38] ctdb-recovery: Replace use of ctdb_dbid_map with local > db_list > >This will be used to build a merged list of databases from all nodes, >allowing the recovery helper to create missing databases. > >It would be possible to also include the db_name field in this >structure but that would cause a lot of churn. This field is used >locally in the recovery of each database so can continue to live in >the relevant state structure(s). > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 4c0b9c36050a0ed8a180d4ac1853224089528e8e) >--- > ctdb/server/ctdb_recovery_helper.c | 176 ++++++++++++++++++++++++++--- > 1 file changed, 161 insertions(+), 15 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 1f3b58312c4..df96240d8da 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -27,6 +27,7 @@ > #include <libgen.h> > > #include "lib/tdb_wrap/tdb_wrap.h" >+#include "lib/util/dlinklist.h" > #include "lib/util/sys_rw.h" > #include "lib/util/time.h" > #include "lib/util/tevent_unix.h" >@@ -169,6 +170,130 @@ static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn) > } > } > >+/* >+ * Database list functions >+ * >+ * Simple, naive implementation that could be updated to a db_hash or similar >+ */ >+ >+struct db { >+ struct db *prev, *next; >+ >+ uint32_t db_id; >+ uint32_t db_flags; >+ uint32_t *pnn_list; >+ unsigned int num_nodes; >+}; >+ >+struct db_list { >+ unsigned int num_dbs; >+ struct db *db; >+ unsigned int num_nodes; >+}; >+ >+static struct db_list *db_list_init(TALLOC_CTX *mem_ctx, unsigned int num_nodes) >+{ >+ struct db_list *l; >+ >+ l = talloc_zero(mem_ctx, struct db_list); >+ l->num_nodes = num_nodes; >+ >+ return l; >+} >+ >+static struct db *db_list_find(struct db_list *dblist, uint32_t db_id) >+{ >+ struct db *db; >+ >+ if (dblist == NULL) { >+ return NULL; >+ } >+ >+ db = dblist->db; >+ while (db != NULL && db->db_id != db_id) { >+ db = db->next; >+ } >+ >+ return db; >+} >+ >+static int db_list_add(struct db_list *dblist, >+ uint32_t db_id, >+ uint32_t db_flags, >+ uint32_t node) >+{ >+ struct db *db = NULL; >+ >+ if (dblist == NULL) { >+ return EINVAL; >+ } >+ >+ db = talloc_zero(dblist, struct db); >+ if (db == NULL) { >+ return ENOMEM; >+ } >+ >+ db->db_id = db_id; >+ db->db_flags = db_flags; >+ db->pnn_list = talloc_zero_array(db, uint32_t, dblist->num_nodes); >+ if (db->pnn_list == NULL) { >+ talloc_free(db); >+ return ENOMEM; >+ } >+ db->pnn_list[0] = node; >+ db->num_nodes = 1; >+ >+ DLIST_ADD_END(dblist->db, db); >+ dblist->num_dbs++; >+ >+ return 0; >+} >+ >+static int db_list_check_and_add(struct db_list *dblist, >+ uint32_t db_id, >+ uint32_t db_flags, >+ uint32_t node) >+{ >+ struct db *db = NULL; >+ int ret; >+ >+ /* >+ * These flags are masked out because they are only set on a >+ * node when a client attaches to that node, so they might not >+ * be set yet. They can't be passed as part of the attch, so >+ * they're no use here. >+ */ >+ db_flags &= ~(CTDB_DB_FLAGS_READONLY | CTDB_DB_FLAGS_STICKY); >+ >+ if (dblist == NULL) { >+ return EINVAL; >+ } >+ >+ db = db_list_find(dblist, db_id); >+ if (db == NULL) { >+ ret = db_list_add(dblist, db_id, db_flags, node); >+ return ret; >+ } >+ >+ if (db->db_flags != db_flags) { >+ D_ERR("Incompatible database flags for 0x%"PRIx32" " >+ "(0x%"PRIx32" != 0x%"PRIx32")\n", >+ db_id, >+ db_flags, >+ db->db_flags); >+ return EINVAL; >+ } >+ >+ if (db->num_nodes >= dblist->num_nodes) { >+ return EINVAL; >+ } >+ >+ db->pnn_list[db->num_nodes] = node; >+ db->num_nodes++; >+ >+ return 0; >+} >+ > /* > * Recovery database functions > */ >@@ -2014,7 +2139,7 @@ static bool recover_db_recv(struct tevent_req *req) > > struct db_recovery_state { > struct tevent_context *ev; >- struct ctdb_dbid_map *dbmap; >+ struct db_list *dblist; > unsigned int num_replies; > unsigned int num_failed; > }; >@@ -2022,7 +2147,7 @@ struct db_recovery_state { > struct db_recovery_one_state { > struct tevent_req *req; > struct ctdb_client_context *client; >- struct ctdb_dbid_map *dbmap; >+ struct db_list *dblist; > struct ctdb_tunable_list *tun_list; > struct node_list *nlist; > uint32_t generation; >@@ -2036,14 +2161,14 @@ static void db_recovery_one_done(struct tevent_req *subreq); > static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > struct tevent_context *ev, > struct ctdb_client_context *client, >- struct ctdb_dbid_map *dbmap, >+ struct db_list *dblist, > struct ctdb_tunable_list *tun_list, > struct node_list *nlist, > uint32_t generation) > { > struct tevent_req *req, *subreq; > struct db_recovery_state *state; >- unsigned int i; >+ struct db *db; > > req = tevent_req_create(mem_ctx, &state, struct db_recovery_state); > if (req == NULL) { >@@ -2051,16 +2176,16 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > } > > state->ev = ev; >- state->dbmap = dbmap; >+ state->dblist = dblist; > state->num_replies = 0; > state->num_failed = 0; > >- if (dbmap->num == 0) { >+ if (dblist->num_dbs == 0) { > tevent_req_done(req); > return tevent_req_post(req, ev); > } > >- for (i=0; i<dbmap->num; i++) { >+ for (db = dblist->db; db != NULL; db = db->next) { > struct db_recovery_one_state *substate; > > substate = talloc_zero(state, struct db_recovery_one_state); >@@ -2070,12 +2195,12 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > > substate->req = req; > substate->client = client; >- substate->dbmap = dbmap; >+ substate->dblist = dblist; > substate->tun_list = tun_list; > substate->nlist = nlist; > substate->generation = generation; >- substate->db_id = dbmap->dbs[i].db_id; >- substate->db_flags = dbmap->dbs[i].flags; >+ substate->db_id = db->db_id; >+ substate->db_flags = db->db_flags; > > subreq = recover_db_send(state, > ev, >@@ -2138,7 +2263,7 @@ failed: > done: > state->num_replies += 1; > >- if (state->num_replies == state->dbmap->num) { >+ if (state->num_replies == state->dblist->num_dbs) { > tevent_req_done(req); > } > } >@@ -2387,7 +2512,7 @@ struct recovery_state { > struct node_list *nlist; > struct ctdb_tunable_list *tun_list; > struct ctdb_vnn_map *vnnmap; >- struct ctdb_dbid_map *dbmap; >+ struct db_list *dblist; > }; > > static void recovery_tunables_done(struct tevent_req *subreq); >@@ -2709,6 +2834,8 @@ static void recovery_dbmap_done(struct tevent_req *subreq) > req, struct recovery_state); > struct ctdb_reply_control *reply; > struct ctdb_req_control request; >+ struct ctdb_dbid_map *dbmap = NULL; >+ unsigned int j; > int ret; > bool status; > >@@ -2721,13 +2848,32 @@ static void recovery_dbmap_done(struct tevent_req *subreq) > return; > } > >- ret = ctdb_reply_control_get_dbmap(reply, state, &state->dbmap); >+ state->dblist = db_list_init(state, state->nlist->count); >+ if (tevent_req_nomem(state->dblist, req)) { >+ D_ERR("memory allocation error\n"); >+ return; >+ } >+ >+ ret = ctdb_reply_control_get_dbmap(reply, state, &dbmap); > if (ret != 0) { > D_ERR("control GET_DBMAP failed, ret=%d\n", ret); > tevent_req_error(req, ret); > return; > } > >+ for (j = 0; j < dbmap->num; j++) { >+ ret = db_list_check_and_add(state->dblist, >+ dbmap->dbs[j].db_id, >+ dbmap->dbs[j].flags, >+ state->destnode); >+ if (ret != 0) { >+ D_ERR("failed to add database list entry, ret=%d\n", >+ ret); >+ tevent_req_error(req, ret); >+ return; >+ } >+ } >+ > ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); > subreq = ctdb_client_control_multi_send(state, > state->ev, >@@ -2898,7 +3044,7 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) > subreq = db_recovery_send(state, > state->ev, > state->client, >- state->dbmap, >+ state->dblist, > state->tun_list, > state->nlist, > state->vnnmap->generation); >@@ -2921,7 +3067,7 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) > status = db_recovery_recv(subreq, &count); > TALLOC_FREE(subreq); > >- D_ERR("%d of %d databases recovered\n", count, state->dbmap->num); >+ D_ERR("%d of %d databases recovered\n", count, state->dblist->num_dbs); > > if (! status) { > subreq = ban_node_send(state, >-- >2.25.1 > > >From 5c45da6421de5ed3aee2cc19151ea72195139802 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 21 Feb 2020 16:10:05 +1100 >Subject: [PATCH 32/38] ctdb-recovery: GET_DBMAP from all nodes > >This builds a complete list of databases across the cluster so it can >be used to create databases on the nodes where they are missing. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit c6f74e590f602e2ed38fe293468770a5e669aefa) >--- > ctdb/server/ctdb_recovery_helper.c | 77 +++++++++++++++++++++--------- > 1 file changed, 54 insertions(+), 23 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index df96240d8da..d5a264df5d2 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -2817,9 +2817,13 @@ static void recovery_capabilities_done(struct tevent_req *subreq) > talloc_free(reply); > > ctdb_req_control_get_dbmap(&request); >- subreq = ctdb_client_control_send(state, state->ev, state->client, >- state->destnode, TIMEOUT(), >- &request); >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, >+ state->client, >+ state->nlist->pnn_list, >+ state->nlist->count, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return; > } >@@ -2832,18 +2836,34 @@ static void recovery_dbmap_done(struct tevent_req *subreq) > subreq, struct tevent_req); > struct recovery_state *state = tevent_req_data( > req, struct recovery_state); >- struct ctdb_reply_control *reply; >+ struct ctdb_reply_control **reply; > struct ctdb_req_control request; >- struct ctdb_dbid_map *dbmap = NULL; >- unsigned int j; >+ int *err_list; >+ unsigned int i, j; > int ret; > bool status; > >- status = ctdb_client_control_recv(subreq, &ret, state, &reply); >+ status = ctdb_client_control_multi_recv(subreq, >+ &ret, >+ state, >+ &err_list, >+ &reply); > TALLOC_FREE(subreq); > if (! status) { >- D_ERR("control GET_DBMAP failed to node %u, ret=%d\n", >- state->destnode, ret); >+ int ret2; >+ uint32_t pnn; >+ >+ ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, >+ state->nlist->count, >+ err_list, >+ &pnn); >+ if (ret2 != 0) { >+ D_ERR("control GET_DBMAP failed on node %u," >+ " ret=%d\n", pnn, ret2); >+ } else { >+ D_ERR("control GET_DBMAP failed, ret=%d\n", >+ ret); >+ } > tevent_req_error(req, ret); > return; > } >@@ -2854,24 +2874,35 @@ static void recovery_dbmap_done(struct tevent_req *subreq) > return; > } > >- ret = ctdb_reply_control_get_dbmap(reply, state, &dbmap); >- if (ret != 0) { >- D_ERR("control GET_DBMAP failed, ret=%d\n", ret); >- tevent_req_error(req, ret); >- return; >- } >+ for (i = 0; i < state->nlist->count; i++) { >+ struct ctdb_dbid_map *dbmap = NULL; >+ uint32_t pnn; > >- for (j = 0; j < dbmap->num; j++) { >- ret = db_list_check_and_add(state->dblist, >- dbmap->dbs[j].db_id, >- dbmap->dbs[j].flags, >- state->destnode); >+ pnn = state->nlist->pnn_list[i]; >+ >+ ret = ctdb_reply_control_get_dbmap(reply[i], state, &dbmap); > if (ret != 0) { >- D_ERR("failed to add database list entry, ret=%d\n", >- ret); >- tevent_req_error(req, ret); >+ D_ERR("control GET_DBMAP failed on node %u\n", >+ pnn); >+ tevent_req_error(req, EPROTO); > return; > } >+ >+ for (j = 0; j < dbmap->num; j++) { >+ ret = db_list_check_and_add(state->dblist, >+ dbmap->dbs[j].db_id, >+ dbmap->dbs[j].flags, >+ pnn); >+ if (ret != 0) { >+ D_ERR("failed to add database list entry, " >+ "ret=%d\n", >+ ret); >+ tevent_req_error(req, ret); >+ return; >+ } >+ } >+ >+ TALLOC_FREE(dbmap); > } > > ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); >-- >2.25.1 > > >From 380130009d2c76383ac5e87548e0ca9d1e6171ff Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Fri, 21 Feb 2020 16:51:10 +1100 >Subject: [PATCH 33/38] ctdb-recovery: Pass db structure for each database > recovery > >Instead of db_id and db_flags. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 1bdfeb3fdc06947a607957ab3d114f97bad5d7d7) >--- > ctdb/server/ctdb_recovery_helper.c | 59 ++++++++++++++---------------- > 1 file changed, 28 insertions(+), 31 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index d5a264df5d2..2b77542245a 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -1672,8 +1672,7 @@ struct recover_db_state { > struct ctdb_client_context *client; > struct ctdb_tunable_list *tun_list; > struct node_list *nlist; >- uint32_t db_id; >- uint8_t db_flags; >+ struct db *db; > > uint32_t destnode; > struct ctdb_transdb transdb; >@@ -1698,8 +1697,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, > struct ctdb_tunable_list *tun_list, > struct node_list *nlist, > uint32_t generation, >- uint32_t db_id, >- uint8_t db_flags) >+ struct db *db) > { > struct tevent_req *req, *subreq; > struct recover_db_state *state; >@@ -1714,14 +1712,13 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, > state->client = client; > state->tun_list = tun_list; > state->nlist = nlist; >- state->db_id = db_id; >- state->db_flags = db_flags; >+ state->db = db; > > state->destnode = ctdb_client_pnn(client); >- state->transdb.db_id = db_id; >+ state->transdb.db_id = db->db_id; > state->transdb.tid = generation; > >- ctdb_req_control_get_dbname(&request, db_id); >+ ctdb_req_control_get_dbname(&request, db->db_id); > subreq = ctdb_client_control_send(state, ev, client, state->destnode, > TIMEOUT(), &request); > if (tevent_req_nomem(subreq, req)) { >@@ -1747,7 +1744,7 @@ static void recover_db_name_done(struct tevent_req *subreq) > TALLOC_FREE(subreq); > if (! status) { > D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", >- state->db_id, ret); >+ state->db->db_id, ret); > tevent_req_error(req, ret); > return; > } >@@ -1755,14 +1752,14 @@ static void recover_db_name_done(struct tevent_req *subreq) > ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name); > if (ret != 0) { > D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", >- state->db_id, ret); >+ state->db->db_id, ret); > tevent_req_error(req, EPROTO); > return; > } > > talloc_free(reply); > >- ctdb_req_control_getdbpath(&request, state->db_id); >+ ctdb_req_control_getdbpath(&request, state->db->db_id); > subreq = ctdb_client_control_send(state, state->ev, state->client, > state->destnode, TIMEOUT(), > &request); >@@ -1802,7 +1799,7 @@ static void recover_db_path_done(struct tevent_req *subreq) > > talloc_free(reply); > >- ctdb_req_control_db_freeze(&request, state->db_id); >+ ctdb_req_control_db_freeze(&request, state->db->db_id); > subreq = ctdb_client_control_multi_send(state, > state->ev, > state->client, >@@ -1873,6 +1870,7 @@ static void recover_db_transaction_started(struct tevent_req *subreq) > struct recover_db_state *state = tevent_req_data( > req, struct recover_db_state); > int *err_list; >+ uint32_t flags; > int ret; > bool status; > >@@ -1899,28 +1897,31 @@ static void recover_db_transaction_started(struct tevent_req *subreq) > return; > } > >- state->recdb = recdb_create(state, state->db_id, state->db_name, >+ flags = state->db->db_flags; >+ state->recdb = recdb_create(state, >+ state->db->db_id, >+ state->db_name, > state->db_path, > state->tun_list->database_hash_size, >- state->db_flags & CTDB_DB_FLAGS_PERSISTENT); >+ flags & CTDB_DB_FLAGS_PERSISTENT); > if (tevent_req_nomem(state->recdb, req)) { > return; > } > >- if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || >- (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { >+ if ((flags & CTDB_DB_FLAGS_PERSISTENT) || >+ (flags & CTDB_DB_FLAGS_REPLICATED)) { > subreq = collect_highseqnum_db_send(state, > state->ev, > state->client, > state->nlist, >- state->db_id, >+ state->db->db_id, > state->recdb); > } else { > subreq = collect_all_db_send(state, > state->ev, > state->client, > state->nlist, >- state->db_id, >+ state->db->db_id, > state->recdb); > } > if (tevent_req_nomem(subreq, req)) { >@@ -1939,8 +1940,8 @@ static void recover_db_collect_done(struct tevent_req *subreq) > int ret; > bool status; > >- if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || >- (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { >+ if ((state->db->db_flags & CTDB_DB_FLAGS_PERSISTENT) || >+ (state->db->db_flags & CTDB_DB_FLAGS_REPLICATED)) { > status = collect_highseqnum_db_recv(subreq, &ret); > } else { > status = collect_all_db_recv(subreq, &ret); >@@ -2076,7 +2077,7 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) > return; > } > >- ctdb_req_control_db_thaw(&request, state->db_id); >+ ctdb_req_control_db_thaw(&request, state->db->db_id); > subreq = ctdb_client_control_multi_send(state, > state->ev, > state->client, >@@ -2151,8 +2152,7 @@ struct db_recovery_one_state { > struct ctdb_tunable_list *tun_list; > struct node_list *nlist; > uint32_t generation; >- uint32_t db_id; >- uint8_t db_flags; >+ struct db *db; > int num_fails; > }; > >@@ -2199,8 +2199,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > substate->tun_list = tun_list; > substate->nlist = nlist; > substate->generation = generation; >- substate->db_id = db->db_id; >- substate->db_flags = db->db_flags; >+ substate->db = db; > > subreq = recover_db_send(state, > ev, >@@ -2208,14 +2207,13 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, > tun_list, > nlist, > generation, >- substate->db_id, >- substate->db_flags); >+ substate->db); > if (tevent_req_nomem(subreq, req)) { > return tevent_req_post(req, ev); > } > tevent_req_set_callback(subreq, db_recovery_one_done, > substate); >- D_NOTICE("recover database 0x%08x\n", substate->db_id); >+ D_NOTICE("recover database 0x%08x\n", substate->db->db_id); > } > > return req; >@@ -2246,14 +2244,13 @@ static void db_recovery_one_done(struct tevent_req *subreq) > substate->tun_list, > substate->nlist, > substate->generation, >- substate->db_id, >- substate->db_flags); >+ substate->db); > if (tevent_req_nomem(subreq, req)) { > goto failed; > } > tevent_req_set_callback(subreq, db_recovery_one_done, substate); > D_NOTICE("recover database 0x%08x, attempt %d\n", >- substate->db_id, substate->num_fails+1); >+ substate->db->db_id, substate->num_fails+1); > return; > } > >-- >2.25.1 > > >From 50d9ab28addb231d85a605064d59f6aade827bd1 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Mon, 24 Feb 2020 10:26:34 +1100 >Subject: [PATCH 34/38] ctdb-recovery: Fetch database name from all nodes where > it is attached > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit e6e63f8fb8194634135bf34cda18f6cc8ff69a7c) >--- > ctdb/server/ctdb_recovery_helper.c | 80 +++++++++++++++++++++++++----- > 1 file changed, 67 insertions(+), 13 deletions(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 2b77542245a..0fbafe45fb6 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -1656,7 +1656,7 @@ static bool collect_all_db_recv(struct tevent_req *req, int *perr) > > /** > * For each database do the following: >- * - Get DB name >+ * - Get DB name from all nodes > * - Get DB path > * - Freeze database on all nodes > * - Start transaction on all nodes >@@ -1719,8 +1719,13 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, > state->transdb.tid = generation; > > ctdb_req_control_get_dbname(&request, db->db_id); >- subreq = ctdb_client_control_send(state, ev, client, state->destnode, >- TIMEOUT(), &request); >+ subreq = ctdb_client_control_multi_send(state, >+ ev, >+ client, >+ state->db->pnn_list, >+ state->db->num_nodes, >+ TIMEOUT(), >+ &request); > if (tevent_req_nomem(subreq, req)) { > return tevent_req_post(req, ev); > } >@@ -1735,26 +1740,75 @@ static void recover_db_name_done(struct tevent_req *subreq) > subreq, struct tevent_req); > struct recover_db_state *state = tevent_req_data( > req, struct recover_db_state); >- struct ctdb_reply_control *reply; >+ struct ctdb_reply_control **reply; > struct ctdb_req_control request; >+ int *err_list; >+ unsigned int i; > int ret; > bool status; > >- status = ctdb_client_control_recv(subreq, &ret, state, &reply); >+ status = ctdb_client_control_multi_recv(subreq, >+ &ret, >+ state, >+ &err_list, >+ &reply); > TALLOC_FREE(subreq); > if (! status) { >- D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", >- state->db->db_id, ret); >+ int ret2; >+ uint32_t pnn; >+ >+ ret2 = ctdb_client_control_multi_error(state->db->pnn_list, >+ state->db->num_nodes, >+ err_list, >+ &pnn); >+ if (ret2 != 0) { >+ D_ERR("control GET_DBNAME failed on node %u," >+ " ret=%d\n", >+ pnn, >+ ret2); >+ } else { >+ D_ERR("control GET_DBNAME failed, ret=%d\n", >+ ret); >+ } > tevent_req_error(req, ret); > return; > } > >- ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name); >- if (ret != 0) { >- D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", >- state->db->db_id, ret); >- tevent_req_error(req, EPROTO); >- return; >+ for (i = 0; i < state->db->num_nodes; i++) { >+ const char *db_name; >+ uint32_t pnn; >+ >+ pnn = state->nlist->pnn_list[i]; >+ >+ ret = ctdb_reply_control_get_dbname(reply[i], >+ state, >+ &db_name); >+ if (ret != 0) { >+ D_ERR("control GET_DBNAME failed on node %u " >+ "for db=0x%x, ret=%d\n", >+ pnn, >+ state->db->db_id, >+ ret); >+ tevent_req_error(req, EPROTO); >+ return; >+ } >+ >+ if (state->db_name == NULL) { >+ state->db_name = db_name; >+ continue; >+ } >+ >+ if (strcmp(state->db_name, db_name) != 0) { >+ D_ERR("Incompatible database name for 0x%"PRIx32" " >+ "(%s != %s) on node %"PRIu32"\n", >+ state->db->db_id, >+ db_name, >+ state->db_name, >+ pnn); >+ node_list_ban_credits(state->nlist, pnn); >+ tevent_req_error(req, ret); >+ return; >+ } > } > > talloc_free(reply); >-- >2.25.1 > > >From 5cecc9688d13ffd499a5eec28f352acac47efcb0 Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Mon, 24 Feb 2020 11:31:33 +1100 >Subject: [PATCH 35/38] ctdb-recovery: Create database on nodes where it is > missing > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 76a8174279f42486b36cc41d5831d4e6613f172e) >--- > ctdb/server/ctdb_recovery_helper.c | 178 ++++++++++++++++++++++++++++- > 1 file changed, 177 insertions(+), 1 deletion(-) > >diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c >index 0fbafe45fb6..f10e60104ae 100644 >--- a/ctdb/server/ctdb_recovery_helper.c >+++ b/ctdb/server/ctdb_recovery_helper.c >@@ -294,6 +294,150 @@ static int db_list_check_and_add(struct db_list *dblist, > return 0; > } > >+/* >+ * Create database on nodes where it is missing >+ */ >+ >+struct db_create_missing_state { >+ struct tevent_context *ev; >+ struct ctdb_client_context *client; >+ >+ struct node_list *nlist; >+ >+ const char *db_name; >+ uint32_t *missing_pnn_list; >+ int missing_num_nodes; >+}; >+ >+static void db_create_missing_done(struct tevent_req *subreq); >+ >+static struct tevent_req *db_create_missing_send( >+ TALLOC_CTX *mem_ctx, >+ struct tevent_context *ev, >+ struct ctdb_client_context *client, >+ struct node_list *nlist, >+ const char *db_name, >+ struct db *db) >+{ >+ struct tevent_req *req, *subreq; >+ struct db_create_missing_state *state; >+ struct ctdb_req_control request; >+ unsigned int i, j; >+ >+ req = tevent_req_create(mem_ctx, >+ &state, >+ struct db_create_missing_state); >+ if (req == NULL) { >+ return NULL; >+ } >+ >+ state->ev = ev; >+ state->client = client; >+ state->nlist = nlist; >+ state->db_name = db_name; >+ >+ if (nlist->count == db->num_nodes) { >+ tevent_req_done(req); >+ return tevent_req_post(req, ev); >+ } >+ >+ state->missing_pnn_list = talloc_array(mem_ctx, uint32_t, nlist->count); >+ if (tevent_req_nomem(state->missing_pnn_list, req)) { >+ return tevent_req_post(req, ev); >+ } >+ >+ for (i = 0; i < nlist->count; i++) { >+ uint32_t pnn = nlist->pnn_list[i] ; >+ >+ for (j = 0; j < db->num_nodes; j++) { >+ if (pnn == db->pnn_list[j]) { >+ break; >+ } >+ } >+ >+ if (j < db->num_nodes) { >+ continue; >+ } >+ >+ DBG_INFO("Create database %s on node %u\n", >+ state->db_name, >+ pnn); >+ state->missing_pnn_list[state->missing_num_nodes] = pnn; >+ state->missing_num_nodes++; >+ } >+ >+ if (db->db_flags & CTDB_DB_FLAGS_PERSISTENT) { >+ ctdb_req_control_db_attach_persistent(&request, db_name); >+ } else if (db->db_flags & CTDB_DB_FLAGS_REPLICATED) { >+ ctdb_req_control_db_attach_replicated(&request, db_name); >+ } else { >+ ctdb_req_control_db_attach(&request, db_name); >+ } >+ request.flags = CTDB_CTRL_FLAG_ATTACH_RECOVERY; >+ subreq = ctdb_client_control_multi_send(state, >+ state->ev, >+ state->client, >+ state->missing_pnn_list, >+ state->missing_num_nodes, >+ TIMEOUT(), >+ &request); >+ if (tevent_req_nomem(subreq, req)) { >+ return tevent_req_post(req, ev); >+ } >+ tevent_req_set_callback(subreq, db_create_missing_done, req); >+ >+ return req; >+} >+ >+static void db_create_missing_done(struct tevent_req *subreq) >+{ >+ struct tevent_req *req = tevent_req_callback_data( >+ subreq, struct tevent_req); >+ struct db_create_missing_state *state = tevent_req_data( >+ req, struct db_create_missing_state); >+ int *err_list; >+ int ret; >+ bool status; >+ >+ status = ctdb_client_control_multi_recv(subreq, >+ &ret, >+ NULL, >+ &err_list, >+ NULL); >+ TALLOC_FREE(subreq); >+ if (! status) { >+ int ret2; >+ uint32_t pnn; >+ >+ ret2 = ctdb_client_control_multi_error( >+ state->missing_pnn_list, >+ state->missing_num_nodes, >+ err_list, >+ &pnn); >+ if (ret2 != 0) { >+ D_ERR("control DB_ATTACH failed for db %s" >+ " on node %u, ret=%d\n", >+ state->db_name, >+ pnn, >+ ret2); >+ node_list_ban_credits(state->nlist, pnn); >+ } else { >+ D_ERR("control DB_ATTACH failed for db %s, ret=%d\n", >+ state->db_name, >+ ret); >+ } >+ tevent_req_error(req, ret); >+ return; >+ } >+ >+ tevent_req_done(req); >+} >+ >+static bool db_create_missing_recv(struct tevent_req *req, int *perr) >+{ >+ return generic_recv(req, perr); >+} >+ > /* > * Recovery database functions > */ >@@ -1657,6 +1801,7 @@ static bool collect_all_db_recv(struct tevent_req *req, int *perr) > /** > * For each database do the following: > * - Get DB name from all nodes >+ * - Attach database on missing nodes > * - Get DB path > * - Freeze database on all nodes > * - Start transaction on all nodes >@@ -1682,6 +1827,7 @@ struct recover_db_state { > }; > > static void recover_db_name_done(struct tevent_req *subreq); >+static void recover_db_create_missing_done(struct tevent_req *subreq); > static void recover_db_path_done(struct tevent_req *subreq); > static void recover_db_freeze_done(struct tevent_req *subreq); > static void recover_db_transaction_started(struct tevent_req *subreq); >@@ -1741,7 +1887,6 @@ static void recover_db_name_done(struct tevent_req *subreq) > struct recover_db_state *state = tevent_req_data( > req, struct recover_db_state); > struct ctdb_reply_control **reply; >- struct ctdb_req_control request; > int *err_list; > unsigned int i; > int ret; >@@ -1813,6 +1958,37 @@ static void recover_db_name_done(struct tevent_req *subreq) > > talloc_free(reply); > >+ subreq = db_create_missing_send(state, >+ state->ev, >+ state->client, >+ state->nlist, >+ state->db_name, >+ state->db); >+ >+ if (tevent_req_nomem(subreq, req)) { >+ return; >+ } >+ tevent_req_set_callback(subreq, recover_db_create_missing_done, req); >+} >+ >+static void recover_db_create_missing_done(struct tevent_req *subreq) >+{ >+ struct tevent_req *req = tevent_req_callback_data( >+ subreq, struct tevent_req); >+ struct recover_db_state *state = tevent_req_data( >+ req, struct recover_db_state); >+ struct ctdb_req_control request; >+ int ret; >+ bool status; >+ >+ /* Could sanity check the db_id here */ >+ status = db_create_missing_recv(subreq, &ret); >+ TALLOC_FREE(subreq); >+ if (! status) { >+ tevent_req_error(req, ret); >+ return; >+ } >+ > ctdb_req_control_getdbpath(&request, state->db->db_id); > subreq = ctdb_client_control_send(state, state->ev, state->client, > state->destnode, TIMEOUT(), >-- >2.25.1 > > >From 69af2b0f089ee51211e9f0467ba233974e6a9c2f Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Mon, 24 Feb 2020 19:51:19 +1100 >Subject: [PATCH 36/38] ctdb-recovery: Remove old code for creating missing > databases > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 3a66d181b6f6199fca362fcb0aa06513645b589d) >--- > ctdb/server/ctdb_recoverd.c | 161 ------------------------------------ > 1 file changed, 161 deletions(-) > >diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c >index 857736e30c8..68748aee70c 100644 >--- a/ctdb/server/ctdb_recoverd.c >+++ b/ctdb/server/ctdb_recoverd.c >@@ -424,140 +424,6 @@ static int set_recovery_mode(struct ctdb_context *ctdb, > return 0; > } > >-/* >- ensure all other nodes have attached to any databases that we have >- */ >-static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, >- uint32_t pnn, struct ctdb_dbid_map_old *dbmap, TALLOC_CTX *mem_ctx) >-{ >- unsigned int i, j, db; >- int ret; >- struct ctdb_dbid_map_old *remote_dbmap; >- >- /* verify that all other nodes have all our databases */ >- for (j=0; j<nodemap->num; j++) { >- /* we don't need to ourself ourselves */ >- if (nodemap->nodes[j].pnn == pnn) { >- continue; >- } >- /* don't check nodes that are unavailable */ >- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { >- continue; >- } >- >- ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, >- mem_ctx, &remote_dbmap); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn)); >- return -1; >- } >- >- /* step through all local databases */ >- for (db=0; db<dbmap->num;db++) { >- const char *name; >- >- >- for (i=0;i<remote_dbmap->num;i++) { >- if (dbmap->dbs[db].db_id == remote_dbmap->dbs[i].db_id) { >- break; >- } >- } >- /* the remote node already have this database */ >- if (i!=remote_dbmap->num) { >- continue; >- } >- /* ok so we need to create this database */ >- ret = ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), pnn, >- dbmap->dbs[db].db_id, mem_ctx, >- &name); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", pnn)); >- return -1; >- } >- ret = ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), >- nodemap->nodes[j].pnn, >- mem_ctx, name, >- dbmap->dbs[db].flags, NULL); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to create remote db:%s\n", name)); >- return -1; >- } >- } >- } >- >- return 0; >-} >- >- >-/* >- ensure we are attached to any databases that anyone else is attached to >- */ >-static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, >- uint32_t pnn, struct ctdb_dbid_map_old **dbmap, TALLOC_CTX *mem_ctx) >-{ >- unsigned int i, j, db; >- int ret; >- struct ctdb_dbid_map_old *remote_dbmap; >- >- /* verify that we have all database any other node has */ >- for (j=0; j<nodemap->num; j++) { >- /* we don't need to ourself ourselves */ >- if (nodemap->nodes[j].pnn == pnn) { >- continue; >- } >- /* don't check nodes that are unavailable */ >- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { >- continue; >- } >- >- ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, >- mem_ctx, &remote_dbmap); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn)); >- return -1; >- } >- >- /* step through all databases on the remote node */ >- for (db=0; db<remote_dbmap->num;db++) { >- const char *name; >- >- for (i=0;i<(*dbmap)->num;i++) { >- if (remote_dbmap->dbs[db].db_id == (*dbmap)->dbs[i].db_id) { >- break; >- } >- } >- /* we already have this db locally */ >- if (i!=(*dbmap)->num) { >- continue; >- } >- /* ok so we need to create this database and >- rebuild dbmap >- */ >- ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, >- remote_dbmap->dbs[db].db_id, mem_ctx, &name); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", >- nodemap->nodes[j].pnn)); >- return -1; >- } >- ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), pnn, >- mem_ctx, name, >- remote_dbmap->dbs[db].flags, NULL); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to create local db:%s\n", name)); >- return -1; >- } >- ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, dbmap); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to reread dbmap on node %u\n", pnn)); >- return -1; >- } >- } >- } >- >- return 0; >-} >- > /* > update flags on all active nodes > */ >@@ -1165,7 +1031,6 @@ static int do_recovery(struct ctdb_recoverd *rec, > struct ctdb_context *ctdb = rec->ctdb; > unsigned int i; > int ret; >- struct ctdb_dbid_map_old *dbmap; > bool self_ban; > > DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n")); >@@ -1245,32 +1110,6 @@ static int do_recovery(struct ctdb_recoverd *rec, > > DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node)); > >- /* get a list of all databases */ >- ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node :%u\n", pnn)); >- goto fail; >- } >- >- /* we do the db creation before we set the recovery mode, so the freeze happens >- on all databases we will be dealing with. */ >- >- /* verify that we have all the databases any other node has */ >- ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to create missing local databases\n")); >- goto fail; >- } >- >- /* verify that all other nodes have all our databases */ >- ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx); >- if (ret != 0) { >- DEBUG(DEBUG_ERR, (__location__ " Unable to create missing remote databases\n")); >- goto fail; >- } >- DEBUG(DEBUG_NOTICE, (__location__ " Recovery - created remote databases\n")); >- >- > /* Retrieve capabilities from all connected nodes */ > ret = update_capabilities(rec, nodemap); > if (ret!=0) { >-- >2.25.1 > > >From 3c584144f520cf45262cb19ed0f0bac2beb4e60f Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 25 Feb 2020 06:20:32 +1100 >Subject: [PATCH 37/38] ctdb-daemon: Remove more unused old client database > functions > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 052f1bdb9cf78f53f584edd32f81ae8e01e8e86e) >--- > ctdb/include/ctdb_client.h | 12 ----- > ctdb/server/ctdb_client.c | 105 ------------------------------------- > 2 files changed, 117 deletions(-) > >diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h >index d1dce1e68d8..198a8a38dbb 100644 >--- a/ctdb/include/ctdb_client.h >+++ b/ctdb/include/ctdb_client.h >@@ -165,10 +165,6 @@ int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, > int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode, uint32_t recmaster); > >-int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, >- uint32_t destnode, TALLOC_CTX *mem_ctx, >- struct ctdb_dbid_map_old **dbmap); >- > int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode, TALLOC_CTX *mem_ctx, > struct ctdb_node_map_old **nodemap); >@@ -176,14 +172,6 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, > int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout, > uint32_t destnode, uint32_t *runstate); > >-int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, >- uint32_t destnode, uint32_t dbid, >- TALLOC_CTX *mem_ctx, const char **name); >- >-int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, >- uint32_t destnode, TALLOC_CTX *mem_ctx, >- const char *name, uint8_t db_flags, uint32_t *db_id); >- > int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, > int32_t *level); > >diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c >index 26055698568..67455745ede 100644 >--- a/ctdb/server/ctdb_client.c >+++ b/ctdb/server/ctdb_client.c >@@ -1029,30 +1029,6 @@ int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, ui > } > > >-/* >- get a list of databases off a remote node >- */ >-int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, >- TALLOC_CTX *mem_ctx, struct ctdb_dbid_map_old **dbmap) >-{ >- int ret; >- TDB_DATA outdata; >- int32_t res; >- >- ret = ctdb_control(ctdb, destnode, 0, >- CTDB_CONTROL_GET_DBMAP, 0, tdb_null, >- mem_ctx, &outdata, &res, &timeout, NULL); >- if (ret != 0 || res != 0) { >- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res)); >- return -1; >- } >- >- *dbmap = (struct ctdb_dbid_map_old *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize); >- talloc_free(outdata.dptr); >- >- return 0; >-} >- > /* > get a list of nodes (vnn and flags ) from a remote node > */ >@@ -1107,87 +1083,6 @@ int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, > return 0; > } > >-/* >- find the name of a db >- */ >-int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, >- const char **name) >-{ >- int ret; >- int32_t res; >- TDB_DATA data; >- >- data.dptr = (uint8_t *)&dbid; >- data.dsize = sizeof(dbid); >- >- ret = ctdb_control(ctdb, destnode, 0, >- CTDB_CONTROL_GET_DBNAME, 0, data, >- mem_ctx, &data, &res, &timeout, NULL); >- if (ret != 0 || res != 0) { >- return -1; >- } >- >- (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); >- if ((*name) == NULL) { >- return -1; >- } >- >- talloc_free(data.dptr); >- >- return 0; >-} >- >-/* >- create a database >- */ >-int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, >- uint32_t destnode, TALLOC_CTX *mem_ctx, >- const char *name, uint8_t db_flags, uint32_t *db_id) >-{ >- int ret; >- int32_t res; >- TDB_DATA data; >- uint32_t opcode; >- >- data.dptr = discard_const(name); >- data.dsize = strlen(name)+1; >- >- if (db_flags & CTDB_DB_FLAGS_PERSISTENT) { >- opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT; >- } else if (db_flags & CTDB_DB_FLAGS_REPLICATED) { >- opcode = CTDB_CONTROL_DB_ATTACH_REPLICATED; >- } else { >- opcode = CTDB_CONTROL_DB_ATTACH; >- } >- >- ret = ctdb_control(ctdb, >- destnode, >- 0, >- opcode, >- CTDB_CTRL_FLAG_ATTACH_RECOVERY, >- data, >- mem_ctx, >- &data, >- &res, >- &timeout, >- NULL); >- >- if (ret != 0 || res != 0) { >- return -1; >- } >- >- if (data.dsize != sizeof(uint32_t)) { >- TALLOC_FREE(data.dptr); >- return -1; >- } >- if (db_id != NULL) { >- *db_id = *(uint32_t *)data.dptr; >- } >- talloc_free(data.dptr); >- >- return 0; >-} >- > /* > get debug level on a node > */ >-- >2.25.1 > > >From 68cb93bbfdf997961c1d23070014c1eba3fcd22a Mon Sep 17 00:00:00 2001 >From: Martin Schwenke <martin@meltin.net> >Date: Tue, 25 Feb 2020 17:32:56 +1100 >Subject: [PATCH 38/38] ctdb-daemon: Don't allow attach from recovery if > recovery is not active > >Neither the recovery daemon nor the recovery helper should attach >databases outside of the recovery process. > >BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 > >Signed-off-by: Martin Schwenke <martin@meltin.net> >Reviewed-by: Amitay Isaacs <amitay@gmail.com> >(cherry picked from commit 147afe77de372ddb9c180228d6fe1b04cca4610f) >--- > ctdb/server/ctdb_ltdb_server.c | 7 +++++++ > 1 file changed, 7 insertions(+) > >diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c >index e050b7304fe..ce3569fe7b1 100644 >--- a/ctdb/server/ctdb_ltdb_server.c >+++ b/ctdb/server/ctdb_ltdb_server.c >@@ -1135,6 +1135,13 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, > return -1; > } > >+ if ((c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && >+ ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) { >+ DBG_ERR("Attach from recovery refused because " >+ "recovery is not active\n"); >+ return -1; >+ } >+ > if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && > (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || > ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { >-- >2.25.1 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Flags:
amitay
:
review+
Actions:
View
Attachments on
bug 14294
:
15873
| 15874