From 201777eaa2bc78229909f0f301768e79e0ba29a5 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 4 Sep 2019 14:14:22 +1000 Subject: [PATCH 01/38] ctdb-client: Fix some typos in debug messages tdb_sore -> tdb_store SCHDULE_FOR_DELETION -> SCHEDULE_FOR_DELETION Switch to modern debug macros while touching the lines. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Tue Sep 17 05:52:15 UTC 2019 on sn-devel-184 (cherry picked from commit 84f544b55f235e2f08596bf4b7854460af008f88) --- ctdb/client/client_db.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ctdb/client/client_db.c b/ctdb/client/client_db.c index dfa8d970de5..a008f2ad63d 100644 --- a/ctdb/client/client_db.c +++ b/ctdb/client/client_db.c @@ -1540,9 +1540,9 @@ struct tevent_req *ctdb_delete_record_send(TALLOC_CTX *mem_ctx, ret = tdb_store(h->db->ltdb->tdb, h->key, rec, TDB_REPLACE); if (ret != 0) { - DEBUG(DEBUG_ERR, - ("fetch_lock delete: %s tdb_sore failed, %s\n", - h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); + D_ERR("fetch_lock delete: %s tdb_store failed, %s\n", + h->db->db_name, + tdb_errorstr(h->db->ltdb->tdb)); tevent_req_error(req, EIO); return tevent_req_post(req, ev); } @@ -1576,9 +1576,9 @@ static void ctdb_delete_record_done(struct tevent_req *subreq) status = ctdb_client_control_recv(subreq, &ret, NULL, NULL); TALLOC_FREE(subreq); if (! status) { - DEBUG(DEBUG_ERR, - ("delete_record: %s SCHDULE_FOR_DELETION failed, " - "ret=%d\n", state->h->db->db_name, ret)); + D_ERR("delete_record: %s SCHEDULE_FOR_DELETION failed, ret=%d\n", + state->h->db->db_name, + ret); tevent_req_error(req, ret); return; } -- 2.25.1 From f1ede2508254d1419a3d7530d6a577635b1be248 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 12:13:53 +1100 Subject: [PATCH 02/38] ctdb-protocol: Drop code related to obsolete controls Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 688567f080156892270cbfb2907cd712cb77cb7a) --- ctdb/protocol/protocol_client.c | 9 ----- ctdb/protocol/protocol_control.c | 60 -------------------------------- 2 files changed, 69 deletions(-) diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c index 9aa32a9bba7..a39dd730e23 100644 --- a/ctdb/protocol/protocol_client.c +++ b/ctdb/protocol/protocol_client.c @@ -424,8 +424,6 @@ int ctdb_reply_control_db_attach(struct ctdb_reply_control *reply, return reply->status; } -/* CTDB_CONTROL_SET_CALL */ - /* CTDB_CONTROL_TRAVERSE_START */ void ctdb_req_control_traverse_start(struct ctdb_req_control *request, @@ -718,8 +716,6 @@ int ctdb_reply_control_shutdown(struct ctdb_reply_control *reply) return ctdb_reply_control_generic(reply, CTDB_CONTROL_SHUTDOWN); } -/* CTDB_CONTROL_GET_MONMODE */ - /* CTDB_CONTROL_TCP_CLIENT */ void ctdb_req_control_tcp_client(struct ctdb_req_control *request, @@ -1170,9 +1166,6 @@ int ctdb_reply_control_try_delete_records(struct ctdb_reply_control *reply, return reply->status; } -/* CTDB_CONTROL_ENABLE_MONITOR */ -/* CTDB_CONTROL_DISABLE_MONITOR */ - /* CTDB_CONTROL_ADD_PUBLIC_IP */ void ctdb_req_control_add_public_ip(struct ctdb_req_control *request, @@ -1855,8 +1848,6 @@ int ctdb_reply_control_set_db_readonly(struct ctdb_reply_control *reply) return ctdb_reply_control_generic(reply, CTDB_CONTROL_SET_DB_READONLY); } -/* CTDB_CONTROL_CHECK_SRVIDS */ - /* CTDB_CONTROL_TRAVERSE_START_EXT */ void ctdb_req_control_traverse_start_ext(struct ctdb_req_control *request, diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c index 0b88b5c8b5a..b2d17611108 100644 --- a/ctdb/protocol/protocol_control.c +++ b/ctdb/protocol/protocol_control.c @@ -90,9 +90,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) len = ctdb_string_len(&cd->data.db_name); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: len = ctdb_traverse_start_len(cd->data.traverse_start); break; @@ -145,9 +142,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) case CTDB_CONTROL_SHUTDOWN: break; - case CTDB_CONTROL_GET_MONMODE: - break; - case CTDB_CONTROL_TCP_CLIENT: len = ctdb_connection_len(cd->data.conn); break; @@ -221,12 +215,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) len = ctdb_rec_buffer_len(cd->data.recbuf); break; - case CTDB_CONTROL_ENABLE_MONITOR: - break; - - case CTDB_CONTROL_DISABLE_MONITOR: - break; - case CTDB_CONTROL_ADD_PUBLIC_IP: len = ctdb_addr_info_len(cd->data.addr_info); break; @@ -338,9 +326,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) len = ctdb_uint32_len(&cd->data.db_id); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: len = ctdb_traverse_start_ext_len(cd->data.traverse_start_ext); break; @@ -466,9 +451,6 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, ctdb_string_push(&cd->data.db_name, buf, &np); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: ctdb_traverse_start_push(cd->data.traverse_start, buf, &np); break; @@ -635,9 +617,6 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, ctdb_uint32_push(&cd->data.db_id, buf, &np); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: ctdb_traverse_start_ext_push(cd->data.traverse_start_ext, buf, &np); @@ -757,9 +736,6 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, &cd->data.db_name, &np); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: ret = ctdb_traverse_start_pull(buf, buflen, mem_ctx, &cd->data.traverse_start, &np); @@ -957,9 +933,6 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: ret = ctdb_traverse_start_ext_pull(buf, buflen, mem_ctx, &cd->data.traverse_start_ext, @@ -1104,9 +1077,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) len = ctdb_uint32_len(&cd->data.db_id); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: break; @@ -1154,9 +1124,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) case CTDB_CONTROL_SHUTDOWN: break; - case CTDB_CONTROL_GET_MONMODE: - break; - case CTDB_CONTROL_TCP_CLIENT: break; @@ -1224,12 +1191,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) len = ctdb_rec_buffer_len(cd->data.recbuf); break; - case CTDB_CONTROL_ENABLE_MONITOR: - break; - - case CTDB_CONTROL_DISABLE_MONITOR: - break; - case CTDB_CONTROL_ADD_PUBLIC_IP: break; @@ -1286,12 +1247,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) len = ctdb_ban_state_len(cd->data.ban_state); break; - case CTDB_CONTROL_SET_DB_PRIORITY: - break; - - case CTDB_CONTROL_GET_DB_PRIORITY: - break; - case CTDB_CONTROL_REGISTER_NOTIFY: break; @@ -1336,9 +1291,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) case CTDB_CONTROL_SET_DB_READONLY: break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: break; @@ -1515,9 +1467,6 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, ctdb_ban_state_push(cd->data.ban_state, buf, &np); break; - case CTDB_CONTROL_GET_DB_PRIORITY: - break; - case CTDB_CONTROL_GET_DB_SEQNUM: ctdb_uint64_push(&cd->data.seqnum, buf, &np); break; @@ -1538,9 +1487,6 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, ctdb_statistics_list_push(cd->data.stats_list, buf, &np); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_GET_DB_STATISTICS: ctdb_db_statistics_push(cd->data.dbstats, buf, &np); break; @@ -1697,9 +1643,6 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, &cd->data.ban_state, &np); break; - case CTDB_CONTROL_GET_DB_PRIORITY: - break; - case CTDB_CONTROL_GET_DB_SEQNUM: ret = ctdb_uint64_pull(buf, buflen, &cd->data.seqnum, &np); break; @@ -1724,9 +1667,6 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, &cd->data.stats_list, &np); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_GET_DB_STATISTICS: ret = ctdb_db_statistics_pull(buf, buflen, mem_ctx, &cd->data.dbstats, &np); -- 2.25.1 From b2314e47fb2dd840bda33b7f62d6c6fd1cc2583c Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 12:28:11 +1100 Subject: [PATCH 03/38] ctdb-tests: Drop code releated to obsolete controls Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 913bd331f65e9fe3d7cb16e041cd37b01987841f) --- ctdb/tests/src/protocol_common_ctdb.c | 78 --------------------------- 1 file changed, 78 deletions(-) diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c index 6a6573486a1..4aa4cfc5bde 100644 --- a/ctdb/tests/src/protocol_common_ctdb.c +++ b/ctdb/tests/src/protocol_common_ctdb.c @@ -203,9 +203,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, assert(cd->data.db_name != NULL); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: cd->data.traverse_start = talloc(mem_ctx, struct ctdb_traverse_start); assert(cd->data.traverse_start != NULL); @@ -264,9 +261,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_SHUTDOWN: break; - case CTDB_CONTROL_GET_MONMODE: - break; - case CTDB_CONTROL_TCP_CLIENT: cd->data.conn = talloc(mem_ctx, struct ctdb_connection); assert(cd->data.conn != NULL); @@ -364,12 +358,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); break; - case CTDB_CONTROL_ENABLE_MONITOR: - break; - - case CTDB_CONTROL_DISABLE_MONITOR: - break; - case CTDB_CONTROL_ADD_PUBLIC_IP: cd->data.addr_info = talloc(mem_ctx, struct ctdb_addr_info); assert(cd->data.addr_info != NULL); @@ -385,12 +373,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_GET_CAPABILITIES: break; - case CTDB_CONTROL_START_PERSISTENT_UPDATE: - break; - - case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: - break; - case CTDB_CONTROL_RECD_PING: break; @@ -510,9 +492,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, cd->data.db_id = rand32(); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: cd->data.traverse_start_ext = talloc(mem_ctx, struct ctdb_traverse_start_ext); assert(cd->data.traverse_start_ext != NULL); @@ -671,9 +650,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, verify_ctdb_string(&cd->data.db_name, &cd2->data.db_name); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: verify_ctdb_traverse_start(cd->data.traverse_start, cd2->data.traverse_start); @@ -728,9 +704,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, case CTDB_CONTROL_SHUTDOWN: break; - case CTDB_CONTROL_GET_MONMODE: - break; - case CTDB_CONTROL_TCP_CLIENT: verify_ctdb_connection(cd->data.conn, cd2->data.conn); break; @@ -805,12 +778,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); break; - case CTDB_CONTROL_ENABLE_MONITOR: - break; - - case CTDB_CONTROL_DISABLE_MONITOR: - break; - case CTDB_CONTROL_ADD_PUBLIC_IP: verify_ctdb_addr_info(cd->data.addr_info, cd2->data.addr_info); break; @@ -822,12 +789,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, case CTDB_CONTROL_GET_CAPABILITIES: break; - case CTDB_CONTROL_START_PERSISTENT_UPDATE: - break; - - case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: - break; - case CTDB_CONTROL_RECD_PING: break; @@ -928,9 +889,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, assert(cd->data.db_id == cd2->data.db_id); break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: verify_ctdb_traverse_start_ext(cd->data.traverse_start_ext, cd2->data.traverse_start_ext); @@ -1111,9 +1069,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, cd->data.db_id = rand32(); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: break; @@ -1163,9 +1118,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_SHUTDOWN: break; - case CTDB_CONTROL_GET_MONMODE: - break; - case CTDB_CONTROL_TCP_CLIENT: break; @@ -1243,12 +1195,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); break; - case CTDB_CONTROL_ENABLE_MONITOR: - break; - - case CTDB_CONTROL_DISABLE_MONITOR: - break; - case CTDB_CONTROL_ADD_PUBLIC_IP: break; @@ -1259,12 +1205,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, cd->data.caps = rand32(); break; - case CTDB_CONTROL_START_PERSISTENT_UPDATE: - break; - - case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: - break; - case CTDB_CONTROL_RECD_PING: break; @@ -1369,9 +1309,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_SET_DB_READONLY: break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: break; @@ -1492,9 +1429,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, assert(cd->data.db_id == cd2->data.db_id); break; - case CTDB_CONTROL_SET_CALL: - break; - case CTDB_CONTROL_TRAVERSE_START: break; @@ -1542,9 +1476,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_SHUTDOWN: break; - case CTDB_CONTROL_GET_MONMODE: - break; - case CTDB_CONTROL_TCP_CLIENT: break; @@ -1613,12 +1544,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); break; - case CTDB_CONTROL_ENABLE_MONITOR: - break; - - case CTDB_CONTROL_DISABLE_MONITOR: - break; - case CTDB_CONTROL_ADD_PUBLIC_IP: break; @@ -1723,9 +1648,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_SET_DB_READONLY: break; - case CTDB_CONTROL_CHECK_SRVIDS: - break; - case CTDB_CONTROL_TRAVERSE_START_EXT: break; -- 2.25.1 From 3e6c89a451a787b97131977faf26e15b00b40829 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 11:57:24 +1100 Subject: [PATCH 04/38] ctdb-protocol: Add new control VACUUM_FETCH Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 0872c52ef0497f96f53318cf7e4d31be0854adde) --- ctdb/protocol/protocol.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h index b868553f6e8..e47daeadba1 100644 --- a/ctdb/protocol/protocol.h +++ b/ctdb/protocol/protocol.h @@ -373,6 +373,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_CHECK_PID_SRVID = 151, CTDB_CONTROL_TUNNEL_REGISTER = 152, CTDB_CONTROL_TUNNEL_DEREGISTER = 153, + CTDB_CONTROL_VACUUM_FETCH = 154, }; #define MAX_COUNT_BUCKETS 16 -- 2.25.1 From 2c60b2158239c267544439294337f154bfd9f6c6 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 11:57:44 +1100 Subject: [PATCH 05/38] ctdb-protocol: Add marshalling for new control VACUUM_FETCH Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit b71d8cd80f84169bacf2dd1e753e468a305c50ce) --- ctdb/protocol/protocol_api.h | 4 ++++ ctdb/protocol/protocol_client.c | 24 ++++++++++++++++++++++++ ctdb/protocol/protocol_control.c | 22 ++++++++++++++++++++++ ctdb/protocol/protocol_debug.c | 1 + 4 files changed, 51 insertions(+) diff --git a/ctdb/protocol/protocol_api.h b/ctdb/protocol/protocol_api.h index 6104c10e7b5..cf4c4635dd4 100644 --- a/ctdb/protocol/protocol_api.h +++ b/ctdb/protocol/protocol_api.h @@ -603,6 +603,10 @@ void ctdb_req_control_tunnel_deregister(struct ctdb_req_control *request, uint64_t tunnel_id); int ctdb_reply_control_tunnel_deregister(struct ctdb_reply_control *reply); +void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request, + struct ctdb_rec_buffer *recbuf); +int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply); + /* From protocol/protocol_debug.c */ void ctdb_packet_print(uint8_t *buf, size_t buflen, FILE *fp); diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c index a39dd730e23..d5f6a222fe4 100644 --- a/ctdb/protocol/protocol_client.c +++ b/ctdb/protocol/protocol_client.c @@ -2333,3 +2333,27 @@ int ctdb_reply_control_tunnel_deregister(struct ctdb_reply_control *reply) return reply->status; } + +/* CTDB_CONTROL_VACUUM_FETCH */ + +void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request, + struct ctdb_rec_buffer *recbuf) +{ + request->opcode = CTDB_CONTROL_VACUUM_FETCH; + request->pad = 0; + request->srvid = 0; + request->client_id = 0; + request->flags = 0; + + request->rdata.opcode = CTDB_CONTROL_VACUUM_FETCH; + request->rdata.data.recbuf = recbuf; +} + +int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply) +{ + if (reply->rdata.opcode != CTDB_CONTROL_VACUUM_FETCH) { + return EPROTO; + } + + return reply->status; +} diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c index b2d17611108..1cc985a71a7 100644 --- a/ctdb/protocol/protocol_control.c +++ b/ctdb/protocol/protocol_control.c @@ -407,6 +407,10 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) case CTDB_CONTROL_TUNNEL_DEREGISTER: break; + + case CTDB_CONTROL_VACUUM_FETCH: + len = ctdb_rec_buffer_len(cd->data.recbuf); + break; } return len; @@ -682,6 +686,10 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, case CTDB_CONTROL_CHECK_PID_SRVID: ctdb_pid_srvid_push(cd->data.pid_srvid, buf, &np); break; + + case CTDB_CONTROL_VACUUM_FETCH: + ctdb_rec_buffer_push(cd->data.recbuf, buf, &np); + break; } *npush = np; @@ -1006,6 +1014,11 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, ret = ctdb_pid_srvid_pull(buf, buflen, mem_ctx, &cd->data.pid_srvid, &np); break; + + case CTDB_CONTROL_VACUUM_FETCH: + ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx, + &cd->data.recbuf, &np); + break; } if (ret != 0) { @@ -1363,6 +1376,9 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) case CTDB_CONTROL_TUNNEL_DEREGISTER: break; + + case CTDB_CONTROL_VACUUM_FETCH: + break; } return len; @@ -1517,6 +1533,9 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_CHECK_PID_SRVID: break; + + case CTDB_CONTROL_VACUUM_FETCH: + break; } *npush = np; @@ -1701,6 +1720,9 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, case CTDB_CONTROL_CHECK_PID_SRVID: break; + + case CTDB_CONTROL_VACUUM_FETCH: + break; } if (ret != 0) { diff --git a/ctdb/protocol/protocol_debug.c b/ctdb/protocol/protocol_debug.c index a34f5a86947..97903ea98f4 100644 --- a/ctdb/protocol/protocol_debug.c +++ b/ctdb/protocol/protocol_debug.c @@ -242,6 +242,7 @@ static void ctdb_opcode_print(uint32_t opcode, FILE *fp) { CTDB_CONTROL_CHECK_PID_SRVID, "CHECK_PID_SRVID" }, { CTDB_CONTROL_TUNNEL_REGISTER, "TUNNEL_REGISTER" }, { CTDB_CONTROL_TUNNEL_DEREGISTER, "TUNNEL_DEREGISTER" }, + { CTDB_CONTROL_VACUUM_FETCH, "VACUUM_FETCH" }, { MAP_END, "" }, }; -- 2.25.1 From 9fe916b4651d7accddad397bba55ea8973fdbd42 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 16 Feb 2018 17:28:49 +1100 Subject: [PATCH 06/38] ctdb-tests: Add marshalling tests for new control Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 36f9b4953a8def40681a6f02f6576795a1ba5fbe) --- ctdb/tests/cunit/protocol_test_101.sh | 2 +- ctdb/tests/src/protocol_common_ctdb.c | 15 +++++++++++++++ ctdb/tests/src/protocol_ctdb_test.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ctdb/tests/cunit/protocol_test_101.sh b/ctdb/tests/cunit/protocol_test_101.sh index 36751d4fbe7..a0bf9d08754 100755 --- a/ctdb/tests/cunit/protocol_test_101.sh +++ b/ctdb/tests/cunit/protocol_test_101.sh @@ -2,7 +2,7 @@ . "${TEST_SCRIPTS_DIR}/unit.sh" -last_control=153 +last_control=154 generate_control_output () { diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c index 4aa4cfc5bde..b02976b5d67 100644 --- a/ctdb/tests/src/protocol_common_ctdb.c +++ b/ctdb/tests/src/protocol_common_ctdb.c @@ -588,6 +588,12 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_TUNNEL_DEREGISTER: break; + + case CTDB_CONTROL_VACUUM_FETCH: + cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer); + assert(cd->data.recbuf != NULL); + fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); + break; } } @@ -974,6 +980,10 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, case CTDB_CONTROL_TUNNEL_DEREGISTER: break; + + case CTDB_CONTROL_VACUUM_FETCH: + verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); + break; } } @@ -1368,6 +1378,9 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_TUNNEL_DEREGISTER: break; + case CTDB_CONTROL_VACUUM_FETCH: + break; + } } @@ -1703,6 +1716,8 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_TUNNEL_DEREGISTER: break; + case CTDB_CONTROL_VACUUM_FETCH: + break; } } diff --git a/ctdb/tests/src/protocol_ctdb_test.c b/ctdb/tests/src/protocol_ctdb_test.c index 9eb25d96186..3ebf15dff6c 100644 --- a/ctdb/tests/src/protocol_ctdb_test.c +++ b/ctdb/tests/src/protocol_ctdb_test.c @@ -284,7 +284,7 @@ PROTOCOL_CTDB4_TEST(struct ctdb_req_dmaster, ctdb_req_dmaster, PROTOCOL_CTDB4_TEST(struct ctdb_reply_dmaster, ctdb_reply_dmaster, CTDB_REPLY_DMASTER); -#define NUM_CONTROLS 154 +#define NUM_CONTROLS 155 PROTOCOL_CTDB2_TEST(struct ctdb_req_control_data, ctdb_req_control_data); PROTOCOL_CTDB2_TEST(struct ctdb_reply_control_data, ctdb_reply_control_data); -- 2.25.1 From 7ae70cc7424d781578b7540d37689049288e3140 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 16 Feb 2018 15:30:13 +1100 Subject: [PATCH 07/38] ctdb-daemon: Add implementation of VACUUM_FETCH control Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit da617f90d90151f955ee354c57bdc4bc6f6498f2) --- ctdb/include/ctdb_private.h | 3 ++ ctdb/server/ctdb_control.c | 3 ++ ctdb/server/ctdb_freeze.c | 9 ++++- ctdb/server/ctdb_ltdb_server.c | 6 ++++ ctdb/server/ctdb_vacuum.c | 66 ++++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 7f160c0c9db..d7b568d6c0c 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -359,6 +359,7 @@ struct ctdb_db_context { struct revokechild_handle *revokechild_active; struct ctdb_persistent_state *persistent_state; struct trbt_tree *delete_queue; + struct trbt_tree *fetch_queue; struct trbt_tree *sticky_records; int (*ctdb_ltdb_store_fn)(struct ctdb_db_context *ctdb_db, TDB_DATA key, @@ -998,6 +999,8 @@ void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db, const struct ctdb_ltdb_header *hdr, const TDB_DATA key); +int32_t ctdb_control_vacuum_fetch(struct ctdb_context *ctdb, TDB_DATA indata); + /* from eventscript.c */ int ctdb_start_eventd(struct ctdb_context *ctdb); diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 6c91e211660..0174f303f14 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -729,6 +729,9 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_TUNNEL_DEREGISTER: return ctdb_control_tunnel_deregister(ctdb, client_id, srvid); + case CTDB_CONTROL_VACUUM_FETCH: + return ctdb_control_vacuum_fetch(ctdb, indata); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_freeze.c b/ctdb/server/ctdb_freeze.c index b4b99a0e5c9..06aeacfd939 100644 --- a/ctdb/server/ctdb_freeze.c +++ b/ctdb/server/ctdb_freeze.c @@ -869,10 +869,17 @@ int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata) if (ctdb_db_volatile(ctdb_db)) { talloc_free(ctdb_db->delete_queue); + talloc_free(ctdb_db->fetch_queue); ctdb_db->delete_queue = trbt_create(ctdb_db, 0); if (ctdb_db->delete_queue == NULL) { DEBUG(DEBUG_ERR, (__location__ " Failed to re-create " - "the vacuum tree.\n")); + "the delete queue.\n")); + return -1; + } + ctdb_db->fetch_queue = trbt_create(ctdb_db, 0); + if (ctdb_db->fetch_queue == NULL) { + DEBUG(DEBUG_ERR, (__location__ " Failed to re-create " + "the fetch queue.\n")); return -1; } } diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index 022baf62d92..1ccf60832e1 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -770,6 +770,11 @@ static int ctdb_local_attach(struct ctdb_context *ctdb, const char *db_name, CTDB_NO_MEMORY(ctdb, ctdb_db->delete_queue); } + ctdb_db->fetch_queue = trbt_create(ctdb_db, 0); + if (ctdb_db->fetch_queue == NULL) { + CTDB_NO_MEMORY(ctdb, ctdb_db->fetch_queue); + } + ctdb_db->ctdb_ltdb_store_fn = ctdb_ltdb_store_server; } @@ -1272,6 +1277,7 @@ int32_t ctdb_control_db_detach(struct ctdb_context *ctdb, TDB_DATA indata, /* Disable vacuuming and drop all vacuuming data */ talloc_free(ctdb_db->vacuum_handle); talloc_free(ctdb_db->delete_queue); + talloc_free(ctdb_db->fetch_queue); /* Terminate any deferred fetch */ talloc_free(ctdb_db->deferred_fetch); diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 4fd11e3738c..6f28fa89cc9 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -41,6 +41,8 @@ #include "common/common.h" #include "common/logging.h" +#include "protocol/protocol_api.h" + #define TIMELIMIT() timeval_current_ofs(10, 0) enum vacuum_child_status { VACUUM_RUNNING, VACUUM_OK, VACUUM_ERROR, VACUUM_TIMEOUT}; @@ -117,6 +119,11 @@ struct delete_records_list { struct vacuum_data *vdata; }; +struct fetch_record_data { + TDB_DATA key; + uint8_t keydata[1]; +}; + static int insert_record_into_delete_queue(struct ctdb_db_context *ctdb_db, const struct ctdb_ltdb_header *hdr, TDB_DATA key); @@ -1573,3 +1580,62 @@ void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db, return; } + +static int vacuum_fetch_parser(uint32_t reqid, + struct ctdb_ltdb_header *header, + TDB_DATA key, TDB_DATA data, + void *private_data) +{ + struct ctdb_db_context *ctdb_db = talloc_get_type_abort( + private_data, struct ctdb_db_context); + struct fetch_record_data *rd; + size_t len; + uint32_t hash; + + len = offsetof(struct fetch_record_data, keydata) + key.dsize; + + rd = (struct fetch_record_data *)talloc_size(ctdb_db->fetch_queue, + len); + if (rd == NULL) { + DEBUG(DEBUG_ERR, (__location__ " Memory error\n")); + return -1; + } + talloc_set_name_const(rd, "struct fetch_record_data"); + + rd->key.dsize = key.dsize; + rd->key.dptr = rd->keydata; + memcpy(rd->keydata, key.dptr, key.dsize); + + hash = ctdb_hash(&key); + + trbt_insert32(ctdb_db->fetch_queue, hash, rd); + + return 0; +} + +int32_t ctdb_control_vacuum_fetch(struct ctdb_context *ctdb, TDB_DATA indata) +{ + struct ctdb_rec_buffer *recbuf; + struct ctdb_db_context *ctdb_db; + size_t npull; + int ret; + + ret = ctdb_rec_buffer_pull(indata.dptr, indata.dsize, ctdb, &recbuf, + &npull); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Invalid data in vacuum_fetch\n")); + return -1; + } + + ctdb_db = find_ctdb_db(ctdb, recbuf->db_id); + if (ctdb_db == NULL) { + talloc_free(recbuf); + DEBUG(DEBUG_ERR, (__location__ " Unknown db 0x%08x\n", + recbuf->db_id)); + return -1; + } + + ret = ctdb_rec_buffer_traverse(recbuf, vacuum_fetch_parser, ctdb_db); + talloc_free(recbuf); + return ret; +} -- 2.25.1 From 708223d1d28719374c6f8dbb5bbbbd3cc6be9954 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 16 Feb 2018 17:00:40 +1100 Subject: [PATCH 08/38] ctdb-vacuum: Add processing of fetch queue Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 86521837b684df3b7c5a0a1e3b7e606c8b91f63e) --- ctdb/server/ctdb_vacuum.c | 192 +++++++++++++++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 6f28fa89cc9..410ef8bf722 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -317,6 +317,181 @@ static int delete_marshall_traverse(void *param, void *data) return 0; } +struct fetch_queue_state { + struct ctdb_db_context *ctdb_db; + int count; +}; + +struct fetch_record_migrate_state { + struct fetch_queue_state *fetch_queue; + TDB_DATA key; +}; + +static void fetch_record_migrate_callback(struct ctdb_client_call_state *state) +{ + struct fetch_record_migrate_state *fetch = talloc_get_type_abort( + state->async.private_data, struct fetch_record_migrate_state); + struct fetch_queue_state *fetch_queue = fetch->fetch_queue; + struct ctdb_ltdb_header hdr; + struct ctdb_call call = { 0 }; + int ret; + + ret = ctdb_call_recv(state, &call); + fetch_queue->count--; + if (ret != 0) { + D_ERR("Failed to migrate record for vacuuming\n"); + goto done; + } + + ret = tdb_chainlock_nonblock(fetch_queue->ctdb_db->ltdb->tdb, + fetch->key); + if (ret != 0) { + goto done; + } + + ret = tdb_parse_record(fetch_queue->ctdb_db->ltdb->tdb, + fetch->key, + vacuum_record_parser, + &hdr); + + tdb_chainunlock(fetch_queue->ctdb_db->ltdb->tdb, fetch->key); + + if (ret != 0) { + goto done; + } + + D_INFO("Vacuum Fetch record, key=%.*s\n", + (int)fetch->key.dsize, + fetch->key.dptr); + + (void) ctdb_local_schedule_for_deletion(fetch_queue->ctdb_db, + &hdr, + fetch->key); + +done: + talloc_free(fetch); +} + +static int fetch_record_parser(TDB_DATA key, TDB_DATA data, void *private_data) +{ + struct ctdb_ltdb_header *header = + (struct ctdb_ltdb_header *)private_data; + + if (data.dsize < sizeof(struct ctdb_ltdb_header)) { + return -1; + } + + memcpy(header, data.dptr, sizeof(*header)); + return 0; +} + +/** + * traverse function for the traversal of the fetch_queue. + * + * Send a record migration request. + */ +static int fetch_queue_traverse(void *param, void *data) +{ + struct fetch_record_data *rd = talloc_get_type_abort( + data, struct fetch_record_data); + struct fetch_queue_state *fetch_queue = + (struct fetch_queue_state *)param; + struct ctdb_db_context *ctdb_db = fetch_queue->ctdb_db; + struct ctdb_client_call_state *state; + struct fetch_record_migrate_state *fetch; + struct ctdb_call call = { 0 }; + struct ctdb_ltdb_header header; + int ret; + + ret = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, rd->key); + if (ret != 0) { + return 0; + } + + ret = tdb_parse_record(ctdb_db->ltdb->tdb, + rd->key, + fetch_record_parser, + &header); + + tdb_chainunlock(ctdb_db->ltdb->tdb, rd->key); + + if (ret != 0) { + goto skipped; + } + + if (header.dmaster == ctdb_db->ctdb->pnn) { + /* If the record is already migrated, skip */ + goto skipped; + } + + fetch = talloc_zero(ctdb_db, struct fetch_record_migrate_state); + if (fetch == NULL) { + D_ERR("Failed to setup fetch record migrate state\n"); + return 0; + } + + fetch->fetch_queue = fetch_queue; + + fetch->key.dsize = rd->key.dsize; + fetch->key.dptr = talloc_memdup(fetch, rd->key.dptr, rd->key.dsize); + if (fetch->key.dptr == NULL) { + D_ERR("Memory error in fetch_queue_traverse\n"); + talloc_free(fetch); + return 0; + } + + call.call_id = CTDB_NULL_FUNC; + call.flags = CTDB_IMMEDIATE_MIGRATION | + CTDB_CALL_FLAG_VACUUM_MIGRATION; + call.key = fetch->key; + + state = ctdb_call_send(ctdb_db, &call); + if (state == NULL) { + DEBUG(DEBUG_ERR, ("Failed to setup vacuum fetch call\n")); + talloc_free(fetch); + return 0; + } + + state->async.fn = fetch_record_migrate_callback; + state->async.private_data = fetch; + + fetch_queue->count++; + + return 0; + +skipped: + D_INFO("Skipped Fetch record, key=%.*s\n", + (int)rd->key.dsize, + rd->key.dptr); + return 0; +} + +/** + * Traverse the fetch. + * Records are migrated to the local node and + * added to delete queue for further processing. + */ +static void ctdb_process_fetch_queue(struct ctdb_db_context *ctdb_db) +{ + struct fetch_queue_state state; + int ret; + + state.ctdb_db = ctdb_db; + state.count = 0; + + ret = trbt_traversearray32(ctdb_db->fetch_queue, 1, + fetch_queue_traverse, &state); + if (ret != 0) { + DEBUG(DEBUG_ERR, (__location__ " Error traversing " + "the fetch queue.\n")); + } + + /* Wait for all migrations to complete */ + while (state.count > 0) { + tevent_loop_once(ctdb_db->ctdb->ev); + } +} + /** * traverse function for the traversal of the delete_queue, * the fast-path vacuuming list. @@ -998,8 +1173,10 @@ fail: /** * Vacuum a DB: * - Always do the fast vacuuming run, which traverses - * the in-memory delete queue: these records have been - * scheduled for deletion. + * - the in-memory fetch queue: these records have been + * scheduled for migration + * - the in-memory delete queue: these records have been + * scheduled for deletion. * - Only if explicitly requested, the database is traversed * in order to use the traditional heuristics on empty records * to trigger deletion. @@ -1070,6 +1247,8 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, ctdb_vacuum_traverse_db(ctdb_db, vdata); } + ctdb_process_fetch_queue(ctdb_db); + ctdb_process_delete_queue(ctdb_db, vdata); ctdb_process_vacuum_fetch_lists(ctdb_db, vdata); @@ -1309,10 +1488,17 @@ static void ctdb_vacuum_event(struct tevent_context *ev, ctdb_db->delete_queue = trbt_create(ctdb_db, 0); if (ctdb_db->delete_queue == NULL) { /* fatal here? ... */ - ctdb_fatal(ctdb, "Out of memory when re-creating vacuum tree " + ctdb_fatal(ctdb, "Out of memory when re-creating delete queue " "in parent context. Shutting down\n"); } + talloc_free(ctdb_db->fetch_queue); + ctdb_db->fetch_queue = trbt_create(ctdb_db, 0); + if (ctdb_db->fetch_queue == NULL) { + ctdb_fatal(ctdb, "Out of memory when re-create fetch queue " + " in parent context. Shutting down\n"); + } + tevent_add_timer(ctdb->ev, child_ctx, timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, 0), vacuum_child_timeout, child_ctx); -- 2.25.1 From ecf1598f1d9e8bf770e82c991b02ca461cdb2e07 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 16 Feb 2018 17:01:21 +1100 Subject: [PATCH 09/38] ctdb-vacuum: Replace VACUUM_FETCH message with control Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 498932c0e8e8614bd52f3270c4d63e2b5f9e26a4) --- ctdb/server/ctdb_vacuum.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 410ef8bf722..ddaef863045 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -839,6 +839,7 @@ static void ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db, { unsigned int i; struct ctdb_context *ctdb = ctdb_db->ctdb; + int ret, res; for (i = 0; i < ctdb->num_nodes; i++) { TDB_DATA data; @@ -857,17 +858,16 @@ static void ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db, ctdb_db->db_name)); data = ctdb_marshall_finish(vfl); - if (ctdb_client_send_message(ctdb, ctdb->nodes[i]->pnn, - CTDB_SRVID_VACUUM_FETCH, - data) != 0) - { - DEBUG(DEBUG_ERR, (__location__ " Failed to send vacuum " - "fetch message to %u\n", + + ret = ctdb_control(ctdb, ctdb->nodes[i]->pnn, 0, + CTDB_CONTROL_VACUUM_FETCH, 0, + data, NULL, NULL, &res, NULL, NULL); + if (ret != 0 || res != 0) { + DEBUG(DEBUG_ERR, ("Failed to send vacuum " + "fetch control to node %u\n", ctdb->nodes[i]->pnn)); } } - - return; } /** @@ -1197,7 +1197,7 @@ fail: * - The vacuum_fetch lists * (one for each other lmaster node): * The records in this list are sent for deletion to - * their lmaster in a bulk VACUUM_FETCH message. + * their lmaster in a bulk VACUUM_FETCH control. * * The lmaster then migrates all these records to itelf * so that they can be vacuumed there. -- 2.25.1 From ea40b47835702b02df87253278e5d37a62aed82d Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 16 Feb 2018 17:13:35 +1100 Subject: [PATCH 10/38] ctdb-recoverd: Drop VACUUM_FETCH message handling This is now implemented in the ctdb daemon using VACUMM_FETCH control. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit fc81729dd2d8eddea1e60e22b183894d6541c7dc) --- ctdb/server/ctdb_recoverd.c | 149 ------------------------------------ 1 file changed, 149 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 3d5b727715a..bbaf1270558 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -574,152 +574,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node return 0; } -/* - called when a vacuum fetch has completed - just free it and do the next one - */ -static void vacuum_fetch_callback(struct ctdb_client_call_state *state) -{ - talloc_free(state); -} - - -/** - * Process one elements of the vacuum fetch list: - * Migrate it over to us with the special flag - * CTDB_CALL_FLAG_VACUUM_MIGRATION. - */ -static bool vacuum_fetch_process_one(struct ctdb_db_context *ctdb_db, - uint32_t pnn, - struct ctdb_rec_data_old *r) -{ - struct ctdb_client_call_state *state; - TDB_DATA data; - struct ctdb_ltdb_header *hdr; - struct ctdb_call call; - - ZERO_STRUCT(call); - call.call_id = CTDB_NULL_FUNC; - call.flags = CTDB_IMMEDIATE_MIGRATION; - call.flags |= CTDB_CALL_FLAG_VACUUM_MIGRATION; - - call.key.dptr = &r->data[0]; - call.key.dsize = r->keylen; - - /* ensure we don't block this daemon - just skip a record if we can't get - the chainlock */ - if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, call.key) != 0) { - return true; - } - - data = tdb_fetch(ctdb_db->ltdb->tdb, call.key); - if (data.dptr == NULL) { - tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); - return true; - } - - if (data.dsize < sizeof(struct ctdb_ltdb_header)) { - free(data.dptr); - tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); - return true; - } - - hdr = (struct ctdb_ltdb_header *)data.dptr; - if (hdr->dmaster == pnn) { - /* its already local */ - free(data.dptr); - tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); - return true; - } - - free(data.dptr); - - state = ctdb_call_send(ctdb_db, &call); - tdb_chainunlock(ctdb_db->ltdb->tdb, call.key); - if (state == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Failed to setup vacuum fetch call\n")); - return false; - } - state->async.fn = vacuum_fetch_callback; - state->async.private_data = NULL; - - return true; -} - - -/* - handler for vacuum fetch -*/ -static void vacuum_fetch_handler(uint64_t srvid, TDB_DATA data, - void *private_data) -{ - struct ctdb_recoverd *rec = talloc_get_type( - private_data, struct ctdb_recoverd); - struct ctdb_context *ctdb = rec->ctdb; - struct ctdb_marshall_buffer *recs; - unsigned int i; - int ret; - TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - const char *name; - struct ctdb_dbid_map_old *dbmap=NULL; - uint8_t db_flags = 0; - struct ctdb_db_context *ctdb_db; - struct ctdb_rec_data_old *r; - - recs = (struct ctdb_marshall_buffer *)data.dptr; - - if (recs->count == 0) { - goto done; - } - - /* work out if the database is persistent */ - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from local node\n")); - goto done; - } - - for (i=0;inum;i++) { - if (dbmap->dbs[i].db_id == recs->db_id) { - db_flags = dbmap->dbs[i].flags; - break; - } - } - if (i == dbmap->num) { - DEBUG(DEBUG_ERR, (__location__ " Unable to find db_id 0x%x on local node\n", recs->db_id)); - goto done; - } - - /* find the name of this database */ - if (ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, recs->db_id, tmp_ctx, &name) != 0) { - DEBUG(DEBUG_ERR,(__location__ " Failed to get name of db 0x%x\n", recs->db_id)); - goto done; - } - - /* attach to it */ - ctdb_db = ctdb_attach(ctdb, CONTROL_TIMEOUT(), name, db_flags); - if (ctdb_db == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Failed to attach to database '%s'\n", name)); - goto done; - } - - r = (struct ctdb_rec_data_old *)&recs->data[0]; - while (recs->count) { - bool ok; - - ok = vacuum_fetch_process_one(ctdb_db, rec->ctdb->pnn, r); - if (!ok) { - break; - } - - r = (struct ctdb_rec_data_old *)(r->length + (uint8_t *)r); - recs->count--; - } - -done: - talloc_free(tmp_ctx); -} - - /* * handler for database detach */ @@ -3147,9 +3001,6 @@ static void monitor_cluster(struct ctdb_context *ctdb) /* when we are asked to puch out a flag change */ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_PUSH_NODE_FLAGS, push_flags_handler, rec); - /* register a message port for vacuum fetch */ - ctdb_client_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec); - /* register a message port for reloadnodes */ ctdb_client_set_message_handler(ctdb, CTDB_SRVID_RELOAD_NODES, reload_nodes_handler, rec); -- 2.25.1 From def9c781591aaf33b37ab7a0fe3153d2f7c3fe82 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 16 Feb 2018 17:17:38 +1100 Subject: [PATCH 11/38] ctdb-recoverd: No need for database detach handler The only reason for recoverd attaching to databases was to migrate records to the local node as part of vacuuming. Recovery daemon does not take part in database vacuuming any more. The actual database recovery is handled via the recovery_helper and recovery daemon should not need to attach to the databases any more. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit c6427dddf5425b267d8c09e8df18653a48679646) --- ctdb/server/ctdb_ltdb_server.c | 8 -------- ctdb/server/ctdb_recoverd.c | 35 ---------------------------------- 2 files changed, 43 deletions(-) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index 1ccf60832e1..970eb54b00b 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1266,14 +1266,6 @@ int32_t ctdb_control_db_detach(struct ctdb_context *ctdb, TDB_DATA indata, return -1; } - /* Detach database from recoverd */ - if (ctdb_daemon_send_message(ctdb, ctdb->pnn, - CTDB_SRVID_DETACH_DATABASE, - indata) != 0) { - DEBUG(DEBUG_ERR, ("Unable to detach DB from recoverd\n")); - return -1; - } - /* Disable vacuuming and drop all vacuuming data */ talloc_free(ctdb_db->vacuum_handle); talloc_free(ctdb_db->delete_queue); diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index bbaf1270558..857736e30c8 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -574,36 +574,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node return 0; } -/* - * handler for database detach - */ -static void detach_database_handler(uint64_t srvid, TDB_DATA data, - void *private_data) -{ - struct ctdb_recoverd *rec = talloc_get_type( - private_data, struct ctdb_recoverd); - struct ctdb_context *ctdb = rec->ctdb; - uint32_t db_id; - struct ctdb_db_context *ctdb_db; - - if (data.dsize != sizeof(db_id)) { - return; - } - db_id = *(uint32_t *)data.dptr; - - ctdb_db = find_ctdb_db(ctdb, db_id); - if (ctdb_db == NULL) { - /* database is not attached */ - return; - } - - DLIST_REMOVE(ctdb->db_list, ctdb_db); - - DEBUG(DEBUG_NOTICE, ("Detached from database '%s'\n", - ctdb_db->db_name)); - talloc_free(ctdb_db); -} - /* called when ctdb_wait_timeout should finish */ @@ -3024,11 +2994,6 @@ static void monitor_cluster(struct ctdb_context *ctdb) CTDB_SRVID_DISABLE_RECOVERIES, disable_recoveries_handler, rec); - /* register a message port for detaching database */ - ctdb_client_set_message_handler(ctdb, - CTDB_SRVID_DETACH_DATABASE, - detach_database_handler, rec); - for (;;) { TALLOC_CTX *mem_ctx = talloc_new(ctdb); struct timeval start; -- 2.25.1 From 8bea2828efde8d4bc2761fd0b5c674927967f6c4 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 6 Jun 2018 15:47:13 +0200 Subject: [PATCH 12/38] ctdb-daemon: Avoid memory leak when packet is deferred Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 680df07630a94b3e76edefe98ee0986e7e5e1f12) --- ctdb/server/ctdb_server.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 4b4c2e9896f..1470b00dba5 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -374,8 +374,9 @@ static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header return; } q->ctdb = ctdb; - q->hdr = talloc_memdup(ctdb, hdr, hdr->length); + q->hdr = talloc_memdup(q, hdr, hdr->length); if (q->hdr == NULL) { + talloc_free(q); DEBUG(DEBUG_ERR,("Error copying deferred packet to self\n")); return; } -- 2.25.1 From c19bbeb3cdb2e23a8cddeb93027d2fa6e97bdae1 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 1 Oct 2019 15:05:10 +1000 Subject: [PATCH 13/38] ctdb-vacuum: Avoid processing any more packets All the vacuum operations if required have an event loop to ensure completion of pending operations. Once all the steps are complete, there is no reason to process any more packets. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit d0cc9edc05b6218a8e20a0a8009cbb9918ff4d02) --- ctdb/server/ctdb_vacuum.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index ddaef863045..79dced38bf5 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -1257,9 +1257,6 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, talloc_free(tmp_ctx); - /* this ensures we run our event queue */ - ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE); - return 0; } -- 2.25.1 From e9d2b18a1b2d79ba9b297604e06cff2ee1b2b7bb Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 30 Jul 2019 10:34:03 +1000 Subject: [PATCH 14/38] ctdb-protocol: Add new control CTDB_CONTROL_DB_VACUUM Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit b314835341e4028f0770fa7f9a37d2d21448ddfd) --- ctdb/protocol/protocol.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h index e47daeadba1..43175ae3a95 100644 --- a/ctdb/protocol/protocol.h +++ b/ctdb/protocol/protocol.h @@ -374,6 +374,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_TUNNEL_REGISTER = 152, CTDB_CONTROL_TUNNEL_DEREGISTER = 153, CTDB_CONTROL_VACUUM_FETCH = 154, + CTDB_CONTROL_DB_VACUUM = 155, }; #define MAX_COUNT_BUCKETS 16 @@ -852,6 +853,12 @@ struct ctdb_pid_srvid { uint64_t srvid; }; +struct ctdb_db_vacuum { + uint32_t db_id; + bool full_vacuum_run; + +}; + struct ctdb_req_control_data { uint32_t opcode; union { @@ -889,6 +896,7 @@ struct ctdb_req_control_data { struct ctdb_traverse_start_ext *traverse_start_ext; struct ctdb_traverse_all_ext *traverse_all_ext; struct ctdb_pid_srvid *pid_srvid; + struct ctdb_db_vacuum *db_vacuum; } data; }; -- 2.25.1 From 67cef10770048fe9f096659d34cf50ea22f2e140 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 30 Jul 2019 16:59:37 +1000 Subject: [PATCH 15/38] ctdb-protocol: Add marshalling for struct ctdb_db_vacuum Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit a896486b62bbcf9915727ba7bfc768fb5383f0c7) --- ctdb/protocol/protocol_private.h | 10 +++++ ctdb/protocol/protocol_types.c | 63 ++++++++++++++++++++++++++++ ctdb/tests/src/protocol_common.c | 13 ++++++ ctdb/tests/src/protocol_common.h | 4 ++ ctdb/tests/src/protocol_types_test.c | 2 + 5 files changed, 92 insertions(+) diff --git a/ctdb/protocol/protocol_private.h b/ctdb/protocol/protocol_private.h index 1c3e56fcb7a..b151e64ef09 100644 --- a/ctdb/protocol/protocol_private.h +++ b/ctdb/protocol/protocol_private.h @@ -83,6 +83,16 @@ void ctdb_pulldb_ext_push(struct ctdb_pulldb_ext *in, uint8_t *buf, int ctdb_pulldb_ext_pull(uint8_t *buf, size_t buflen, TALLOC_CTX *mem_ctx, struct ctdb_pulldb_ext **out, size_t *npull); +size_t ctdb_db_vacuum_len(struct ctdb_db_vacuum *in); +void ctdb_db_vacuum_push(struct ctdb_db_vacuum *in, + uint8_t *buf, + size_t *npush); +int ctdb_db_vacuum_pull(uint8_t *buf, + size_t buflen, + TALLOC_CTX *mem_ctx, + struct ctdb_db_vacuum **out, + size_t *npull); + size_t ctdb_traverse_start_len(struct ctdb_traverse_start *in); void ctdb_traverse_start_push(struct ctdb_traverse_start *in, uint8_t *buf, size_t *npush); diff --git a/ctdb/protocol/protocol_types.c b/ctdb/protocol/protocol_types.c index d9388b74b1e..fb288635234 100644 --- a/ctdb/protocol/protocol_types.c +++ b/ctdb/protocol/protocol_types.c @@ -1240,6 +1240,69 @@ fail: return ret; } +size_t ctdb_db_vacuum_len(struct ctdb_db_vacuum *in) +{ + return ctdb_uint32_len(&in->db_id) + + ctdb_bool_len(&in->full_vacuum_run); +} + +void ctdb_db_vacuum_push(struct ctdb_db_vacuum *in, + uint8_t *buf, + size_t *npush) +{ + size_t offset = 0, np; + + ctdb_uint32_push(&in->db_id, buf+offset, &np); + offset += np; + + ctdb_bool_push(&in->full_vacuum_run, buf+offset, &np); + offset += np; + + *npush = offset; +} + +int ctdb_db_vacuum_pull(uint8_t *buf, + size_t buflen, + TALLOC_CTX *mem_ctx, + struct ctdb_db_vacuum **out, + size_t *npull) +{ + struct ctdb_db_vacuum *val; + size_t offset = 0, np; + int ret; + + val = talloc(mem_ctx, struct ctdb_db_vacuum); + if (val == NULL) { + return ENOMEM; + } + + ret = ctdb_uint32_pull(buf+offset, + buflen-offset, + &val->db_id, + &np); + if (ret != 0) { + goto fail;; + } + offset += np; + + ret = ctdb_bool_pull(buf+offset, + buflen-offset, + &val->full_vacuum_run, + &np); + if (ret != 0) { + goto fail; + } + offset += np; + + *out = val; + *npull = offset; + return 0; + +fail: + talloc_free(val); + return ret; +} + size_t ctdb_ltdb_header_len(struct ctdb_ltdb_header *in) { return ctdb_uint64_len(&in->rsn) + diff --git a/ctdb/tests/src/protocol_common.c b/ctdb/tests/src/protocol_common.c index 73e9a8c5d0d..2030b4bb5e5 100644 --- a/ctdb/tests/src/protocol_common.c +++ b/ctdb/tests/src/protocol_common.c @@ -310,6 +310,19 @@ void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1, assert(p1->srvid == p2->srvid); } +void fill_ctdb_db_vacuum(TALLOC_CTX *mem_ctx, struct ctdb_db_vacuum *p) +{ + fill_ctdb_uint32(&p->db_id); + fill_ctdb_bool(&p->full_vacuum_run); +} + +void verify_ctdb_db_vacuum(struct ctdb_db_vacuum *p1, + struct ctdb_db_vacuum *p2) +{ + verify_ctdb_uint32(&p1->db_id, &p2->db_id); + verify_ctdb_bool(&p1->full_vacuum_run, &p2->full_vacuum_run); +} + void fill_ctdb_ltdb_header(struct ctdb_ltdb_header *p) { p->rsn = rand64(); diff --git a/ctdb/tests/src/protocol_common.h b/ctdb/tests/src/protocol_common.h index ec00cf97b63..2b4fb6a07a9 100644 --- a/ctdb/tests/src/protocol_common.h +++ b/ctdb/tests/src/protocol_common.h @@ -64,6 +64,10 @@ void fill_ctdb_pulldb_ext(TALLOC_CTX *mem_ctx, struct ctdb_pulldb_ext *p); void verify_ctdb_pulldb_ext(struct ctdb_pulldb_ext *p1, struct ctdb_pulldb_ext *p2); +void fill_ctdb_db_vacuum(TALLOC_CTX *mem_ctx, struct ctdb_db_vacuum *p); +void verify_ctdb_db_vacuum(struct ctdb_db_vacuum *p1, + struct ctdb_db_vacuum *p2); + void fill_ctdb_ltdb_header(struct ctdb_ltdb_header *p); void verify_ctdb_ltdb_header(struct ctdb_ltdb_header *p1, struct ctdb_ltdb_header *p2); diff --git a/ctdb/tests/src/protocol_types_test.c b/ctdb/tests/src/protocol_types_test.c index e607d06b820..e9cf4debe89 100644 --- a/ctdb/tests/src/protocol_types_test.c +++ b/ctdb/tests/src/protocol_types_test.c @@ -38,6 +38,7 @@ PROTOCOL_TYPE3_TEST(struct ctdb_dbid, ctdb_dbid); PROTOCOL_TYPE3_TEST(struct ctdb_dbid_map, ctdb_dbid_map); PROTOCOL_TYPE3_TEST(struct ctdb_pulldb, ctdb_pulldb); PROTOCOL_TYPE3_TEST(struct ctdb_pulldb_ext, ctdb_pulldb_ext); +PROTOCOL_TYPE3_TEST(struct ctdb_db_vacuum, ctdb_db_vacuum); PROTOCOL_TYPE1_TEST(struct ctdb_ltdb_header, ctdb_ltdb_header); PROTOCOL_TYPE3_TEST(struct ctdb_rec_data, ctdb_rec_data); PROTOCOL_TYPE3_TEST(struct ctdb_rec_buffer, ctdb_rec_buffer); @@ -143,6 +144,7 @@ int main(int argc, char *argv[]) TEST_FUNC(ctdb_dbid_map)(); TEST_FUNC(ctdb_pulldb)(); TEST_FUNC(ctdb_pulldb_ext)(); + TEST_FUNC(ctdb_db_vacuum)(); TEST_FUNC(ctdb_ltdb_header)(); TEST_FUNC(ctdb_rec_data)(); TEST_FUNC(ctdb_rec_buffer)(); -- 2.25.1 From d653d75e3af6d77bba9ea9afbb939574717e3680 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 30 Jul 2019 10:52:05 +1000 Subject: [PATCH 16/38] ctdb-protocol: Add marshalling for control DB_VACUUM Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 496204feb0e2b6eb2f3d9a74e45596a3e74ad9b1) --- ctdb/protocol/protocol_api.h | 4 ++++ ctdb/protocol/protocol_client.c | 24 ++++++++++++++++++++++++ ctdb/protocol/protocol_control.c | 25 +++++++++++++++++++++++++ ctdb/protocol/protocol_debug.c | 1 + ctdb/tests/cunit/protocol_test_101.sh | 2 +- ctdb/tests/src/protocol_common_ctdb.c | 15 +++++++++++++++ ctdb/tests/src/protocol_ctdb_test.c | 2 +- 7 files changed, 71 insertions(+), 2 deletions(-) diff --git a/ctdb/protocol/protocol_api.h b/ctdb/protocol/protocol_api.h index cf4c4635dd4..c2cd4a76289 100644 --- a/ctdb/protocol/protocol_api.h +++ b/ctdb/protocol/protocol_api.h @@ -607,6 +607,10 @@ void ctdb_req_control_vacuum_fetch(struct ctdb_req_control *request, struct ctdb_rec_buffer *recbuf); int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply); +void ctdb_req_control_db_vacuum(struct ctdb_req_control *request, + struct ctdb_db_vacuum *db_vacuum); +int ctdb_reply_control_db_vacuum(struct ctdb_reply_control *reply); + /* From protocol/protocol_debug.c */ void ctdb_packet_print(uint8_t *buf, size_t buflen, FILE *fp); diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c index d5f6a222fe4..84dc55a34a3 100644 --- a/ctdb/protocol/protocol_client.c +++ b/ctdb/protocol/protocol_client.c @@ -2357,3 +2357,27 @@ int ctdb_reply_control_vacuum_fetch(struct ctdb_reply_control *reply) return reply->status; } + +/* CTDB_CONTROL_DB_VACUUM */ + +void ctdb_req_control_db_vacuum(struct ctdb_req_control *request, + struct ctdb_db_vacuum *db_vacuum) +{ + request->opcode = CTDB_CONTROL_DB_VACUUM; + request->pad = 0; + request->srvid = 0; + request->client_id = 0; + request->flags = 0; + + request->rdata.opcode = CTDB_CONTROL_DB_VACUUM; + request->rdata.data.db_vacuum = db_vacuum; +} + +int ctdb_reply_control_db_vacuum(struct ctdb_reply_control *reply) +{ + if (reply->rdata.opcode != CTDB_CONTROL_DB_VACUUM) { + return EPROTO; + } + + return reply->status; +} diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c index 1cc985a71a7..a25c9b1cfe0 100644 --- a/ctdb/protocol/protocol_control.c +++ b/ctdb/protocol/protocol_control.c @@ -411,6 +411,10 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) case CTDB_CONTROL_VACUUM_FETCH: len = ctdb_rec_buffer_len(cd->data.recbuf); break; + + case CTDB_CONTROL_DB_VACUUM: + len = ctdb_db_vacuum_len(cd->data.db_vacuum); + break; } return len; @@ -690,6 +694,10 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, case CTDB_CONTROL_VACUUM_FETCH: ctdb_rec_buffer_push(cd->data.recbuf, buf, &np); break; + + case CTDB_CONTROL_DB_VACUUM: + ctdb_db_vacuum_push(cd->data.db_vacuum, buf, &np); + break; } *npush = np; @@ -1019,6 +1027,14 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx, &cd->data.recbuf, &np); break; + + case CTDB_CONTROL_DB_VACUUM: + ret = ctdb_db_vacuum_pull(buf, + buflen, + mem_ctx, + &cd->data.db_vacuum, + &np); + break; } if (ret != 0) { @@ -1379,6 +1395,9 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) case CTDB_CONTROL_VACUUM_FETCH: break; + + case CTDB_CONTROL_DB_VACUUM: + break; } return len; @@ -1536,6 +1555,9 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_VACUUM_FETCH: break; + + case CTDB_CONTROL_DB_VACUUM: + break; } *npush = np; @@ -1723,6 +1745,9 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, case CTDB_CONTROL_VACUUM_FETCH: break; + + case CTDB_CONTROL_DB_VACUUM: + break; } if (ret != 0) { diff --git a/ctdb/protocol/protocol_debug.c b/ctdb/protocol/protocol_debug.c index 97903ea98f4..3fe78b13162 100644 --- a/ctdb/protocol/protocol_debug.c +++ b/ctdb/protocol/protocol_debug.c @@ -243,6 +243,7 @@ static void ctdb_opcode_print(uint32_t opcode, FILE *fp) { CTDB_CONTROL_TUNNEL_REGISTER, "TUNNEL_REGISTER" }, { CTDB_CONTROL_TUNNEL_DEREGISTER, "TUNNEL_DEREGISTER" }, { CTDB_CONTROL_VACUUM_FETCH, "VACUUM_FETCH" }, + { CTDB_CONTROL_DB_VACUUM, "DB_VACUUM" }, { MAP_END, "" }, }; diff --git a/ctdb/tests/cunit/protocol_test_101.sh b/ctdb/tests/cunit/protocol_test_101.sh index a0bf9d08754..6b07dc017ba 100755 --- a/ctdb/tests/cunit/protocol_test_101.sh +++ b/ctdb/tests/cunit/protocol_test_101.sh @@ -2,7 +2,7 @@ . "${TEST_SCRIPTS_DIR}/unit.sh" -last_control=154 +last_control=155 generate_control_output () { diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c index b02976b5d67..6989010cd6f 100644 --- a/ctdb/tests/src/protocol_common_ctdb.c +++ b/ctdb/tests/src/protocol_common_ctdb.c @@ -594,6 +594,12 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, assert(cd->data.recbuf != NULL); fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); break; + + case CTDB_CONTROL_DB_VACUUM: + cd->data.db_vacuum = talloc(mem_ctx, struct ctdb_db_vacuum); + assert(cd->data.db_vacuum != NULL); + fill_ctdb_db_vacuum(mem_ctx, cd->data.db_vacuum); + break; } } @@ -984,6 +990,10 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, case CTDB_CONTROL_VACUUM_FETCH: verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); break; + + case CTDB_CONTROL_DB_VACUUM: + verify_ctdb_db_vacuum(cd->data.db_vacuum, cd2->data.db_vacuum); + break; } } @@ -1381,6 +1391,8 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_VACUUM_FETCH: break; + case CTDB_CONTROL_DB_VACUUM: + break; } } @@ -1718,6 +1730,9 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_VACUUM_FETCH: break; + + case CTDB_CONTROL_DB_VACUUM: + break; } } diff --git a/ctdb/tests/src/protocol_ctdb_test.c b/ctdb/tests/src/protocol_ctdb_test.c index 3ebf15dff6c..b13cd5491d1 100644 --- a/ctdb/tests/src/protocol_ctdb_test.c +++ b/ctdb/tests/src/protocol_ctdb_test.c @@ -284,7 +284,7 @@ PROTOCOL_CTDB4_TEST(struct ctdb_req_dmaster, ctdb_req_dmaster, PROTOCOL_CTDB4_TEST(struct ctdb_reply_dmaster, ctdb_reply_dmaster, CTDB_REPLY_DMASTER); -#define NUM_CONTROLS 155 +#define NUM_CONTROLS 156 PROTOCOL_CTDB2_TEST(struct ctdb_req_control_data, ctdb_req_control_data); PROTOCOL_CTDB2_TEST(struct ctdb_reply_control_data, ctdb_reply_control_data); -- 2.25.1 From 06cf521a1cbdd1f1c682cc8a789d9e60e8cd16dd Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 4 Oct 2019 12:06:21 +1000 Subject: [PATCH 17/38] ctdb-vacuum: Simplify recording of in-progress vacuuming child There can only be one, so simplify the logic. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 5539edfdbe69d1d5f084d06753cce8ed6e524999) --- ctdb/include/ctdb_private.h | 2 +- ctdb/server/ctdb_vacuum.c | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index d7b568d6c0c..19b8bb98d8f 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -318,7 +318,7 @@ struct ctdb_context { TALLOC_CTX *banning_ctx; - struct ctdb_vacuum_child_context *vacuumers; + struct ctdb_vacuum_child_context *vacuumer; /* mapping from pid to ctdb_client * */ struct ctdb_client_pid_list *client_pids; diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 79dced38bf5..255898c84d9 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -48,7 +48,6 @@ enum vacuum_child_status { VACUUM_RUNNING, VACUUM_OK, VACUUM_ERROR, VACUUM_TIMEOUT}; struct ctdb_vacuum_child_context { - struct ctdb_vacuum_child_context *next, *prev; struct ctdb_vacuum_handle *vacuum_handle; /* fd child writes status to */ int fd[2]; @@ -59,7 +58,6 @@ struct ctdb_vacuum_child_context { struct ctdb_vacuum_handle { struct ctdb_db_context *ctdb_db; - struct ctdb_vacuum_child_context *child_ctx; uint32_t fast_path_count; }; @@ -1325,7 +1323,7 @@ static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx) child_ctx->vacuum_handle->fast_path_count++; } - DLIST_REMOVE(ctdb->vacuumers, child_ctx); + ctdb->vacuumer = NULL; tevent_add_timer(ctdb->ev, child_ctx->vacuum_handle, timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), @@ -1407,7 +1405,7 @@ static void ctdb_vacuum_event(struct tevent_context *ev, * same time. If there is vacuuming child process active, delay * new vacuuming event to stagger vacuuming events. */ - if (ctdb->vacuumers != NULL) { + if (ctdb->vacuumer != NULL) { tevent_add_timer(ctdb->ev, vacuum_handle, timeval_current_ofs(0, 500*1000), ctdb_vacuum_event, vacuum_handle); @@ -1475,7 +1473,7 @@ static void ctdb_vacuum_event(struct tevent_context *ev, child_ctx->status = VACUUM_RUNNING; child_ctx->start_time = timeval_current(); - DLIST_ADD(ctdb->vacuumers, child_ctx); + ctdb->vacuumer = child_ctx; talloc_set_destructor(child_ctx, vacuum_child_destructor); /* @@ -1506,19 +1504,17 @@ static void ctdb_vacuum_event(struct tevent_context *ev, TEVENT_FD_READ, vacuum_child_handler, child_ctx); tevent_fd_set_auto_close(fde); - vacuum_handle->child_ctx = child_ctx; child_ctx->vacuum_handle = vacuum_handle; } void ctdb_stop_vacuuming(struct ctdb_context *ctdb) { - /* Simply free them all. */ - while (ctdb->vacuumers) { - DEBUG(DEBUG_INFO, ("Aborting vacuuming for %s (%i)\n", - ctdb->vacuumers->vacuum_handle->ctdb_db->db_name, - (int)ctdb->vacuumers->child_pid)); + if (ctdb->vacuumer != NULL) { + D_INFO("Aborting vacuuming for %s (%i)\n", + ctdb->vacuumer->vacuum_handle->ctdb_db->db_name, + (int)ctdb->vacuumer->child_pid); /* vacuum_child_destructor kills it, removes from list */ - talloc_free(ctdb->vacuumers); + talloc_free(ctdb->vacuumer); } } -- 2.25.1 From 657252c84a9d54470079fc2410deece682d0cb73 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 30 Jul 2019 14:16:13 +1000 Subject: [PATCH 18/38] ctdb-daemon: Factor out code to create vacuuming child This changes the behaviour for some failures from exiting to simply attempting to schedule the next run. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 13cedaf0195c6bda3a3820aedb1ee65f36dfc23e) --- ctdb/server/ctdb_vacuum.c | 134 ++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 48 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 255898c84d9..7d13c0a4222 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -1377,28 +1377,23 @@ static void vacuum_child_handler(struct tevent_context *ev, /* * this event is called every time we need to start a new vacuum process */ -static void ctdb_vacuum_event(struct tevent_context *ev, - struct tevent_timer *te, - struct timeval t, void *private_data) +static int vacuum_db_child(TALLOC_CTX *mem_ctx, + struct ctdb_db_context *ctdb_db, + bool full_vacuum_run, + struct ctdb_vacuum_child_context **out) { - struct ctdb_vacuum_handle *vacuum_handle = talloc_get_type(private_data, struct ctdb_vacuum_handle); - struct ctdb_db_context *ctdb_db = vacuum_handle->ctdb_db; struct ctdb_context *ctdb = ctdb_db->ctdb; struct ctdb_vacuum_child_context *child_ctx; struct tevent_fd *fde; - bool full_vacuum_run = false; int ret; /* we don't vacuum if we are in recovery mode, or db frozen */ if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || ctdb_db_frozen(ctdb_db)) { - DEBUG(DEBUG_INFO, ("Not vacuuming %s (%s)\n", ctdb_db->db_name, - ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ? - "in recovery" : "frozen")); - tevent_add_timer(ctdb->ev, vacuum_handle, - timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), - ctdb_vacuum_event, vacuum_handle); - return; + D_INFO("Not vacuuming %s (%s)\n", ctdb_db->db_name, + ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE ? + "in recovery" : "frozen"); + return EAGAIN; } /* Do not allow multiple vacuuming child processes to be active at the @@ -1406,35 +1401,22 @@ static void ctdb_vacuum_event(struct tevent_context *ev, * new vacuuming event to stagger vacuuming events. */ if (ctdb->vacuumer != NULL) { - tevent_add_timer(ctdb->ev, vacuum_handle, - timeval_current_ofs(0, 500*1000), - ctdb_vacuum_event, vacuum_handle); - return; + return EBUSY; } - child_ctx = talloc(vacuum_handle, struct ctdb_vacuum_child_context); + child_ctx = talloc_zero(mem_ctx, struct ctdb_vacuum_child_context); if (child_ctx == NULL) { - DEBUG(DEBUG_CRIT, (__location__ " Failed to allocate child context for vacuuming of %s\n", ctdb_db->db_name)); - ctdb_fatal(ctdb, "Out of memory when crating vacuum child context. Shutting down\n"); + DBG_ERR("Failed to allocate child context for vacuuming of %s\n", + ctdb_db->db_name); + return ENOMEM; } ret = pipe(child_ctx->fd); if (ret != 0) { talloc_free(child_ctx); - DEBUG(DEBUG_ERR, ("Failed to create pipe for vacuum child process.\n")); - tevent_add_timer(ctdb->ev, vacuum_handle, - timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), - ctdb_vacuum_event, vacuum_handle); - return; - } - - if (vacuum_handle->fast_path_count >= - ctdb->tunable.vacuum_fast_path_count) { - if (ctdb->tunable.vacuum_fast_path_count > 0) { - full_vacuum_run = true; - } - vacuum_handle->fast_path_count = 0; + D_ERR("Failed to create pipe for vacuum child process.\n"); + return EAGAIN; } child_ctx->child_pid = ctdb_fork(ctdb); @@ -1442,11 +1424,8 @@ static void ctdb_vacuum_event(struct tevent_context *ev, close(child_ctx->fd[0]); close(child_ctx->fd[1]); talloc_free(child_ctx); - DEBUG(DEBUG_ERR, ("Failed to fork vacuum child process.\n")); - tevent_add_timer(ctdb->ev, vacuum_handle, - timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), - ctdb_vacuum_event, vacuum_handle); - return; + D_ERR("Failed to fork vacuum child process.\n"); + return EAGAIN; } @@ -1454,11 +1433,15 @@ static void ctdb_vacuum_event(struct tevent_context *ev, char cc = 0; close(child_ctx->fd[0]); - DEBUG(DEBUG_INFO,("Vacuuming child process %d for db %s started\n", getpid(), ctdb_db->db_name)); + D_INFO("Vacuuming child process %d for db %s started\n", + getpid(), + ctdb_db->db_name); prctl_set_comment("ctdb_vacuum"); - if (switch_from_server_to_client(ctdb) != 0) { - DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch vacuum daemon into client mode. Shutting down.\n")); - _exit(1); + ret = switch_from_server_to_client(ctdb); + if (ret != 0) { + DBG_ERR("ERROR: failed to switch vacuum daemon " + "into client mode.\n"); + return EIO; } cc = ctdb_vacuum_and_repack_db(ctdb_db, full_vacuum_run); @@ -1482,9 +1465,8 @@ static void ctdb_vacuum_event(struct tevent_context *ev, talloc_free(ctdb_db->delete_queue); ctdb_db->delete_queue = trbt_create(ctdb_db, 0); if (ctdb_db->delete_queue == NULL) { - /* fatal here? ... */ - ctdb_fatal(ctdb, "Out of memory when re-creating delete queue " - "in parent context. Shutting down\n"); + DBG_ERR("Out of memory when re-creating vacuum tree\n"); + return ENOMEM; } talloc_free(ctdb_db->fetch_queue); @@ -1495,16 +1477,72 @@ static void ctdb_vacuum_event(struct tevent_context *ev, } tevent_add_timer(ctdb->ev, child_ctx, - timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, 0), + timeval_current_ofs(ctdb->tunable.vacuum_max_run_time, + 0), vacuum_child_timeout, child_ctx); - DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child vacuum process\n", child_ctx->fd[0])); + DBG_DEBUG(" Created PIPE FD:%d to child vacuum process\n", + child_ctx->fd[0]); fde = tevent_add_fd(ctdb->ev, child_ctx, child_ctx->fd[0], TEVENT_FD_READ, vacuum_child_handler, child_ctx); tevent_fd_set_auto_close(fde); - child_ctx->vacuum_handle = vacuum_handle; + child_ctx->vacuum_handle = ctdb_db->vacuum_handle; + + *out = child_ctx; + return 0; +} + +static void ctdb_vacuum_event(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval t, void *private_data) +{ + struct ctdb_vacuum_handle *vacuum_handle = talloc_get_type( + private_data, struct ctdb_vacuum_handle); + struct ctdb_db_context *ctdb_db = vacuum_handle->ctdb_db; + struct ctdb_context *ctdb = ctdb_db->ctdb; + struct ctdb_vacuum_child_context *child_ctx = NULL; + uint32_t fast_path_max = ctdb->tunable.vacuum_fast_path_count; + bool full_vacuum_run = false; + int ret; + + if (vacuum_handle->fast_path_count >= fast_path_max) { + if (fast_path_max > 0) { + full_vacuum_run = true; + } + vacuum_handle->fast_path_count = 0; + } + + ret = vacuum_db_child(vacuum_handle, + ctdb_db, + full_vacuum_run, + &child_ctx); + + if (ret == 0) { + return; + } + + switch (ret) { + case EBUSY: + /* Stagger */ + tevent_add_timer(ctdb->ev, + vacuum_handle, + timeval_current_ofs(0, 500*1000), + ctdb_vacuum_event, + vacuum_handle); + break; + + default: + /* Temporary failure, schedule next attempt */ + tevent_add_timer(ctdb->ev, + vacuum_handle, + timeval_current_ofs( + get_vacuum_interval(ctdb_db), 0), + ctdb_vacuum_event, + vacuum_handle); + } + } void ctdb_stop_vacuuming(struct ctdb_context *ctdb) -- 2.25.1 From 1f34c5e5c8f796791aae40bdc9ae9afc6bca8a1e Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 15 Oct 2019 16:36:44 +1100 Subject: [PATCH 19/38] ctdb-vacuum: Only schedule next vacuum event if vacuuuming is scheduled At the moment vacuuming is always scheduled. Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d462d64cdf001fd5d1cbf2a109df62e087ad0c49) --- ctdb/server/ctdb_vacuum.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 7d13c0a4222..910751d59e0 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -54,6 +54,7 @@ struct ctdb_vacuum_child_context { pid_t child_pid; enum vacuum_child_status status; struct timeval start_time; + bool scheduled; }; struct ctdb_vacuum_handle { @@ -1325,9 +1326,14 @@ static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx) ctdb->vacuumer = NULL; - tevent_add_timer(ctdb->ev, child_ctx->vacuum_handle, - timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), - ctdb_vacuum_event, child_ctx->vacuum_handle); + if (child_ctx->scheduled) { + tevent_add_timer( + ctdb->ev, + child_ctx->vacuum_handle, + timeval_current_ofs(get_vacuum_interval(ctdb_db), 0), + ctdb_vacuum_event, + child_ctx->vacuum_handle); + } return 0; } @@ -1379,6 +1385,7 @@ static void vacuum_child_handler(struct tevent_context *ev, */ static int vacuum_db_child(TALLOC_CTX *mem_ctx, struct ctdb_db_context *ctdb_db, + bool scheduled, bool full_vacuum_run, struct ctdb_vacuum_child_context **out) { @@ -1454,6 +1461,7 @@ static int vacuum_db_child(TALLOC_CTX *mem_ctx, close(child_ctx->fd[1]); child_ctx->status = VACUUM_RUNNING; + child_ctx->scheduled = scheduled; child_ctx->start_time = timeval_current(); ctdb->vacuumer = child_ctx; @@ -1516,6 +1524,7 @@ static void ctdb_vacuum_event(struct tevent_context *ev, ret = vacuum_db_child(vacuum_handle, ctdb_db, + true, full_vacuum_run, &child_ctx); -- 2.25.1 From 9f74111b4b279b846d2beb6ce3ff8aa7f681f926 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 30 Jul 2019 14:17:11 +1000 Subject: [PATCH 20/38] ctdb-daemon: Implement DB_VACUUM control Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 41a41d5f3e2b8e16e25221e14939dc5962997ac7) --- ctdb/include/ctdb_private.h | 5 +++ ctdb/server/ctdb_control.c | 9 ++++ ctdb/server/ctdb_vacuum.c | 89 +++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 19b8bb98d8f..d3e70b5e2fa 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -986,6 +986,11 @@ int32_t ctdb_control_uptime(struct ctdb_context *ctdb, TDB_DATA *outdata); /* from ctdb_vacuum.c */ +int32_t ctdb_control_db_vacuum(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata, + bool *async_reply); + void ctdb_stop_vacuuming(struct ctdb_context *ctdb); int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db); diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 0174f303f14..d162268a178 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -33,6 +33,8 @@ #include "ctdb_private.h" #include "ctdb_client.h" +#include "protocol/protocol_private.h" + #include "common/reqid.h" #include "common/common.h" #include "common/logging.h" @@ -732,6 +734,13 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_VACUUM_FETCH: return ctdb_control_vacuum_fetch(ctdb, indata); + case CTDB_CONTROL_DB_VACUUM: { + struct ctdb_db_vacuum db_vacuum; + + CHECK_CONTROL_DATA_SIZE(ctdb_db_vacuum_len(&db_vacuum)); + return ctdb_control_db_vacuum(ctdb, c, indata, async_reply); + } + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 910751d59e0..2cc6aa53ecb 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -37,6 +37,8 @@ #include "ctdb_private.h" #include "ctdb_client.h" +#include "protocol/protocol_private.h" + #include "common/rb_tree.h" #include "common/common.h" #include "common/logging.h" @@ -1554,6 +1556,93 @@ static void ctdb_vacuum_event(struct tevent_context *ev, } +struct vacuum_control_state { + struct ctdb_vacuum_child_context *child_ctx; + struct ctdb_req_control_old *c; + struct ctdb_context *ctdb; +}; + +static int vacuum_control_state_destructor(struct vacuum_control_state *state) +{ + struct ctdb_vacuum_child_context *child_ctx = state->child_ctx; + int32_t status; + + status = (child_ctx->status == VACUUM_OK ? 0 : -1); + ctdb_request_control_reply(state->ctdb, state->c, NULL, status, NULL); + + return 0; +} + +int32_t ctdb_control_db_vacuum(struct ctdb_context *ctdb, + struct ctdb_req_control_old *c, + TDB_DATA indata, + bool *async_reply) +{ + struct ctdb_db_context *ctdb_db; + struct ctdb_vacuum_child_context *child_ctx = NULL; + struct ctdb_db_vacuum *db_vacuum; + struct vacuum_control_state *state; + size_t np; + int ret; + + ret = ctdb_db_vacuum_pull(indata.dptr, + indata.dsize, + ctdb, + &db_vacuum, + &np); + if (ret != 0) { + DBG_ERR("Invalid data\n"); + return -1; + } + + ctdb_db = find_ctdb_db(ctdb, db_vacuum->db_id); + if (ctdb_db == NULL) { + DBG_ERR("Unknown db id 0x%08x\n", db_vacuum->db_id); + talloc_free(db_vacuum); + return -1; + } + + state = talloc(ctdb, struct vacuum_control_state); + if (state == NULL) { + DBG_ERR("Memory allocation error\n"); + return -1; + } + + ret = vacuum_db_child(ctdb_db, + ctdb_db, + false, + db_vacuum->full_vacuum_run, + &child_ctx); + + talloc_free(db_vacuum); + + if (ret == 0) { + (void) talloc_steal(child_ctx, state); + + state->child_ctx = child_ctx; + state->c = talloc_steal(state, c); + state->ctdb = ctdb; + + talloc_set_destructor(state, vacuum_control_state_destructor); + + *async_reply = true; + return 0; + } + + talloc_free(state); + + switch (ret) { + case EBUSY: + DBG_WARNING("Vacuuming collision\n"); + break; + + default: + DBG_ERR("Temporary vacuuming failure, ret=%d\n", ret); + } + + return -1; +} + void ctdb_stop_vacuuming(struct ctdb_context *ctdb) { if (ctdb->vacuumer != NULL) { -- 2.25.1 From 73767a2520823e57f128707f20578c7cafc76f30 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 27 Sep 2019 16:49:01 +1000 Subject: [PATCH 21/38] ctdb-client: Factor out function client_db_tdb() Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 439ef65d290357e513103530183091a9a6fed197) --- ctdb/client/client_db.c | 37 ++++++++++++++++++++---------------- ctdb/client/client_private.h | 4 ++++ 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/ctdb/client/client_db.c b/ctdb/client/client_db.c index a008f2ad63d..0b06d6e5e52 100644 --- a/ctdb/client/client_db.c +++ b/ctdb/client/client_db.c @@ -37,6 +37,11 @@ #include "client/client_private.h" #include "client/client.h" +struct tdb_context *client_db_tdb(struct ctdb_db_context *db) +{ + return db->ltdb->tdb; +} + static struct ctdb_db_context *client_db_handle( struct ctdb_client_context *client, const char *db_name) @@ -835,11 +840,11 @@ int ctdb_db_traverse_local(struct ctdb_db_context *db, bool readonly, state.error = 0; if (readonly) { - ret = tdb_traverse_read(db->ltdb->tdb, + ret = tdb_traverse_read(client_db_tdb(db), ctdb_db_traverse_local_handler, &state); } else { - ret = tdb_traverse(db->ltdb->tdb, + ret = tdb_traverse(client_db_tdb(db), ctdb_db_traverse_local_handler, &state); } @@ -1105,14 +1110,14 @@ int ctdb_ltdb_fetch(struct ctdb_db_context *db, TDB_DATA key, size_t np; int ret; - rec = tdb_fetch(db->ltdb->tdb, key); + rec = tdb_fetch(client_db_tdb(db), key); if (rec.dsize < sizeof(struct ctdb_ltdb_header)) { /* No record present */ if (rec.dptr != NULL) { free(rec.dptr); } - if (tdb_error(db->ltdb->tdb) != TDB_ERR_NOEXIST) { + if (tdb_error(client_db_tdb(db)) != TDB_ERR_NOEXIST) { return EIO; } @@ -1235,18 +1240,18 @@ static int ctdb_fetch_lock_check(struct tevent_req *req) int ret, err = 0; bool do_migrate = false; - ret = tdb_chainlock(h->db->ltdb->tdb, h->key); + ret = tdb_chainlock(client_db_tdb(h->db), h->key); if (ret != 0) { DEBUG(DEBUG_ERR, ("fetch_lock: %s tdb_chainlock failed, %s\n", - h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); + h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); err = EIO; goto failed; } - data = tdb_fetch(h->db->ltdb->tdb, h->key); + data = tdb_fetch(client_db_tdb(h->db), h->key); if (data.dptr == NULL) { - if (tdb_error(h->db->ltdb->tdb) == TDB_ERR_NOEXIST) { + if (tdb_error(client_db_tdb(h->db)) == TDB_ERR_NOEXIST) { goto migrate; } else { err = EIO; @@ -1297,11 +1302,11 @@ failed: if (data.dptr != NULL) { free(data.dptr); } - ret = tdb_chainunlock(h->db->ltdb->tdb, h->key); + ret = tdb_chainunlock(client_db_tdb(h->db), h->key); if (ret != 0) { DEBUG(DEBUG_ERR, ("fetch_lock: %s tdb_chainunlock failed, %s\n", - h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); + h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); return EIO; } @@ -1377,11 +1382,11 @@ static int ctdb_record_handle_destructor(struct ctdb_record_handle *h) { int ret; - ret = tdb_chainunlock(h->db->ltdb->tdb, h->key); + ret = tdb_chainunlock(client_db_tdb(h->db), h->key); if (ret != 0) { DEBUG(DEBUG_ERR, ("fetch_lock: %s tdb_chainunlock failed, %s\n", - h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); + h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); } free(h->data.dptr); return 0; @@ -1487,11 +1492,11 @@ int ctdb_store_record(struct ctdb_record_handle *h, TDB_DATA data) rec[1].dsize = data.dsize; rec[1].dptr = data.dptr; - ret = tdb_storev(h->db->ltdb->tdb, h->key, rec, 2, TDB_REPLACE); + ret = tdb_storev(client_db_tdb(h->db), h->key, rec, 2, TDB_REPLACE); if (ret != 0) { DEBUG(DEBUG_ERR, ("store_record: %s tdb_storev failed, %s\n", - h->db->db_name, tdb_errorstr(h->db->ltdb->tdb))); + h->db->db_name, tdb_errorstr(client_db_tdb(h->db)))); return EIO; } @@ -1538,11 +1543,11 @@ struct tevent_req *ctdb_delete_record_send(TALLOC_CTX *mem_ctx, rec.dsize = np; rec.dptr = header; - ret = tdb_store(h->db->ltdb->tdb, h->key, rec, TDB_REPLACE); + ret = tdb_store(client_db_tdb(h->db), h->key, rec, TDB_REPLACE); if (ret != 0) { D_ERR("fetch_lock delete: %s tdb_store failed, %s\n", h->db->db_name, - tdb_errorstr(h->db->ltdb->tdb)); + tdb_errorstr(client_db_tdb(h->db))); tevent_req_error(req, EIO); return tevent_req_post(req, ev); } diff --git a/ctdb/client/client_private.h b/ctdb/client/client_private.h index bb1705534e6..0bb2ad590ea 100644 --- a/ctdb/client/client_private.h +++ b/ctdb/client/client_private.h @@ -77,6 +77,10 @@ struct ctdb_tunnel_context { void ctdb_client_reply_call(struct ctdb_client_context *client, uint8_t *buf, size_t buflen, uint32_t reqid); +/* From client_db.c */ + +struct tdb_context *client_db_tdb(struct ctdb_db_context *db); + /* From client_message.c */ void ctdb_client_req_message(struct ctdb_client_context *client, -- 2.25.1 From 6744b82657cfbd7e32bb4d0bb630fc94ccc3006b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 2 Mar 2020 13:59:42 +1100 Subject: [PATCH 22/38] ctdb-recovery: Fetched vnnmap is never used, so don't fetch it New vnnmap is constructed using the information from all the connected nodes. So there is no need to fetch the vnnmap from recovery master. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 072ff4d12b8f34766120ddae888d772e97bca491) --- ctdb/server/ctdb_recovery_helper.c | 40 ------------------------------ 1 file changed, 40 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 0597c507ba6..7356ebdb062 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2048,7 +2048,6 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) * * - Get tunables * - Get nodemap - * - Get vnnmap * - Get capabilities from all nodes * - Get dbmap * - Set RECOVERY_ACTIVE @@ -2076,7 +2075,6 @@ struct recovery_state { static void recovery_tunables_done(struct tevent_req *subreq); static void recovery_nodemap_done(struct tevent_req *subreq); -static void recovery_vnnmap_done(struct tevent_req *subreq); static void recovery_capabilities_done(struct tevent_req *subreq); static void recovery_dbmap_done(struct tevent_req *subreq); static void recovery_active_done(struct tevent_req *subreq); @@ -2199,43 +2197,6 @@ static void recovery_nodemap_done(struct tevent_req *subreq) return; } - ctdb_req_control_getvnnmap(&request); - subreq = ctdb_client_control_send(state, state->ev, state->client, - state->destnode, TIMEOUT(), - &request); - if (tevent_req_nomem(subreq, req)) { - return; - } - tevent_req_set_callback(subreq, recovery_vnnmap_done, req); -} - -static void recovery_vnnmap_done(struct tevent_req *subreq) -{ - struct tevent_req *req = tevent_req_callback_data( - subreq, struct tevent_req); - struct recovery_state *state = tevent_req_data( - req, struct recovery_state); - struct ctdb_reply_control *reply; - struct ctdb_req_control request; - bool status; - int ret; - - status = ctdb_client_control_recv(subreq, &ret, state, &reply); - TALLOC_FREE(subreq); - if (! status) { - D_ERR("control GETVNNMAP failed to node %u, ret=%d\n", - state->destnode, ret); - tevent_req_error(req, ret); - return; - } - - ret = ctdb_reply_control_getvnnmap(reply, state, &state->vnnmap); - if (ret != 0) { - D_ERR("control GETVNNMAP failed, ret=%d\n", ret); - tevent_req_error(req, ret); - return; - } - ctdb_req_control_get_capabilities(&request); subreq = ctdb_client_control_multi_send(state, state->ev, state->client, @@ -2435,7 +2396,6 @@ static void recovery_active_done(struct tevent_req *subreq) vnnmap->generation = state->generation; - talloc_free(state->vnnmap); state->vnnmap = vnnmap; ctdb_req_control_start_recovery(&request); -- 2.25.1 From f31abdc66d5cb66eb9cf0815e71c16017014163f Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 2 Mar 2020 15:07:21 +1100 Subject: [PATCH 23/38] ctdb-recovery: Consolidate node state This avoids passing multiple arguments to async computation. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 6e2f8756f1bce4dfc7fdc435e082f400116e29ec) --- ctdb/server/ctdb_recovery_helper.c | 585 +++++++++++++++++------------ 1 file changed, 346 insertions(+), 239 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 7356ebdb062..893cb15d9d6 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -70,6 +70,105 @@ static uint64_t srvid_next(void) return rec_srvid; } +/* + * Node related functions + */ + +struct node_list { + uint32_t *pnn_list; + uint32_t *caps; + uint32_t *ban_credits; + unsigned int size; + unsigned int count; +}; + +static struct node_list *node_list_init(TALLOC_CTX *mem_ctx, unsigned int size) +{ + struct node_list *nlist; + unsigned int i; + + nlist = talloc_zero(mem_ctx, struct node_list); + if (nlist == NULL) { + return NULL; + } + + nlist->pnn_list = talloc_array(nlist, uint32_t, size); + nlist->caps = talloc_zero_array(nlist, uint32_t, size); + nlist->ban_credits = talloc_zero_array(nlist, uint32_t, size); + + if (nlist->pnn_list == NULL || + nlist->caps == NULL || + nlist->ban_credits == NULL) { + talloc_free(nlist); + return NULL; + } + nlist->size = size; + + for (i=0; isize; i++) { + nlist->pnn_list[i] = CTDB_UNKNOWN_PNN; + } + + return nlist; +} + +static bool node_list_add(struct node_list *nlist, uint32_t pnn) +{ + unsigned int i; + + if (nlist->count == nlist->size) { + return false; + } + + for (i=0; icount; i++) { + if (nlist->pnn_list[i] == pnn) { + return false; + } + } + + nlist->pnn_list[nlist->count] = pnn; + nlist->count += 1; + + return true; +} + +static uint32_t *node_list_lmaster(struct node_list *nlist, + TALLOC_CTX *mem_ctx, + unsigned int *pnn_count) +{ + uint32_t *pnn_list; + unsigned int count, i; + + pnn_list = talloc_zero_array(mem_ctx, uint32_t, nlist->count); + if (pnn_list == NULL) { + return NULL; + } + + count = 0; + for (i=0; icount; i++) { + if (!(nlist->caps[i] & CTDB_CAP_LMASTER)) { + continue; + } + + pnn_list[count] = nlist->pnn_list[i]; + count += 1; + } + + *pnn_count = count; + return pnn_list; +} + +static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn) +{ + unsigned int i; + + for (i=0; icount; i++) { + if (nlist->pnn_list[i] == pnn) { + nlist->ban_credits[i] += 1; + break; + } + } +} + /* * Recovery database functions */ @@ -665,9 +764,9 @@ struct push_database_old_state { struct ctdb_client_context *client; struct recdb_context *recdb; uint32_t *pnn_list; - int count; + unsigned int count; struct ctdb_rec_buffer *recbuf; - int index; + unsigned int index; }; static void push_database_old_push_done(struct tevent_req *subreq); @@ -676,7 +775,8 @@ static struct tevent_req *push_database_old_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, + uint32_t *pnn_list, + unsigned int count, struct recdb_context *recdb) { struct tevent_req *req, *subreq; @@ -769,7 +869,7 @@ struct push_database_new_state { struct ctdb_client_context *client; struct recdb_context *recdb; uint32_t *pnn_list; - int count; + unsigned int count; uint64_t srvid; uint32_t dmaster; int fd; @@ -787,7 +887,8 @@ static struct tevent_req *push_database_new_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, + uint32_t *pnn_list, + unsigned int count, struct recdb_context *recdb, int max_size) { @@ -991,7 +1092,8 @@ static void push_database_new_confirmed(struct tevent_req *subreq) struct ctdb_reply_control **reply; int *err_list; bool status; - int ret, i; + unsigned int i; + int ret; uint32_t num_records; status = ctdb_client_control_multi_recv(subreq, &ret, state, @@ -1062,7 +1164,7 @@ static struct tevent_req *push_database_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, uint32_t *caps, + struct node_list *nlist, struct ctdb_tunable_list *tun_list, struct recdb_context *recdb) { @@ -1070,7 +1172,7 @@ static struct tevent_req *push_database_send( struct push_database_state *state; uint32_t *old_list, *new_list; unsigned int old_count, new_count; - int i; + unsigned int i; req = tevent_req_create(mem_ctx, &state, struct push_database_state); if (req == NULL) { @@ -1082,21 +1184,19 @@ static struct tevent_req *push_database_send( old_count = 0; new_count = 0; - old_list = talloc_array(state, uint32_t, count); - new_list = talloc_array(state, uint32_t, count); + old_list = talloc_array(state, uint32_t, nlist->count); + new_list = talloc_array(state, uint32_t, nlist->count); if (tevent_req_nomem(old_list, req) || tevent_req_nomem(new_list,req)) { return tevent_req_post(req, ev); } - for (i=0; icount; i++) { + if (nlist->caps[i] & CTDB_CAP_FRAGMENTED_CONTROLS) { + new_list[new_count] = nlist->pnn_list[i]; new_count += 1; } else { - old_list[old_count] = pnn; + old_list[old_count] = nlist->pnn_list[i]; old_count += 1; } } @@ -1183,12 +1283,10 @@ static bool push_database_recv(struct tevent_req *req, int *perr) struct collect_highseqnum_db_state { struct tevent_context *ev; struct ctdb_client_context *client; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t db_id; struct recdb_context *recdb; + uint32_t max_pnn; }; @@ -1199,8 +1297,8 @@ static struct tevent_req *collect_highseqnum_db_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, uint32_t *caps, - uint32_t *ban_credits, uint32_t db_id, + struct node_list *nlist, + uint32_t db_id, struct recdb_context *recdb) { struct tevent_req *req, *subreq; @@ -1215,17 +1313,18 @@ static struct tevent_req *collect_highseqnum_db_send( state->ev = ev; state->client = client; - state->pnn_list = pnn_list; - state->count = count; - state->caps = caps; - state->ban_credits = ban_credits; + state->nlist = nlist; state->db_id = db_id; state->recdb = recdb; ctdb_req_control_get_db_seqnum(&request, db_id); - subreq = ctdb_client_control_multi_send(mem_ctx, ev, client, - state->pnn_list, state->count, - TIMEOUT(), &request); + subreq = ctdb_client_control_multi_send(mem_ctx, + ev, + client, + nlist->pnn_list, + nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -1244,8 +1343,10 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) struct ctdb_reply_control **reply; int *err_list; bool status; - int ret, i; + unsigned int i; + int ret; uint64_t seqnum, max_seqnum; + uint32_t max_caps; status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list, &reply); @@ -1254,8 +1355,9 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, err_list, + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, &pnn); if (ret2 != 0) { D_ERR("control GET_DB_SEQNUM failed for db %s" @@ -1271,8 +1373,9 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) } max_seqnum = 0; - state->max_pnn = state->pnn_list[0]; - for (i=0; icount; i++) { + state->max_pnn = state->nlist->pnn_list[0]; + max_caps = state->nlist->caps[0]; + for (i=0; inlist->count; i++) { ret = ctdb_reply_control_get_db_seqnum(reply[i], &seqnum); if (ret != 0) { tevent_req_error(req, EPROTO); @@ -1281,7 +1384,8 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) if (max_seqnum < seqnum) { max_seqnum = seqnum; - state->max_pnn = state->pnn_list[i]; + state->max_pnn = state->nlist->pnn_list[i]; + max_caps = state->nlist->caps[i]; } } @@ -1290,9 +1394,11 @@ static void collect_highseqnum_db_seqnum_done(struct tevent_req *subreq) D_INFO("Pull persistent db %s from node %d with seqnum 0x%"PRIx64"\n", recdb_name(state->recdb), state->max_pnn, max_seqnum); - subreq = pull_database_send(state, state->ev, state->client, + subreq = pull_database_send(state, + state->ev, + state->client, state->max_pnn, - state->caps[state->max_pnn], + max_caps, state->recdb); if (tevent_req_nomem(subreq, req)) { return; @@ -1313,7 +1419,7 @@ static void collect_highseqnum_db_pulldb_done(struct tevent_req *subreq) status = pull_database_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { - state->ban_credits[state->max_pnn] += 1; + node_list_ban_credits(state->nlist, state->max_pnn); tevent_req_error(req, ret); return; } @@ -1333,14 +1439,12 @@ static bool collect_highseqnum_db_recv(struct tevent_req *req, int *perr) struct collect_all_db_state { struct tevent_context *ev; struct ctdb_client_context *client; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t db_id; struct recdb_context *recdb; + struct ctdb_pulldb pulldb; - int index; + unsigned int index; }; static void collect_all_db_pulldb_done(struct tevent_req *subreq); @@ -1349,13 +1453,12 @@ static struct tevent_req *collect_all_db_send( TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - uint32_t *pnn_list, int count, uint32_t *caps, - uint32_t *ban_credits, uint32_t db_id, + struct node_list *nlist, + uint32_t db_id, struct recdb_context *recdb) { struct tevent_req *req, *subreq; struct collect_all_db_state *state; - uint32_t pnn; req = tevent_req_create(mem_ctx, &state, struct collect_all_db_state); @@ -1365,17 +1468,17 @@ static struct tevent_req *collect_all_db_send( state->ev = ev; state->client = client; - state->pnn_list = pnn_list; - state->count = count; - state->caps = caps; - state->ban_credits = ban_credits; + state->nlist = nlist; state->db_id = db_id; state->recdb = recdb; state->index = 0; - pnn = state->pnn_list[state->index]; - - subreq = pull_database_send(state, ev, client, pnn, caps[pnn], recdb); + subreq = pull_database_send(state, + ev, + client, + nlist->pnn_list[state->index], + nlist->caps[state->index], + recdb); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -1390,28 +1493,30 @@ static void collect_all_db_pulldb_done(struct tevent_req *subreq) subreq, struct tevent_req); struct collect_all_db_state *state = tevent_req_data( req, struct collect_all_db_state); - uint32_t pnn; int ret; bool status; status = pull_database_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { - pnn = state->pnn_list[state->index]; - state->ban_credits[pnn] += 1; + node_list_ban_credits(state->nlist, + state->nlist->pnn_list[state->index]); tevent_req_error(req, ret); return; } state->index += 1; - if (state->index == state->count) { + if (state->index == state->nlist->count) { tevent_req_done(req); return; } - pnn = state->pnn_list[state->index]; - subreq = pull_database_send(state, state->ev, state->client, - pnn, state->caps[pnn], state->recdb); + subreq = pull_database_send(state, + state->ev, + state->client, + state->nlist->pnn_list[state->index], + state->nlist->caps[state->index], + state->recdb); if (tevent_req_nomem(subreq, req)) { return; } @@ -1441,10 +1546,7 @@ struct recover_db_state { struct tevent_context *ev; struct ctdb_client_context *client; struct ctdb_tunable_list *tun_list; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t db_id; uint8_t db_flags; @@ -1469,11 +1571,10 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, struct ctdb_tunable_list *tun_list, - uint32_t *pnn_list, int count, - uint32_t *caps, - uint32_t *ban_credits, + struct node_list *nlist, uint32_t generation, - uint32_t db_id, uint8_t db_flags) + uint32_t db_id, + uint8_t db_flags) { struct tevent_req *req, *subreq; struct recover_db_state *state; @@ -1487,10 +1588,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->ev = ev; state->client = client; state->tun_list = tun_list; - state->pnn_list = pnn_list; - state->count = count; - state->caps = caps; - state->ban_credits = ban_credits; + state->nlist = nlist; state->db_id = db_id; state->db_flags = db_flags; @@ -1580,10 +1678,13 @@ static void recover_db_path_done(struct tevent_req *subreq) talloc_free(reply); ctdb_req_control_db_freeze(&request, state->db_id); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1608,14 +1709,16 @@ static void recover_db_freeze_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, err_list, + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, &pnn); if (ret2 != 0) { D_ERR("control FREEZE_DB failed for db %s" " on node %u, ret=%d\n", state->db_name, pnn, ret2); - state->ban_credits[pnn] += 1; + + node_list_ban_credits(state->nlist, pnn); } else { D_ERR("control FREEZE_DB failed for db %s, ret=%d\n", state->db_name, ret); @@ -1625,10 +1728,13 @@ static void recover_db_freeze_done(struct tevent_req *subreq) } ctdb_req_control_db_transaction_start(&request, &state->transdb); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1652,9 +1758,10 @@ static void recover_db_transaction_started(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control TRANSACTION_DB failed for db=%s" " on node %u, ret=%d\n", @@ -1677,17 +1784,19 @@ static void recover_db_transaction_started(struct tevent_req *subreq) if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { - subreq = collect_highseqnum_db_send( - state, state->ev, state->client, - state->pnn_list, state->count, state->caps, - state->ban_credits, state->db_id, - state->recdb); + subreq = collect_highseqnum_db_send(state, + state->ev, + state->client, + state->nlist, + state->db_id, + state->recdb); } else { - subreq = collect_all_db_send( - state, state->ev, state->client, - state->pnn_list, state->count, state->caps, - state->ban_credits, state->db_id, - state->recdb); + subreq = collect_all_db_send(state, + state->ev, + state->client, + state->nlist, + state->db_id, + state->recdb); } if (tevent_req_nomem(subreq, req)) { return; @@ -1718,10 +1827,13 @@ static void recover_db_collect_done(struct tevent_req *subreq) } ctdb_req_control_wipe_database(&request, &state->transdb); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1745,9 +1857,10 @@ static void recover_db_wipedb_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control WIPEDB failed for db %s on node %u," " ret=%d\n", state->db_name, pnn, ret2); @@ -1759,9 +1872,11 @@ static void recover_db_wipedb_done(struct tevent_req *subreq) return; } - subreq = push_database_send(state, state->ev, state->client, - state->pnn_list, state->count, - state->caps, state->tun_list, + subreq = push_database_send(state, + state->ev, + state->client, + state->nlist, + state->tun_list, state->recdb); if (tevent_req_nomem(subreq, req)) { return; @@ -1789,10 +1904,13 @@ static void recover_db_pushdb_done(struct tevent_req *subreq) TALLOC_FREE(state->recdb); ctdb_req_control_db_transaction_commit(&request, &state->transdb); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1817,9 +1935,10 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control DB_TRANSACTION_COMMIT failed for db %s" " on node %u, ret=%d\n", @@ -1833,10 +1952,13 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) } ctdb_req_control_db_thaw(&request, state->db_id); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -1860,9 +1982,10 @@ static void recover_db_thaw_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control DB_THAW failed for db %s on node %u," " ret=%d\n", state->db_name, pnn, ret2); @@ -1901,10 +2024,7 @@ struct db_recovery_one_state { struct ctdb_client_context *client; struct ctdb_dbid_map *dbmap; struct ctdb_tunable_list *tun_list; - uint32_t *pnn_list; - int count; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; uint32_t generation; uint32_t db_id; uint8_t db_flags; @@ -1918,9 +2038,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, struct ctdb_client_context *client, struct ctdb_dbid_map *dbmap, struct ctdb_tunable_list *tun_list, - uint32_t *pnn_list, int count, - uint32_t *caps, - uint32_t *ban_credits, + struct node_list *nlist, uint32_t generation) { struct tevent_req *req, *subreq; @@ -1954,17 +2072,18 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->client = client; substate->dbmap = dbmap; substate->tun_list = tun_list; - substate->pnn_list = pnn_list; - substate->count = count; - substate->caps = caps; - substate->ban_credits = ban_credits; + substate->nlist = nlist; substate->generation = generation; substate->db_id = dbmap->dbs[i].db_id; substate->db_flags = dbmap->dbs[i].flags; - subreq = recover_db_send(state, ev, client, tun_list, - pnn_list, count, caps, ban_credits, - generation, substate->db_id, + subreq = recover_db_send(state, + ev, + client, + tun_list, + nlist, + generation, + substate->db_id, substate->db_flags); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); @@ -1996,11 +2115,13 @@ static void db_recovery_one_done(struct tevent_req *subreq) substate->num_fails += 1; if (substate->num_fails < NUM_RETRIES) { - subreq = recover_db_send(state, state->ev, substate->client, + subreq = recover_db_send(state, + state->ev, + substate->client, substate->tun_list, - substate->pnn_list, substate->count, - substate->caps, substate->ban_credits, - substate->generation, substate->db_id, + substate->nlist, + substate->generation, + substate->db_id, substate->db_flags); if (tevent_req_nomem(subreq, req)) { goto failed; @@ -2062,12 +2183,8 @@ struct recovery_state { struct tevent_context *ev; struct ctdb_client_context *client; uint32_t generation; - uint32_t *pnn_list; - unsigned int count; uint32_t destnode; - struct ctdb_node_map *nodemap; - uint32_t *caps; - uint32_t *ban_credits; + struct node_list *nlist; struct ctdb_tunable_list *tun_list; struct ctdb_vnn_map *vnnmap; struct ctdb_dbid_map *dbmap; @@ -2165,6 +2282,8 @@ static void recovery_nodemap_done(struct tevent_req *subreq) req, struct recovery_state); struct ctdb_reply_control *reply; struct ctdb_req_control request; + struct ctdb_node_map *nodemap; + unsigned int i; bool status; int ret; @@ -2177,31 +2296,34 @@ static void recovery_nodemap_done(struct tevent_req *subreq) return; } - ret = ctdb_reply_control_get_nodemap(reply, state, &state->nodemap); + ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); if (ret != 0) { D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } - state->count = list_of_active_nodes(state->nodemap, CTDB_UNKNOWN_PNN, - state, &state->pnn_list); - if (state->count <= 0) { - tevent_req_error(req, ENOMEM); + state->nlist = node_list_init(state, nodemap->num); + if (tevent_req_nomem(state->nlist, req)) { return; } - state->ban_credits = talloc_zero_array(state, uint32_t, - state->nodemap->num); - if (tevent_req_nomem(state->ban_credits, req)) { - return; + for (i=0; inum; i++) { + if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { + continue; + } + + node_list_add(state->nlist, nodemap->node[i].pnn); } ctdb_req_control_get_capabilities(&request); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2228,9 +2350,10 @@ static void recovery_capabilities_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("control GET_CAPABILITIES failed on node %u," " ret=%d\n", pnn, ret2); @@ -2242,25 +2365,18 @@ static void recovery_capabilities_done(struct tevent_req *subreq) return; } - /* Make the array size same as nodemap */ - state->caps = talloc_zero_array(state, uint32_t, - state->nodemap->num); - if (tevent_req_nomem(state->caps, req)) { - return; - } - - for (i=0; icount; i++) { - uint32_t pnn; + for (i=0; inlist->count; i++) { + uint32_t caps; - pnn = state->pnn_list[i]; - ret = ctdb_reply_control_get_capabilities(reply[i], - &state->caps[pnn]); + ret = ctdb_reply_control_get_capabilities(reply[i], &caps); if (ret != 0) { D_ERR("control GET_CAPABILITIES failed on node %u\n", - pnn); + state->nlist->pnn_list[i]); tevent_req_error(req, EPROTO); return; } + + state->nlist->caps[i] = caps; } talloc_free(reply); @@ -2303,10 +2419,13 @@ static void recovery_dbmap_done(struct tevent_req *subreq) } ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2323,7 +2442,6 @@ static void recovery_active_done(struct tevent_req *subreq) struct ctdb_vnn_map *vnnmap; int *err_list; int ret; - unsigned int count, i; bool status; status = ctdb_client_control_multi_recv(subreq, &ret, NULL, &err_list, @@ -2333,9 +2451,10 @@ static void recovery_active_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to set recovery mode ACTIVE on node %u," " ret=%d\n", pnn, ret2); @@ -2350,48 +2469,20 @@ static void recovery_active_done(struct tevent_req *subreq) D_ERR("Set recovery mode to ACTIVE\n"); /* Calculate new VNNMAP */ - count = 0; - for (i=0; inodemap->num; i++) { - if (state->nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { - continue; - } - if (!(state->caps[i] & CTDB_CAP_LMASTER)) { - continue; - } - count += 1; - } - - if (count == 0) { - D_WARNING("No active lmasters found. Adding recmaster anyway\n"); - } - vnnmap = talloc_zero(state, struct ctdb_vnn_map); if (tevent_req_nomem(vnnmap, req)) { return; } - vnnmap->size = (count == 0 ? 1 : count); - vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size); + vnnmap->map = node_list_lmaster(state->nlist, vnnmap, &vnnmap->size); if (tevent_req_nomem(vnnmap->map, req)) { return; } - if (count == 0) { + if (vnnmap->size == 0) { + D_WARNING("No active lmasters found. Adding recmaster anyway\n"); vnnmap->map[0] = state->destnode; - } else { - count = 0; - for (i=0; inodemap->num; i++) { - if (state->nodemap->node[i].flags & - NODE_FLAGS_INACTIVE) { - continue; - } - if (!(state->caps[i] & CTDB_CAP_LMASTER)) { - continue; - } - - vnnmap->map[count] = state->nodemap->node[i].pnn; - count += 1; - } + vnnmap->size = 1; } vnnmap->generation = state->generation; @@ -2399,10 +2490,13 @@ static void recovery_active_done(struct tevent_req *subreq) state->vnnmap = vnnmap; ctdb_req_control_start_recovery(&request); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2427,9 +2521,10 @@ static void recovery_start_recovery_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to run start_recovery event on node %u," " ret=%d\n", pnn, ret2); @@ -2444,10 +2539,13 @@ static void recovery_start_recovery_done(struct tevent_req *subreq) D_ERR("start_recovery event finished\n"); ctdb_req_control_setvnnmap(&request, state->vnnmap); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2471,9 +2569,10 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to update VNNMAP on node %u, ret=%d\n", pnn, ret2); @@ -2486,10 +2585,12 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) D_NOTICE("updated VNNMAP\n"); - subreq = db_recovery_send(state, state->ev, state->client, - state->dbmap, state->tun_list, - state->pnn_list, state->count, - state->caps, state->ban_credits, + subreq = db_recovery_send(state, + state->ev, + state->client, + state->dbmap, + state->tun_list, + state->nlist, state->vnnmap->generation); if (tevent_req_nomem(subreq, req)) { return; @@ -2522,12 +2623,10 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) return; } - for (i=0; icount; i++) { - uint32_t pnn; - pnn = state->pnn_list[i]; - if (state->ban_credits[pnn] > max_credits) { - max_pnn = pnn; - max_credits = state->ban_credits[pnn]; + for (i=0; inlist->count; i++) { + if (state->nlist->ban_credits[i] > max_credits) { + max_pnn = state->nlist->pnn_list[i]; + max_credits = state->nlist->ban_credits[i]; } } @@ -2563,10 +2662,13 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) } ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_NORMAL); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2617,9 +2719,10 @@ static void recovery_normal_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to set recovery mode NORMAL on node %u," " ret=%d\n", pnn, ret2); @@ -2634,10 +2737,13 @@ static void recovery_normal_done(struct tevent_req *subreq) D_ERR("Set recovery mode to NORMAL\n"); ctdb_req_control_end_recovery(&request); - subreq = ctdb_client_control_multi_send(state, state->ev, + subreq = ctdb_client_control_multi_send(state, + state->ev, state->client, - state->pnn_list, state->count, - TIMEOUT(), &request); + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2661,9 +2767,10 @@ static void recovery_end_recovery_done(struct tevent_req *subreq) int ret2; uint32_t pnn; - ret2 = ctdb_client_control_multi_error(state->pnn_list, - state->count, - err_list, &pnn); + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); if (ret2 != 0) { D_ERR("failed to run recovered event on node %u," " ret=%d\n", pnn, ret2); -- 2.25.1 From 0ac2da85acec21ae454ae6981bb1d4e983a3badf Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 18 Feb 2020 16:17:00 +1100 Subject: [PATCH 24/38] ctdb-recovery: Don't trust nodemap obtained from local node It's possible to have a node stopped, but recovery master not yet updated flags on the local ctdb daemon when recovery is started. So do not trust the list of active nodes obtained from the local node. Query the connected nodes to calculate the list of active nodes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit c6a0ff1bed0265e44fd6135d16bfc41919fe5bf5) --- ctdb/server/ctdb_recovery_helper.c | 116 ++++++++++++++++++++++++++++- 1 file changed, 113 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 893cb15d9d6..5f38d55e50e 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2168,7 +2168,7 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) * Run the parallel database recovery * * - Get tunables - * - Get nodemap + * - Get nodemap from all nodes * - Get capabilities from all nodes * - Get dbmap * - Set RECOVERY_ACTIVE @@ -2192,6 +2192,7 @@ struct recovery_state { static void recovery_tunables_done(struct tevent_req *subreq); static void recovery_nodemap_done(struct tevent_req *subreq); +static void recovery_nodemap_verify(struct tevent_req *subreq); static void recovery_capabilities_done(struct tevent_req *subreq); static void recovery_dbmap_done(struct tevent_req *subreq); static void recovery_active_done(struct tevent_req *subreq); @@ -2309,13 +2310,122 @@ static void recovery_nodemap_done(struct tevent_req *subreq) } for (i=0; inum; i++) { - if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { + bool ok; + + if (nodemap->node[i].flags & NODE_FLAGS_DISCONNECTED) { continue; } - node_list_add(state->nlist, nodemap->node[i].pnn); + ok = node_list_add(state->nlist, nodemap->node[i].pnn); + if (!ok) { + tevent_req_error(req, EINVAL); + return; + } } + talloc_free(nodemap); + talloc_free(reply); + + /* Verify flags by getting local node information from each node */ + ctdb_req_control_get_nodemap(&request); + subreq = ctdb_client_control_multi_send(state, + state->ev, + state->client, + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, recovery_nodemap_verify, req); +} + +static void recovery_nodemap_verify(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct recovery_state *state = tevent_req_data( + req, struct recovery_state); + struct ctdb_req_control request; + struct ctdb_reply_control **reply; + struct node_list *nlist; + unsigned int i; + int *err_list; + int ret; + bool status; + + status = ctdb_client_control_multi_recv(subreq, + &ret, + state, + &err_list, + &reply); + TALLOC_FREE(subreq); + if (! status) { + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control GET_NODEMAP failed on node %u," + " ret=%d\n", pnn, ret2); + } else { + D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); + } + tevent_req_error(req, ret); + return; + } + + nlist = node_list_init(state, state->nlist->size); + if (tevent_req_nomem(nlist, req)) { + return; + } + + for (i=0; inlist->count; i++) { + struct ctdb_node_map *nodemap = NULL; + uint32_t pnn, flags; + unsigned int j; + bool ok; + + pnn = state->nlist->pnn_list[i]; + ret = ctdb_reply_control_get_nodemap(reply[i], + state, + &nodemap); + if (ret != 0) { + D_ERR("control GET_NODEMAP failed on node %u\n", pnn); + tevent_req_error(req, EPROTO); + return; + } + + flags = NODE_FLAGS_DISCONNECTED; + for (j=0; jnum; j++) { + if (nodemap->node[j].pnn == pnn) { + flags = nodemap->node[j].flags; + break; + } + } + + TALLOC_FREE(nodemap); + + if (flags & NODE_FLAGS_INACTIVE) { + continue; + } + + ok = node_list_add(nlist, pnn); + if (!ok) { + tevent_req_error(req, EINVAL); + return; + } + } + + talloc_free(reply); + + talloc_free(state->nlist); + state->nlist = nlist; + ctdb_req_control_get_capabilities(&request); subreq = ctdb_client_control_multi_send(state, state->ev, -- 2.25.1 From 981a9d56be37d75128b66ef1d69f033ce5b9c322 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 2 Mar 2020 16:16:26 +1100 Subject: [PATCH 25/38] ctdb-recovery: Refactor banning a node into separate computation If a node is marked for banning, confirm that it's not become inactive during the recovery. If yes, then don't ban the node. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 1c56d6413f86cc15ebac232f39ef1e2a53ae4297) --- ctdb/server/ctdb_recovery_helper.c | 261 +++++++++++++++++++++++------ 1 file changed, 208 insertions(+), 53 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 5f38d55e50e..1f3b58312c4 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2163,6 +2163,206 @@ static bool db_recovery_recv(struct tevent_req *req, unsigned int *count) return true; } +struct ban_node_state { + struct tevent_context *ev; + struct ctdb_client_context *client; + struct ctdb_tunable_list *tun_list; + struct node_list *nlist; + uint32_t destnode; + + uint32_t max_pnn; +}; + +static bool ban_node_check(struct tevent_req *req); +static void ban_node_check_done(struct tevent_req *subreq); +static void ban_node_done(struct tevent_req *subreq); + +static struct tevent_req *ban_node_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct ctdb_client_context *client, + struct ctdb_tunable_list *tun_list, + struct node_list *nlist) +{ + struct tevent_req *req; + struct ban_node_state *state; + bool ok; + + req = tevent_req_create(mem_ctx, &state, struct ban_node_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->client = client; + state->tun_list = tun_list; + state->nlist = nlist; + state->destnode = ctdb_client_pnn(client); + + /* Bans are not enabled */ + if (state->tun_list->enable_bans == 0) { + D_ERR("Bans are not enabled\n"); + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + ok = ban_node_check(req); + if (!ok) { + return tevent_req_post(req, ev); + } + + return req; +} + +static bool ban_node_check(struct tevent_req *req) +{ + struct tevent_req *subreq; + struct ban_node_state *state = tevent_req_data( + req, struct ban_node_state); + struct ctdb_req_control request; + unsigned max_credits = 0, i; + + for (i=0; inlist->count; i++) { + if (state->nlist->ban_credits[i] > max_credits) { + state->max_pnn = state->nlist->pnn_list[i]; + max_credits = state->nlist->ban_credits[i]; + } + } + + if (max_credits < NUM_RETRIES) { + tevent_req_done(req); + return false; + } + + ctdb_req_control_get_nodemap(&request); + subreq = ctdb_client_control_send(state, + state->ev, + state->client, + state->max_pnn, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return false; + } + tevent_req_set_callback(subreq, ban_node_check_done, req); + + return true; +} + +static void ban_node_check_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct ban_node_state *state = tevent_req_data( + req, struct ban_node_state); + struct ctdb_reply_control *reply; + struct ctdb_node_map *nodemap; + struct ctdb_req_control request; + struct ctdb_ban_state ban; + unsigned int i; + int ret; + bool ok; + + ok = ctdb_client_control_recv(subreq, &ret, state, &reply); + TALLOC_FREE(subreq); + if (!ok) { + D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n", + state->max_pnn, ret); + tevent_req_error(req, ret); + return; + } + + ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap); + if (ret != 0) { + D_ERR("control GET_NODEMAP failed, ret=%d\n", ret); + tevent_req_error(req, ret); + return; + } + + for (i=0; inum; i++) { + if (nodemap->node[i].pnn != state->max_pnn) { + continue; + } + + /* If the node became inactive, reset ban_credits */ + if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) { + unsigned int j; + + for (j=0; jnlist->count; j++) { + if (state->nlist->pnn_list[j] == + state->max_pnn) { + state->nlist->ban_credits[j] = 0; + break; + } + } + state->max_pnn = CTDB_UNKNOWN_PNN; + } + } + + talloc_free(nodemap); + talloc_free(reply); + + /* If node becames inactive during recovery, pick next */ + if (state->max_pnn == CTDB_UNKNOWN_PNN) { + (void) ban_node_check(req); + return; + } + + ban = (struct ctdb_ban_state) { + .pnn = state->max_pnn, + .time = state->tun_list->recovery_ban_period, + }; + + D_ERR("Banning node %u for %u seconds\n", ban.pnn, ban.time); + + ctdb_req_control_set_ban_state(&request, &ban); + subreq = ctdb_client_control_send(state, + state->ev, + state->client, + ban.pnn, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, ban_node_done, req); +} + +static void ban_node_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct node_ban_state *state = tevent_req_data( + req, struct node_ban_state); + struct ctdb_reply_control *reply; + int ret; + bool status; + + status = ctdb_client_control_recv(subreq, &ret, state, &reply); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, ret); + return; + } + + ret = ctdb_reply_control_set_ban_state(reply); + if (ret != 0) { + D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret); + tevent_req_error(req, ret); + return; + } + + talloc_free(reply); + tevent_req_done(req); +} + +static bool ban_node_recv(struct tevent_req *req, int *perr) +{ + if (tevent_req_is_unix_error(req, perr)) { + return false; + } + + return true; +} /* * Run the parallel database recovery @@ -2724,50 +2924,15 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) D_ERR("%d of %d databases recovered\n", count, state->dbmap->num); if (! status) { - uint32_t max_pnn = CTDB_UNKNOWN_PNN, max_credits = 0; - unsigned int i; - - /* Bans are not enabled */ - if (state->tun_list->enable_bans == 0) { - tevent_req_error(req, EIO); + subreq = ban_node_send(state, + state->ev, + state->client, + state->tun_list, + state->nlist); + if (tevent_req_nomem(subreq, req)) { return; } - - for (i=0; inlist->count; i++) { - if (state->nlist->ban_credits[i] > max_credits) { - max_pnn = state->nlist->pnn_list[i]; - max_credits = state->nlist->ban_credits[i]; - } - } - - /* If pulling database fails multiple times */ - if (max_credits >= NUM_RETRIES) { - struct ctdb_ban_state ban_state = { - .pnn = max_pnn, - .time = state->tun_list->recovery_ban_period, - }; - - D_ERR("Banning node %u for %u seconds\n", - ban_state.pnn, - ban_state.time); - - ctdb_req_control_set_ban_state(&request, - &ban_state); - subreq = ctdb_client_control_send(state, - state->ev, - state->client, - ban_state.pnn, - TIMEOUT(), - &request); - if (tevent_req_nomem(subreq, req)) { - return; - } - tevent_req_set_callback(subreq, - recovery_failed_done, - req); - } else { - tevent_req_error(req, EIO); - } + tevent_req_set_callback(subreq, recovery_failed_done, req); return; } @@ -2789,25 +2954,15 @@ static void recovery_failed_done(struct tevent_req *subreq) { struct tevent_req *req = tevent_req_callback_data( subreq, struct tevent_req); - struct recovery_state *state = tevent_req_data( - req, struct recovery_state); - struct ctdb_reply_control *reply; int ret; bool status; - status = ctdb_client_control_recv(subreq, &ret, state, &reply); + status = ban_node_recv(subreq, &ret); TALLOC_FREE(subreq); if (! status) { D_ERR("failed to ban node, ret=%d\n", ret); - goto done; } - ret = ctdb_reply_control_set_ban_state(reply); - if (ret != 0) { - D_ERR("control SET_BAN_STATE failed, ret=%d\n", ret); - } - -done: tevent_req_error(req, EIO); } -- 2.25.1 From fb977f726aeca56126a504e922ded88645a788ee Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 20 Feb 2020 13:48:13 +1100 Subject: [PATCH 26/38] ctdb-daemon: Fix database attach deferral logic Commit 3cc230b5eeca749ab68d19cfda969f72c269f1f6 says: Dont allow clients to connect to databases untile we are well past and through the initial recovery phase It is unclear what this commit was attempting to do. The commit message implies that more attaches should be deferred but the code change adds a conjunction that causes less attaches to be deferred. In particular, no attaches will be deferred after startup is complete. This seems wrong. To implement what seems to be stated in the commit message an "or" needs to be used so that non-recovery daemon attaches are deferred either when in recovery or before startup is complete. Making this change highlights that attaches need to be allowed during the "startup" event because this is when smbd is started. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c6c89495fbe9b6f238d10a538eccc92b937a69de) --- ctdb/server/ctdb_ltdb_server.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index 970eb54b00b..a6709ff72de 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1135,9 +1135,9 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, return -1; } - if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE && - client->pid != ctdb->recoverd_pid && - ctdb->runstate < CTDB_RUNSTATE_RUNNING) { + if (client->pid != ctdb->recoverd_pid && + (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || + ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); if (da_ctx == NULL) { -- 2.25.1 From fa14e812efc0ae395819d943f5b22a227f47d56a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 26 Feb 2020 17:03:49 +1100 Subject: [PATCH 27/38] ctdb-daemon: Remove unused old client database functions BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit fc23cd1b9cdd1d70067491614b16e616291e8ff2) --- ctdb/include/ctdb_client.h | 22 ------ ctdb/include/ctdb_private.h | 2 + ctdb/server/ctdb_client.c | 146 ------------------------------------ 3 files changed, 2 insertions(+), 168 deletions(-) diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h index ef4950ab533..d1dce1e68d8 100644 --- a/ctdb/include/ctdb_client.h +++ b/ctdb/include/ctdb_client.h @@ -176,9 +176,6 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *runstate); -int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, uint32_t dbid, - TALLOC_CTX *mem_ctx, const char **path); int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **name); @@ -190,25 +187,6 @@ int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level); -/* - attach to a ctdb database -*/ -int ctdb_ctrl_db_open_flags(struct ctdb_context *ctdb, uint32_t db_id, - int *tdb_flags); - -struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, - struct timeval timeout, - const char *name, - uint8_t db_flags); - -/* a ctdb call function */ -typedef int (*ctdb_fn_t)(struct ctdb_call_info *); - -/* - setup a ctdb call function -*/ -int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id); - int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode); diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index d3e70b5e2fa..2f37db36e0c 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -36,6 +36,8 @@ struct ctdb_tcp_array { /* an installed ctdb remote call */ +typedef int (*ctdb_fn_t)(struct ctdb_call_info *); + struct ctdb_registered_call { struct ctdb_registered_call *next, *prev; uint32_t id; diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index dc7836abb2e..4c67990c0b5 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1107,36 +1107,6 @@ int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, return 0; } -/* - find the real path to a ltdb - */ -int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, - const char **path) -{ - int ret; - int32_t res; - TDB_DATA data; - - data.dptr = (uint8_t *)&dbid; - data.dsize = sizeof(dbid); - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GETDBPATH, 0, data, - mem_ctx, &data, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - return -1; - } - - (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); - if ((*path) == NULL) { - return -1; - } - - talloc_free(data.dptr); - - return 0; -} - /* find the name of a db */ @@ -1233,122 +1203,6 @@ int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32 return 0; } -/* - * Get db open flags - */ -int ctdb_ctrl_db_open_flags(struct ctdb_context *ctdb, uint32_t db_id, - int *tdb_flags) -{ - TDB_DATA indata, outdata; - int ret; - int32_t res; - - indata.dptr = (uint8_t *)&db_id; - indata.dsize = sizeof(db_id); - - ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, - CTDB_CONTROL_DB_OPEN_FLAGS, 0, indata, - ctdb, &outdata, &res, NULL, NULL); - if (ret != 0 || res != 0) { - D_ERR("ctdb control for db open flags failed\n"); - return -1; - } - - if (outdata.dsize != sizeof(int32_t)) { - D_ERR(__location__ " expected %zi bytes, received %zi bytes\n", - sizeof(int32_t), outdata.dsize); - talloc_free(outdata.dptr); - return -1; - } - - *tdb_flags = *(int32_t *)outdata.dptr; - talloc_free(outdata.dptr); - return 0; -} - -/* - attach to a specific database - client call -*/ -struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, - struct timeval timeout, - const char *name, - uint8_t db_flags) -{ - struct ctdb_db_context *ctdb_db; - int ret; - int tdb_flags; - - ctdb_db = ctdb_db_handle(ctdb, name); - if (ctdb_db) { - return ctdb_db; - } - - ctdb_db = talloc_zero(ctdb, struct ctdb_db_context); - CTDB_NO_MEMORY_NULL(ctdb, ctdb_db); - - ctdb_db->ctdb = ctdb; - ctdb_db->db_name = talloc_strdup(ctdb_db, name); - CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name); - - /* tell ctdb daemon to attach */ - ret = ctdb_ctrl_createdb(ctdb, timeout, CTDB_CURRENT_NODE, - ctdb_db, name, db_flags, &ctdb_db->db_id); - if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name)); - talloc_free(ctdb_db); - return NULL; - } - - ret = ctdb_ctrl_getdbpath(ctdb, timeout, CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path); - if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name)); - talloc_free(ctdb_db); - return NULL; - } - - ret = ctdb_ctrl_db_open_flags(ctdb, ctdb_db->db_id, &tdb_flags); - if (ret != 0) { - D_ERR("Failed to get tdb_flags for database '%s'\n", name); - talloc_free(ctdb_db); - return NULL; - } - - ctdb_db->ltdb = tdb_wrap_open(ctdb_db, ctdb_db->db_path, 0, tdb_flags, - O_RDWR, 0); - if (ctdb_db->ltdb == NULL) { - ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path); - talloc_free(ctdb_db); - return NULL; - } - - ctdb_db->db_flags = db_flags; - - DLIST_ADD(ctdb->db_list, ctdb_db); - - /* add well known functions */ - ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC); - ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC); - ctdb_set_call(ctdb_db, ctdb_fetch_with_header_func, CTDB_FETCH_WITH_HEADER_FUNC); - - return ctdb_db; -} - -/* - setup a call for a database - */ -int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id) -{ - struct ctdb_registered_call *call; - - /* register locally */ - call = talloc(ctdb_db, struct ctdb_registered_call); - call->fn = fn; - call->id = id; - - DLIST_ADD(ctdb_db->calls, call); - return 0; -} - /* Freeze all databases */ int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode) -- 2.25.1 From 81501d83f3e81fa874e85d20fe947350f49fa096 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 11:04:14 +1100 Subject: [PATCH 28/38] ctdb-protocol: Add control flag CTDB_CTRL_FLAG_ATTACH_RECOVERY BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 17ed0425904a98624284d351ab7617b3e02c0f7b) --- ctdb/protocol/protocol.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h index 43175ae3a95..04a651018be 100644 --- a/ctdb/protocol/protocol.h +++ b/ctdb/protocol/protocol.h @@ -944,6 +944,7 @@ struct ctdb_req_control { #define CTDB_CTRL_FLAG_OPCODE_SPECIFIC 0xFFFF0000 /* Ugly overloading of this field... */ #define CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE 0x00010000 +#define CTDB_CTRL_FLAG_ATTACH_RECOVERY 0x00020000 uint32_t flags; struct ctdb_req_control_data rdata; }; -- 2.25.1 From 9ce65cca8d9568638a2c574d2195f624a5a00ae6 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 11:13:05 +1100 Subject: [PATCH 29/38] ctdb-recovery: Use CTDB_CTRL_FLAG_ATTACH_RECOVERY to attach during recovery ctdb_ctrl_createdb() is only called by the recovery daemon, so this is a safe, temporary change. This is temporary because ctdb_ctrl_createdb(), create_missing_remote_databases() and create_missing_local_databases() will all go away soon. Note that this doesn't cause a change in behaviour. The main daemon will still only defer attaches from non-recoverd processes during recovery. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 98e3d0db2bc5f33217e26fab1dfb4bb91eae534f) --- ctdb/server/ctdb_client.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index 4c67990c0b5..26055698568 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1160,8 +1160,17 @@ int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, opcode = CTDB_CONTROL_DB_ATTACH; } - ret = ctdb_control(ctdb, destnode, 0, opcode, 0, data, - mem_ctx, &data, &res, &timeout, NULL); + ret = ctdb_control(ctdb, + destnode, + 0, + opcode, + CTDB_CTRL_FLAG_ATTACH_RECOVERY, + data, + mem_ctx, + &data, + &res, + &timeout, + NULL); if (ret != 0 || res != 0) { return -1; -- 2.25.1 From f75adc0c0b7c7fcd34c35db5d9dcdf5cdc62e2f6 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Wed, 26 Feb 2020 11:50:09 +1100 Subject: [PATCH 30/38] ctdb-daemon: Respect CTDB_CTRL_FLAG_ATTACH_RECOVERY when attaching databases This is currently only set by the recovery daemon when it attaches missing databases, so there is no obvious behaviour change. However, attaching missing databases can now be moved to the recovery helper as long as it sets this flag. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 7e5a8a4884ea87bb985fe0e2b65ff130fc2ba8aa) --- ctdb/server/ctdb_ltdb_server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index a6709ff72de..e050b7304fe 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1135,7 +1135,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, return -1; } - if (client->pid != ctdb->recoverd_pid && + if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); -- 2.25.1 From 297b7ee6e2b70c6c7db69665994f60ffb91a39f7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 12:24:39 +1100 Subject: [PATCH 31/38] ctdb-recovery: Replace use of ctdb_dbid_map with local db_list This will be used to build a merged list of databases from all nodes, allowing the recovery helper to create missing databases. It would be possible to also include the db_name field in this structure but that would cause a lot of churn. This field is used locally in the recovery of each database so can continue to live in the relevant state structure(s). BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 4c0b9c36050a0ed8a180d4ac1853224089528e8e) --- ctdb/server/ctdb_recovery_helper.c | 176 ++++++++++++++++++++++++++--- 1 file changed, 161 insertions(+), 15 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 1f3b58312c4..df96240d8da 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -27,6 +27,7 @@ #include #include "lib/tdb_wrap/tdb_wrap.h" +#include "lib/util/dlinklist.h" #include "lib/util/sys_rw.h" #include "lib/util/time.h" #include "lib/util/tevent_unix.h" @@ -169,6 +170,130 @@ static void node_list_ban_credits(struct node_list *nlist, uint32_t pnn) } } +/* + * Database list functions + * + * Simple, naive implementation that could be updated to a db_hash or similar + */ + +struct db { + struct db *prev, *next; + + uint32_t db_id; + uint32_t db_flags; + uint32_t *pnn_list; + unsigned int num_nodes; +}; + +struct db_list { + unsigned int num_dbs; + struct db *db; + unsigned int num_nodes; +}; + +static struct db_list *db_list_init(TALLOC_CTX *mem_ctx, unsigned int num_nodes) +{ + struct db_list *l; + + l = talloc_zero(mem_ctx, struct db_list); + l->num_nodes = num_nodes; + + return l; +} + +static struct db *db_list_find(struct db_list *dblist, uint32_t db_id) +{ + struct db *db; + + if (dblist == NULL) { + return NULL; + } + + db = dblist->db; + while (db != NULL && db->db_id != db_id) { + db = db->next; + } + + return db; +} + +static int db_list_add(struct db_list *dblist, + uint32_t db_id, + uint32_t db_flags, + uint32_t node) +{ + struct db *db = NULL; + + if (dblist == NULL) { + return EINVAL; + } + + db = talloc_zero(dblist, struct db); + if (db == NULL) { + return ENOMEM; + } + + db->db_id = db_id; + db->db_flags = db_flags; + db->pnn_list = talloc_zero_array(db, uint32_t, dblist->num_nodes); + if (db->pnn_list == NULL) { + talloc_free(db); + return ENOMEM; + } + db->pnn_list[0] = node; + db->num_nodes = 1; + + DLIST_ADD_END(dblist->db, db); + dblist->num_dbs++; + + return 0; +} + +static int db_list_check_and_add(struct db_list *dblist, + uint32_t db_id, + uint32_t db_flags, + uint32_t node) +{ + struct db *db = NULL; + int ret; + + /* + * These flags are masked out because they are only set on a + * node when a client attaches to that node, so they might not + * be set yet. They can't be passed as part of the attch, so + * they're no use here. + */ + db_flags &= ~(CTDB_DB_FLAGS_READONLY | CTDB_DB_FLAGS_STICKY); + + if (dblist == NULL) { + return EINVAL; + } + + db = db_list_find(dblist, db_id); + if (db == NULL) { + ret = db_list_add(dblist, db_id, db_flags, node); + return ret; + } + + if (db->db_flags != db_flags) { + D_ERR("Incompatible database flags for 0x%"PRIx32" " + "(0x%"PRIx32" != 0x%"PRIx32")\n", + db_id, + db_flags, + db->db_flags); + return EINVAL; + } + + if (db->num_nodes >= dblist->num_nodes) { + return EINVAL; + } + + db->pnn_list[db->num_nodes] = node; + db->num_nodes++; + + return 0; +} + /* * Recovery database functions */ @@ -2014,7 +2139,7 @@ static bool recover_db_recv(struct tevent_req *req) struct db_recovery_state { struct tevent_context *ev; - struct ctdb_dbid_map *dbmap; + struct db_list *dblist; unsigned int num_replies; unsigned int num_failed; }; @@ -2022,7 +2147,7 @@ struct db_recovery_state { struct db_recovery_one_state { struct tevent_req *req; struct ctdb_client_context *client; - struct ctdb_dbid_map *dbmap; + struct db_list *dblist; struct ctdb_tunable_list *tun_list; struct node_list *nlist; uint32_t generation; @@ -2036,14 +2161,14 @@ static void db_recovery_one_done(struct tevent_req *subreq); static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, struct ctdb_client_context *client, - struct ctdb_dbid_map *dbmap, + struct db_list *dblist, struct ctdb_tunable_list *tun_list, struct node_list *nlist, uint32_t generation) { struct tevent_req *req, *subreq; struct db_recovery_state *state; - unsigned int i; + struct db *db; req = tevent_req_create(mem_ctx, &state, struct db_recovery_state); if (req == NULL) { @@ -2051,16 +2176,16 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, } state->ev = ev; - state->dbmap = dbmap; + state->dblist = dblist; state->num_replies = 0; state->num_failed = 0; - if (dbmap->num == 0) { + if (dblist->num_dbs == 0) { tevent_req_done(req); return tevent_req_post(req, ev); } - for (i=0; inum; i++) { + for (db = dblist->db; db != NULL; db = db->next) { struct db_recovery_one_state *substate; substate = talloc_zero(state, struct db_recovery_one_state); @@ -2070,12 +2195,12 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->req = req; substate->client = client; - substate->dbmap = dbmap; + substate->dblist = dblist; substate->tun_list = tun_list; substate->nlist = nlist; substate->generation = generation; - substate->db_id = dbmap->dbs[i].db_id; - substate->db_flags = dbmap->dbs[i].flags; + substate->db_id = db->db_id; + substate->db_flags = db->db_flags; subreq = recover_db_send(state, ev, @@ -2138,7 +2263,7 @@ failed: done: state->num_replies += 1; - if (state->num_replies == state->dbmap->num) { + if (state->num_replies == state->dblist->num_dbs) { tevent_req_done(req); } } @@ -2387,7 +2512,7 @@ struct recovery_state { struct node_list *nlist; struct ctdb_tunable_list *tun_list; struct ctdb_vnn_map *vnnmap; - struct ctdb_dbid_map *dbmap; + struct db_list *dblist; }; static void recovery_tunables_done(struct tevent_req *subreq); @@ -2709,6 +2834,8 @@ static void recovery_dbmap_done(struct tevent_req *subreq) req, struct recovery_state); struct ctdb_reply_control *reply; struct ctdb_req_control request; + struct ctdb_dbid_map *dbmap = NULL; + unsigned int j; int ret; bool status; @@ -2721,13 +2848,32 @@ static void recovery_dbmap_done(struct tevent_req *subreq) return; } - ret = ctdb_reply_control_get_dbmap(reply, state, &state->dbmap); + state->dblist = db_list_init(state, state->nlist->count); + if (tevent_req_nomem(state->dblist, req)) { + D_ERR("memory allocation error\n"); + return; + } + + ret = ctdb_reply_control_get_dbmap(reply, state, &dbmap); if (ret != 0) { D_ERR("control GET_DBMAP failed, ret=%d\n", ret); tevent_req_error(req, ret); return; } + for (j = 0; j < dbmap->num; j++) { + ret = db_list_check_and_add(state->dblist, + dbmap->dbs[j].db_id, + dbmap->dbs[j].flags, + state->destnode); + if (ret != 0) { + D_ERR("failed to add database list entry, ret=%d\n", + ret); + tevent_req_error(req, ret); + return; + } + } + ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); subreq = ctdb_client_control_multi_send(state, state->ev, @@ -2898,7 +3044,7 @@ static void recovery_vnnmap_update_done(struct tevent_req *subreq) subreq = db_recovery_send(state, state->ev, state->client, - state->dbmap, + state->dblist, state->tun_list, state->nlist, state->vnnmap->generation); @@ -2921,7 +3067,7 @@ static void recovery_db_recovery_done(struct tevent_req *subreq) status = db_recovery_recv(subreq, &count); TALLOC_FREE(subreq); - D_ERR("%d of %d databases recovered\n", count, state->dbmap->num); + D_ERR("%d of %d databases recovered\n", count, state->dblist->num_dbs); if (! status) { subreq = ban_node_send(state, -- 2.25.1 From 5c45da6421de5ed3aee2cc19151ea72195139802 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 16:10:05 +1100 Subject: [PATCH 32/38] ctdb-recovery: GET_DBMAP from all nodes This builds a complete list of databases across the cluster so it can be used to create databases on the nodes where they are missing. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c6f74e590f602e2ed38fe293468770a5e669aefa) --- ctdb/server/ctdb_recovery_helper.c | 77 +++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index df96240d8da..d5a264df5d2 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -2817,9 +2817,13 @@ static void recovery_capabilities_done(struct tevent_req *subreq) talloc_free(reply); ctdb_req_control_get_dbmap(&request); - subreq = ctdb_client_control_send(state, state->ev, state->client, - state->destnode, TIMEOUT(), - &request); + subreq = ctdb_client_control_multi_send(state, + state->ev, + state->client, + state->nlist->pnn_list, + state->nlist->count, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return; } @@ -2832,18 +2836,34 @@ static void recovery_dbmap_done(struct tevent_req *subreq) subreq, struct tevent_req); struct recovery_state *state = tevent_req_data( req, struct recovery_state); - struct ctdb_reply_control *reply; + struct ctdb_reply_control **reply; struct ctdb_req_control request; - struct ctdb_dbid_map *dbmap = NULL; - unsigned int j; + int *err_list; + unsigned int i, j; int ret; bool status; - status = ctdb_client_control_recv(subreq, &ret, state, &reply); + status = ctdb_client_control_multi_recv(subreq, + &ret, + state, + &err_list, + &reply); TALLOC_FREE(subreq); if (! status) { - D_ERR("control GET_DBMAP failed to node %u, ret=%d\n", - state->destnode, ret); + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error(state->nlist->pnn_list, + state->nlist->count, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control GET_DBMAP failed on node %u," + " ret=%d\n", pnn, ret2); + } else { + D_ERR("control GET_DBMAP failed, ret=%d\n", + ret); + } tevent_req_error(req, ret); return; } @@ -2854,24 +2874,35 @@ static void recovery_dbmap_done(struct tevent_req *subreq) return; } - ret = ctdb_reply_control_get_dbmap(reply, state, &dbmap); - if (ret != 0) { - D_ERR("control GET_DBMAP failed, ret=%d\n", ret); - tevent_req_error(req, ret); - return; - } + for (i = 0; i < state->nlist->count; i++) { + struct ctdb_dbid_map *dbmap = NULL; + uint32_t pnn; - for (j = 0; j < dbmap->num; j++) { - ret = db_list_check_and_add(state->dblist, - dbmap->dbs[j].db_id, - dbmap->dbs[j].flags, - state->destnode); + pnn = state->nlist->pnn_list[i]; + + ret = ctdb_reply_control_get_dbmap(reply[i], state, &dbmap); if (ret != 0) { - D_ERR("failed to add database list entry, ret=%d\n", - ret); - tevent_req_error(req, ret); + D_ERR("control GET_DBMAP failed on node %u\n", + pnn); + tevent_req_error(req, EPROTO); return; } + + for (j = 0; j < dbmap->num; j++) { + ret = db_list_check_and_add(state->dblist, + dbmap->dbs[j].db_id, + dbmap->dbs[j].flags, + pnn); + if (ret != 0) { + D_ERR("failed to add database list entry, " + "ret=%d\n", + ret); + tevent_req_error(req, ret); + return; + } + } + + TALLOC_FREE(dbmap); } ctdb_req_control_set_recmode(&request, CTDB_RECOVERY_ACTIVE); -- 2.25.1 From 380130009d2c76383ac5e87548e0ca9d1e6171ff Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 21 Feb 2020 16:51:10 +1100 Subject: [PATCH 33/38] ctdb-recovery: Pass db structure for each database recovery Instead of db_id and db_flags. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 1bdfeb3fdc06947a607957ab3d114f97bad5d7d7) --- ctdb/server/ctdb_recovery_helper.c | 59 ++++++++++++++---------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index d5a264df5d2..2b77542245a 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -1672,8 +1672,7 @@ struct recover_db_state { struct ctdb_client_context *client; struct ctdb_tunable_list *tun_list; struct node_list *nlist; - uint32_t db_id; - uint8_t db_flags; + struct db *db; uint32_t destnode; struct ctdb_transdb transdb; @@ -1698,8 +1697,7 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, struct ctdb_tunable_list *tun_list, struct node_list *nlist, uint32_t generation, - uint32_t db_id, - uint8_t db_flags) + struct db *db) { struct tevent_req *req, *subreq; struct recover_db_state *state; @@ -1714,14 +1712,13 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->client = client; state->tun_list = tun_list; state->nlist = nlist; - state->db_id = db_id; - state->db_flags = db_flags; + state->db = db; state->destnode = ctdb_client_pnn(client); - state->transdb.db_id = db_id; + state->transdb.db_id = db->db_id; state->transdb.tid = generation; - ctdb_req_control_get_dbname(&request, db_id); + ctdb_req_control_get_dbname(&request, db->db_id); subreq = ctdb_client_control_send(state, ev, client, state->destnode, TIMEOUT(), &request); if (tevent_req_nomem(subreq, req)) { @@ -1747,7 +1744,7 @@ static void recover_db_name_done(struct tevent_req *subreq) TALLOC_FREE(subreq); if (! status) { D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db_id, ret); + state->db->db_id, ret); tevent_req_error(req, ret); return; } @@ -1755,14 +1752,14 @@ static void recover_db_name_done(struct tevent_req *subreq) ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name); if (ret != 0) { D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db_id, ret); + state->db->db_id, ret); tevent_req_error(req, EPROTO); return; } talloc_free(reply); - ctdb_req_control_getdbpath(&request, state->db_id); + ctdb_req_control_getdbpath(&request, state->db->db_id); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), &request); @@ -1802,7 +1799,7 @@ static void recover_db_path_done(struct tevent_req *subreq) talloc_free(reply); - ctdb_req_control_db_freeze(&request, state->db_id); + ctdb_req_control_db_freeze(&request, state->db->db_id); subreq = ctdb_client_control_multi_send(state, state->ev, state->client, @@ -1873,6 +1870,7 @@ static void recover_db_transaction_started(struct tevent_req *subreq) struct recover_db_state *state = tevent_req_data( req, struct recover_db_state); int *err_list; + uint32_t flags; int ret; bool status; @@ -1899,28 +1897,31 @@ static void recover_db_transaction_started(struct tevent_req *subreq) return; } - state->recdb = recdb_create(state, state->db_id, state->db_name, + flags = state->db->db_flags; + state->recdb = recdb_create(state, + state->db->db_id, + state->db_name, state->db_path, state->tun_list->database_hash_size, - state->db_flags & CTDB_DB_FLAGS_PERSISTENT); + flags & CTDB_DB_FLAGS_PERSISTENT); if (tevent_req_nomem(state->recdb, req)) { return; } - if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || - (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { + if ((flags & CTDB_DB_FLAGS_PERSISTENT) || + (flags & CTDB_DB_FLAGS_REPLICATED)) { subreq = collect_highseqnum_db_send(state, state->ev, state->client, state->nlist, - state->db_id, + state->db->db_id, state->recdb); } else { subreq = collect_all_db_send(state, state->ev, state->client, state->nlist, - state->db_id, + state->db->db_id, state->recdb); } if (tevent_req_nomem(subreq, req)) { @@ -1939,8 +1940,8 @@ static void recover_db_collect_done(struct tevent_req *subreq) int ret; bool status; - if ((state->db_flags & CTDB_DB_FLAGS_PERSISTENT) || - (state->db_flags & CTDB_DB_FLAGS_REPLICATED)) { + if ((state->db->db_flags & CTDB_DB_FLAGS_PERSISTENT) || + (state->db->db_flags & CTDB_DB_FLAGS_REPLICATED)) { status = collect_highseqnum_db_recv(subreq, &ret); } else { status = collect_all_db_recv(subreq, &ret); @@ -2076,7 +2077,7 @@ static void recover_db_transaction_committed(struct tevent_req *subreq) return; } - ctdb_req_control_db_thaw(&request, state->db_id); + ctdb_req_control_db_thaw(&request, state->db->db_id); subreq = ctdb_client_control_multi_send(state, state->ev, state->client, @@ -2151,8 +2152,7 @@ struct db_recovery_one_state { struct ctdb_tunable_list *tun_list; struct node_list *nlist; uint32_t generation; - uint32_t db_id; - uint8_t db_flags; + struct db *db; int num_fails; }; @@ -2199,8 +2199,7 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, substate->tun_list = tun_list; substate->nlist = nlist; substate->generation = generation; - substate->db_id = db->db_id; - substate->db_flags = db->db_flags; + substate->db = db; subreq = recover_db_send(state, ev, @@ -2208,14 +2207,13 @@ static struct tevent_req *db_recovery_send(TALLOC_CTX *mem_ctx, tun_list, nlist, generation, - substate->db_id, - substate->db_flags); + substate->db); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } tevent_req_set_callback(subreq, db_recovery_one_done, substate); - D_NOTICE("recover database 0x%08x\n", substate->db_id); + D_NOTICE("recover database 0x%08x\n", substate->db->db_id); } return req; @@ -2246,14 +2244,13 @@ static void db_recovery_one_done(struct tevent_req *subreq) substate->tun_list, substate->nlist, substate->generation, - substate->db_id, - substate->db_flags); + substate->db); if (tevent_req_nomem(subreq, req)) { goto failed; } tevent_req_set_callback(subreq, db_recovery_one_done, substate); D_NOTICE("recover database 0x%08x, attempt %d\n", - substate->db_id, substate->num_fails+1); + substate->db->db_id, substate->num_fails+1); return; } -- 2.25.1 From 50d9ab28addb231d85a605064d59f6aade827bd1 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Feb 2020 10:26:34 +1100 Subject: [PATCH 34/38] ctdb-recovery: Fetch database name from all nodes where it is attached BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit e6e63f8fb8194634135bf34cda18f6cc8ff69a7c) --- ctdb/server/ctdb_recovery_helper.c | 80 +++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 2b77542245a..0fbafe45fb6 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -1656,7 +1656,7 @@ static bool collect_all_db_recv(struct tevent_req *req, int *perr) /** * For each database do the following: - * - Get DB name + * - Get DB name from all nodes * - Get DB path * - Freeze database on all nodes * - Start transaction on all nodes @@ -1719,8 +1719,13 @@ static struct tevent_req *recover_db_send(TALLOC_CTX *mem_ctx, state->transdb.tid = generation; ctdb_req_control_get_dbname(&request, db->db_id); - subreq = ctdb_client_control_send(state, ev, client, state->destnode, - TIMEOUT(), &request); + subreq = ctdb_client_control_multi_send(state, + ev, + client, + state->db->pnn_list, + state->db->num_nodes, + TIMEOUT(), + &request); if (tevent_req_nomem(subreq, req)) { return tevent_req_post(req, ev); } @@ -1735,26 +1740,75 @@ static void recover_db_name_done(struct tevent_req *subreq) subreq, struct tevent_req); struct recover_db_state *state = tevent_req_data( req, struct recover_db_state); - struct ctdb_reply_control *reply; + struct ctdb_reply_control **reply; struct ctdb_req_control request; + int *err_list; + unsigned int i; int ret; bool status; - status = ctdb_client_control_recv(subreq, &ret, state, &reply); + status = ctdb_client_control_multi_recv(subreq, + &ret, + state, + &err_list, + &reply); TALLOC_FREE(subreq); if (! status) { - D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db->db_id, ret); + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error(state->db->pnn_list, + state->db->num_nodes, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control GET_DBNAME failed on node %u," + " ret=%d\n", + pnn, + ret2); + } else { + D_ERR("control GET_DBNAME failed, ret=%d\n", + ret); + } tevent_req_error(req, ret); return; } - ret = ctdb_reply_control_get_dbname(reply, state, &state->db_name); - if (ret != 0) { - D_ERR("control GET_DBNAME failed for db=0x%x, ret=%d\n", - state->db->db_id, ret); - tevent_req_error(req, EPROTO); - return; + for (i = 0; i < state->db->num_nodes; i++) { + const char *db_name; + uint32_t pnn; + + pnn = state->nlist->pnn_list[i]; + + ret = ctdb_reply_control_get_dbname(reply[i], + state, + &db_name); + if (ret != 0) { + D_ERR("control GET_DBNAME failed on node %u " + "for db=0x%x, ret=%d\n", + pnn, + state->db->db_id, + ret); + tevent_req_error(req, EPROTO); + return; + } + + if (state->db_name == NULL) { + state->db_name = db_name; + continue; + } + + if (strcmp(state->db_name, db_name) != 0) { + D_ERR("Incompatible database name for 0x%"PRIx32" " + "(%s != %s) on node %"PRIu32"\n", + state->db->db_id, + db_name, + state->db_name, + pnn); + node_list_ban_credits(state->nlist, pnn); + tevent_req_error(req, ret); + return; + } } talloc_free(reply); -- 2.25.1 From 5cecc9688d13ffd499a5eec28f352acac47efcb0 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Feb 2020 11:31:33 +1100 Subject: [PATCH 35/38] ctdb-recovery: Create database on nodes where it is missing BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 76a8174279f42486b36cc41d5831d4e6613f172e) --- ctdb/server/ctdb_recovery_helper.c | 178 ++++++++++++++++++++++++++++- 1 file changed, 177 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recovery_helper.c b/ctdb/server/ctdb_recovery_helper.c index 0fbafe45fb6..f10e60104ae 100644 --- a/ctdb/server/ctdb_recovery_helper.c +++ b/ctdb/server/ctdb_recovery_helper.c @@ -294,6 +294,150 @@ static int db_list_check_and_add(struct db_list *dblist, return 0; } +/* + * Create database on nodes where it is missing + */ + +struct db_create_missing_state { + struct tevent_context *ev; + struct ctdb_client_context *client; + + struct node_list *nlist; + + const char *db_name; + uint32_t *missing_pnn_list; + int missing_num_nodes; +}; + +static void db_create_missing_done(struct tevent_req *subreq); + +static struct tevent_req *db_create_missing_send( + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct ctdb_client_context *client, + struct node_list *nlist, + const char *db_name, + struct db *db) +{ + struct tevent_req *req, *subreq; + struct db_create_missing_state *state; + struct ctdb_req_control request; + unsigned int i, j; + + req = tevent_req_create(mem_ctx, + &state, + struct db_create_missing_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->client = client; + state->nlist = nlist; + state->db_name = db_name; + + if (nlist->count == db->num_nodes) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + state->missing_pnn_list = talloc_array(mem_ctx, uint32_t, nlist->count); + if (tevent_req_nomem(state->missing_pnn_list, req)) { + return tevent_req_post(req, ev); + } + + for (i = 0; i < nlist->count; i++) { + uint32_t pnn = nlist->pnn_list[i] ; + + for (j = 0; j < db->num_nodes; j++) { + if (pnn == db->pnn_list[j]) { + break; + } + } + + if (j < db->num_nodes) { + continue; + } + + DBG_INFO("Create database %s on node %u\n", + state->db_name, + pnn); + state->missing_pnn_list[state->missing_num_nodes] = pnn; + state->missing_num_nodes++; + } + + if (db->db_flags & CTDB_DB_FLAGS_PERSISTENT) { + ctdb_req_control_db_attach_persistent(&request, db_name); + } else if (db->db_flags & CTDB_DB_FLAGS_REPLICATED) { + ctdb_req_control_db_attach_replicated(&request, db_name); + } else { + ctdb_req_control_db_attach(&request, db_name); + } + request.flags = CTDB_CTRL_FLAG_ATTACH_RECOVERY; + subreq = ctdb_client_control_multi_send(state, + state->ev, + state->client, + state->missing_pnn_list, + state->missing_num_nodes, + TIMEOUT(), + &request); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, db_create_missing_done, req); + + return req; +} + +static void db_create_missing_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct db_create_missing_state *state = tevent_req_data( + req, struct db_create_missing_state); + int *err_list; + int ret; + bool status; + + status = ctdb_client_control_multi_recv(subreq, + &ret, + NULL, + &err_list, + NULL); + TALLOC_FREE(subreq); + if (! status) { + int ret2; + uint32_t pnn; + + ret2 = ctdb_client_control_multi_error( + state->missing_pnn_list, + state->missing_num_nodes, + err_list, + &pnn); + if (ret2 != 0) { + D_ERR("control DB_ATTACH failed for db %s" + " on node %u, ret=%d\n", + state->db_name, + pnn, + ret2); + node_list_ban_credits(state->nlist, pnn); + } else { + D_ERR("control DB_ATTACH failed for db %s, ret=%d\n", + state->db_name, + ret); + } + tevent_req_error(req, ret); + return; + } + + tevent_req_done(req); +} + +static bool db_create_missing_recv(struct tevent_req *req, int *perr) +{ + return generic_recv(req, perr); +} + /* * Recovery database functions */ @@ -1657,6 +1801,7 @@ static bool collect_all_db_recv(struct tevent_req *req, int *perr) /** * For each database do the following: * - Get DB name from all nodes + * - Attach database on missing nodes * - Get DB path * - Freeze database on all nodes * - Start transaction on all nodes @@ -1682,6 +1827,7 @@ struct recover_db_state { }; static void recover_db_name_done(struct tevent_req *subreq); +static void recover_db_create_missing_done(struct tevent_req *subreq); static void recover_db_path_done(struct tevent_req *subreq); static void recover_db_freeze_done(struct tevent_req *subreq); static void recover_db_transaction_started(struct tevent_req *subreq); @@ -1741,7 +1887,6 @@ static void recover_db_name_done(struct tevent_req *subreq) struct recover_db_state *state = tevent_req_data( req, struct recover_db_state); struct ctdb_reply_control **reply; - struct ctdb_req_control request; int *err_list; unsigned int i; int ret; @@ -1813,6 +1958,37 @@ static void recover_db_name_done(struct tevent_req *subreq) talloc_free(reply); + subreq = db_create_missing_send(state, + state->ev, + state->client, + state->nlist, + state->db_name, + state->db); + + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, recover_db_create_missing_done, req); +} + +static void recover_db_create_missing_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct recover_db_state *state = tevent_req_data( + req, struct recover_db_state); + struct ctdb_req_control request; + int ret; + bool status; + + /* Could sanity check the db_id here */ + status = db_create_missing_recv(subreq, &ret); + TALLOC_FREE(subreq); + if (! status) { + tevent_req_error(req, ret); + return; + } + ctdb_req_control_getdbpath(&request, state->db->db_id); subreq = ctdb_client_control_send(state, state->ev, state->client, state->destnode, TIMEOUT(), -- 2.25.1 From 69af2b0f089ee51211e9f0467ba233974e6a9c2f Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Feb 2020 19:51:19 +1100 Subject: [PATCH 36/38] ctdb-recovery: Remove old code for creating missing databases BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3a66d181b6f6199fca362fcb0aa06513645b589d) --- ctdb/server/ctdb_recoverd.c | 161 ------------------------------------ 1 file changed, 161 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 857736e30c8..68748aee70c 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -424,140 +424,6 @@ static int set_recovery_mode(struct ctdb_context *ctdb, return 0; } -/* - ensure all other nodes have attached to any databases that we have - */ -static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, - uint32_t pnn, struct ctdb_dbid_map_old *dbmap, TALLOC_CTX *mem_ctx) -{ - unsigned int i, j, db; - int ret; - struct ctdb_dbid_map_old *remote_dbmap; - - /* verify that all other nodes have all our databases */ - for (j=0; jnum; j++) { - /* we don't need to ourself ourselves */ - if (nodemap->nodes[j].pnn == pnn) { - continue; - } - /* don't check nodes that are unavailable */ - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - mem_ctx, &remote_dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn)); - return -1; - } - - /* step through all local databases */ - for (db=0; dbnum;db++) { - const char *name; - - - for (i=0;inum;i++) { - if (dbmap->dbs[db].db_id == remote_dbmap->dbs[i].db_id) { - break; - } - } - /* the remote node already have this database */ - if (i!=remote_dbmap->num) { - continue; - } - /* ok so we need to create this database */ - ret = ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), pnn, - dbmap->dbs[db].db_id, mem_ctx, - &name); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", pnn)); - return -1; - } - ret = ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), - nodemap->nodes[j].pnn, - mem_ctx, name, - dbmap->dbs[db].flags, NULL); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create remote db:%s\n", name)); - return -1; - } - } - } - - return 0; -} - - -/* - ensure we are attached to any databases that anyone else is attached to - */ -static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap, - uint32_t pnn, struct ctdb_dbid_map_old **dbmap, TALLOC_CTX *mem_ctx) -{ - unsigned int i, j, db; - int ret; - struct ctdb_dbid_map_old *remote_dbmap; - - /* verify that we have all database any other node has */ - for (j=0; jnum; j++) { - /* we don't need to ourself ourselves */ - if (nodemap->nodes[j].pnn == pnn) { - continue; - } - /* don't check nodes that are unavailable */ - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - continue; - } - - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - mem_ctx, &remote_dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node %u\n", pnn)); - return -1; - } - - /* step through all databases on the remote node */ - for (db=0; dbnum;db++) { - const char *name; - - for (i=0;i<(*dbmap)->num;i++) { - if (remote_dbmap->dbs[db].db_id == (*dbmap)->dbs[i].db_id) { - break; - } - } - /* we already have this db locally */ - if (i!=(*dbmap)->num) { - continue; - } - /* ok so we need to create this database and - rebuild dbmap - */ - ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, - remote_dbmap->dbs[db].db_id, mem_ctx, &name); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbname from node %u\n", - nodemap->nodes[j].pnn)); - return -1; - } - ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), pnn, - mem_ctx, name, - remote_dbmap->dbs[db].flags, NULL); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create local db:%s\n", name)); - return -1; - } - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to reread dbmap on node %u\n", pnn)); - return -1; - } - } - } - - return 0; -} - /* update flags on all active nodes */ @@ -1165,7 +1031,6 @@ static int do_recovery(struct ctdb_recoverd *rec, struct ctdb_context *ctdb = rec->ctdb; unsigned int i; int ret; - struct ctdb_dbid_map_old *dbmap; bool self_ban; DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n")); @@ -1245,32 +1110,6 @@ static int do_recovery(struct ctdb_recoverd *rec, DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node)); - /* get a list of all databases */ - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node :%u\n", pnn)); - goto fail; - } - - /* we do the db creation before we set the recovery mode, so the freeze happens - on all databases we will be dealing with. */ - - /* verify that we have all the databases any other node has */ - ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create missing local databases\n")); - goto fail; - } - - /* verify that all other nodes have all our databases */ - ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to create missing remote databases\n")); - goto fail; - } - DEBUG(DEBUG_NOTICE, (__location__ " Recovery - created remote databases\n")); - - /* Retrieve capabilities from all connected nodes */ ret = update_capabilities(rec, nodemap); if (ret!=0) { -- 2.25.1 From 3c584144f520cf45262cb19ed0f0bac2beb4e60f Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 25 Feb 2020 06:20:32 +1100 Subject: [PATCH 37/38] ctdb-daemon: Remove more unused old client database functions BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 052f1bdb9cf78f53f584edd32f81ae8e01e8e86e) --- ctdb/include/ctdb_client.h | 12 ----- ctdb/server/ctdb_client.c | 105 ------------------------------------- 2 files changed, 117 deletions(-) diff --git a/ctdb/include/ctdb_client.h b/ctdb/include/ctdb_client.h index d1dce1e68d8..198a8a38dbb 100644 --- a/ctdb/include/ctdb_client.h +++ b/ctdb/include/ctdb_client.h @@ -165,10 +165,6 @@ int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster); -int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, TALLOC_CTX *mem_ctx, - struct ctdb_dbid_map_old **dbmap); - int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_node_map_old **nodemap); @@ -176,14 +172,6 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, struct timeval timeout, int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *runstate); -int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, uint32_t dbid, - TALLOC_CTX *mem_ctx, const char **name); - -int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, TALLOC_CTX *mem_ctx, - const char *name, uint8_t db_flags, uint32_t *db_id); - int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level); diff --git a/ctdb/server/ctdb_client.c b/ctdb/server/ctdb_client.c index 26055698568..67455745ede 100644 --- a/ctdb/server/ctdb_client.c +++ b/ctdb/server/ctdb_client.c @@ -1029,30 +1029,6 @@ int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, ui } -/* - get a list of databases off a remote node - */ -int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - TALLOC_CTX *mem_ctx, struct ctdb_dbid_map_old **dbmap) -{ - int ret; - TDB_DATA outdata; - int32_t res; - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GET_DBMAP, 0, tdb_null, - mem_ctx, &outdata, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res)); - return -1; - } - - *dbmap = (struct ctdb_dbid_map_old *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize); - talloc_free(outdata.dptr); - - return 0; -} - /* get a list of nodes (vnn and flags ) from a remote node */ @@ -1107,87 +1083,6 @@ int ctdb_ctrl_get_runstate(struct ctdb_context *ctdb, return 0; } -/* - find the name of a db - */ -int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, - const char **name) -{ - int ret; - int32_t res; - TDB_DATA data; - - data.dptr = (uint8_t *)&dbid; - data.dsize = sizeof(dbid); - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GET_DBNAME, 0, data, - mem_ctx, &data, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - return -1; - } - - (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); - if ((*name) == NULL) { - return -1; - } - - talloc_free(data.dptr); - - return 0; -} - -/* - create a database - */ -int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, - uint32_t destnode, TALLOC_CTX *mem_ctx, - const char *name, uint8_t db_flags, uint32_t *db_id) -{ - int ret; - int32_t res; - TDB_DATA data; - uint32_t opcode; - - data.dptr = discard_const(name); - data.dsize = strlen(name)+1; - - if (db_flags & CTDB_DB_FLAGS_PERSISTENT) { - opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT; - } else if (db_flags & CTDB_DB_FLAGS_REPLICATED) { - opcode = CTDB_CONTROL_DB_ATTACH_REPLICATED; - } else { - opcode = CTDB_CONTROL_DB_ATTACH; - } - - ret = ctdb_control(ctdb, - destnode, - 0, - opcode, - CTDB_CTRL_FLAG_ATTACH_RECOVERY, - data, - mem_ctx, - &data, - &res, - &timeout, - NULL); - - if (ret != 0 || res != 0) { - return -1; - } - - if (data.dsize != sizeof(uint32_t)) { - TALLOC_FREE(data.dptr); - return -1; - } - if (db_id != NULL) { - *db_id = *(uint32_t *)data.dptr; - } - talloc_free(data.dptr); - - return 0; -} - /* get debug level on a node */ -- 2.25.1 From 68cb93bbfdf997961c1d23070014c1eba3fcd22a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 25 Feb 2020 17:32:56 +1100 Subject: [PATCH 38/38] ctdb-daemon: Don't allow attach from recovery if recovery is not active Neither the recovery daemon nor the recovery helper should attach databases outside of the recovery process. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14294 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 147afe77de372ddb9c180228d6fe1b04cca4610f) --- ctdb/server/ctdb_ltdb_server.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index e050b7304fe..ce3569fe7b1 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -1135,6 +1135,13 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, return -1; } + if ((c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && + ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) { + DBG_ERR("Attach from recovery refused because " + "recovery is not active\n"); + return -1; + } + if (!(c->flags & CTDB_CTRL_FLAG_ATTACH_RECOVERY) && (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE || ctdb->runstate < CTDB_RUNSTATE_STARTUP)) { -- 2.25.1