From 6c59a85d5d18995bd2ee976d05e91634b840ecce Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 14 Feb 2018 14:29:18 +1100 Subject: [PATCH 01/11] ctdb-daemon: Add invalid_records flag to ctdb_db_context If a node becomes INACTIVE, then all the records in volatile databases are invalidated. This avoids the need to include records from such nodes during subsequent recovery after the node comes out INACTIVE state. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 71896fddf10a92237d332779ccbb26c059caa649) --- ctdb/include/ctdb_private.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index b3d2e146dcf..ae154a08a0c 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -386,6 +386,7 @@ struct ctdb_db_context { uint32_t freeze_transaction_id; uint32_t generation; + bool invalid_records; bool push_started; void *push_state; -- 2.19.1 From eddd33fae4fb831c5d6a0a0e4ae136c483927191 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 14 Feb 2018 14:27:32 +1100 Subject: [PATCH 02/11] ctdb-daemon: Don't pull any records if records are invalidated This avoids unnecessary work during recovery to pull records from nodes that were INACTIVE just before the recovery. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 040401ca3abfa266261130f6c5ae4e9718f19cd7) --- ctdb/server/ctdb_recover.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index fc64037b95f..77c614df643 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -279,6 +279,11 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT ctdb_db->db_name, ctdb_db->unhealthy_reason)); } + /* If the records are invalid, we are done */ + if (ctdb_db->invalid_records) { + goto done; + } + if (ctdb_lockdb_mark(ctdb_db) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entire db - failing\n")); return -1; @@ -293,6 +298,7 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT ctdb_lockdb_unmark(ctdb_db); +done: outdata->dptr = (uint8_t *)params.pulldata; outdata->dsize = params.len; @@ -388,6 +394,11 @@ int32_t ctdb_control_db_pull(struct ctdb_context *ctdb, state.srvid = pulldb_ext->srvid; state.num_records = 0; + /* If the records are invalid, we are done */ + if (ctdb_db->invalid_records) { + goto done; + } + if (ctdb_lockdb_mark(ctdb_db) != 0) { DEBUG(DEBUG_ERR, (__location__ " Failed to get lock on entire db - failing\n")); @@ -422,6 +433,7 @@ int32_t ctdb_control_db_pull(struct ctdb_context *ctdb, ctdb_lockdb_unmark(ctdb_db); +done: outdata->dptr = talloc_size(outdata, sizeof(uint32_t)); if (outdata->dptr == NULL) { DEBUG(DEBUG_ERR, (__location__ " Memory allocation error\n")); -- 2.19.1 From 5191a8d20c7b7da4251907a5b2ba9a3fad5cb84b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 14 Feb 2018 14:19:44 +1100 Subject: [PATCH 03/11] ctdb-daemon: Invalidate records if a node becomes INACTIVE BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit c4ec99b1d3f1c5bff83bf66e3fd64d45a8be7441) --- ctdb/server/ctdb_freeze.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_freeze.c b/ctdb/server/ctdb_freeze.c index c41fc7d53ee..10841efa1b9 100644 --- a/ctdb/server/ctdb_freeze.c +++ b/ctdb/server/ctdb_freeze.c @@ -140,6 +140,9 @@ static int ctdb_db_freeze_handle_destructor(struct ctdb_db_freeze_handle *h) ctdb_db->freeze_mode = CTDB_FREEZE_NONE; ctdb_db->freeze_handle = NULL; + /* Clear invalid records flag */ + ctdb_db->invalid_records = false; + talloc_free(h->lreq); return 0; } @@ -393,6 +396,19 @@ static int db_freeze_waiter_destructor(struct ctdb_db_freeze_waiter *w) return 0; } +/** + * Invalidate the records in the database. + * This only applies to volatile databases. + */ +static int db_invalidate(struct ctdb_db_context *ctdb_db, void *private_data) +{ + if (ctdb_db_volatile(ctdb_db)) { + ctdb_db->invalid_records = true; + } + + return 0; +} + /** * Count the number of databases */ @@ -436,13 +452,17 @@ static int db_freeze(struct ctdb_db_context *ctdb_db, void *private_data) } /* - start the freeze process for a certain priority + start the freeze process for all databases + This is only called from ctdb_control_freeze(), which is called + only on node becoming INACTIVE. So mark the records invalid. */ static void ctdb_start_freeze(struct ctdb_context *ctdb) { struct ctdb_freeze_handle *h; int ret; + ctdb_db_iterator(ctdb, db_invalidate, NULL); + if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) { int count = 0; @@ -534,6 +554,8 @@ static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w) /* freeze all the databases + This control is only used when freezing database on node becoming INACTIVE. + So mark the records invalid in ctdb_start_freeze(). */ int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply) -- 2.19.1 From 12b4b905e2e7cc8245944b5dd899c6038e230aca Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Mon, 24 Sep 2018 16:17:19 +1000 Subject: [PATCH 04/11] ctdb-tests: Add recovery record resurrection test for volatile databases Ensure that deleted records and vacuumed records are not resurrected from recently inactive nodes. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit dcc9935995a5a7b40df64653a605d1af89075bd1) --- .../simple/69_recovery_resurrect_deleted.sh | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 ctdb/tests/simple/69_recovery_resurrect_deleted.sh diff --git a/ctdb/tests/simple/69_recovery_resurrect_deleted.sh b/ctdb/tests/simple/69_recovery_resurrect_deleted.sh new file mode 100755 index 00000000000..95e79fdd491 --- /dev/null +++ b/ctdb/tests/simple/69_recovery_resurrect_deleted.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +test_info() +{ + cat < Date: Wed, 14 Feb 2018 14:50:40 +1100 Subject: [PATCH 05/11] ctdb-vacuum: Simplify the deletion of vacuumed records The 3-phase deletion of vacuumed records was introduced to overcome the problem of record(s) resurrection during recovery. This problem is now handled by avoiding the records from recently INACTIVE nodes in the recovery process. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 202b9027ba44eee33c2fde2332126be10f719423) --- ctdb/server/ctdb_vacuum.c | 252 ++------------------------------------ 1 file changed, 9 insertions(+), 243 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index e7491164262..5aa0ca7dcc0 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -308,118 +308,6 @@ static int delete_marshall_traverse(void *param, void *data) return 0; } -/** - * Variant of delete_marshall_traverse() that bumps the - * RSN of each traversed record in the database. - * - * This is needed to ensure that when rolling out our - * empty record copy before remote deletion, we as the - * record's dmaster keep a higher RSN than the non-dmaster - * nodes. This is needed to prevent old copies from - * resurrection in recoveries. - */ -static int delete_marshall_traverse_first(void *param, void *data) -{ - struct delete_record_data *dd = talloc_get_type(data, struct delete_record_data); - struct delete_records_list *recs = talloc_get_type(param, struct delete_records_list); - struct ctdb_db_context *ctdb_db = dd->ctdb_db; - struct ctdb_context *ctdb = ctdb_db->ctdb; - struct ctdb_ltdb_header header; - uint32_t lmaster; - uint32_t hash = ctdb_hash(&(dd->key)); - int res; - - res = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, dd->key); - if (res != 0) { - recs->vdata->count.delete_list.skipped++; - recs->vdata->count.delete_list.left--; - talloc_free(dd); - return 0; - } - - /* - * Verify that the record is still empty, its RSN has not - * changed and that we are still its lmaster and dmaster. - */ - - res = tdb_parse_record(ctdb_db->ltdb->tdb, dd->key, - vacuum_record_parser, &header); - if (res != 0) { - goto skip; - } - - if (header.flags & CTDB_REC_RO_FLAGS) { - DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] " - "on database db[%s] has read-only flags. " - "skipping.\n", - hash, ctdb_db->db_name)); - goto skip; - } - - if (header.dmaster != ctdb->pnn) { - DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] " - "on database db[%s] has been migrated away. " - "skipping.\n", - hash, ctdb_db->db_name)); - goto skip; - } - - if (header.rsn != dd->hdr.rsn) { - DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] " - "on database db[%s] seems to have been " - "migrated away and back again (with empty " - "data). skipping.\n", - hash, ctdb_db->db_name)); - goto skip; - } - - lmaster = ctdb_lmaster(ctdb_db->ctdb, &dd->key); - - if (lmaster != ctdb->pnn) { - DEBUG(DEBUG_INFO, (__location__ ": not lmaster for record in " - "delete list (key hash [0x%08x], db[%s]). " - "Strange! skipping.\n", - hash, ctdb_db->db_name)); - goto skip; - } - - /* - * Increment the record's RSN to ensure the dmaster (i.e. the current - * node) has the highest RSN of the record in the cluster. - * This is to prevent old record copies from resurrecting in recoveries - * if something should fail during the deletion process. - * Note that ctdb_ltdb_store_server() increments the RSN if called - * on the record's dmaster. - */ - - res = ctdb_ltdb_store(ctdb_db, dd->key, &header, tdb_null); - if (res != 0) { - DEBUG(DEBUG_ERR, (__location__ ": Failed to store record with " - "key hash [0x%08x] on database db[%s].\n", - hash, ctdb_db->db_name)); - goto skip; - } - - tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key); - - goto done; - -skip: - tdb_chainunlock(ctdb_db->ltdb->tdb, dd->key); - - recs->vdata->count.delete_list.skipped++; - recs->vdata->count.delete_list.left--; - talloc_free(dd); - dd = NULL; - -done: - if (dd == NULL) { - return 0; - } - - return delete_marshall_traverse(param, data); -} - /** * traverse function for the traversal of the delete_queue, * the fast-path vacuuming list. @@ -602,12 +490,10 @@ static int delete_record_traverse(void *param, void *data) goto skip; } - if (header.rsn != dd->hdr.rsn + 1) { + if (header.rsn != dd->hdr.rsn) { /* * The record has been migrated off the node and back again. * But not requeued for deletion. Skip it. - * (Note that the first marshall traverse has bumped the RSN - * on disk.) */ DEBUG(DEBUG_INFO, (__location__ ": record with hash [0x%08x] " "on database db[%s] seems to have been " @@ -805,15 +691,9 @@ static void ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db, * at least some of these records previously from the former dmasters * with the vacuum fetch message. * - * This last step is implemented as a 3-phase process to protect from - * races leading to data corruption: - * - * 1) Send the lmaster's copy to all other active nodes with the - * RECEIVE_RECORDS control: The remote nodes store the lmaster's copy. - * 2) Send the records that could successfully be stored remotely - * in step #1 to all active nodes with the TRY_DELETE_RECORDS + * 1) Send the records to all active nodes with the TRY_DELETE_RECORDS * control. The remote notes delete their local copy. - * 3) The lmaster locally deletes its copies of all records that + * 2) The lmaster locally deletes its copies of all records that * could successfully be deleted remotely in step #2. */ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, @@ -861,17 +741,9 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, num_active_nodes = talloc_get_size(active_nodes)/sizeof(*active_nodes); /* - * Now delete the records all active nodes in a three-phase process: - * 1) send all active remote nodes the current empty copy with this - * node as DMASTER - * 2) if all nodes could store the new copy, - * tell all the active remote nodes to delete all their copy - * 3) if all remote nodes deleted their record copy, delete it locally - */ - - /* - * Step 1: - * Send currently empty record copy to all active nodes for storing. + * Now delete the records all active nodes in a two-phase process: + * 1) tell all active remote nodes to delete all their copy + * 2) if all remote nodes deleted their record copy, delete it locally */ recs = talloc_zero(tmp_ctx, struct delete_records_list); @@ -879,122 +751,16 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); goto done; } - recs->records = (struct ctdb_marshall_buffer *) - talloc_zero_size(recs, - offsetof(struct ctdb_marshall_buffer, data)); - if (recs->records == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); - goto done; - } - recs->records->db_id = ctdb_db->db_id; - recs->vdata = vdata; /* - * traverse the tree of all records we want to delete and - * create a blob we can send to the other nodes. - * - * We call delete_marshall_traverse_first() to bump the - * records' RSNs in the database, to ensure we (as dmaster) - * keep the highest RSN of the records in the cluster. - */ - ret = trbt_traversearray32(vdata->delete_list, 1, - delete_marshall_traverse_first, recs); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Error traversing the " - "delete list for first marshalling.\n")); - goto done; - } - - indata = ctdb_marshall_finish(recs->records); - - for (i = 0; i < num_active_nodes; i++) { - struct ctdb_marshall_buffer *records; - struct ctdb_rec_data_old *rec; - int32_t res; - TDB_DATA outdata; - - ret = ctdb_control(ctdb, active_nodes[i], 0, - CTDB_CONTROL_RECEIVE_RECORDS, 0, - indata, recs, &outdata, &res, - NULL, NULL); - if (ret != 0 || res != 0) { - DEBUG(DEBUG_ERR, ("Error storing record copies on " - "node %u: ret[%d] res[%d]\n", - active_nodes[i], ret, res)); - goto done; - } - - /* - * outdata contains the list of records coming back - * from the node: These are the records that the - * remote node could not store. We remove these from - * the list to process further. - */ - records = (struct ctdb_marshall_buffer *)outdata.dptr; - rec = (struct ctdb_rec_data_old *)&records->data[0]; - while (records->count-- > 1) { - TDB_DATA reckey, recdata; - struct ctdb_ltdb_header *rechdr; - struct delete_record_data *dd; - - reckey.dptr = &rec->data[0]; - reckey.dsize = rec->keylen; - recdata.dptr = &rec->data[reckey.dsize]; - recdata.dsize = rec->datalen; - - if (recdata.dsize < sizeof(struct ctdb_ltdb_header)) { - DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n")); - goto done; - } - rechdr = (struct ctdb_ltdb_header *)recdata.dptr; - recdata.dptr += sizeof(*rechdr); - recdata.dsize -= sizeof(*rechdr); - - dd = (struct delete_record_data *)trbt_lookup32( - vdata->delete_list, - ctdb_hash(&reckey)); - if (dd != NULL) { - /* - * The other node could not store the record - * copy and it is the first node that failed. - * So we should remove it from the tree and - * update statistics. - */ - talloc_free(dd); - vdata->count.delete_list.remote_error++; - vdata->count.delete_list.left--; - } else { - DEBUG(DEBUG_ERR, (__location__ " Failed to " - "find record with hash 0x%08x coming " - "back from RECEIVE_RECORDS " - "control in delete list.\n", - ctdb_hash(&reckey))); - vdata->count.delete_list.local_error++; - vdata->count.delete_list.left--; - } - - rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec); - } - } - - if (vdata->count.delete_list.left == 0) { - goto success; - } - - /* - * Step 2: - * Send the remaining records to all active nodes for deletion. - * - * The lmaster's (i.e. our) copies of these records have been stored - * successfully on the other nodes. + * Step 1: + * Send all records to all active nodes for deletion. */ /* * Create a marshall blob from the remaining list of records to delete. */ - talloc_free(recs->records); - recs->records = (struct ctdb_marshall_buffer *) talloc_zero_size(recs, offsetof(struct ctdb_marshall_buffer, data)); @@ -1089,7 +855,7 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, } /* - * Step 3: + * Step 2: * Delete the remaining records locally. * * These records have successfully been deleted on all -- 2.19.1 From 5d9edeb776f11f19918b4ce3b6833af7e60a4f57 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 14 Feb 2018 15:18:17 +1100 Subject: [PATCH 06/11] ctdb-vacuum: Fix the incorrect counting of remote errors If a node fails to delete a record in TRY_DELETE_RECORDS control during vacuuming, then it's possible that other nodes also may fail to delete a record. So instead of deleting the record from RB tree on first failure, keep track of the remote failures. Update delete_list.remote_error and delete_list.left statistics only once per record during the delete_record_traverse. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit ef052397173522ac2dd0d0bd9660a18a13a3e4fc) --- ctdb/server/ctdb_vacuum.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 5aa0ca7dcc0..8faf803efb9 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -107,6 +107,7 @@ struct delete_record_data { struct ctdb_context *ctdb; struct ctdb_db_context *ctdb_db; struct ctdb_ltdb_header hdr; + uint32_t remote_fail_count; TDB_DATA key; uint8_t keydata[1]; }; @@ -149,6 +150,7 @@ static int insert_delete_record_data_into_tree(struct ctdb_context *ctdb, memcpy(dd->keydata, key.dptr, key.dsize); dd->hdr = *hdr; + dd->remote_fail_count = 0; hash = ctdb_hash(&key); @@ -451,6 +453,13 @@ static int delete_record_traverse(void *param, void *data) uint32_t lmaster; uint32_t hash = ctdb_hash(&(dd->key)); + if (dd->remote_fail_count > 0) { + vdata->count.delete_list.remote_error++; + vdata->count.delete_list.left--; + talloc_free(dd); + return 0; + } + res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key); if (res != 0) { DEBUG(DEBUG_ERR, @@ -828,22 +837,17 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, ctdb_hash(&reckey)); if (dd != NULL) { /* - * The other node could not delete the - * record and it is the first node that - * failed. So we should remove it from - * the tree and update statistics. + * The remote node could not delete the + * record. Since other remote nodes can + * also fail, we just mark the record. */ - talloc_free(dd); - vdata->count.delete_list.remote_error++; - vdata->count.delete_list.left--; + dd->remote_fail_count++; } else { DEBUG(DEBUG_ERR, (__location__ " Failed to " "find record with hash 0x%08x coming " "back from TRY_DELETE_RECORDS " "control in delete list.\n", ctdb_hash(&reckey))); - vdata->count.delete_list.local_error++; - vdata->count.delete_list.left--; } rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec); -- 2.19.1 From 4f26c5945d9cb74e824977f2dae134763110876a Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 14 Feb 2018 15:23:07 +1100 Subject: [PATCH 07/11] ctdb-vacuum: Remove unnecessary check for zero records in delete list Since no records are deleted from RB tree during step 1, there is no need for the check. Run step 2 unconditionally. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit e15cdc652d76b37c58cd114215f00500991bc6b4) --- ctdb/server/ctdb_vacuum.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 8faf803efb9..2194b7f4da7 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -854,10 +854,6 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, } } - if (vdata->count.delete_list.left == 0) { - goto success; - } - /* * Step 2: * Delete the remaining records locally. @@ -873,8 +869,6 @@ static void ctdb_process_delete_list(struct ctdb_db_context *ctdb_db, "delete list for deletion.\n")); } -success: - if (vdata->count.delete_list.left != 0) { DEBUG(DEBUG_ERR, (__location__ " Vaccum db[%s] error: " "there are %u records left for deletion after " -- 2.19.1 From 2856b528c111b3c805bbd7afd5882f2b2a9ef92b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 12:04:32 +1100 Subject: [PATCH 08/11] ctdb-daemon: Drop implementation of RECEIVE_RECORDS control BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit d18385ea2aa93770996214d056a384a0244e7d73) --- ctdb/include/ctdb_private.h | 2 - ctdb/server/ctdb_control.c | 2 +- ctdb/server/ctdb_recover.c | 199 ------------------------------------ 3 files changed, 1 insertion(+), 202 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index ae154a08a0c..ea00bb12128 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -821,8 +821,6 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata); -int32_t ctdb_control_receive_records(struct ctdb_context *ctdb, - TDB_DATA indata, TDB_DATA *outdata); int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata); diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 848010e2310..c260b924529 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -650,7 +650,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, return ctdb_control_reload_public_ips(ctdb, c, async_reply); case CTDB_CONTROL_RECEIVE_RECORDS: - return ctdb_control_receive_records(ctdb, indata, outdata); + return control_not_implemented("RECEIVE_RECORDS", NULL); case CTDB_CONTROL_DB_DETACH: return ctdb_control_db_detach(ctdb, indata, client_id); diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 77c614df643..f05052e8466 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -1330,205 +1330,6 @@ int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA inda return 0; } -/** - * Store a record as part of the vacuum process: - * This is called from the RECEIVE_RECORD control which - * the lmaster uses to send the current empty copy - * to all nodes for storing, before it lets the other - * nodes delete the records in the second phase with - * the TRY_DELETE_RECORDS control. - * - * Only store if we are not lmaster or dmaster, and our - * rsn is <= the provided rsn. Use non-blocking locks. - * - * return 0 if the record was successfully stored. - * return !0 if the record still exists in the tdb after returning. - */ -static int store_tdb_record(struct ctdb_context *ctdb, - struct ctdb_db_context *ctdb_db, - struct ctdb_rec_data_old *rec) -{ - TDB_DATA key, data, data2; - struct ctdb_ltdb_header *hdr, *hdr2; - int ret; - - key.dsize = rec->keylen; - key.dptr = &rec->data[0]; - data.dsize = rec->datalen; - data.dptr = &rec->data[rec->keylen]; - - if (ctdb_lmaster(ctdb, &key) == ctdb->pnn) { - DEBUG(DEBUG_INFO, (__location__ " Called store_tdb_record " - "where we are lmaster\n")); - return -1; - } - - if (data.dsize != sizeof(struct ctdb_ltdb_header)) { - DEBUG(DEBUG_ERR, (__location__ " Bad record size\n")); - return -1; - } - - hdr = (struct ctdb_ltdb_header *)data.dptr; - - /* use a non-blocking lock */ - if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, key) != 0) { - DEBUG(DEBUG_INFO, (__location__ " Failed to lock chain in non-blocking mode\n")); - return -1; - } - - data2 = tdb_fetch(ctdb_db->ltdb->tdb, key); - if (data2.dptr == NULL || data2.dsize < sizeof(struct ctdb_ltdb_header)) { - if (tdb_store(ctdb_db->ltdb->tdb, key, data, 0) == -1) { - DEBUG(DEBUG_ERR, (__location__ "Failed to store record\n")); - ret = -1; - goto done; - } - DEBUG(DEBUG_INFO, (__location__ " Stored record\n")); - ret = 0; - goto done; - } - - hdr2 = (struct ctdb_ltdb_header *)data2.dptr; - - if (hdr2->rsn > hdr->rsn) { - DEBUG(DEBUG_INFO, (__location__ " Skipping record with " - "rsn=%llu - called with rsn=%llu\n", - (unsigned long long)hdr2->rsn, - (unsigned long long)hdr->rsn)); - ret = -1; - goto done; - } - - /* do not allow vacuuming of records that have readonly flags set. */ - if (hdr->flags & CTDB_REC_RO_FLAGS) { - DEBUG(DEBUG_INFO,(__location__ " Skipping record with readonly " - "flags set\n")); - ret = -1; - goto done; - } - if (hdr2->flags & CTDB_REC_RO_FLAGS) { - DEBUG(DEBUG_INFO,(__location__ " Skipping record with readonly " - "flags set\n")); - ret = -1; - goto done; - } - - if (hdr2->dmaster == ctdb->pnn) { - DEBUG(DEBUG_INFO, (__location__ " Attempted to store record " - "where we are the dmaster\n")); - ret = -1; - goto done; - } - - if (tdb_store(ctdb_db->ltdb->tdb, key, data, 0) != 0) { - DEBUG(DEBUG_INFO,(__location__ " Failed to store record\n")); - ret = -1; - goto done; - } - - ret = 0; - -done: - tdb_chainunlock(ctdb_db->ltdb->tdb, key); - free(data2.dptr); - return ret; -} - - - -/** - * Try to store all these records as part of the vacuuming process - * and return the records we failed to store. - */ -int32_t ctdb_control_receive_records(struct ctdb_context *ctdb, - TDB_DATA indata, TDB_DATA *outdata) -{ - struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr; - struct ctdb_db_context *ctdb_db; - int i; - struct ctdb_rec_data_old *rec; - struct ctdb_marshall_buffer *records; - - if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) { - DEBUG(DEBUG_ERR, - (__location__ " invalid data in receive_records\n")); - return -1; - } - - ctdb_db = find_ctdb_db(ctdb, reply->db_id); - if (!ctdb_db) { - DEBUG(DEBUG_ERR, (__location__ " Unknown db 0x%08x\n", - reply->db_id)); - return -1; - } - - DEBUG(DEBUG_DEBUG, ("starting receive_records of %u records for " - "dbid 0x%x\n", reply->count, reply->db_id)); - - /* create a blob to send back the records we could not store */ - records = (struct ctdb_marshall_buffer *) - talloc_zero_size(outdata, - offsetof(struct ctdb_marshall_buffer, data)); - if (records == NULL) { - DEBUG(DEBUG_ERR, (__location__ " Out of memory\n")); - return -1; - } - records->db_id = ctdb_db->db_id; - - rec = (struct ctdb_rec_data_old *)&reply->data[0]; - for (i=0; icount; i++) { - TDB_DATA key, data; - - key.dptr = &rec->data[0]; - key.dsize = rec->keylen; - data.dptr = &rec->data[key.dsize]; - data.dsize = rec->datalen; - - if (data.dsize < sizeof(struct ctdb_ltdb_header)) { - DEBUG(DEBUG_CRIT, (__location__ " bad ltdb record " - "in indata\n")); - talloc_free(records); - return -1; - } - - /* - * If we can not store the record we must add it to the reply - * so the lmaster knows it may not purge this record. - */ - if (store_tdb_record(ctdb, ctdb_db, rec) != 0) { - size_t old_size; - struct ctdb_ltdb_header *hdr; - - hdr = (struct ctdb_ltdb_header *)data.dptr; - data.dptr += sizeof(*hdr); - data.dsize -= sizeof(*hdr); - - DEBUG(DEBUG_INFO, (__location__ " Failed to store " - "record with hash 0x%08x in vacuum " - "via RECEIVE_RECORDS\n", - ctdb_hash(&key))); - - old_size = talloc_get_size(records); - records = talloc_realloc_size(outdata, records, - old_size + rec->length); - if (records == NULL) { - DEBUG(DEBUG_ERR, (__location__ " Failed to " - "expand\n")); - return -1; - } - records->count++; - memcpy(old_size+(uint8_t *)records, rec, rec->length); - } - - rec = (struct ctdb_rec_data_old *)(rec->length + (uint8_t *)rec); - } - - *outdata = ctdb_marshall_finish(records); - - return 0; -} - - /* report capabilities */ -- 2.19.1 From 2f1db3f6e408f15bafa3c19be1c968d215cb485a Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 13:52:10 +1100 Subject: [PATCH 09/11] ctdb-protocol: Mark RECEIVE_RECORDS control obsolete BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 81dae71fa74bfd83a5701e4841b5a0a13cbe87a1) --- ctdb/protocol/protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/protocol/protocol.h b/ctdb/protocol/protocol.h index 6abd0158b32..b868553f6e8 100644 --- a/ctdb/protocol/protocol.h +++ b/ctdb/protocol/protocol.h @@ -355,7 +355,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_SET_DB_STICKY = 133, CTDB_CONTROL_RELOAD_PUBLIC_IPS = 134, CTDB_CONTROL_TRAVERSE_ALL_EXT = 135, - CTDB_CONTROL_RECEIVE_RECORDS = 136, + CTDB_CONTROL_RECEIVE_RECORDS = 136, /* obsolete */ CTDB_CONTROL_IPREALLOCATED = 137, CTDB_CONTROL_GET_RUNSTATE = 138, CTDB_CONTROL_DB_DETACH = 139, -- 2.19.1 From 4c68912ae0b12db98e5d336e8d08c81c0148a30c Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 12:21:57 +1100 Subject: [PATCH 10/11] ctdb-protocol: Drop marshalling code for RECEIVE_RECORDS control BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 2f89bd96fb6c5e50cfc09604ceb6b96a94cb4f56) --- ctdb/protocol/protocol_api.h | 6 ------ ctdb/protocol/protocol_client.c | 29 ----------------------------- ctdb/protocol/protocol_control.c | 26 -------------------------- 3 files changed, 61 deletions(-) diff --git a/ctdb/protocol/protocol_api.h b/ctdb/protocol/protocol_api.h index 1cd5d7d66e6..6104c10e7b5 100644 --- a/ctdb/protocol/protocol_api.h +++ b/ctdb/protocol/protocol_api.h @@ -530,12 +530,6 @@ int ctdb_reply_control_set_db_sticky(struct ctdb_reply_control *reply); void ctdb_req_control_reload_public_ips(struct ctdb_req_control *request); int ctdb_reply_control_reload_public_ips(struct ctdb_reply_control *reply); -void ctdb_req_control_receive_records(struct ctdb_req_control *request, - struct ctdb_rec_buffer *recbuf); -int ctdb_reply_control_receive_records(struct ctdb_reply_control *reply, - TALLOC_CTX *mem_ctx, - struct ctdb_rec_buffer **recbuf); - void ctdb_req_control_ipreallocated(struct ctdb_req_control *request); int ctdb_reply_control_ipreallocated(struct ctdb_reply_control *reply); diff --git a/ctdb/protocol/protocol_client.c b/ctdb/protocol/protocol_client.c index a18af08e21a..9aa32a9bba7 100644 --- a/ctdb/protocol/protocol_client.c +++ b/ctdb/protocol/protocol_client.c @@ -1948,35 +1948,6 @@ int ctdb_reply_control_reload_public_ips(struct ctdb_reply_control *reply) /* CTDB_CONTROL_TRAVERSE_ALL_EXT */ -/* CTDB_CONTROL_RECEIVE_RECORDS */ - -void ctdb_req_control_receive_records(struct ctdb_req_control *request, - struct ctdb_rec_buffer *recbuf) -{ - request->opcode = CTDB_CONTROL_RECEIVE_RECORDS; - request->pad = 0; - request->srvid = 0; - request->client_id = 0; - request->flags = 0; - - request->rdata.opcode = CTDB_CONTROL_RECEIVE_RECORDS; - request->rdata.data.recbuf = recbuf; -} - -int ctdb_reply_control_receive_records(struct ctdb_reply_control *reply, - TALLOC_CTX *mem_ctx, - struct ctdb_rec_buffer **recbuf) -{ - if (reply->rdata.opcode != CTDB_CONTROL_RECEIVE_RECORDS) { - return EPROTO; - } - - if (reply->status == 0) { - *recbuf = talloc_steal(mem_ctx, reply->rdata.data.recbuf); - } - return reply->status; -} - /* CTDB_CONTROL_IPREALLOCATED */ void ctdb_req_control_ipreallocated(struct ctdb_req_control *request) diff --git a/ctdb/protocol/protocol_control.c b/ctdb/protocol/protocol_control.c index 12a78e1792d..0b88b5c8b5a 100644 --- a/ctdb/protocol/protocol_control.c +++ b/ctdb/protocol/protocol_control.c @@ -360,10 +360,6 @@ static size_t ctdb_req_control_data_len(struct ctdb_req_control_data *cd) len = ctdb_traverse_all_ext_len(cd->data.traverse_all_ext); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - len = ctdb_rec_buffer_len(cd->data.recbuf); - break; - case CTDB_CONTROL_IPREALLOCATED: break; @@ -660,10 +656,6 @@ static void ctdb_req_control_data_push(struct ctdb_req_control_data *cd, &np); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - ctdb_rec_buffer_push(cd->data.recbuf, buf, &np); - break; - case CTDB_CONTROL_DB_DETACH: ctdb_uint32_push(&cd->data.db_id, buf, &np); break; @@ -988,11 +980,6 @@ static int ctdb_req_control_data_pull(uint8_t *buf, size_t buflen, &np); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx, - &cd->data.recbuf, &np); - break; - case CTDB_CONTROL_DB_DETACH: ret = ctdb_uint32_pull(buf, buflen, &cd->data.db_id, &np); break; @@ -1368,10 +1355,6 @@ static size_t ctdb_reply_control_data_len(struct ctdb_reply_control_data *cd) case CTDB_CONTROL_TRAVERSE_ALL_EXT: break; - case CTDB_CONTROL_RECEIVE_RECORDS: - len = ctdb_rec_buffer_len(cd->data.recbuf); - break; - case CTDB_CONTROL_IPREALLOCATED: break; @@ -1562,10 +1545,6 @@ static void ctdb_reply_control_data_push(struct ctdb_reply_control_data *cd, ctdb_db_statistics_push(cd->data.dbstats, buf, &np); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - ctdb_rec_buffer_push(cd->data.recbuf, buf, &np); - break; - case CTDB_CONTROL_GET_RUNSTATE: ctdb_uint32_push(&cd->data.runstate, buf, &np); break; @@ -1753,11 +1732,6 @@ static int ctdb_reply_control_data_pull(uint8_t *buf, size_t buflen, &cd->data.dbstats, &np); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - ret = ctdb_rec_buffer_pull(buf, buflen, mem_ctx, - &cd->data.recbuf, &np); - break; - case CTDB_CONTROL_GET_RUNSTATE: ret = ctdb_uint32_pull(buf, buflen, &cd->data.runstate, &np); break; -- 2.19.1 From d8c1f30f90e758b1da640034f62d849f3e03b970 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 15 Feb 2018 12:28:36 +1100 Subject: [PATCH 11/11] ctdb-tests: Drop code for RECEIVE_RECORDS control BUG: https://bugzilla.samba.org/show_bug.cgi?id=13641 Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 83b3c5670d85c607c1cf1ab8cfc2c967d4d16721) --- ctdb/tests/src/protocol_common_ctdb.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/ctdb/tests/src/protocol_common_ctdb.c b/ctdb/tests/src/protocol_common_ctdb.c index 03888f44e62..6a6573486a1 100644 --- a/ctdb/tests/src/protocol_common_ctdb.c +++ b/ctdb/tests/src/protocol_common_ctdb.c @@ -536,12 +536,6 @@ void fill_ctdb_req_control_data(TALLOC_CTX *mem_ctx, fill_ctdb_traverse_all_ext(mem_ctx, cd->data.traverse_all_ext); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer); - assert(cd->data.recbuf != NULL); - fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); - break; - case CTDB_CONTROL_IPREALLOCATED: break; @@ -958,10 +952,6 @@ void verify_ctdb_req_control_data(struct ctdb_req_control_data *cd, cd2->data.traverse_all_ext); break; - case CTDB_CONTROL_RECEIVE_RECORDS: - verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); - break; - case CTDB_CONTROL_IPREALLOCATED: break; @@ -1400,12 +1390,6 @@ void fill_ctdb_reply_control_data(TALLOC_CTX *mem_ctx, case CTDB_CONTROL_TRAVERSE_ALL_EXT: break; - case CTDB_CONTROL_RECEIVE_RECORDS: - cd->data.recbuf = talloc(mem_ctx, struct ctdb_rec_buffer); - assert(cd->data.recbuf != NULL); - fill_ctdb_rec_buffer(mem_ctx, cd->data.recbuf); - break; - case CTDB_CONTROL_IPREALLOCATED: break; @@ -1758,10 +1742,6 @@ void verify_ctdb_reply_control_data(struct ctdb_reply_control_data *cd, case CTDB_CONTROL_TRAVERSE_ALL_EXT: break; - case CTDB_CONTROL_RECEIVE_RECORDS: - verify_ctdb_rec_buffer(cd->data.recbuf, cd2->data.recbuf); - break; - case CTDB_CONTROL_IPREALLOCATED: break; -- 2.19.1