From c491ad6532a3e9f664f96877d1ea1d00f5428016 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 11 Feb 2014 14:23:28 +1100 Subject: [PATCH 1/6] ctdb-vacuum: Track time for vacuuming in database statistics Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit a0628e317df76c7c38a7cca9c3090077fa352899) --- ctdb/include/ctdb_protocol.h | 3 +++ ctdb/server/ctdb_vacuum.c | 1 + ctdb/tools/ctdb.c | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/ctdb/include/ctdb_protocol.h b/ctdb/include/ctdb_protocol.h index 725e426..15d9fc5 100644 --- a/ctdb/include/ctdb_protocol.h +++ b/ctdb/include/ctdb_protocol.h @@ -717,6 +717,9 @@ struct ctdb_db_statistics { struct latency_counter latency; uint32_t buckets[MAX_COUNT_BUCKETS]; } locks; + struct { + struct latency_counter latency; + } vacuum; uint32_t db_ro_delegations; uint32_t db_ro_revokes; uint32_t hop_count_bucket[MAX_COUNT_BUCKETS]; diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 35f1fe1..8cd37aa 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -1364,6 +1364,7 @@ static int vacuum_child_destructor(struct ctdb_vacuum_child_context *child_ctx) struct ctdb_db_context *ctdb_db = child_ctx->vacuum_handle->ctdb_db; struct ctdb_context *ctdb = ctdb_db->ctdb; + CTDB_UPDATE_DB_LATENCY(ctdb_db, "vacuum", vacuum.latency, l); DEBUG(DEBUG_INFO,("Vacuuming took %.3f seconds for database %s\n", l, ctdb_db->db_name)); if (child_ctx->child_pid != -1) { diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 1ba2be1..1ec0748 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -725,6 +725,14 @@ static int control_dbstatistics(struct ctdb_context *ctdb, int argc, const char 0.0), dbstat->locks.latency.max, dbstat->locks.latency.num); + printf(" %-30s %.6f/%.6f/%.6f sec out of %d\n", + "vacuum_latency MIN/AVG/MAX", + dbstat->vacuum.latency.min, + (dbstat->vacuum.latency.num ? + dbstat->vacuum.latency.total /dbstat->vacuum.latency.num : + 0.0), + dbstat->vacuum.latency.max, + dbstat->vacuum.latency.num); num_hot_keys = 0; for (i=0; inum_hot_keys; i++) { if (dbstat->hot_keys[i].count > 0) { -- 1.9.3 From c12548330443d9f5e24930ab47bb1ae98f1d781b Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 21 Feb 2014 14:58:00 +1100 Subject: [PATCH 2/6] ctdb-vacuum: Stagger vacuuming child processes This prevents multiple child processes being forked at the same time for vacuuming TDBs. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit e4597f8771f42cf315bd163c18b2f27147d3de5f) --- ctdb/server/ctdb_vacuum.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 8cd37aa..0f1de19 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -1451,6 +1451,17 @@ ctdb_vacuum_event(struct event_context *ev, struct timed_event *te, return; } + /* Do not allow multiple vacuuming child processes to be active at the + * same time. If there is vacuuming child process active, delay + * new vacuuming event to stagger vacuuming events. + */ + if (ctdb->vacuumers != NULL) { + event_add_timed(ctdb->ev, vacuum_handle, + timeval_current_ofs(0, 500*1000), + ctdb_vacuum_event, vacuum_handle); + return; + } + child_ctx = talloc(vacuum_handle, struct ctdb_vacuum_child_context); if (child_ctx == NULL) { DEBUG(DEBUG_CRIT, (__location__ " Failed to allocate child context for vacuuming of %s\n", ctdb_db->db_name)); -- 1.9.3 From 9e6c57d2c48f73cbb3abe68694144b21cfd7e7cc Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 14 Apr 2014 13:18:41 +1000 Subject: [PATCH 3/6] ctdb-vacuum: Use non-blocking lock when traversing delete queue This avoids vacuuming getting in the way of ctdb daemon to process record requests. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit d35f512cd972ac1f732fe998b2179242d042082d) --- ctdb/server/ctdb_vacuum.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 0f1de19..892dc07 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -446,12 +446,8 @@ static int delete_queue_traverse(void *param, void *data) vdata->count.delete_queue.total++; - res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key); + res = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, dd->key); if (res != 0) { - DEBUG(DEBUG_ERR, - (__location__ " Error getting chainlock on record with " - "key hash [0x%08x] on database db[%s].\n", - hash, ctdb_db->db_name)); vdata->count.delete_queue.error++; return 0; } -- 1.9.3 From acf146c4e4133763f0cee3c2cd0b1ff1b8b1333a Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Thu, 6 Nov 2014 09:33:50 +1100 Subject: [PATCH 4/6] ctdb-vacuum: Use non-blocking lock when traversing delete tree This avoids vacuuming getting in the way of ctdb daemon to process record requests. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit dbb1958284657f26a868705e5f9612bc377fd5e0) --- ctdb/server/ctdb_vacuum.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ctdb/server/ctdb_vacuum.c b/ctdb/server/ctdb_vacuum.c index 892dc07..d678ff9 100644 --- a/ctdb/server/ctdb_vacuum.c +++ b/ctdb/server/ctdb_vacuum.c @@ -317,12 +317,8 @@ static int delete_marshall_traverse_first(void *param, void *data) uint32_t hash = ctdb_hash(&(dd->key)); int res; - res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key); + res = tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, dd->key); if (res != 0) { - DEBUG(DEBUG_ERR, - (__location__ " Error getting chainlock on record with " - "key hash [0x%08x] on database db[%s].\n", - hash, ctdb_db->db_name)); recs->vdata->count.delete_list.skipped++; recs->vdata->count.delete_list.left--; talloc_free(dd); -- 1.9.3 From a9cc9607e534f8e5958def6221aaedebd391fa53 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Mon, 14 Apr 2014 14:53:25 +1000 Subject: [PATCH 5/6] ctdb-vacuum: Do not delete VACUUM MIGRATED records immediately Such records should be processed by the local vacuuming daemon to ensure that all the remote copies have been deleted first. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke (cherry picked from commit 257311e337065f089df688cbf261d2577949203d) --- ctdb/server/ctdb_ltdb_server.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index 8fb2bc7..9ac2217 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -115,6 +115,11 @@ static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db, * fails. So storing the empty record makes sure that we do not * need to change the client code. */ + if ((header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED) && + (ctdb_db->ctdb->pnn == header->dmaster)) { + keep = true; + schedule_for_deletion = true; + } if (!(header->flags & CTDB_REC_FLAG_VACUUM_MIGRATED)) { keep = true; } else if (ctdb_db->ctdb->pnn != header->dmaster) { -- 1.9.3 From 08abf653214e9663adf47518df93959167679a6a Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Wed, 23 Apr 2014 18:02:39 +1000 Subject: [PATCH 6/6] ctdb-recoverd: Process all the records for vacuum fetch in a loop Processing one migration request at a time is very slow and processing a batch of records can take longer than VacuumInterval. This causes subsequent vacuum fetch requests to be dropped. The dropped records can accumulate quickly and will cause the vacuum database traverse to be quite expensive. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke Autobuild-User(master): Amitay Isaacs Autobuild-Date(master): Fri Dec 5 17:06:58 CET 2014 on sn-devel-104 (cherry picked from commit 959b9ea0ef85c57ffc84d66a6e5e855868943391) --- ctdb/server/ctdb_recoverd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index fd07e64..f86f57e 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -910,9 +910,7 @@ static void vacuum_fetch_next(struct vacuum_info *v); */ static void vacuum_fetch_callback(struct ctdb_client_call_state *state) { - struct vacuum_info *v = talloc_get_type(state->async.private_data, struct vacuum_info); talloc_free(state); - vacuum_fetch_next(v); } @@ -977,8 +975,7 @@ static void vacuum_fetch_next(struct vacuum_info *v) return; } state->async.fn = vacuum_fetch_callback; - state->async.private_data = v; - return; + state->async.private_data = NULL; } talloc_free(v); -- 1.9.3