[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241105034428.578701-1-dongyangli@ddn.com>
Date: Tue, 5 Nov 2024 14:44:28 +1100
From: Li Dongyang <dongyangli@....com>
To: linux-ext4@...r.kernel.org
Cc: Andreas Dilger <adilger@...ger.ca>,
Alex Zhuravlev <bzzz@...mcloud.com>
Subject: [PATCH V2] jbd2: use rhashtable for revoke records during replay
Resizable hashtable should improve journal replay time when
we have million of revoke records.
Notice that rhashtable is used during replay only,
as removal with list_del() is less expensive and it's still used
during regular processing.
before:
1048576 records - 95 seconds
2097152 records - 580 seconds
after:
1048576 records - 2 seconds
2097152 records - 3 seconds
4194304 records - 7 seconds
Signed-off-by: Alex Zhuravlev <bzzz@...mcloud.com>
Signed-off-by: Li Dongyang <dongyangli@....com>
---
v1->v2:
include rhashtable header in jbd2.h
---
fs/jbd2/recovery.c | 4 +++
fs/jbd2/revoke.c | 65 +++++++++++++++++++++++++++++++-------------
include/linux/jbd2.h | 7 +++++
3 files changed, 57 insertions(+), 19 deletions(-)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 667f67342c52..d9287439171c 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -294,6 +294,10 @@ int jbd2_journal_recover(journal_t *journal)
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;
+ err = jbd2_journal_init_recovery_revoke(journal);
+ if (err)
+ return err;
+
/*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 4556e4689024..d6e96099e9c9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -90,6 +90,7 @@
#include <linux/bio.h>
#include <linux/log2.h>
#include <linux/hash.h>
+#include <linux/rhashtable.h>
#endif
static struct kmem_cache *jbd2_revoke_record_cache;
@@ -101,7 +102,10 @@ static struct kmem_cache *jbd2_revoke_table_cache;
struct jbd2_revoke_record_s
{
- struct list_head hash;
+ union {
+ struct list_head hash;
+ struct rhash_head linkage;
+ };
tid_t sequence; /* Used for recovery only */
unsigned long long blocknr;
};
@@ -680,13 +684,22 @@ static void flush_descriptor(journal_t *journal,
* single block.
*/
+static const struct rhashtable_params revoke_rhashtable_params = {
+ .key_len = sizeof(unsigned long long),
+ .key_offset = offsetof(struct jbd2_revoke_record_s, blocknr),
+ .head_offset = offsetof(struct jbd2_revoke_record_s, linkage),
+};
+
int jbd2_journal_set_revoke(journal_t *journal,
unsigned long long blocknr,
tid_t sequence)
{
struct jbd2_revoke_record_s *record;
+ gfp_t gfp_mask = GFP_NOFS;
+ int err;
- record = find_revoke_record(journal, blocknr);
+ record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+ revoke_rhashtable_params);
if (record) {
/* If we have multiple occurrences, only record the
* latest sequence number in the hashed record */
@@ -694,7 +707,22 @@ int jbd2_journal_set_revoke(journal_t *journal,
record->sequence = sequence;
return 0;
}
- return insert_revoke_hash(journal, blocknr, sequence);
+
+ if (journal_oom_retry)
+ gfp_mask |= __GFP_NOFAIL;
+ record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
+ if (!record)
+ return -ENOMEM;
+
+ record->sequence = sequence;
+ record->blocknr = blocknr;
+ err = rhashtable_lookup_insert_fast(&journal->j_revoke_rhtable,
+ &record->linkage,
+ revoke_rhashtable_params);
+ if (err)
+ kmem_cache_free(jbd2_revoke_record_cache, record);
+
+ return err;
}
/*
@@ -710,7 +738,8 @@ int jbd2_journal_test_revoke(journal_t *journal,
{
struct jbd2_revoke_record_s *record;
- record = find_revoke_record(journal, blocknr);
+ record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+ revoke_rhashtable_params);
if (!record)
return 0;
if (tid_gt(sequence, record->sequence))
@@ -718,6 +747,17 @@ int jbd2_journal_test_revoke(journal_t *journal,
return 1;
}
+int jbd2_journal_init_recovery_revoke(journal_t *journal)
+{
+ return rhashtable_init(&journal->j_revoke_rhtable,
+ &revoke_rhashtable_params);
+}
+
+static void jbd2_revoke_record_free(void *ptr, void *arg)
+{
+ kmem_cache_free(jbd2_revoke_record_cache, ptr);
+}
+
/*
* Finally, once recovery is over, we need to clear the revoke table so
* that it can be reused by the running filesystem.
@@ -725,19 +765,6 @@ int jbd2_journal_test_revoke(journal_t *journal,
void jbd2_journal_clear_revoke(journal_t *journal)
{
- int i;
- struct list_head *hash_list;
- struct jbd2_revoke_record_s *record;
- struct jbd2_revoke_table_s *revoke;
-
- revoke = journal->j_revoke;
-
- for (i = 0; i < revoke->hash_size; i++) {
- hash_list = &revoke->hash_table[i];
- while (!list_empty(hash_list)) {
- record = (struct jbd2_revoke_record_s*) hash_list->next;
- list_del(&record->hash);
- kmem_cache_free(jbd2_revoke_record_cache, record);
- }
- }
+ rhashtable_free_and_destroy(&journal->j_revoke_rhtable,
+ jbd2_revoke_record_free, NULL);
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..2b0aa1e159b8 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/bit_spinlock.h>
#include <linux/blkdev.h>
+#include <linux/rhashtable-types.h>
#include <crypto/hash.h>
#endif
@@ -1122,6 +1123,11 @@ struct journal_s
*/
struct jbd2_revoke_table_s *j_revoke_table[2];
+ /**
+ * @j_revoke_rhtable: rhashtable for revoke records during recovery
+ */
+ struct rhashtable j_revoke_rhtable;
+
/**
* @j_wbuf: Array of bhs for jbd2_journal_commit_transaction.
*/
@@ -1644,6 +1650,7 @@ extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
/* Recovery revoke support */
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
+extern int jbd2_journal_init_recovery_revoke(journal_t *);
extern void jbd2_journal_clear_revoke(journal_t *);
extern void jbd2_journal_switch_revoke_table(journal_t *journal);
extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
--
2.47.0
Powered by blists - more mailing lists