[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <8760bcpdc8.fsf@openvz.org>
Date: Wed, 18 Oct 2017 20:36:55 +0300
From: Dmitry Monakhov <dmonakhov@...nvz.org>
To: linux-ext4@...r.kernel.org
CC: tytso@....edu
Subject: [PATCH] ext4: improve smp scalability for inode generation
->s_next_generation is protected by s_next_gen_lock but it usage
pattern is very primitive and can be replaced with atomic_ops
This significantly improve creation/unlink scenario on SMP systems,
for example lat_fs_create_unlink test [1] on x2 E5-2680 (32vcpu) system
shows ~20% improvement.
| nr_tsk | wo/ patch | w/ patch |
|--------+-----------+----------|
| 1 | 137 | 140 |
| 2 | 224 | 233 |
| 4 | 356 | 372 |
| 8 | 439 | 519 |
| 16 | 443 | 585 |
| 32 | 598 | 695 |
| 64 | 559 | 707 |
| 128 | 385 | 437 |
Footnotes:
[1]https://github.com/dmonakhov/lmbench/blob/master/src/lat_fs_create_unlink.c
Signed-off-by: Dmitry Monakhov <dmonakhov@...nvz.org>
---
fs/ext4/ext4.h | 3 +--
fs/ext4/ialloc.c | 4 +---
fs/ext4/ioctl.c | 6 ++----
fs/ext4/super.c | 8 ++++----
4 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e2abe01..6be1aa8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1392,8 +1392,7 @@ struct ext4_sb_info {
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
- spinlock_t s_next_gen_lock;
- u32 s_next_generation;
+ atomic_t s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ee82302..d12dabc 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1138,9 +1138,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_ino);
goto out;
}
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
+ inode->i_generation = atomic_inc_return(&sbi->s_next_generation);
/* Precompute checksum seed for inode metadata */
if (ext4_has_metadata_csum(sb)) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index afb66d4..7d8b1a5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -157,10 +157,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
inode->i_ctime = inode_bl->i_ctime = current_time(inode);
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- inode_bl->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
+ inode_bl->i_generation = atomic_add_return(2, &sbi->s_next_generation);
+ inode->i_generation = inode_bl->i_generation -1;
ext4_discard_preallocations(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b104096..bfc6d2e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3419,7 +3419,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
int err = 0;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
ext4_group_t first_not_zeroed;
-
+ u32 igen;
+
if ((data && !orig_data) || !sbi)
goto out_free_base;
@@ -3977,9 +3978,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
sbi->s_gdb_count = db_count;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
- spin_lock_init(&sbi->s_next_gen_lock);
-
+ get_random_bytes(&igen, sizeof(u32));
+ atomic_set(&sbi->s_next_generation, igen);
setup_timer(&sbi->s_err_report, print_daily_error_info,
(unsigned long) sb);
--
1.8.3.1
Powered by blists - more mailing lists