[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <47270313.6060301@suse.de>
Date: Tue, 30 Oct 2007 18:10:27 +0800
From: Coly Li <coyli@...e.de>
To: linux-ext4@...r.kernel.org
Subject: [PATCH] ext4: dir inode reservation V2
>From Mingming's feedback, there were a typo which could break compiling in V1 patch.
This patch fixed this typo, also fixed errors reported by script/checkpatch.pl.
BTW, it seems some duplicated functions were introduced into fs/ext4/inode.c in commit
92ae2b932ed127edff4354929c477f24112341b0 (maybe my mistake during git-pull). If there is any error
reported from fs/ext4/inode.c, please fix it before apply this patch.
Thank Mingming's for the feedback.
Signed-off-by: Coly Li <coyli@...e.de>
Cc: Andreas Dilger <adilger@....com>
Cc: Mingming Cao <cmm@...ibm.com>
---
fs/ext4/ialloc.c | 201 ++++++++++++++++++++++++++++++++++++++++++--
fs/ext4/super.c | 16 ++++
include/linux/ext4_fs.h | 8 ++
include/linux/ext4_fs_sb.h | 2 +
4 files changed, 218 insertions(+), 9 deletions(-)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index d775170..1c79ca4 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -7,9 +7,11 @@
* Universite Pierre et Marie Curie (Paris VI)
*
* BSD ufs-inspired inode and directory allocation by
- * Stephen Tweedie (sct@...hat.com), 1993
+ * Stephen Tweedie (sct@...hat.com), 1993
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@...p.rutgers.edu), 1995
+ * Dir inode reservation support by
+ * Coly Li (coyli@...e.de), 2007
*/
#include <linux/time.h>
@@ -130,6 +132,41 @@ error_out:
}
/*
+ * When calling this function, spin_lock of gdp is hold already.
+ */
+static void ext4_update_itable_unused(handle_t *handle, struct inode *inode,
+ struct ext4_group_desc *gdp, struct buffer_head *bitmap_bh)
+{
+ struct super_block *sb;
+ int bit, offset;
+ int free, group, ires;
+
+ sb = inode->i_sb;
+ ires = EXT4_SB(sb)->s_dir_ireserve_nr;
+ bit = (inode->i_ino - 1) % EXT4_INODES_PER_GROUP(sb);
+ if (bit & (ires - 1))
+ return;
+ free = EXT4_INODES_PER_GROUP(sb) - le16_to_cpu(gdp->bg_itable_unused);
+ if (free < ires)
+ return;
+ group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
+ do {
+ offset = ext4_find_next_bit(
+ bitmap_bh->b_data, free, free - ires);
+ if (offset >= free)
+ free -= ires;
+ else
+ break;
+ } while (free > 0);
+ if (free < 0)
+ free = 0;
+ if (group == 0 && (free < EXT4_DIR_IRESERVE_NORMAL))
+ free = EXT4_DIR_IRESERVE_NORMAL;
+ gdp->bg_itable_unused = cpu_to_le16(
+ EXT4_INODES_PER_GROUP(sb) - free);
+}
+
+/*
* NOTE! When we get the inode, we're the only people
* that have access to it, and as such there are no
* race conditions we have to worry about. The inode
@@ -225,9 +262,13 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_inodes_count = cpu_to_le16(
le16_to_cpu(gdp->bg_free_inodes_count) + 1);
- if (is_directory)
+ if (is_directory) {
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+ if (test_opt(sb, DIR_IRESERVE))
+ ext4_update_itable_unused(
+ handle, inode, gdp, bitmap_bh);
+ }
gdp->bg_checksum = ext4_group_desc_csum(sbi,
block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -264,9 +305,10 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
ext4_grpnum_t *best_group)
{
ext4_grpnum_t ngroups = EXT4_SB(sb)->s_groups_count;
+ int ires = EXT4_SB(sb)->s_dir_ireserve_nr;
unsigned int freei, avefreei;
- struct ext4_group_desc *desc, *best_desc = NULL;
- ext4_grpnum_t group;
+ struct ext4_group_desc *desc, *best_desc = NULL, *best_ires_desc = NULL;
+ ext4_grpnum_t group, best_ires_group = -1;
int ret = -1;
freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
@@ -285,7 +327,21 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
best_desc = desc;
ret = 0;
}
+ if (test_opt(sb, DIR_IRESERVE)) {
+ if ((best_ires_desc &&
+ (le16_to_cpu(desc->bg_itable_unused) >
+ le16_to_cpu(best_ires_desc->bg_itable_unused))) ||
+ ((!best_ires_desc) &&
+ (le16_to_cpu(desc->bg_itable_unused) >= ires))) {
+ best_ires_group = group;
+ best_ires_desc = desc;
+ ret = 0;
+ }
+ }
}
+ if (test_opt(sb, DIR_IRESERVE) && best_ires_desc)
+ *best_group = best_ires_group;
+
return ret;
}
@@ -354,6 +410,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
desc = ext4_get_group_desc(sb, grp, NULL);
if (!desc || !desc->bg_free_inodes_count)
continue;
+ if (test_opt(sb, DIR_IRESERVE) &&
+ (le16_to_cpu(desc->bg_itable_unused)
+ < EXT4_SB(sb)->s_dir_ireserve_nr))
+ continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -390,6 +450,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
desc = ext4_get_group_desc(sb, *group, NULL);
if (!desc || !desc->bg_free_inodes_count)
continue;
+ if (test_opt(sb, DIR_IRESERVE) &&
+ (le16_to_cpu(desc->bg_itable_unused)
+ < EXT4_SB(sb)->s_dir_ireserve_nr))
+ continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
@@ -478,6 +542,105 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
return -1;
}
+static int ext4_ino_from_ireserve(handle_t *handle, struct inode *dir,
+ int mode, ext4_grpnum_t *group, unsigned long *ino)
+{
+ struct ext4_group_desc *gdp = NULL;
+ struct super_block *sb;
+ struct ext4_sb_info *sbi;
+ struct buffer_head *gdp_bh = NULL, *bitmap_bh = NULL;
+ int free;
+ int i;
+ int retries;
+ unsigned long ires_ino;
+ int ires_group = *group;
+
+ sb = dir->i_sb;
+ sbi = EXT4_SB(sb);
+
+ /* if the inode number is not for directory,
+ * only try to allocate after directory's inode
+ */
+ if (!S_ISDIR(mode)) {
+ ires_ino = dir->i_ino % EXT4_INODES_PER_GROUP(sb);
+ goto find;
+ }
+
+ /* reserve inodes for new directory */
+ for (i = 0; i < sbi->s_groups_count; i++) {
+ gdp = ext4_get_group_desc(sb, ires_group, &gdp_bh);
+ if (!gdp)
+ goto fail;
+ retries = 2;
+still_reserve_in_this_group:
+ if (le16_to_cpu(gdp->bg_itable_unused) >=
+ sbi->s_dir_ireserve_nr) {
+
+ brelse(bitmap_bh);
+ bitmap_bh = read_inode_bitmap(sb, ires_group);
+ if (!bitmap_bh)
+ goto fail;
+
+ BUFFER_TRACE(bitmap_bh, "get_write_access");
+ if (ext4_journal_get_write_access(
+ handle, bitmap_bh) != 0)
+ goto fail;
+ free = EXT4_INODES_PER_GROUP(sb) -
+ le16_to_cpu(gdp->bg_itable_unused);
+ if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, ires_group),
+ free, bitmap_bh->b_data)) {
+ /* we won it */
+ BUFFER_TRACE(bitmap_bh,
+ "call ext4_journal_dirty_metadata");
+ if (ext4_journal_dirty_metadata(handle,
+ bitmap_bh) != 0)
+ goto fail;
+ ires_ino = free;
+ goto find;
+ }
+ /* we lost it */
+ jbd2_journal_release_buffer(handle, bitmap_bh);
+ if (-- retries > 0)
+ goto still_reserve_in_this_group;
+ }
+ if (++ires_group == sbi->s_groups_count)
+ ires_group = 0;
+ }
+ goto fail;
+find:
+ if (S_ISDIR(mode)) {
+ free = ires_ino + sbi->s_dir_ireserve_nr;
+ if (free > EXT4_INODES_PER_GROUP(sb))
+ free = EXT4_INODES_PER_GROUP(sb);
+
+ spin_lock(sb_bgl_lock(sbi, ires_group));
+ if ((EXT4_INODES_PER_GROUP(sb) - free) <
+ le16_to_cpu(gdp->bg_itable_unused)) {
+ BUFFER_TRACE(gdp_bh,
+ "call ext4_journal_get_write_access");
+ if (ext4_journal_get_write_access(handle, gdp_bh)) {
+ spin_unlock(sb_bgl_lock(sbi, ires_group));
+ goto fail;
+ }
+ gdp->bg_itable_unused =
+ EXT4_INODES_PER_GROUP(sb) - free;
+ spin_unlock(sb_bgl_lock(sbi, ires_group));
+ BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+ if (ext4_journal_dirty_metadata(handle, gdp_bh) != 0)
+ goto fail;
+ } else {
+ spin_unlock(sb_bgl_lock(sbi, ires_group));
+ }
+ brelse(bitmap_bh);
+ *group = ires_group;
+ }
+ *ino = ires_ino;
+ return 0;
+fail:
+ brelse(bitmap_bh);
+ return -ENOSPC;
+}
+
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
@@ -541,7 +704,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
goto fail;
ino = 0;
-
+ if (test_opt(sb, DIR_IRESERVE)) {
+ err = ext4_ino_from_ireserve(handle, dir,
+ mode, &group, &ino);
+ if ((!err) && S_ISDIR(mode))
+ goto got;
+ }
repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
@@ -633,6 +801,20 @@ got:
}
spin_lock(sb_bgl_lock(sbi, group));
+
+ if (test_opt(sb, DIR_IRESERVE)) {
+ free = EXT4_INODES_PER_GROUP(sb) -
+ le16_to_cpu(gdp->bg_itable_unused);
+ if (ino > free) {
+ free += sbi->s_dir_ireserve_nr;
+ free = (free + sbi->s_dir_ireserve_nr - 1) &
+ ~(sbi->s_dir_ireserve_nr - 1);
+ if (free > EXT4_INODES_PER_GROUP(sb))
+ free = EXT4_INODES_PER_GROUP(sb);
+ gdp->bg_itable_unused = cpu_to_le16(
+ EXT4_INODES_PER_GROUP(sb) - free);
+ }
+ }
/* If we didn't allocate from within the initialized part of the inode
* table then we need to initialize up to this inode. */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
@@ -655,12 +837,13 @@ got:
/*
* Check the relative inode number against the last used
* relative inode number in this group. if it is greater
- * we need to update the bg_itable_unused count
- *
+ * we need to update the bg_itable_unused count. If
+ * directory inode reservation is enabled, try to make it
+ * align on a s_dir_ireserve_nr boundary.
*/
if (ino > free)
- gdp->bg_itable_unused =
- cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+ gdp->bg_itable_unused = cpu_to_le16(
+ EXT4_INODES_PER_GROUP(sb) - ino);
}
gdp->bg_free_inodes_count =
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 37afc41..a9b87c3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -874,6 +874,7 @@ enum {
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_dir_ireserve_low, Opt_dir_ireserve_normal, Opt_dir_ireserve_high,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -919,6 +920,9 @@ static match_table_t tokens = {
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
+ {Opt_dir_ireserve_low, "dir_ireserve=low"},
+ {Opt_dir_ireserve_normal, "dir_ireserve=normal"},
+ {Opt_dir_ireserve_high, "dir_ireserve=high"},
{Opt_offusrjquota, "usrjquota="},
{Opt_usrjquota, "usrjquota=%s"},
{Opt_offgrpjquota, "grpjquota="},
@@ -1297,6 +1301,18 @@ clear_qf_name:
return 0;
sbi->s_stripe = option;
break;
+ case Opt_dir_ireserve_low:
+ set_opt(sbi->s_mount_opt, DIR_IRESERVE);
+ sbi->s_dir_ireserve_nr = EXT4_DIR_IRESERVE_LOW;
+ break;
+ case Opt_dir_ireserve_normal:
+ set_opt(sbi->s_mount_opt, DIR_IRESERVE);
+ sbi->s_dir_ireserve_nr = EXT4_DIR_IRESERVE_NORMAL;
+ break;
+ case Opt_dir_ireserve_high:
+ set_opt(sbi->s_mount_opt, DIR_IRESERVE);
+ sbi->s_dir_ireserve_nr = EXT4_DIR_IRESERVE_HIGH;
+ break;
default:
printk (KERN_ERR
"EXT4-fs: Unrecognized mount option \"%s\" "
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 8d56b86..d9493e3 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -92,6 +92,13 @@ struct ext4_allocation_request {
#define EXT4_GOOD_OLD_FIRST_INO 11
/*
+ * Macro-instructions used to reserve inodes for directories
+ */
+#define EXT4_DIR_IRESERVE_LOW 16
+#define EXT4_DIR_IRESERVE_NORMAL 64
+#define EXT4_DIR_IRESERVE_HIGH 128
+
+/*
* Maximal count of links to a file
*/
#define EXT4_LINK_MAX 65000
@@ -502,6 +509,7 @@ do { \
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_DELALLOC 0x2000000 /* Delalloc support */
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
+#define EXT4_MOUNT_DIR_IRESERVE 0x10000000/* dir inode reservation support */
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 4098d4f..fa5e866 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -147,6 +147,8 @@ struct ext4_sb_info {
/* locality groups */
struct ext4_locality_group *s_locality_groups;
+ /* directory inodes reservation number */
+ int s_dir_ireserve_nr;
};
#define EXT4_GROUP_INFO(sb, group) \
EXT4_SB(sb)->s_group_info[(group) >> EXT4_DESC_PER_BLOCK_BITS(sb)] \
--
Coly Li
SuSE PRC Labs
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists