[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1334863211-19504-4-git-send-email-tytso@mit.edu>
Date: Thu, 19 Apr 2012 15:20:11 -0400
From: Theodore Ts'o <tytso@....edu>
To: linux-fsdevel@...r.kernel.org
Cc: Ext4 Developers List <linux-ext4@...r.kernel.org>,
Theodore Ts'o <tytso@....edu>
Subject: [PATCH, RFC 3/3] ext4: use the O_HOT and O_COLD open flags to influence inode allocation
Wire up the use of the O_HOT and O_COLD open flags so that when an
inode is being created, it can influence which part of the disk gets
used on rotational storage devices.
Signed-off-by: "Theodore Ts'o" <tytso@....edu>
---
fs/ext4/ext4.h | 8 +++++++-
fs/ext4/ialloc.c | 33 +++++++++++++++++++++++++++------
fs/ext4/migrate.c | 2 +-
fs/ext4/namei.c | 15 +++++++++++----
4 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0e01e90..6539c9a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1553,6 +1553,12 @@ struct ext4_dir_entry_2 {
#define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
+ * Flags for ext4_new_inode()
+ */
+#define EXT4_NEWI_HOT 0x0001
+#define EXT4_NEWI_COLD 0x0002
+
+/*
* If we ever get support for fs block sizes > page_size, we'll need
* to remove the #if statements in the next two functions...
*/
@@ -1850,7 +1856,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
/* ialloc.c */
extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
const struct qstr *qstr, __u32 goal,
- uid_t *owner);
+ uid_t *owner, int flags);
extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 409c2ee..3dcc8c8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -363,7 +363,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group, umode_t mode,
- const struct qstr *qstr)
+ const struct qstr *qstr, int flags)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -508,13 +508,20 @@ fallback_retry:
}
static int find_group_other(struct super_block *sb, struct inode *parent,
- ext4_group_t *group, umode_t mode)
+ ext4_group_t *group, umode_t mode, int flags)
{
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
+ if ((flags & EXT4_NEWI_HOT) && (ngroups > 3) &&
+ (parent_group > ngroups / 3))
+ parent_group = 0;
+ if ((flags & EXT4_NEWI_COLD) && (ngroups > 3) &&
+ (parent_group < (2 * (ngroups / 3))))
+ parent_group = 2 * (ngroups / 3);
+
/*
* Try to place the inode is the same flex group as its
* parent. If we can't find space, use the Orlov algorithm to
@@ -550,7 +557,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
*group = parent_group + flex_size;
if (*group > ngroups)
*group = 0;
- return find_group_orlov(sb, parent, group, mode, NULL);
+ return find_group_orlov(sb, parent, group, mode, NULL, flags);
}
/*
@@ -614,7 +621,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* group to find a free inode.
*/
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
- const struct qstr *qstr, __u32 goal, uid_t *owner)
+ const struct qstr *qstr, __u32 goal, uid_t *owner,
+ int flags)
{
struct super_block *sb;
struct buffer_head *inode_bitmap_bh = NULL;
@@ -643,6 +651,19 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
ei = EXT4_I(inode);
sbi = EXT4_SB(sb);
+ if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
+ flags &= ~(EXT4_NEWI_HOT | EXT4_NEWI_COLD);
+
+ /*
+ * We will only allow the HOT flag if the user passes the
+ * reserved uid/gid check, or if she has CAP_SYS_RESOURCE
+ */
+ if ((flags & EXT4_NEWI_HOT) &&
+ !(sbi->s_resuid == current_fsuid() ||
+ ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
+ capable(CAP_SYS_RESOURCE)))
+ flags &= ~EXT4_NEWI_HOT;
+
if (!goal)
goal = sbi->s_inode_goal;
@@ -654,9 +675,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
}
if (S_ISDIR(mode))
- ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
+ ret2 = find_group_orlov(sb, dir, &group, mode, qstr, flags);
else
- ret2 = find_group_other(sb, dir, &group, mode);
+ ret2 = find_group_other(sb, dir, &group, mode, flags);
got_group:
EXT4_I(dir)->i_last_alloc_group = group;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f39f80f..2b3d65c 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -469,7 +469,7 @@ int ext4_ext_migrate(struct inode *inode)
owner[0] = inode->i_uid;
owner[1] = inode->i_gid;
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
- S_IFREG, NULL, goal, owner);
+ S_IFREG, NULL, goal, owner, 0);
if (IS_ERR(tmp_inode)) {
retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6f48ff8..222a419 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1742,6 +1742,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
handle_t *handle;
struct inode *inode;
int err, retries = 0;
+ int flags = 0;
dquot_initialize(dir);
@@ -1755,7 +1756,13 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
+ if (op && op->open_flag & O_HOT)
+ flags |= EXT4_NEWI_HOT;
+ if (op && op->open_flag & O_COLD)
+ flags |= EXT4_NEWI_COLD;
+
+ inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0,
+ NULL, flags);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
@@ -1791,7 +1798,7 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
+ inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL, 0);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -1831,7 +1838,7 @@ retry:
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
- &dentry->d_name, 0, NULL);
+ &dentry->d_name, 0, NULL, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2278,7 +2285,7 @@ retry:
ext4_handle_sync(handle);
inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
- &dentry->d_name, 0, NULL);
+ &dentry->d_name, 0, NULL, 0);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
--
1.7.10.rc3
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists