[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d9885f0f0711151205v1d891fa6k45740c14a7d6cbbe@mail.gmail.com>
Date: Thu, 15 Nov 2007 12:05:58 -0800
From: "Abhishek Rai" <abhishekrai@...gle.com>
To: "Abhishek Rai" <abhishekrai@...gle.com>, linux-ext4@...r.kernel.org
Subject: Re: ext3 metaclustering performance numbers and updated patch
Please ignore previous patch, it has an issue with operator precedence
in ext3_get_grp_metacluster(), try this instead:
Signed-off-by: Abhishek Rai <abhishekrai@...gle.com>
diff -uprdN linux-2.6.23mm1-clean/fs/ext3/balloc.c
linux-2.6.23mm1-ext3mc/fs/ext3/balloc.c
--- linux-2.6.23mm1-clean/fs/ext3/balloc.c 2007-10-17 18:31:42.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/fs/ext3/balloc.c 2007-11-15 11:23:51.000000000 -0800
@@ -711,6 +711,7 @@ bitmap_search_next_usable_block(ext3_grp
ext3_grpblk_t next;
struct journal_head *jh = bh2jh(bh);
+ BUG_ON(start > maxblocks);
while (start < maxblocks) {
next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
if (next >= maxblocks)
@@ -841,10 +842,12 @@ claim_block(spinlock_t *lock, ext3_grpbl
static ext3_grpblk_t
ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
- unsigned long *count, struct ext3_reserve_window *my_rsv)
+ int use_metacluster, unsigned long *count,
+ struct ext3_reserve_window *my_rsv)
{
ext3_fsblk_t group_first_block;
ext3_grpblk_t start, end;
+ ext3_grpblk_t mc_start, mc_end, start2 = -1, end2 = -1;
unsigned long num = 0;
/* we do allocation within the reservation window if we have a window */
@@ -872,12 +875,48 @@ ext3_try_to_allocate(struct super_block
}
BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
+ /* start must have been set to grp_goal if one still exists. */
+ BUG_ON(grp_goal >= 0 && start != grp_goal);
+
+ if (test_opt(sb, METACLUSTER) && !use_metacluster) {
+ ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
+
+ /*
+ * If there is an overlap with metacluster range, adjust our
+ * range to remove overlap, splitting our range into two if
+ * needed.
+ */
+ if (mc_end > mc_start) {
+ if (mc_start <= start)
+ start = max_t(ext3_grpblk_t, start, mc_end);
+ else if (mc_end >= end)
+ end = min_t(ext3_grpblk_t, end, mc_start);
+ else {
+ start2 = mc_end;
+ end2 = end;
+ end = mc_start;
+ }
+ }
+ }
+
+ if (start >= end)
+ goto fail_access;
+
+ if (grp_goal > 0)
+ grp_goal = start;
repeat:
if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
grp_goal = find_next_usable_block(start, bitmap_bh, end);
- if (grp_goal < 0)
+ if (grp_goal < 0) {
+ if (start2 >= 0) {
+ start = start2;
+ end = end2;
+ start2 = -1;
+ goto repeat;
+ }
goto fail_access;
+ }
if (!my_rsv) {
int i;
@@ -898,8 +937,15 @@ repeat:
*/
start++;
grp_goal++;
- if (start >= end)
- goto fail_access;
+ if (start >= end) {
+ if (start2 < 0)
+ goto fail_access;
+
+ start = start2;
+ end = end2;
+ start2 = -1;
+ grp_goal = -1;
+ }
goto repeat;
}
num++;
@@ -1084,6 +1130,7 @@ static int alloc_new_reservation(struct
unsigned long size;
int ret;
spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+ ext3_grpblk_t mc_start, mc_end;
group_first_block = ext3_group_first_block_no(sb, group);
group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
@@ -1143,6 +1190,7 @@ static int alloc_new_reservation(struct
* To make sure the reservation window has a free bit inside it, we
* need to check the bitmap after we found a reservable window.
*/
+ ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
retry:
ret = find_next_reservable_window(search_head, my_rsv, sb,
start_block, group_end_block);
@@ -1170,6 +1218,11 @@ retry:
my_rsv->rsv_start - group_first_block,
bitmap_bh, group_end_block - group_first_block + 1);
+ if (first_free_block >= mc_start && first_free_block < mc_end) {
+ start_block = mc_end;
+ goto next;
+ }
+
if (first_free_block < 0) {
/*
* no free block left on the bitmap, no point
@@ -1195,6 +1248,7 @@ retry:
* start from where the free block is,
* we also shift the list head to where we stopped last time
*/
+next:
search_head = my_rsv;
spin_lock(rsv_lock);
goto retry;
@@ -1223,12 +1277,18 @@ static void try_to_extend_reservation(st
struct ext3_reserve_window_node *next_rsv;
struct rb_node *next;
spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+ ext3_grpblk_t mc_start, mc_end;
if (!spin_trylock(rsv_lock))
return;
next = rb_next(&my_rsv->rsv_node);
+ ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
+
+ if (my_rsv->rsv_end >= mc_start && my_rsv->rsv_end < mc_end)
+ size += mc_end - 1 - my_rsv->rsv_end;
+
if (!next)
my_rsv->rsv_end += size;
else {
@@ -1274,7 +1334,7 @@ static void try_to_extend_reservation(st
static ext3_grpblk_t
ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
unsigned int group, struct buffer_head *bitmap_bh,
- ext3_grpblk_t grp_goal,
+ ext3_grpblk_t grp_goal, int use_metacluster,
struct ext3_reserve_window_node * my_rsv,
unsigned long *count, int *errp)
{
@@ -1305,7 +1365,8 @@ ext3_try_to_allocate_with_rsv(struct sup
*/
if (my_rsv == NULL ) {
ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
- grp_goal, count, NULL);
+ grp_goal, use_metacluster,
+ count, NULL);
goto out;
}
/*
@@ -1361,7 +1422,8 @@ ext3_try_to_allocate_with_rsv(struct sup
BUG();
}
ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
- grp_goal, &num, &my_rsv->rsv_window);
+ grp_goal, use_metacluster,
+ &num, &my_rsv->rsv_window);
if (ret >= 0) {
my_rsv->rsv_alloc_hit += num;
*count = num;
@@ -1455,6 +1517,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
int bgi; /* blockgroup iteration index */
int fatal = 0, err;
int performed_allocation = 0;
+ int use_metacluster = 0;
ext3_grpblk_t free_blocks; /* number of free blocks in a group */
struct super_block *sb;
struct ext3_group_desc *gdp;
@@ -1473,6 +1536,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
sb = inode->i_sb;
if (!sb) {
printk("ext3_new_block: nonexistent device");
+ *errp = -ENODEV;
return 0;
}
@@ -1487,6 +1551,11 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
ext3_debug("goal=%lu.\n", goal);
+
+ /* Caller should ensure this. */
+ BUG_ON(goal < le32_to_cpu(es->s_first_data_block) ||
+ goal >= le32_to_cpu(es->s_blocks_count));
+
/*
* Allocate a block from reservation only when
* filesystem is mounted with reservation(default,-o reservation), and
@@ -1507,9 +1576,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
/*
* First, test whether the goal block is free.
*/
- if (goal < le32_to_cpu(es->s_first_data_block) ||
- goal >= le32_to_cpu(es->s_blocks_count))
- goal = le32_to_cpu(es->s_first_data_block);
group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
EXT3_BLOCKS_PER_GROUP(sb);
goal_group = group_no;
@@ -1535,7 +1601,7 @@ retry_alloc:
goto io_error;
grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
group_no, bitmap_bh, grp_target_blk,
- my_rsv, &num, &fatal);
+ use_metacluster, my_rsv, &num, &fatal);
if (fatal)
goto out;
if (grp_alloc_blk >= 0)
@@ -1573,8 +1639,8 @@ retry_alloc:
* try to allocate block(s) from this group, without a goal(-1).
*/
grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
- group_no, bitmap_bh, -1, my_rsv,
- &num, &fatal);
+ group_no, bitmap_bh, -1,
+ use_metacluster, my_rsv, &num, &fatal);
if (fatal)
goto out;
if (grp_alloc_blk >= 0)
@@ -1593,6 +1659,10 @@ retry_alloc:
group_no = goal_group;
goto retry_alloc;
}
+ if (test_opt(sb, METACLUSTER) && use_metacluster == 0) {
+ use_metacluster = 1;
+ goto retry_alloc;
+ }
/* No space left on the device */
*errp = -ENOSPC;
goto out;
@@ -1713,6 +1783,161 @@ ext3_fsblk_t ext3_new_block(handle_t *ha
return ext3_new_blocks(handle, inode, goal, &count, errp);
}
+/*
+ * ext3_new_indirect_blocks() -- allocate indirect blocks for inode.
+ * @inode: file inode
+ * @count: target number of indirect blocks to allocate
+ * @new_blocks[]: used for returning block numbers allocated
+ *
+ * return: 0 on success, appropriate error code otherwise. Upon return, *count
+ * contains the number of blocks successfully allocated which is non-zero only
+ * in the success case.
+ *
+ * Allocate maximum of *count indirect blocks from the indirect block metadata
+ * area of inode's group and store the block numbers in new_blocksp[]. Since
+ * the allocation is in a predetermined region of the block group, caller just
+ * needs to pass a group number here which is where the goal and/or the
+ * reservation window may fall.
+ */
+int ext3_new_indirect_blocks(handle_t *handle, struct inode *inode,
+ unsigned long group_no, unsigned long *count,
+ ext3_fsblk_t new_blocks[])
+{
+ struct super_block *sb;
+ struct ext3_sb_info *sbi;
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gdp_bh;
+ struct ext3_group_desc *gdp;
+ ext3_grpblk_t group_first_block; /* first block in the group */
+ ext3_grpblk_t free_blocks; /* number of free blocks in the group */
+ ext3_grpblk_t mc_start, mc_end;
+ int blk, done = 0;
+ int err = 0;
+
+ BUG_ON(*count > 3);
+
+ sb = inode->i_sb;
+ if (!sb) {
+ printk(KERN_INFO "ext3_new_indirect_blocks: "
+ "nonexistent device");
+ return -ENODEV;
+ }
+ BUG_ON(!test_opt(sb, METACLUSTER));
+ sbi = EXT3_SB(sb);
+
+ if (DQUOT_ALLOC_BLOCK(inode, *count))
+ return -EDQUOT;
+
+ if (!ext3_has_free_blocks(sbi)) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+ if (!gdp) {
+ err = -EIO;
+ goto out;
+ }
+
+ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+ if (free_blocks == 0) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh) {
+ err = -EIO;
+ goto out;
+ }
+
+ /*
+ * Make sure we use undo access for the bitmap, because it is critical
+ * that we do the frozen_data COW on bitmap buffers in all cases even
+ * if the buffer is in BJ_Forget state in the committing transaction.
+ */
+ BUFFER_TRACE(bitmap_bh, "get undo access for new indirect block");
+ err = ext3_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto out;
+
+ err = -ENOSPC;
+ group_first_block = ext3_group_first_block_no(sb, group_no);
+ ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
+ blk = mc_start;
+
+ while (done < *count && blk < mc_end) {
+ if (!ext3_test_allocatable(blk, bitmap_bh)) {
+ /*
+ * Don't use find_next_usable_block() here as it may
+ * skip free blocks that are not close to the goal.
+ * Since our goal is always fixed (mc_start), we may
+ * be trying to allocate slightly far from it and that
+ * will be a problem.
+ */
+ blk = bitmap_search_next_usable_block(blk, bitmap_bh,
+ mc_end);
+ continue;
+ }
+ if (claim_block(sb_bgl_lock(sbi, group_no), blk,
+ bitmap_bh)) {
+ new_blocks[done++] = group_first_block + blk;
+ } else {
+ /*
+ * The block was allocated by another thread, or it
+ * was allocated and then freed by another thread
+ */
+ cpu_relax();
+ }
+ blk++;
+ }
+
+ if (!done) {
+ BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
+ ext3_journal_release_buffer(handle, bitmap_bh);
+ goto out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
+ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+ if (err)
+ goto out;
+
+ BUFFER_TRACE(gdp_bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, gdp_bh);
+ if (err)
+ goto out;
+
+ /*
+ * Caller is responsible for adding the new indirect block buffers
+ * to the journal list.
+ */
+
+ spin_lock(sb_bgl_lock(sbi, group_no));
+ gdp->bg_free_blocks_count =
+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - done);
+ spin_unlock(sb_bgl_lock(sbi, group_no));
+ percpu_counter_sub(&sbi->s_freeblocks_counter, done);
+
+ BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
+ err = ext3_journal_dirty_metadata(handle, gdp_bh);
+ sb->s_dirt = 1;
+ if (err)
+ goto out;
+
+out:
+ if (bitmap_bh)
+ brelse(bitmap_bh);
+
+ DQUOT_FREE_BLOCK(inode, *count - done);
+ *count = done;
+
+ if (err && err != -ENOSPC)
+ ext3_error(sb, "ext3_new_indirect_blocks", "error %d", err);
+
+ return err;
+}
+
/**
* ext3_count_free_blocks() -- count filesystem free blocks
* @sb: superblock
diff -uprdN linux-2.6.23mm1-clean/fs/ext3/inode.c
linux-2.6.23mm1-ext3mc/fs/ext3/inode.c
--- linux-2.6.23mm1-clean/fs/ext3/inode.c 2007-10-17 18:31:42.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/fs/ext3/inode.c 2007-11-15 11:21:07.000000000 -0800
@@ -39,7 +39,29 @@
#include "xattr.h"
#include "acl.h"
+typedef struct {
+ __le32 *p;
+ __le32 key;
+ struct buffer_head *bh;
+} Indirect;
+
+struct ext3_ind_read_info {
+ int count;
+ int seq_prefetch;
+ long size;
+ struct buffer_head *bh[0];
+};
+
+# define EXT3_IND_READ_INFO_SIZE(_c) \
+ (sizeof(struct ext3_ind_read_info) + \
+ sizeof(struct buffer_head *) * (_c))
+
+# define EXT3_IND_READ_MAX (32)
+
static int ext3_writepage_trans_blocks(struct inode *inode);
+static Indirect *ext3_read_indblocks(struct inode *inode, int iblock,
+ int depth, int offsets[4],
+ Indirect chain[4], int *err);
/*
* Test whether an inode is a fast symlink.
@@ -233,12 +255,6 @@ no_delete:
clear_inode(inode); /* We must guarantee clearing of inode... */
}
-typedef struct {
- __le32 *p;
- __le32 key;
- struct buffer_head *bh;
-} Indirect;
-
static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
{
p->key = *(p->p = v);
@@ -352,18 +368,21 @@ static int ext3_block_to_path(struct ino
* the whole chain, all way to the data (returns %NULL, *err == 0).
*/
static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
- Indirect chain[4], int *err)
+ Indirect chain[4], int ind_readahead, int *err)
{
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
+ int index;
*err = 0;
/* i_data is not going away, no lock needed */
add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
if (!p->key)
goto no_block;
- while (--depth) {
+ for (index = 0; index < depth - 1; index++) {
+ if (ind_readahead && depth > 2 && index == depth - 2)
+ break;
bh = sb_bread(sb, le32_to_cpu(p->key));
if (!bh)
goto failure;
@@ -396,7 +415,15 @@ no_block:
* It is used when heuristic for sequential allocation fails.
* Rules are:
* + if there is a block to the left of our position - allocate near it.
- * + if pointer will live in indirect block - allocate near that block.
+ * + If METACLUSTER options is not specified, allocate the data
+ * block close to the metadata block. Otherwise, if pointer will live in
+ * indirect block, we cannot allocate near the indirect block since
+ * indirect blocks are allocated in a reserved area. Even if we allocate
+ * this block right after the preceding logical file block, we'll still
+ * have to incur extra seek due to the indirect block (unless we
+ * prefetch the indirect block separately). So for now (until
+ * prefetching is turned on), it's OK not to return a sequential goal -
+ * just put in the same cylinder group as the inode.
* + if pointer will live in inode - allocate in the same
* cylinder group.
*
@@ -421,9 +448,11 @@ static ext3_fsblk_t ext3_find_near(struc
return le32_to_cpu(*p);
}
- /* No such thing, so let's try location of indirect block */
- if (ind->bh)
- return ind->bh->b_blocknr;
+ if (!test_opt(inode->i_sb, METACLUSTER)) {
+ /* No such thing, so let's try location of indirect block */
+ if (ind->bh)
+ return ind->bh->b_blocknr;
+ }
/*
* It is going to be referred to from the inode itself? OK, just put it
@@ -475,8 +504,7 @@ static ext3_fsblk_t ext3_find_goal(struc
* @blks: number of data blocks to be mapped.
* @blocks_to_boundary: the offset in the indirect block
*
- * return the total number of blocks to be allocate, including the
- * direct and indirect blocks.
+ * return the total number of direct blocks to be allocated.
*/
static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
int blocks_to_boundary)
@@ -508,22 +536,39 @@ static int ext3_blks_to_allocate(Indirec
* ext3_alloc_blocks: multiple allocate blocks needed for a branch
* @indirect_blks: the number of blocks need to allocate for indirect
* blocks
- *
+ * @blks: the number of direct blocks to be allocated
* @new_blocks: on return it will store the new block numbers for
* the indirect blocks(if needed) and the first direct block,
- * @blks: on return it will store the total number of allocated
- * direct blocks
+ *
+ * returns the number of direct blocks allocated, error via *err, and
+ * new block numbers via new_blocks[]
*/
static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, int indirect_blks, int blks,
ext3_fsblk_t new_blocks[4], int *err)
{
+ struct super_block *sb;
+ struct ext3_super_block *es;
int target, i;
- unsigned long count = 0;
+ unsigned long count = 0, goal_group;
int index = 0;
ext3_fsblk_t current_block = 0;
int ret = 0;
+ BUG_ON(blks <= 0);
+
+ sb = inode->i_sb;
+ if (!sb) {
+ printk(KERN_INFO "ext3_alloc_blocks: nonexistent device");
+ *err = -ENODEV;
+ return 0;
+ }
+ es = EXT3_SB(sb)->s_es;
+
+ if (goal < le32_to_cpu(es->s_first_data_block) ||
+ goal >= le32_to_cpu(es->s_blocks_count))
+ goal = le32_to_cpu(es->s_first_data_block);
+
/*
* Here we try to allocate the requested multiple blocks at once,
* on a best-effort basis.
@@ -534,6 +579,41 @@ static int ext3_alloc_blocks(handle_t *h
*/
target = blks + indirect_blks;
+ /*
+ * Try to allocate indirect blocks in the metacluster region of block
+ * group in which goal falls. This should not only give us clustered
+ * metablock allocation, but also allocate new metablocks close to the
+ * corresponding data blocks (by putting them in the same block group).
+ * Note that allocation of indirect blocks is only guided by goal and
+ * not by reservation window since the goal mostly falls within the
+ * reservation window for sequential allocation.
+ * If the indirect blocks could not be allocated in this block group,
+ * we fall back to sequential allocation of indirect block alongside
+ * the data block instead of trying other block groups as that can
+ * separate indirect and data blocks too far out.
+ */
+ if (test_opt(sb, METACLUSTER) && indirect_blks) {
+ count = indirect_blks;
+ goal_group = (goal - le32_to_cpu(es->s_first_data_block)) /
+ EXT3_BLOCKS_PER_GROUP(sb);
+ *err = ext3_new_indirect_blocks(handle, inode, goal_group,
+ &count, new_blocks + index);
+ if (*err && *err != -ENOSPC) {
+ printk(KERN_ERR "ext3_alloc_blocks failed to allocate "
+ "indirect blocks: %d", *err);
+ goto failed_out;
+ } else if (*err == 0) {
+ BUG_ON(count == 0);
+ }
+ *err = 0;
+
+ if (count > 0) {
+ index += count;
+ target -= count;
+ BUG_ON(index > indirect_blks);
+ }
+ }
+
while (1) {
count = target;
/* allocating blocks for indirect blocks and direct blocks */
@@ -542,7 +622,7 @@ static int ext3_alloc_blocks(handle_t *h
goto failed_out;
target -= count;
- /* allocate blocks for indirect blocks */
+ /* store indirect block numbers we just allocated */
while (index < indirect_blks && count) {
new_blocks[index++] = current_block++;
count--;
@@ -570,10 +650,14 @@ failed_out:
* @inode: owner
* @indirect_blks: number of allocated indirect blocks
* @blks: number of allocated direct blocks
+ * @goal: goal for allocation
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
*
- * This function allocates blocks, zeroes out all but the last one,
+ * returns error and number of direct blocks allocated via *blks
+ *
+ * This function allocates indirect_blks + *blks, zeroes out all
+ * indirect blocks,
* links them into chain and (if we are synchronous) writes them to disk.
* In other words, it prepares a branch that can be spliced onto the
* inode. It stores the information about that chain in the branch[], in
@@ -799,17 +883,24 @@ int ext3_get_blocks_handle(handle_t *han
int blocks_to_boundary = 0;
int depth;
struct ext3_inode_info *ei = EXT3_I(inode);
- int count = 0;
+ int count = 0, ind_readahead;
ext3_fsblk_t first_block = 0;
-
+ BUG_ON(!create &&
+ iblock >= (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (depth == 0)
goto out;
- partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+ ind_readahead = !create && depth > 2;
+ partial = ext3_get_branch(inode, depth, offsets, chain,
+ ind_readahead, &err);
+ if (!partial && ind_readahead)
+ partial = ext3_read_indblocks(inode, iblock, depth,
+ offsets, chain, &err);
/* Simplest case - block found, no allocation needed */
if (!partial) {
@@ -844,7 +935,7 @@ int ext3_get_blocks_handle(handle_t *han
}
/* Next simple case - plain lookup or failed read of indirect block */
- if (!create || err == -EIO)
+ if (!create || (err && err != -EAGAIN))
goto cleanup;
mutex_lock(&ei->truncate_mutex);
@@ -866,7 +957,8 @@ int ext3_get_blocks_handle(handle_t *han
brelse(partial->bh);
partial--;
}
- partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+ partial = ext3_get_branch(inode, depth, offsets, chain, 0,
+ &err);
if (!partial) {
count++;
mutex_unlock(&ei->truncate_mutex);
@@ -1974,7 +2066,7 @@ static Indirect *ext3_find_shared(struct
/* Make k index the deepest non-null offest + 1 */
for (k = depth; k > 1 && !offsets[k-1]; k--)
;
- partial = ext3_get_branch(inode, k, offsets, chain, &err);
+ partial = ext3_get_branch(inode, k, offsets, chain, 0, &err);
/* Writer: pointers */
if (!partial)
partial = chain + k-1;
@@ -3297,3 +3389,508 @@ int ext3_change_inode_journal_flag(struc
return err;
}
+
+/*
+ * ext3_ind_read_end_bio --
+ *
+ * bio callback for read IO issued from ext3_read_indblocks.
+ * Will be called only once, when all I/O has completed.
+ * Frees read_info and bio.
+ */
+static void ext3_ind_read_end_bio(struct bio *bio, int err)
+{
+ struct ext3_ind_read_info *read_info = bio->bi_private;
+ struct buffer_head *bh;
+ int uptodate = !err && test_bit(BIO_UPTODATE, &bio->bi_flags);
+ int i;
+
+ BUG_ON(read_info->count <= 0);
+
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+
+ for (i = 0; i < read_info->count; i++) {
+ bh = read_info->bh[i];
+ BUG_ON(bh == NULL);
+
+ if (err == -EOPNOTSUPP)
+ set_bit(BH_Eopnotsupp, &bh->b_state);
+
+ if (uptodate) {
+ BUG_ON(buffer_uptodate(bh));
+ BUG_ON(ext3_buffer_prefetch(bh));
+ set_buffer_uptodate(bh);
+ if (read_info->seq_prefetch)
+ ext3_set_buffer_prefetch(bh);
+ }
+
+ unlock_buffer(bh);
+ brelse(bh);
+ }
+
+ kfree(read_info);
+ bio_put(bio);
+}
+
+/*
+ * ext3_get_max_read --
+ * @inode: inode of file.
+ * @block: block number in file (starting from zero).
+ * @offset_in_dind_block: offset of the indirect block inside it's
+ * parent doubly-indirect block.
+ *
+ * Compute the maximum no. of indirect blocks that can be read
+ * satisfying following constraints:
+ * - Don't read indirect blocks beyond the end of current
+ * doubly-indirect block.
+ * - Don't read beyond eof.
+ */
+static inline unsigned long ext3_get_max_read(const struct inode *inode,
+ int block,
+ int offset_in_dind_block)
+{
+ const struct super_block *sb = inode->i_sb;
+ unsigned long max_read;
+ unsigned long ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+ unsigned long ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
+ unsigned long blocks_in_file =
+ (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ unsigned long remaining_ind_blks_in_dind =
+ (ptrs >= offset_in_dind_block) ? (ptrs - offset_in_dind_block)
+ : 0;
+ unsigned long remaining_ind_blks_before_eof =
+ ((blocks_in_file - EXT3_NDIR_BLOCKS + ptrs - 1) >> ptrs_bits) -
+ ((block - EXT3_NDIR_BLOCKS) >> ptrs_bits);
+
+ BUG_ON(block >= blocks_in_file);
+
+ max_read = min_t(unsigned long, remaining_ind_blks_in_dind,
+ remaining_ind_blks_before_eof);
+
+ BUG_ON(max_read < 1);
+
+ return max_read;
+}
+
+static void ext3_read_indblocks_submit(struct bio **pbio,
+ struct ext3_ind_read_info **pread_info,
+ int *read_cnt, int seq_prefetch)
+{
+ struct bio *bio = *pbio;
+ struct ext3_ind_read_info *read_info = *pread_info;
+
+ BUG_ON(*read_cnt < 1);
+
+ read_info->seq_prefetch = seq_prefetch;
+ read_info->count = *read_cnt;
+ read_info->size = bio->bi_size;
+ bio->bi_private = read_info;
+ bio->bi_end_io = ext3_ind_read_end_bio;
+ submit_bio(READ, bio);
+
+ *pbio = NULL;
+ *pread_info = NULL;
+ *read_cnt = 0;
+}
+
+/*
+ * ext3_read_indblocks_async --
+ * @sb: super block
+ * @ind_blocks[]: array of indirect block numbers on disk
+ * @count: maximum number of indirect blocks to read
+ * @first_bh: buffer_head for indirect block ind_blocks[0], may be
+ * NULL
+ * @seq_prefetch: if this is part of a sequential prefetch and buffers'
+ * prefetch bit must be set.
+ * @blocks_done: number of blocks considered for prefetching.
+ *
+ * Issue a single bio request to read upto count buffers identified in
+ * ind_blocks[]. Fewer than count buffers may be read in some cases:
+ * - If a buffer is found to be uptodate and it's prefetch bit is set, we
+ * don't look at any more buffers as they will most likely be in
the cache.
+ * - We skip buffers we cannot lock without blocking (except for first_bh
+ * read_info->seq_prefetch = seq_prefetch;
+ read_info->count = read_cnt;
+ read_info->size = bio->bi_size;
+ bio->bi_private = read_info;
+ bio->bi_end_io = ext3_ind_read_end_bio;
+ submit_bio(READ, bio);
+ if specified).
+ * - We skip buffers beyond a certain range on disk.
+ *
+ * This function must issue read on first_bh if specified unless of course
+ * it's already uptodate.
+ */
+static int ext3_read_indblocks_async(struct super_block *sb,
+ __le32 ind_blocks[], int count,
+ struct buffer_head *first_bh,
+ int seq_prefetch,
+ unsigned long *blocks_done)
+{
+ struct buffer_head *bh;
+ struct bio *bio = NULL;
+ struct ext3_ind_read_info *read_info = NULL;
+ int read_cnt = 0, blk;
+ ext3_fsblk_t prev_blk = 0, io_start_blk = 0, curr;
+ int err = 0;
+
+ BUG_ON(count < 1);
+ /* Don't move this to ext3_get_max_read() since callers often need to
+ * trim the count returned by that function. So this bound must only
+ * be imposed at the last moment. */
+ count = min_t(unsigned long, count, EXT3_IND_READ_MAX);
+ *blocks_done = 0UL;
+
+ if (count == 1 && first_bh) {
+ lock_buffer(first_bh);
+ get_bh(first_bh);
+ first_bh->b_end_io = end_buffer_read_sync;
+ submit_bh(READ, first_bh);
+ *blocks_done = 1UL;
+ return 0;
+ }
+
+ for (blk = 0; blk < count; blk++) {
+ curr = le32_to_cpu(ind_blocks[blk]);
+
+ if (!curr)
+ continue;
+
+ if (io_start_blk > 0) {
+ if (max(io_start_blk, curr) - min(io_start_blk, curr) >=
+ EXT3_IND_READ_MAX)
+ continue;
+ }
+
+ if (prev_blk > 0 && curr != prev_blk + 1) {
+ ext3_read_indblocks_submit(&bio, &read_info,
+ &read_cnt, seq_prefetch);
+ prev_blk = 0;
+ break;
+ }
+
+ if (blk == 0 && first_bh) {
+ bh = first_bh;
+ get_bh(first_bh);
+ } else {
+ bh = sb_getblk(sb, curr);
+ if (unlikely(!bh)) {
+ err = -ENOMEM;
+ goto failure;
+ }
+ }
+
+ if (buffer_uptodate(bh)) {
+ if (ext3_buffer_prefetch(bh)) {
+ brelse(bh);
+ break;
+ }
+ brelse(bh);
+ continue;
+ }
+
+ /* Lock the buffer without blocking, skipping any buffers
+ * which would require us to block. first_bh when specified is
+ * an exception as caller typically wants it to be read for
+ * sure (e.g., ext3_read_indblocks_sync).
+ */
+ if (bh == first_bh) {
+ lock_buffer(bh);
+ } else if (test_set_buffer_locked(bh)) {
+ brelse(bh);
+ continue;
+ }
+
+ /* Check again with the buffer locked. */
+ if (buffer_uptodate(bh)) {
+ if (ext3_buffer_prefetch(bh)) {
+ unlock_buffer(bh);
+ brelse(bh);
+ break;
+ }
+ unlock_buffer(bh);
+ brelse(bh);
+ continue;
+ }
+
+ if (read_cnt == 0) {
+ /* read_info freed in ext3_ind_read_end_bio(). */
+ read_info = kmalloc(EXT3_IND_READ_INFO_SIZE(count),
+ GFP_KERNEL);
+ if (unlikely(!read_info)) {
+ err = -ENOMEM;
+ goto failure;
+ }
+
+ bio = bio_alloc(GFP_KERNEL, count);
+ if (unlikely(!bio)) {
+ err = -ENOMEM;
+ goto failure;
+ }
+ bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ }
+
+ if (bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))
+ < bh->b_size) {
+ brelse(bh);
+ if (read_cnt == 0)
+ goto failure;
+
+ break;
+ }
+
+ read_info->bh[read_cnt++] = bh;
+
+ prev_blk = curr;
+ if (io_start_blk == 0)
+ io_start_blk = curr;
+ }
+
+ if (read_cnt == 0)
+ goto done;
+
+ ext3_read_indblocks_submit(&bio, &read_info, &read_cnt, seq_prefetch);
+
+ *blocks_done = blk;
+ return 0;
+
+failure:
+ while (--read_cnt >= 0) {
+ unlock_buffer(read_info->bh[read_cnt]);
+ brelse(read_info->bh[read_cnt]);
+ }
+
+done:
+ if (read_info)
+ kfree(read_info);
+
+ if (bio)
+ bio_put(bio);
+
+ return err;
+}
+
+/*
+ * ext3_read_indblocks_sync --
+ * @sb: super block
+ * @ind_blocks[]: array of indirect block numbers on disk
+ * @count: maximum number of indirect blocks to read
+ * @first_bh: buffer_head for indirect block ind_blocks[0], must be
+ * non-NULL.
+ * @seq_prefetch: set prefetch bit of buffers, used when this is part of
+ * a sequential prefetch.
+ * @blocks_done: number of blocks considered for prefetching.
+ *
+ * Synchronously read at most count indirect blocks listed in
+ * ind_blocks[]. This function calls ext3_read_indblocks_async() to do all
+ * the hard work. It waits for read to complete on first_bh before
+ * returning.
+ */
+
+static int ext3_read_indblocks_sync(struct super_block *sb,
+ __le32 ind_blocks[], int count,
+ struct buffer_head *first_bh,
+ int seq_prefetch,
+ unsigned long *blocks_done)
+{
+ int err;
+
+ BUG_ON(count < 1);
+ BUG_ON(!first_bh);
+
+ err = ext3_read_indblocks_async(sb, ind_blocks, count, first_bh,
+ seq_prefetch, blocks_done);
+ if (err)
+ return err;
+
+ wait_on_buffer(first_bh);
+ if (!buffer_uptodate(first_bh))
+ err = -EIO;
+
+ /* if seq_prefetch != 0, ext3_read_indblocks_async() sets prefetch bit
+ * for all buffers, but the first buffer for sync IO is never a prefetch
+ * buffer since it's needed presently so mark it so.
+ */
+ if (seq_prefetch)
+ ext3_clear_buffer_prefetch(first_bh);
+
+ BUG_ON(ext3_buffer_prefetch(first_bh));
+
+ return err;
+}
+
+/*
+ * ext3_read_indblocks --
+ *
+ * @inode: inode of file
+ * @iblock: block number inside file (starting from 0).
+ * @depth: depth of path from inode to data block.
+ * @offsets: array of offsets within blocks identified in 'chain'.
+ * @chain: array of Indirect with info about all levels of blocks until
+ * the data block.
+ * @err: error pointer.
+ *
+ * This function is called after reading all metablocks leading to 'iblock'
+ * except the (singly) indirect block. It reads the indirect block if not
+ * already in the cache and may also prefetch next few indirect blocks.
+ * It uses a combination of synchronous and asynchronous requests to
+ * accomplish this. We do prefetching even for random reads by reading
+ * ahead one indirect block since reads of size >=512KB have at least 12%
+ * chance of spanning two indirect blocks.
+ */
+
+static Indirect *ext3_read_indblocks(struct inode *inode, int iblock,
+ int depth, int offsets[4],
+ Indirect chain[4], int *err)
+{
+ struct super_block *sb = inode->i_sb;
+ struct buffer_head *first_bh, *prev_bh;
+ unsigned long max_read, blocks_done = 0;
+ __le32 *ind_blocks;
+
+ /* Must have doubly indirect block for prefetching indirect blocks. */
+ BUG_ON(depth <= 2);
+ BUG_ON(!chain[depth-2].key);
+
+ *err = 0;
+
+ /* Handle first block */
+ ind_blocks = chain[depth-2].p;
+ first_bh = sb_getblk(sb, le32_to_cpu(ind_blocks[0]));
+ if (unlikely(!first_bh)) {
+ printk(KERN_ERR "Failed to get block %u for sb %p\n",
+ le32_to_cpu(ind_blocks[0]), sb);
+ goto failure;
+ }
+
+ BUG_ON(first_bh->b_size != sb->s_blocksize);
+
+ if (buffer_uptodate(first_bh)) {
+ /* Found the buffer in cache, either it was accessed recently or
+ * it was prefetched while reading previous indirect block(s).
+ * We need to figure out if we need to prefetch the following
+ * indirect blocks.
+ */
+ if (!ext3_buffer_prefetch(first_bh)) {
+ /* Either we've seen this indirect block before while
+ * accessing another data block, or this is a random
+ * read. In the former case, we must have done the
+ * needful the first time we had a cache hit on this
+ * indirect block, in the latter case we obviously
+ * don't need to do any prefetching.
+ */
+ goto done;
+ }
+
+ max_read = ext3_get_max_read(inode, iblock,
+ offsets[depth-2]);
+
+ /* This indirect block is in the cache due to prefetching and
+ * this is its first cache hit, clear the prefetch bit and
+ * make sure the following blocks are also prefetched.
+ */
+ ext3_clear_buffer_prefetch(first_bh);
+
+ if (max_read >= 2) {
+ /* ext3_read_indblocks_async() stops at the first
+ * indirect block which has the prefetch bit set which
+ * will most likely be the very next indirect block.
+ */
+ ext3_read_indblocks_async(sb, &ind_blocks[1],
+ max_read - 1,
+ NULL, 1, &blocks_done);
+ }
+
+ } else {
+ /* Buffer is not in memory, we need to read it. If we are
+ * reading sequentially from the previous indirect block, we
+ * have just detected a sequential read and we must prefetch
+ * some indirect blocks for future.
+ */
+
+ max_read = ext3_get_max_read(inode, iblock,
+ offsets[depth-2]);
+
+ if ((ind_blocks - (__le32 *)chain[depth-2].bh->b_data) >= 1) {
+ prev_bh = sb_getblk(sb, le32_to_cpu(ind_blocks[-1]));
+ if (buffer_uptodate(prev_bh) &&
+ !ext3_buffer_prefetch(prev_bh)) {
+ /* Detected sequential read. */
+ brelse(prev_bh);
+
+ /* Sync read indirect block, also read the next
+ * few indirect blocks.
+ */
+ *err = ext3_read_indblocks_sync(sb, ind_blocks,
+ max_read, first_bh, 1,
+ &blocks_done);
+
+ if (*err)
+ goto out;
+
+ /* In case the very next indirect block is
+ * discontiguous by a non-trivial amount,
+ * ext3_read_indblocks_sync() above won't
+ * prefetch it (indicated by blocks_done < 2).
+ * So to help sequential read, schedule an
+ * async request for reading the next
+ * contiguous indirect block range (which
+ * in metaclustering case would be the next
+ * metacluster, without metaclustering it
+ * would be the next indirect block). This is
+ * expected to benefit the non-metaclustering
+ * case.
+ */
+ if (max_read >= 2 && blocks_done < 2)
+ ext3_read_indblocks_async(sb,
+ &ind_blocks[1],
+ max_read - 1,
+ NULL, 1, &blocks_done);
+
+ goto done;
+ }
+ brelse(prev_bh);
+ }
+
+ /* Either random read, or sequential detection failed above.
+ * We always prefetch the next indirect block in this case
+ * whenever possible.
+ * This is because for random reads of size ~512KB, there is
+ * >12% chance that a read will span two indirect blocks.
+ */
+ *err = ext3_read_indblocks_sync(sb, ind_blocks,
+ (max_read >= 2) ? 2 : 1,
+ first_bh, 0, &blocks_done);
+ if (*err)
+ goto out;
+ }
+
+done:
+ /* Reader: pointers */
+ if (!verify_chain(chain, &chain[depth - 2])) {
+ brelse(first_bh);
+ goto changed;
+ }
+ add_chain(&chain[depth - 1], first_bh,
+ (__le32*)first_bh->b_data + offsets[depth - 1]);
+ /* Reader: end */
+ if (!chain[depth - 1].key)
+ goto out;
+
+ BUG_ON(!buffer_uptodate(first_bh));
+ return NULL;
+
+changed:
+ *err = -EAGAIN;
+ goto out;
+failure:
+ *err = -EIO;
+out:
+ if (*err) {
+ ext3_debug("Error %d reading indirect blocks\n", *err);
+ return &chain[depth - 2];
+ } else
+ return &chain[depth - 1];
+}
+
diff -uprdN linux-2.6.23mm1-clean/fs/ext3/super.c
linux-2.6.23mm1-ext3mc/fs/ext3/super.c
--- linux-2.6.23mm1-clean/fs/ext3/super.c 2007-10-17 18:31:42.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/fs/ext3/super.c 2007-11-09 16:46:29.000000000 -0800
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_
else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
seq_puts(seq, ",data=writeback");
+ if (test_opt(sb, METACLUSTER))
+ seq_puts(seq, ",metacluster");
+
ext3_show_quota_options(seq, sb);
return 0;
@@ -758,7 +761,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
- Opt_grpquota
+ Opt_grpquota, Opt_metacluster
};
static match_table_t tokens = {
@@ -808,6 +811,7 @@ static match_table_t tokens = {
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
{Opt_barrier, "barrier=%u"},
+ {Opt_metacluster, "metacluster"},
{Opt_err, NULL},
{Opt_resize, "resize"},
};
@@ -1140,6 +1144,9 @@ clear_qf_name:
case Opt_bh:
clear_opt(sbi->s_mount_opt, NOBH);
break;
+ case Opt_metacluster:
+ set_opt(sbi->s_mount_opt, METACLUSTER);
+ break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
diff -uprdN linux-2.6.23mm1-clean/include/linux/ext3_fs.h
linux-2.6.23mm1-ext3mc/include/linux/ext3_fs.h
--- linux-2.6.23mm1-clean/include/linux/ext3_fs.h 2007-10-17
18:31:43.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/include/linux/ext3_fs.h 2007-11-15
12:03:48.000000000 -0800
@@ -380,6 +380,7 @@ struct ext3_inode {
#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT3_MOUNT_METACLUSTER 0x400000 /* Indirect block clustering */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
@@ -493,6 +494,7 @@ struct ext3_super_block {
#ifdef __KERNEL__
#include <linux/ext3_fs_i.h>
#include <linux/ext3_fs_sb.h>
+#include <linux/buffer_head.h>
static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
{
return sb->s_fs_info;
@@ -722,6 +724,11 @@ struct dir_private_info {
__u32 next_hash;
};
+/* Special bh flag used by the metacluster readahead logic. */
+enum ext3_bh_state_bits {
+ EXT3_BH_PREFETCH = BH_JBD_Sentinel,
+};
+
/* calculate the first block number of the group */
static inline ext3_fsblk_t
ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
@@ -730,6 +737,24 @@ ext3_group_first_block_no(struct super_b
le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
}
+static inline void
+ext3_set_buffer_prefetch(struct buffer_head *bh)
+{
+ set_bit(EXT3_BH_PREFETCH, &bh->b_state);
+}
+
+static inline void
+ext3_clear_buffer_prefetch(struct buffer_head *bh)
+{
+ clear_bit(EXT3_BH_PREFETCH, &bh->b_state);
+}
+
+static inline int
+ext3_buffer_prefetch(struct buffer_head *bh)
+{
+ return test_bit(EXT3_BH_PREFETCH, &bh->b_state);
+}
+
/*
* Special error return code only used by dx_probe() and its callers.
*/
@@ -752,6 +777,9 @@ extern int ext3_bg_has_super(struct supe
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, int *errp);
+extern int ext3_new_indirect_blocks(handle_t *handle, struct inode *,
+ unsigned long group_no, unsigned long *,
+ ext3_fsblk_t new_blocks[]);
extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, unsigned long *count, int *errp);
extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
@@ -870,6 +898,31 @@ extern const struct inode_operations ext
extern const struct inode_operations ext3_symlink_inode_operations;
extern const struct inode_operations ext3_fast_symlink_inode_operations;
+/*
+ * ext3_get_grp_metacluster:
+ *
+ * Determines metacluster block range for all block groups of the file
+ * system.
+ *
+ * Number of metacluster blocks = blocks_per_group/128. This allows us
+ * to fit all indirect blocks in a block group with average file size of
+ * 256KB into the group's metacluster. We want to avoid having large
+ * metaclusters because then we'll run of data blocks sooner and when
+ * out of data blocks metaclustering goes for a toss.
+ *
+ */
+static inline void
+ext3_get_grp_metacluster(struct super_block *sb,
+ ext3_grpblk_t *mc_start,
+ ext3_grpblk_t *mc_end) /* exclusive */
+{
+ *mc_start = EXT3_BLOCKS_PER_GROUP(sb) / 2;
+ if (test_opt(sb, METACLUSTER)) {
+ *mc_end = *mc_start + (EXT3_BLOCKS_PER_GROUP(sb) >> 7);
+ } else {
+ *mc_end = *mc_start;
+ }
+}
#endif /* __KERNEL__ */
diff -uprdN linux-2.6.23mm1-clean/include/linux/jbd.h
linux-2.6.23mm1-ext3mc/include/linux/jbd.h
--- linux-2.6.23mm1-clean/include/linux/jbd.h 2007-10-17
18:31:43.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/include/linux/jbd.h 2007-11-09
16:46:29.000000000 -0800
@@ -294,6 +294,7 @@ enum jbd_state_bits {
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
+ BH_JBD_Sentinel, /* Start bit for clients of jbd */
};
BUFFER_FNS(JBD, jbd)
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists