[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1270833270-14087-1-git-send-email-dmonakhov@openvz.org>
Date: Fri, 9 Apr 2010 21:14:30 +0400
From: Dmitry Monakhov <dmonakhov@...nvz.org>
To: linux-ext4@...r.kernel.org
Cc: aneesh.kumar@...ux.vnet.ibm.com, tytso@....edu,
Dmitry Monakhov <dmonakhov@...nvz.org>
Subject: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size
Zerrout trick allow us to optimize cases where it is more reasonable
to explicitly zeroout extent and mark it as initialized instead of
splitting to several small ones.
But this optimization is not acceptable is extent is beyond i_size
Because it is not possible to have initialized blocks after i_size.
Fsck treat this as incorrect inode size.
#BUG# (here suppose to be bug number, but bugzilla.kernel.org is too
dammit slow)
##TESTCASE
/***********************************************
gcc -Wall falloc_test.c -ofalloc_test
This testcase check write to fallocated space
mkfs.ext4 /dev/sdb1
mount /dev/sdb1 /mnt
./falloc_test /mnt/F1
umount /mnt
fsck.ext4 -f /dev/sdb1
***********************************************/
#define _GNU_SOURCE
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
int main(int argc, char **argv)
{
loff_t len, offset;
int fd, ret;
if (argc != 2) {
printf("Usage: %s <fname>\n", argv[0]);
return 1;
}
fd = open(argv[1], O_CREAT|O_RDWR, 0777);
/* 8192 is less than EXT4_EXT_ZERO_LEN */
ret = fallocate(fd, 0x1, 0, 8192);
if (ret)
return ret;
/* Provoke reserved space convertation */
return write(fd, "1", 1) != 1;
}
Signed-off-by: Dmitry Monakhov <dmonakhov@...nvz.org>
---
fs/ext4/extents.c | 62 +++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 49 insertions(+), 13 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8bdee27..54a9b80 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1,3 +1,4 @@
+
/*
* Copyright (c) 2003-2006, Cluster File Systems, Inc, info@...sterfs.com
* Written by Alex Tomas <alex@...sterfs.com>
@@ -2540,9 +2541,11 @@ static void bi_complete(struct bio *bio, int error)
{
complete((struct completion *)bio->bi_private);
}
-
+#define ext4_ext_zeroout(inode,ext_nn) \
+ __ext4_ext_zeroout(inode, ext_nn, iblock, max_blocks)
/* FIXME!! we need to try to merge to left or right after zero-out */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+static int __ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex,
+ ext4_lblk_t iblock, unsigned int max_blocks)
{
int ret;
struct bio *bio;
@@ -2560,6 +2563,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
/* convert ee_pblock to 512 byte sectors */
ee_pblock = ee_pblock << (blkbits - 9);
+ printk("%s inode:%lu ext:[%d:%d] iblock:%d max:%d i_sz:%lld expand:%d\n",
+ __FUNCTION__, inode->i_ino,
+ le32_to_cpu(ex->ee_block), ee_len,
+ iblock, max_blocks, inode->i_size,
+ ((inode->i_size +blocksize -1) >> blkbits) < iblock + max_blocks);
+
while (ee_len > 0) {
if (ee_len > BIO_MAX_PAGES)
@@ -2631,11 +2640,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
int ret = 0;
+ int may_zeroout;
+ ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+ "block %llu, max_blocks %u ",
+ inode->i_ino, (unsigned long long)iblock, max_blocks);
depth = ext_depth(inode);
eh = path[depth].p_hdr;
@@ -2644,16 +2657,25 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+ ee_block + ee_len <= eof_block;
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2684,7 +2706,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (allocated > max_blocks) {
unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
- if (allocated <= EXT4_EXT_ZERO_LEN) {
+ if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
/*
* iblock == ee_block is handled by the zerouout
* at the beginning.
@@ -2760,7 +2782,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2784,8 +2806,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* update the extent length after successful insert of the
* split extent
*/
- orig_ex.ee_len = cpu_to_le16(ee_len -
- ext4_ext_get_actual_len(ex3));
+ ee_len -= ext4_ext_get_actual_len(ex3);
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+ ee_block + ee_len <= eof_block;
+
depth = newdepth;
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path);
@@ -2809,7 +2834,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* otherwise give the extent a chance to merge to left
*/
if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
- iblock != ee_block) {
+ iblock != ee_block && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2878,7 +2903,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2938,14 +2963,16 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
- ext4_lblk_t ee_block;
+ ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock;
int err = 0;
+ int may_zeroout;
ext_debug("ext4_split_unwritten_extents: inode %lu,"
"iblock %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)iblock, max_blocks);
+
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
@@ -2953,10 +2980,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
+ eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_sb->s_blocksize_bits;
+
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ /*
+ * It is safe to convert extent to initialized via explicit
+ * zeroout only if extent is fully insde i_size or new_size.
+ */
+ may_zeroout = ee_block + ee_len <= iblock + max_blocks ||
+ ee_block + ee_len <= eof_block;
/*
* If the uninitialized extent begins at the same logical
@@ -2992,7 +3028,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -3063,7 +3099,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
- if (err == -ENOSPC) {
+ if (err == -ENOSPC && may_zeroout) {
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
--
1.6.6.1
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists