[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140115221951.GA12226@thunk.org>
Date: Wed, 15 Jan 2014 17:19:51 -0500
From: Theodore Ts'o <tytso@....edu>
To: "Darrick J. Wong" <darrick.wong@...cle.com>
Cc: linux-ext4@...r.kernel.org
Subject: Re: [PATCH 50/74] libext2fs: support allocating uninit blocks in
bmap2()
On Wed, Jan 15, 2014 at 01:11:22PM -0800, Darrick J. Wong wrote:
> > The other thing to note about this patch is that if you want to
> > implement fallocate, ext2fs_bmap2() is really the wrong tool to use.
> > I've been working on a program for work which pre-creates a bunch of
>
> I think that ext2fs_fallocate would be a good addition to the library. Is your
> program far enough along to share? fuse2fs would benefit greatly.
An ext2fs_fallocate() is way more difficult than what I've done, since
you have to deal with all sorts of corner cases where the file has
pre-existing sparse extents, which may or may not be initialized, and
making sure that it works in that case. Allocating blocks to a file
which you know started as a zero length file is in fact much easier.
Here are the key bits from my program:
/*
* This should eventually be cleaned up and put into libext2fs
* This is much faster than calling ext2fs_block_alloc_stats() for each
* block, since it requires recalculating the bg descriptor checksum
* for every single block that you allocate.
*/
static void ext2fs_block_alloc_stats_range(ext2_filsys fs, blk64_t blk,
blk_t num, int inuse)
{
int group;
#ifndef OMIT_COM_ERR
if (blk + num >= ext2fs_blocks_count(fs->super)) {
com_err("ext2fs_block_alloc_stats_range", 0,
"Illegal block range: %llu (%u) ",
(unsigned long long) blk, num);
return;
}
#endif
if (inuse == 0)
return;
if (inuse > 0) {
ext2fs_mark_block_bitmap_range2(fs->block_map, blk, num);
inuse = 1;
} else {
ext2fs_unmark_block_bitmap_range2(fs->block_map, blk, num);
inuse = -1;
}
while (num) {
group = ext2fs_group_of_blk2(fs, blk);
blk64_t last_blk = ext2fs_group_last_block2(fs, group);
blk_t n = num;
if (blk + num > last_blk)
n = last_blk - blk + 1;
ext2fs_bg_free_blocks_count_set(fs, group,
ext2fs_bg_free_blocks_count(fs, group) -
inuse*n/EXT2FS_CLUSTER_RATIO(fs));
ext2fs_bg_flags_clear(fs, group, EXT2_BG_BLOCK_UNINIT);
ext2fs_group_desc_csum_set(fs, group);
ext2fs_free_blocks_count_add(fs->super, -inuse * n);
ext2fs_mark_super_dirty(fs);
ext2fs_mark_bb_dirty(fs);
blk += n;
num -= n;
}
}
/*
* ext2fs_allocate_tables() is not optimally allocating blocks in all
* situations. We need to take a look at this at some point. For
* now, just replace it with something simple and stupid.
*/
errcode_t my_allocate_tables(ext2_filsys fs)
{
errcode_t retval;
dgrp_t i;
for (i = 0; i < fs->group_desc_count; i++) {
retval = ext2fs_new_block2(fs, goal, NULL, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats2(fs, goal, +1);
ext2fs_block_bitmap_loc_set(fs, i, goal);
}
for (i = 0; i < fs->group_desc_count; i++) {
retval = ext2fs_new_block2(fs, goal, NULL, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats2(fs, goal, +1);
ext2fs_inode_bitmap_loc_set(fs, i, goal);
}
for (i = 0; i < fs->group_desc_count; i++) {
blk64_t end = ext2fs_blocks_count(fs->super) - 1;
retval = ext2fs_get_free_blocks2(fs, goal, end,
fs->inode_blocks_per_group,
fs->block_map, &goal);
if (retval)
return retval;
ext2fs_block_alloc_stats_range(fs, goal,
fs->inode_blocks_per_group, +1);
ext2fs_inode_table_loc_set(fs, i, goal);
}
return 0;
}
/*
* Some of this could eventually get turned into fallocate, but that's
* actually a much more difficult and tricking thing to implement.
*/
static errcode_t mk_hugefile(ext2_filsys fs, unsigned int num,
ext2_ino_t dir, int idx, ext2_ino_t *ino)
{
errcode_t retval;
blk64_t lblk, blk, bend;
__u64 size;
unsigned int i;
struct ext2_inode inode;
ext2_extent_handle_t handle;
char fn[32];
retval = ext2fs_new_inode(fs, 0, LINUX_S_IFREG, NULL, ino);
if (retval)
return retval;
memset(&inode, 0, sizeof(struct ext2_inode));
inode.i_mode = LINUX_S_IFREG | 0600;
ext2fs_iblk_set(fs, &inode, num / EXT2FS_CLUSTER_RATIO(fs));
size = (__u64) num * fs->blocksize;
inode.i_size = size & 0xffffffff;
inode.i_size_high = (size >> 32);
inode.i_links_count = 1;
retval = ext2fs_write_new_inode(fs, *ino, &inode);
if (retval)
return retval;
ext2fs_inode_alloc_stats2(fs, *ino, +1, 0);
retval = ext2fs_extent_open2(fs, *ino, &inode, &handle);
if (retval)
return retval;
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("eo: i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
lblk = 0;
while (num) {
blk64_t pblk, end;
blk_t n = num;
retval = ext2fs_find_first_zero_block_bitmap2(fs->block_map,
goal, ext2fs_blocks_count(fs->super) - 1, &end);
if (retval)
return ENOSPC;
goal = end;
retval = ext2fs_find_first_set_block_bitmap2(fs->block_map, goal,
ext2fs_blocks_count(fs->super) - 1, &bend);
if (bend == ENOENT)
bend = ext2fs_blocks_count(fs->super);
if (bend - goal < num)
n = bend - goal;
printf("goal %llu bend %llu num %u n %u\n", goal, bend, num, n);
pblk = goal;
num -= n;
goal += n;
ext2fs_block_alloc_stats_range(fs, pblk, n, +1);
while (n) {
blk_t l = n;
struct ext2fs_extent newextent;
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
if (l > EXT_INIT_MAX_LEN)
l = EXT_INIT_MAX_LEN;
newextent.e_len = l;
newextent.e_pblk = pblk;
newextent.e_lblk = lblk;
newextent.e_flags = 0;
printf("inserting extent: %llu %llu %u\n", lblk, pblk, l);
retval = ext2fs_extent_insert(handle,
EXT2_EXTENT_INSERT_AFTER, &newextent);
if (retval)
return retval;
pblk += l;
lblk += l;
n -= l;
}
}
{
struct ext2_inode t;
ext2fs_read_inode(fs, *ino, &t);
printf("i_size_high: %lu size: %llu\n", t.i_size_high,
EXT2_I_SIZE(&t));
}
sprintf(fn, "hugefile%05d", idx);
retry:
retval = ext2fs_link(fs, dir, fn, *ino, EXT2_FT_REG_FILE);
if (retval == EXT2_ET_DIR_NO_SPACE) {
retval = ext2fs_expand_dir(fs, dir);
if (retval)
goto errout;
goto retry;
}
if (retval)
goto errout;
errout:
if (handle)
ext2fs_extent_free(handle);
return retval;
}
Note that this requires some of the test patches I've been sending
out, since it uses ext2fs_find_first_{set,zero}_block_bitmap2().
There are also some bugs in the versions which I sent out; I'm working
on fixing them....
> That said, I've also found a couple of bugs in the extent code by implementing
> fallocate in such a stupid way. :) It turns out that if (a) we need to split
> an extent into three pieces (say we write to a block in the middle of an
> unwritten extent and don't want to convert the whole extent) and (b) either of
> the extent_insert calls requires us to split the extent block and (c) we ENOSPC
> while trying to allocate a new extent block, we don't put the extent tree back
> the way it was before the split, and all the blocks after that point are lost.
Well, I found a bug in extfs_extent_insert() which showed up when I
tried to implement the block allocation in an intelligent way. :-)
I'll send out that bug fix a bit.
> I will send patches to avoid this corruption by checking for enough space soon.
> I think your local git tree has patches in it that aren't on kernel.org yet, so
> I'll hold off until I see them show up.
Yeah, some of those patches still need some clean up, so I haven't
pushed my maint branch to kernel.org yet.
But anyway, the above code will give you an idea where I'm going ---
this is **way** faster than trying to allocate blocks using the
set_bmap() function. :-)
- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists