[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <85527CC4-DBCB-404E-8267-F87D33E6CCBC@dilger.ca>
Date: Fri, 3 Mar 2017 17:45:28 -0700
From: Andreas Dilger <adilger@...ger.ca>
To: Artem Blagodarenko <artem.blagodarenko@...il.com>
Cc: linux-ext4 <linux-ext4@...r.kernel.org>,
Alexey Lyashkov <alexey.lyashkov@...il.com>
Subject: Re: [PATCH, RFC] libext2fs: preload block group on request
On Mar 3, 2017, at 2:15 AM, Artem Blagodarenko <artem.blagodarenko@...il.com> wrote:
>
> From: Artem Blagodarenko <artem.blagodarenko@...gate.com>
>
> I send this for descussion. Requested by Andreas Dilger and Alexey Lyashkov.
>
>> But again - I may ask an Artem submit a patch to read GD by demand as =
>> it ready again, but it don=E2=80=99t like it due a complexity.
>
>> Sure, it would be good to send it to the list, just to see where =
>> complexity is and discuss how it might be fixed.
Artem,
thanks for the patch. This looks roughly like I expected. Comments inline:
> diff --git a/lib/ext2fs/blknum.c b/lib/ext2fs/blknum.c
> index ac80849..b171bac 100644
> --- a/lib/ext2fs/blknum.c
> +++ b/lib/ext2fs/blknum.c
> @@ -186,8 +186,51 @@ struct ext2_group_desc *ext2fs_group_desc(ext2_filsys fs,
> struct opaque_ext2_group_desc *gdp,
It might be a good idea to rename this variable to "gdt" so that it is more
clear that this is the pointer to the whole group descriptor table, rather
than just a single entry for the group.
> dgrp_t group)
> {
> int desc_size = EXT2_DESC_SIZE(fs->super) & ~7;
> -
> - return (struct ext2_group_desc *)((char *)gdp + group * desc_size);
> + struct ext2_group_desc *ret_gdp;
> + struct ext2_group_desc *tmp_gdp;
> + char *dest;
> + dgrp_t block;
> + blk64_t blk;
(style) these should be declared locally inside "if" block
> + int retval;
> + unsigned int i;
> + unsigned int groups_per_block = EXT2_DESC_PER_BLOCK(fs->super);
> +
> + ret_gdp = (struct ext2_group_desc *)((char *)gdp + group * desc_size);
> +
> + if ((ret_gdp->bg_flags & EXT2_BG_READ) == 0) {
The main problem I see with storing EXT2_BG_READ in the group descriptor
itself it will be written to disk. It would be equivalent to check the
non-zero status of some or all of the fields, using something like:
if (memcmp(zero_gdt, ret_gdp, sizeof(*ret_gdp)) != 0) {
> + block = group / groups_per_block;
> + blk = ext2fs_descriptor_block_loc2(fs, fs->group_block, block);
> + dest = (char *) gdp + fs->blocksize * block;
(style) no space after typecast
> + retval = io_channel_read_blk64(fs->io, blk, 1, dest);
> + if (retval)
> + return NULL;
> +
> + tmp_gdp = (struct ext2_group_desc *)dest;
(style) we could get rid of "dest" and have only "tmp_gdp", and just use:
tmp_gdp = (char *)gdp + fs->blocksize * block;
retval = io_channel_read_blk64(fs->io, blk, 1, tmp_gdp);
since the argument is "void *data" so no need to cast it.
> + for (i=0; i < groups_per_block; i++) {
(style) spaces around that '='
> + /*
> + * TDB: If recovery is from backup superblock, Clear
> + * _UNININT flags & reset bg_itable_unused to zero
Not sure what "TDB" means, since this is being done below?
> + */
> +#ifdef WORDS_BIGENDIAN
> + ext2fs_swap_group_desc2(fs, tmp_gdp);
> +#endif
> + tmp_gdp->bg_flags |= EXT2_BG_READ;
> + if (fs->orig_super == 0 &&
> + EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
> + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
ext2fs_has_group_desc_csum(fs)
> + ext2fs_bg_flags_clear(fs, block * groups_per_block + i, EXT2_BG_BLOCK_UNINIT);
Would be cleaner to calculate "start_group = block * blocks_per_group" once
at the start.
> + ext2fs_bg_flags_clear(fs, block * groups_per_block + i, EXT2_BG_INODE_UNINIT);
> + ext2fs_bg_itable_unused_set(fs, block * groups_per_block + i, 0);
This is nasty, since these are calling ext2fs_group_desc() recursively.
Better to just do this directly in this function:
tmp_gdp->bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
tmp_gdp->bg_flags &= ~EXT2_BG_INODE_UNINIT;
or have some static inline helper functions in blknum.c that take a single
gdp pointer argument instead of doing the lookup internally each time:
set_bg_itable_unused(tmp_gdp, start_group + i);
> + // The checksum will be reset later, but fix it here
> + // anyway to avoid printing a lot of spurious errors.
No C99 comments.
> + ext2fs_group_desc_csum_set(fs, block * groups_per_block + i);
When recovering from the backup superblocks this needs the call to:
if (fs->flags & EXT2_FLAG_RW)
ext2fs_mark_super_dirty(fs);
> + }
> + tmp_gdp = (struct ext2_group_desc *)((char *)tmp_gdp + EXT2_DESC_SIZE(fs->super));
(style) wrap at 80 columns
> + }
> + }
> + return ret_gdp;
> }
>
> /* Do the same but as an ext4 group desc for internal use here */
> diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
> index 27a7d3a..4858684 100644
> --- a/lib/ext2fs/ext2_fs.h
> +++ b/lib/ext2fs/ext2_fs.h
> @@ -202,6 +202,7 @@ struct ext4_group_desc
> #define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not initialized */
> #define EXT2_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not initialized */
> #define EXT2_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
> +#define EXT2_BG_READ 0x0008 /* Block group was read from disk */
>
> /*
> * Data structures used by the directory indexing feature
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 8ff49ca..0ae5cbe 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -221,6 +221,7 @@ struct struct_ext2_filsys {
> int fragsize;
> dgrp_t group_desc_count;
> unsigned long desc_blocks;
> + blk64_t group_block;
> struct opaque_ext2_group_desc * group_desc;
> unsigned int inode_blocks_per_group;
> ext2fs_inode_bitmap inode_map;
> diff --git a/lib/ext2fs/openfs.c b/lib/ext2fs/openfs.c
> index ba39e01..5f6cda5 100644
> --- a/lib/ext2fs/openfs.c
> +++ b/lib/ext2fs/openfs.c
> @@ -121,11 +121,9 @@ errcode_t ext2fs_open2(const char *name, const char *io_options,
> blk64_t group_block, blk;
> char *dest, *cp;
> int group_zero_adjust = 0;
> -#ifdef WORDS_BIGENDIAN
> unsigned int groups_per_block;
> struct ext2_group_desc *gdp;
> int j;
> -#endif
>
> EXT2_CHECK_MAGIC(manager, EXT2_ET_MAGIC_IO_MANAGER);
>
> @@ -412,40 +410,27 @@ errcode_t ext2fs_open2(const char *name, const char *io_options,
> first_meta_bg, dest);
> if (retval)
> goto cleanup;
> -#ifdef WORDS_BIGENDIAN
> - gdp = (struct ext2_group_desc *) dest;
> for (j=0; j < groups_per_block*first_meta_bg; j++) {
> gdp = ext2fs_group_desc(fs, fs->group_desc, j);
Isn't this just loading the group descriptor blocks to be read in here?
This should just start the prefetch and nothing else. We might even try
moving the prefetch to the first call to ext2fs_group_desc() when GDT block
#1 is accessed for the first time so that the prefetch isn't started if
the GDT is never accessed. The drawback is that it could lose time when
the blocks could be prefetched from disk, but I suspect that interval would
be very small if the GDT is actually needed so it is probably a net win.
Otherwise, this will read up to 128MB for filesystems without meta_bg.
Possibly always reading in the first GDT block would be good, so that the
special reserved inodes are loaded always.
> - ext2fs_swap_group_desc2(fs, gdp);
> - }
> -#endif
> - dest += fs->blocksize*first_meta_bg;
> - }
> - for (i=first_meta_bg ; i < fs->desc_blocks; i++) {
> - blk = ext2fs_descriptor_block_loc2(fs, group_block, i);
> - retval = io_channel_read_blk64(fs->io, blk, 1, dest);
> - if (retval)
> - goto cleanup;
> #ifdef WORDS_BIGENDIAN
> - for (j=0; j < groups_per_block; j++) {
> - gdp = ext2fs_group_desc(fs, fs->group_desc,
> - i * groups_per_block + j);
> ext2fs_swap_group_desc2(fs, gdp);
> - }
> #endif
> - dest += fs->blocksize;
> + gdp->bg_flags |= EXT2_BG_READ;
> + }
> + dest += fs->blocksize*first_meta_bg;
> }
>
> + fs->group_block = group_block;
> fs->stride = fs->super->s_raid_stride;
>
> /*
> * If recovery is from backup superblock, Clear _UNININT flags &
> * reset bg_itable_unused to zero
> */
> - if (superblock > 1 && ext2fs_has_group_desc_csum(fs)) {
> + if (first_meta_bg && superblock > 1 && ext2fs_has_group_desc_csum(fs)) {
> dgrp_t group;
>
> - for (group = 0; group < fs->group_desc_count; group++) {
> + for (group = 0; group < groups_per_block*first_meta_bg; group++) {
> ext2fs_bg_flags_clear(fs, group, EXT2_BG_BLOCK_UNINIT);
> ext2fs_bg_flags_clear(fs, group, EXT2_BG_INODE_UNINIT);
> ext2fs_bg_itable_unused_set(fs, group, 0);
This is already implemented in the ext2fs_group_desc() access, so could be
removed here?
Cheers, Andreas
Download attachment "signature.asc" of type "application/pgp-signature" (196 bytes)
Powered by blists - more mailing lists