lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170125172346.GH14029@birch.djwong.org>
Date:   Wed, 25 Jan 2017 09:23:46 -0800
From:   "Darrick J. Wong" <darrick.wong@...cle.com>
To:     Artem Blagodarenko <artem.blagodarenko@...gate.com>
Cc:     linux-ext4@...r.kernel.org
Subject: Re: [PATCH 2/3] e2fsck: 3 level hash tree directory optimisation

On Tue, Jan 24, 2017 at 09:08:20PM +0300, Artem Blagodarenko wrote:
> e2fsck fixed for partitions with 3 level hash directries.
> Additional level is added to e2fsck -D codepath.
> 
> Signed-off-by: Artem Blagodarenko <artem.blagodarenko@...gate.com>
> ---
>  debugfs/htree.c                          |    3 +-
>  e2fsck/e2fsck.h                          |    1 +
>  e2fsck/pass2.c                           |   72 +++++++++++++-----
>  e2fsck/rehash.c                          |  124 ++++++++++++++++++++++++------
>  tests/d_fallocate_blkmap/expect          |    4 +-
>  tests/d_inline_dump/expect               |   12 ++--
>  tests/f_convert_bmap/expect.1            |    2 +-
>  tests/f_convert_bmap_and_extent/expect.1 |    2 +-
>  tests/f_create_symlinks/expect           |    8 +-
>  9 files changed, 170 insertions(+), 58 deletions(-)
> 
> diff --git a/debugfs/htree.c b/debugfs/htree.c
> index 54e55e2..8c18666 100644
> --- a/debugfs/htree.c
> +++ b/debugfs/htree.c
> @@ -287,7 +287,8 @@ void do_htree_dump(int argc, char *argv[])
>      fprintf(pager, "\t Indirect levels: %d\n", rootnode->indirect_levels);
>      fprintf(pager, "\t Flags: %d\n", rootnode->unused_flags);
> 
> -    ent = (struct ext2_dx_entry *) (buf + 24 + rootnode->info_length);
> +    ent = (struct ext2_dx_entry *)
> +        ((char *)rootnode + rootnode->info_length);
> 
>      htree_dump_int_node(current_fs, ino, &inode, rootnode, ent,
>                  buf + current_fs->blocksize,
> diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
> index f356810..a4efbdf 100644
> --- a/e2fsck/e2fsck.h
> +++ b/e2fsck/e2fsck.h
> @@ -122,6 +122,7 @@ struct dx_dirblock_info {
>      blk64_t        phys;
>      int        flags;
>      blk64_t        parent;
> +    blk64_t        previous;
>      ext2_dirhash_t    min_hash;
>      ext2_dirhash_t    max_hash;
>      ext2_dirhash_t    node_min_hash;
> diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
> index 7ded7bb..9f5cd1b 100644
> --- a/e2fsck/pass2.c
> +++ b/e2fsck/pass2.c
> @@ -85,6 +85,40 @@ struct check_dir_struct {
>      unsigned long long next_ra_off;
>  };
> 
> +static void update_parents(struct dx_dir_info *dx_dir, int type)
> +{
> +    struct dx_dirblock_info *dx_db, *dx_parent, *dx_previous;
> +    int b;
> +
> +    for (b = 0, dx_db = dx_dir->dx_block;
> +         b < dx_dir->numblocks;
> +         b++, dx_db++) {
> +        dx_parent = &dx_dir->dx_block[dx_db->parent];
> +        if (dx_db->type != type ||
> +            !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))

The flags check is redundant given the two if clauses.

> +            continue;
> +
> +        /*
> +         * XXX Make sure dx_parent->min_hash > dx_db->min_hash

"XXX" in a comment makes me think this is unfinished...?

Granted, you /did/ just copy & paste the code as-is.  Maybe it's better
to let 15yo code sleep.

> +        */
> +        if (dx_db->flags & DX_FLAG_FIRST) {
> +            dx_parent->min_hash = dx_db->min_hash;
> +            if (dx_parent->previous) {
> +                dx_previous =
> +                    &dx_dir->dx_block[dx_parent->previous];
> +                dx_previous->node_max_hash =
> +                    dx_parent->min_hash;
> +            }
> +        }
> +        /*
> +         * XXX Make sure dx_parent->max_hash < dx_db->max_hash
> +         */
> +        if (dx_db->flags & DX_FLAG_LAST) {
> +            dx_parent->max_hash = dx_db->max_hash;
> +        }
> +    }
> +}
> +
>  void e2fsck_pass2(e2fsck_t ctx)
>  {
>      struct ext2_super_block *sb = ctx->fs->super;
> @@ -182,24 +216,11 @@ void e2fsck_pass2(e2fsck_t ctx)
>           * Find all of the first and last leaf blocks, and
>           * update their parent's min and max hash values
>           */
> -        for (b=0, dx_db = dx_dir->dx_block;
> -             b < dx_dir->numblocks;
> -             b++, dx_db++) {
> -            if ((dx_db->type != DX_DIRBLOCK_LEAF) ||
> -                !(dx_db->flags & (DX_FLAG_FIRST | DX_FLAG_LAST)))
> -                continue;
> -            dx_parent = &dx_dir->dx_block[dx_db->parent];
> -            /*
> -             * XXX Make sure dx_parent->min_hash > dx_db->min_hash
> -             */
> -            if (dx_db->flags & DX_FLAG_FIRST)
> -                dx_parent->min_hash = dx_db->min_hash;
> -            /*
> -             * XXX Make sure dx_parent->max_hash < dx_db->max_hash
> -             */
> -            if (dx_db->flags & DX_FLAG_LAST)
> -                dx_parent->max_hash = dx_db->max_hash;
> -        }
> +        update_parents(dx_dir, DX_DIRBLOCK_LEAF);
> +
> +        /* for 3 level htree: update 2 level parent's min
> +         * and max hash values */
> +        update_parents(dx_dir, DX_DIRBLOCK_NODE);
> 
>          for (b=0, dx_db = dx_dir->dx_block;
>               b < dx_dir->numblocks;
> @@ -642,6 +663,10 @@ static void parse_int_node(ext2_filsys fs,
>              dx_db->flags |= DX_FLAG_REFERENCED;
>              dx_db->parent = db->blockcnt;
>          }
> +
> +        dx_db->previous =
> +            i ? ext2fs_le32_to_cpu(ent[i-1].block & 0x0ffffff) : 0;

Why the bit masking?  Is it ever the case that the top byte is set?

> +
>          if (hash < min_hash)
>              min_hash = hash;
>          if (hash > max_hash)
> @@ -949,6 +974,17 @@ static int check_dir_block(ext2_filsys fs,
>              return DIRENT_ABORT;
>      }
> 
> +    /* This will allow (at some point in the future) to punch out empty
> +     * directory blocks and reduce the space used by a directory that grows
> +     * very large and then the files are deleted. For now, all that is
> +     * needed is to avoid e2fsck filling in these holes as part of
> +     * feature flag. */
> +    if (db->blk == 0 &&
> +        EXT2_HAS_INCOMPAT_FEATURE(fs->super,
> +                      EXT4_FEATURE_INCOMPAT_LARGEDIR)) {

ext2fs_has_feature_large_dir()

> +        return 0;
> +    }
> +
>      if (db->blk == 0 && !inline_data_size) {
>          if (allocate_dir_block(ctx, db, buf, &cd->pctx))
>              return 0;
> diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
> index 22a58f3..6d7e3df 100644
> --- a/e2fsck/rehash.c
> +++ b/e2fsck/rehash.c
> @@ -603,6 +603,42 @@ static struct ext2_dx_entry
> *set_int_node(ext2_filsys fs, char *buf)
>      return (struct ext2_dx_entry *) limits;
>  }
> 
> +static int alloc_blocks(ext2_filsys fs,
> +            struct ext2_dx_countlimit **limit,
> +            struct ext2_dx_entry **prev_ent,
> +            struct ext2_dx_entry **next_ent,
> +            int *prev_offset, int *next_offset,
> +            struct out_dir *outdir, int i,
> +            int *prev_count, int *next_count)
> +{
> +    errcode_t    retval;
> +    char        *block_start;
> +
> +    if (*limit)
> +        (*limit)->limit = (*limit)->count =
> +            ext2fs_cpu_to_le16((*limit)->limit);
> +    *prev_ent = (struct ext2_dx_entry *) (outdir->buf + *prev_offset);
> +    (*prev_ent)->block = ext2fs_cpu_to_le32(outdir->num);
> +
> +    if (i != 1)
> +        (*prev_ent)->hash =
> +            ext2fs_cpu_to_le32(outdir->hashes[i]);
> +
> +    retval = get_next_block(fs, outdir, &block_start);
> +    if (retval)
> +        return retval;
> +
> +    *next_ent = set_int_node(fs, block_start);
> +    *limit = (struct ext2_dx_countlimit *)(*next_ent);
> +    if (next_offset)
> +        *next_offset = ((char *) *next_ent - outdir->buf);
> +
> +    *next_count = (*limit)->limit;
> +    (*prev_offset) += sizeof(struct ext2_dx_entry);
> +    (*prev_count)--;
> +return 0;

Indentation.  Speaking of which, when did all the tabs change to "   "?

> +}
> +
>  /*
>   * This function takes the leaf nodes which have been written in
>   * outdir, and populates the root node and any necessary interior nodes.
> @@ -612,13 +648,13 @@ static errcode_t calculate_tree(ext2_filsys fs,
>                  ext2_ino_t ino,
>                  ext2_ino_t parent)
>  {
> -    struct ext2_dx_root_info      *root_info;
> -    struct ext2_dx_entry         *root, *dx_ent = 0;
> -    struct ext2_dx_countlimit    *root_limit, *limit;
> +    struct ext2_dx_root_info    *root_info;
> +    struct ext2_dx_entry        *root, *int_ent, *dx_ent = 0;
> +    struct ext2_dx_countlimit    *root_limit, *int_limit, *limit;
>      errcode_t            retval;
>      char                * block_start;
> -    int                i, c1, c2, nblks;
> -    int                limit_offset, root_offset;
> +    int                i, c1, c2, c3, nblks;

I defer to Ted on this, but I wonder if all the htree state would all be
better off as a htree cursor structure rather than open coded stack
variables.  OTOH that's a lot of churn and I don't figure we're likely
to get to 4-level htree ever.

> +    int                limit_offset, int_offset, root_offset;
> 
>      root_info = set_root_node(fs, outdir->buf, ino, parent);
>      root_offset = limit_offset = ((char *) root_info - outdir->buf) +
> @@ -628,7 +664,7 @@ static errcode_t calculate_tree(ext2_filsys fs,
>      nblks = outdir->num;
> 
>      /* Write out the pointer blocks */
> -    if (nblks-1 <= c1) {
> +    if (nblks - 1 <= c1) {
>          /* Just write out the root block, and we're done */
>          root = (struct ext2_dx_entry *) (outdir->buf + root_offset);
>          for (i=1; i < nblks; i++) {
> @@ -639,31 +675,24 @@ static errcode_t calculate_tree(ext2_filsys fs,
>              root++;
>              c1--;
>          }
> -    } else {
> +    } else if (nblks - 1 <= (c1 * ((fs->blocksize - 8) /
> +                 sizeof(*root)))) {

Wishing these were proper helper functions...

if (nblks - 1 <= ext2fs_htree_intnode_maxrecs(c1)) {

>          c2 = 0;
> -        limit = 0;
> +        limit = NULL;
>          root_info->indirect_levels = 1;
>          for (i=1; i < nblks; i++) {
> -            if (c1 == 0)
> +            if (c2 == 0 && c1 == 0)
>                  return ENOSPC;
>              if (c2 == 0) {
> -                if (limit)
> -                    limit->limit = limit->count =
> -        ext2fs_cpu_to_le16(limit->limit);
> -                root = (struct ext2_dx_entry *)
> -                    (outdir->buf + root_offset);
> -                root->block = ext2fs_cpu_to_le32(outdir->num);
> -                if (i != 1)
> -                    root->hash =
> -            ext2fs_cpu_to_le32(outdir->hashes[i]);
> -                if ((retval =  get_next_block(fs, outdir,
> -                                  &block_start)))
> +                retval = alloc_blocks(fs, &limit,
> +                             &root,
> +                             &dx_ent,
> +                             &root_offset,
> +                             NULL,
> +                             outdir, i,
> +                             &c1, &c2);
> +                if (retval)
>                      return retval;
> -                dx_ent = set_int_node(fs, block_start);
> -                limit = (struct ext2_dx_countlimit *) dx_ent;
> -                c2 = limit->limit;
> -                root_offset += sizeof(struct ext2_dx_entry);
> -                c1--;
>              }
>              dx_ent->block = ext2fs_cpu_to_le32(i);
>              if (c2 != limit->limit)
> @@ -674,6 +703,51 @@ static errcode_t calculate_tree(ext2_filsys fs,
>          }
>          limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
>          limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +    } else {
> +        c2 = 0;
> +        c3 = 0;
> +        limit = NULL;
> +        int_limit = 0;
> +        root_info->indirect_levels = 2;
> +        for (i = 1; i < nblks; i++) {
> +            if (c3 == 0 && c2 == 0 && c1 == 0)
> +                return ENOSPC;
> +            if (c3 == 0 && c2 == 0) {
> +                retval = alloc_blocks(fs, &int_limit,
> +                             &root,
> +                             &int_ent,
> +                             &root_offset,
> +                             &int_offset,
> +                             outdir, i,
> +                             &c1, &c2);
> +                if (retval)
> +                    return retval;
> +            }
> +            if (c3 == 0) {
> +                retval = alloc_blocks(fs, &limit,
> +                             &int_ent,
> +                             &dx_ent,
> +                             &int_offset,
> +                             NULL,
> +                             outdir, i,
> +                             &c2, &c3);
> +                if (retval)
> +                    return retval;
> +
> +            }
> +            dx_ent->block = ext2fs_cpu_to_le32(i);
> +            if (c3 != limit->limit)
> +                dx_ent->hash =
> +                    ext2fs_cpu_to_le32(outdir->hashes[i]);
> +            dx_ent++;
> +            c3--;
> +        }
> +        int_limit->count = ext2fs_cpu_to_le16(limit->limit - c2);
> +        int_limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +
> +        limit->count = ext2fs_cpu_to_le16(limit->limit - c3);
> +        limit->limit = ext2fs_cpu_to_le16(limit->limit);
> +
>      }
>      root_limit = (struct ext2_dx_countlimit *) (outdir->buf + limit_offset);
>      root_limit->count = ext2fs_cpu_to_le16(root_limit->limit - c1);
> diff --git a/tests/d_fallocate_blkmap/expect b/tests/d_fallocate_blkmap/expect
> index 8ce79ff..f588511 100644
> --- a/tests/d_fallocate_blkmap/expect
> +++ b/tests/d_fallocate_blkmap/expect
> @@ -18,7 +18,7 @@ debugfs: stat /a
>  Inode: 12   Type: regular    Mode:  0666   Flags: 0x0
>  Generation: 0    Version: 0x00000000:00000000
>  User:     0   Group:     0   Project:     0   Size: 40960
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0

These belong in the patch that removes Directory ACL.

--D

>  Links: 1   Blockcount: 82
>  Fragment:  Address: 0    Number: 0    Size: 0
>  Size of extra inode fields: 32
> @@ -30,7 +30,7 @@ debugfs: stat /b
>  Inode: 13   Type: regular    Mode:  0666   Flags: 0x0
>  Generation: 0    Version: 0x00000000:00000000
>  User:     0   Group:     0   Project:     0   Size: 10240000
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 20082
>  Fragment:  Address: 0    Number: 0    Size: 0
>  Size of extra inode fields: 32
> diff --git a/tests/d_inline_dump/expect b/tests/d_inline_dump/expect
> index c84f64d..f0ba471 100644
> --- a/tests/d_inline_dump/expect
> +++ b/tests/d_inline_dump/expect
> @@ -2,7 +2,7 @@
>  Inode: 13   Type: regular    Mode:  0644   Flags: 0x10000000
>  Generation: 3289262644    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 80
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x53cec6b4:c72e3c00 -- Tue Jul 22 20:16:52 2014
> @@ -18,7 +18,7 @@ Size of inline data: 80
>  Inode: 18   Type: regular    Mode:  0644   Flags: 0x10000000
>  Generation: 3842229473    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 20
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x53cec6b4:cafecc00 -- Tue Jul 22 20:16:52 2014
> @@ -35,7 +35,7 @@ Size of inline data: 60
>  Inode: 16   Type: directory    Mode:  0755   Flags: 0x10000000
>  Generation: 3842229469    Version: 0x00000000:00000004
>  User:     0   Group:     0   Size: 132
> -File ACL: 7    Directory ACL: 0
> +File ACL: 7
>  Links: 2   Blockcount: 8
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x53cec6e3:27eac000 -- Tue Jul 22 20:17:39 2014
> @@ -51,7 +51,7 @@ Size of inline data: 132
>  Inode: 20   Type: directory    Mode:  0755   Flags: 0x10000000
>  Generation: 3710818931    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 60
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 2   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x53cec6b4:ca0aa800 -- Tue Jul 22 20:16:52 2014
> @@ -68,7 +68,7 @@ Size of inline data: 60
>  Inode: 12   Type: symlink    Mode:  0777   Flags: 0x10000000
>  Generation: 3289262643    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 80
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x53cec47f:724db800 -- Tue Jul 22 20:07:27 2014
> @@ -83,7 +83,7 @@ Fast link dest:
> "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
>  Inode: 19   Type: symlink    Mode:  0777   Flags: 0x0
>  Generation: 3842229474    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 20
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x53cec44c:a1fcc000 -- Tue Jul 22 20:06:36 2014
> diff --git a/tests/f_convert_bmap/expect.1 b/tests/f_convert_bmap/expect.1
> index 7d2ca86..0291f94 100644
> --- a/tests/f_convert_bmap/expect.1
> +++ b/tests/f_convert_bmap/expect.1
> @@ -2,7 +2,7 @@ debugfs: stat /a
>  Inode: 12   Type: regular    Mode:  0644   Flags: 0x0
>  Generation: 1573716129    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 524288
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 1030
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x5457f87a:62ae2980 -- Mon Nov  3 21:49:46 2014
> diff --git a/tests/f_convert_bmap_and_extent/expect.1
> b/tests/f_convert_bmap_and_extent/expect.1
> index 7af91aa..eb55db7 100644
> --- a/tests/f_convert_bmap_and_extent/expect.1
> +++ b/tests/f_convert_bmap_and_extent/expect.1
> @@ -2,7 +2,7 @@ debugfs: stat /a
>  Inode: 12   Type: regular    Mode:  0644   Flags: 0x0
>  Generation: 1573716129    Version: 0x00000000:00000001
>  User:     0   Group:     0   Size: 524288
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 1030
>  Fragment:  Address: 0    Number: 0    Size: 0
>   ctime: 0x5457f87a:62ae2980 -- Mon Nov  3 21:49:46 2014
> diff --git a/tests/f_create_symlinks/expect b/tests/f_create_symlinks/expect
> index dca6e92..4409385 100644
> --- a/tests/f_create_symlinks/expect
> +++ b/tests/f_create_symlinks/expect
> @@ -20,7 +20,7 @@ debugfs -R "stat /l_30" test.img
>  Inode: 12   Type: symlink    Mode:  0777   Flags: 0x0
>  Generation: 0    Version: 0x00000000:00000000
>  User:     0   Group:     0   Project:     0   Size: 31
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>  Size of extra inode fields: 32
> @@ -29,7 +29,7 @@ debugfs -R "stat /l_70" test.img
>  Inode: 13   Type: symlink    Mode:  0777   Flags: 0x10000000
>  Generation: 0    Version: 0x00000000:00000000
>  User:     0   Group:     0   Project:     0   Size: 71
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 0
>  Fragment:  Address: 0    Number: 0    Size: 0
>  Size of extra inode fields: 32
> @@ -40,7 +40,7 @@ debugfs -R "stat /l_500" test.img
>  Inode: 14   Type: symlink    Mode:  0777   Flags: 0x80000
>  Generation: 0    Version: 0x00000000:00000000
>  User:     0   Group:     0   Project:     0   Size: 501
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 2
>  Fragment:  Address: 0    Number: 0    Size: 0
>  Size of extra inode fields: 32
> @@ -50,7 +50,7 @@ debugfs -R "stat /l_1023" test.img
>  Inode: 15   Type: symlink    Mode:  0777   Flags: 0x80000
>  Generation: 0    Version: 0x00000000:00000000
>  User:     0   Group:     0   Project:     0   Size: 1024
> -File ACL: 0    Directory ACL: 0
> +File ACL: 0
>  Links: 1   Blockcount: 2
>  Fragment:  Address: 0    Number: 0    Size: 0
>  Size of extra inode fields: 32
> --

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ