lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180628140657.GA5699@magnolia>
Date:   Thu, 28 Jun 2018 07:06:57 -0700
From:   "Darrick J. Wong" <darrick.wong@...cle.com>
To:     c17828 <artem.blagodarenko@...il.com>
Cc:     linux-ext4@...r.kernel.org, adilger.kernel@...ger.ca,
        alexey.lyashkov@...il.com,
        Andreas Dilger <andreas.dilger@...el.com>
Subject: Re: [PATCH] filefrag: Lustre changes to filefrag FIEMAP handling

On Wed, Jun 27, 2018 at 06:57:31PM +0300, c17828 wrote:
> From: Andreas Dilger <andreas.dilger@...el.com>
> 
> Add support for multiple-device filesystems by defining a new
> fe_device field in the fiemap_extent structure.  This allows
> printing the filesystem-relative or linux block device number
> associated with each extent of a file.  If a single filesystem
> extent is mirrored to multiple block devices, the fe_device
> field can be used to disambiguate the multiple copies.
> 
> If the "-l" (device-logical) option is given to filefrag, then
> all extents for a particular device of a file are returned
> before returning extents for the next device.  This makes it
> easier to see if extent allocation within a single device is
> contiguous, instead of returning all of the blocks of a file
> interleaved in file-logical-offset order.
> 
> Change-Id: Icdefe4dbc319e6652c3d6641e1500f2cfaf63605
> Signed-off-by: Andreas Dilger <andreas.dilger@...el.com>
> Signed-off-by: Artem Blagodarenko <artem.blagodarenko@...il.com>
> ---
>  lib/ext2fs/fiemap.h |  7 +++++-
>  misc/filefrag.8.in  |  5 +++-
>  misc/filefrag.c     | 67 +++++++++++++++++++++++++++++++++++++++++++----------
>  3 files changed, 65 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/ext2fs/fiemap.h b/lib/ext2fs/fiemap.h
> index a331bc12..261a0b2b 100644
> --- a/lib/ext2fs/fiemap.h
> +++ b/lib/ext2fs/fiemap.h
> @@ -19,7 +19,8 @@ struct fiemap_extent {
>  	__u64 fe_length;   /* length in bytes for this extent */
>  	__u64 fe_reserved64[2];
>  	__u32 fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
> -	__u32 fe_reserved[3];
> +	__u32 fe_device;   /* device number (fs-specific if FIEMAP_EXTENT_NET)*/
> +	__u32 fe_reserved[2];
>  };
>  
>  struct fiemap {
> @@ -42,6 +43,7 @@ struct fiemap {
>  
>  #define FIEMAP_FLAG_SYNC	0x00000001 /* sync file data before map */
>  #define FIEMAP_FLAG_XATTR	0x00000002 /* map extended attribute tree */
> +#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
>  
>  #define FIEMAP_FLAGS_COMPAT	(FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
>  
> @@ -67,4 +69,7 @@ struct fiemap {
>  #define FIEMAP_EXTENT_SHARED		0x00002000 /* Space shared with other
>  						    * files. */
>  
> +/* Network filesystem flags - use a high bit, don't conflict with upstream */
> +#define FIEMAP_EXTENT_NET		0x80000000 /* Data stored remotely. */

Why does "NET" means "look for a device number"?

What's the format for fe_device?  Arbitrary 32-bit cookie?  Kernel
dev_t?  Something else?

Is there a corresponding change for include/uapi/linux/fiemap.h?  Would
be nice if we could wire that up to iomap_fiemap <hint hint>

--D

> +
>  #endif /* _LINUX_FIEMAP_H */
> diff --git a/misc/filefrag.8.in b/misc/filefrag.8.in
> index 292b3b7d..fdba67f7 100644
> --- a/misc/filefrag.8.in
> +++ b/misc/filefrag.8.in
> @@ -8,7 +8,7 @@ filefrag \- report on file fragmentation
>  .BI \-b blocksize
>  ]
>  [
> -.B \-BeksvxX
> +.B \-BeklsvxX
>  ]
>  [
>  .I files...
> @@ -46,6 +46,9 @@ Print output in extent format, even for block-mapped files.
>  .BI \-k
>  Use 1024\-byte blocksize for output (identical to '\-b 1024').
>  .TP
> +.B \-l
> +Extents are displayed in device-logical offset order.
> +.TP
>  .B \-s
>  Sync the file before requesting the mapping.
>  .TP
> diff --git a/misc/filefrag.c b/misc/filefrag.c
> index 9c57ab93..78e13076 100644
> --- a/misc/filefrag.c
> +++ b/misc/filefrag.c
> @@ -54,14 +54,16 @@ int verbose = 0;
>  int blocksize;		/* Use specified blocksize (default 1kB) */
>  int sync_file = 0;	/* fsync file before getting the mapping */
>  int xattr_map = 0;	/* get xattr mapping */
> -int force_bmap;	/* force use of FIBMAP instead of FIEMAP */
> +int force_bmap;		/* force use of FIBMAP instead of FIEMAP */
>  int force_extent;	/* print output in extent format always */
> +int device_offset;	/* extents report device-relative offsets */
>  int logical_width = 8;
>  int physical_width = 10;
>  const char *ext_fmt = "%4d: %*llu..%*llu: %*llu..%*llu: %6llu: %s\n";
>  const char *hex_fmt = "%4d: %*llx..%*llx: %*llx..%*llx: %6llx: %s\n";
>  
> -#define FILEFRAG_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
> +#define FILEFRAG_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR |\
> +				      FIEMAP_FLAG_DEVICE_ORDER)
>  
>  #define FIBMAP		_IO(0x00, 1)	/* bmap access */
>  #define FIGETBSZ	_IO(0x00, 2)	/* get the block size used for bmap */
> @@ -120,10 +122,10 @@ static void print_extent_header(void)
>  {
>  	printf(" ext: %*s %*s length: %*s flags:\n",
>  	       logical_width * 2 + 3,
> -	       "logical_offset:",
> +	       device_offset ? "device_logical:" : "logical_offset:",
>  	       physical_width * 2 + 3, "physical_offset:",
> -	       physical_width + 1,
> -	       "expected:");
> +	       device_offset ? 5 : physical_width + 1,
> +	       device_offset ? " dev:" : "expected:");
>  }
>  
>  static void print_flag(__u32 *flags, __u32 mask, char *buf, const char *name)
> @@ -159,11 +161,11 @@ static void print_extent_info(struct fiemap_extent *fm_extent, int cur_ex,
>  		physical_blk = fm_extent->fe_physical >> blk_shift;
>  	}
>  
> -	if (expected)
> -		sprintf(flags, ext_fmt == hex_fmt ? "%*llx: " : "%*llu: ",
> +	if (device_offset)
> +		sprintf(flags, "%04x: ", fm_extent->fe_device);
> +	else if (expected)
> +		sprintf(flags, ext_fmt == hex_fmt ? "%*llx:" : "%*llu: ",
>  			physical_width, expected >> blk_shift);
> -	else
> -		sprintf(flags, "%.*s  ", physical_width, "                   ");
>  
>  	fe_flags = fm_extent->fe_flags;
>  	print_flag(&fe_flags, FIEMAP_EXTENT_LAST, flags, "last,");
> @@ -177,6 +179,8 @@ static void print_extent_info(struct fiemap_extent *fm_extent, int cur_ex,
>  	print_flag(&fe_flags, FIEMAP_EXTENT_UNWRITTEN, flags, "unwritten,");
>  	print_flag(&fe_flags, FIEMAP_EXTENT_MERGED, flags, "merged,");
>  	print_flag(&fe_flags, FIEMAP_EXTENT_SHARED, flags, "shared,");
> +	print_flag(&fe_flags, FIEMAP_EXTENT_NET, flags, "net,");
> +
>  	/* print any unknown flags as hex values */
>  	for (mask = 1; fe_flags != 0 && mask != 0; mask <<= 1) {
>  		char hex[6];
> @@ -217,6 +221,7 @@ static int filefrag_fiemap(int fd, int blk_shift, int *num_extents,
>  	unsigned int i;
>  	int fiemap_header_printed = 0;
>  	int tot_extents = 0, n = 0;
> +	int previous_device = 0;
>  	int last = 0;
>  	int rc;
>  
> @@ -228,6 +233,12 @@ static int filefrag_fiemap(int fd, int blk_shift, int *num_extents,
>  	if (xattr_map)
>  		flags |= FIEMAP_FLAG_XATTR;
>  
> +	if (device_offset) {
> +		flags |= FIEMAP_FLAG_DEVICE_ORDER;
> +		memset(fm_ext, 0, sizeof(struct fiemap_extent));
> +	}
> +
> +retry_wo_device_order:
>  	do {
>  		fiemap->fm_length = ~0ULL;
>  		fiemap->fm_flags = flags;
> @@ -242,6 +253,10 @@ static int filefrag_fiemap(int fd, int blk_shift, int *num_extents,
>  						"flags %x\n",
>  				       fiemap->fm_flags);
>  				fiemap_incompat_printed = 1;
> +			} else if (rc == EBADR && (fiemap->fm_flags &
> +						   FIEMAP_FLAG_DEVICE_ORDER)) {
> +				flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
> +				goto retry_wo_device_order;
>  			}
>  			return rc;
>  		}
> @@ -260,6 +275,9 @@ static int filefrag_fiemap(int fd, int blk_shift, int *num_extents,
>  					 fm_last.fe_length;
>  			expected = fm_last.fe_physical +
>  				   fm_ext[i].fe_logical - fm_last.fe_logical;
> +			if (previous_device != fm_ext[i].fe_device)
> +				previous_device = fm_ext[i].fe_device;
> +
>  			if (fm_ext[i].fe_logical != 0 &&
>  			    fm_ext[i].fe_physical != expected &&
>  			    fm_ext[i].fe_physical != expected_dense) {
> @@ -278,8 +296,20 @@ static int filefrag_fiemap(int fd, int blk_shift, int *num_extents,
>  			n++;
>  		}
>  
> -		fiemap->fm_start = (fm_ext[i - 1].fe_logical +
> -				    fm_ext[i - 1].fe_length);
> +		/* For DEVICE_ORDER mappings, if EXTENT_LAST not yet found then
> +		 * fm_start needs to be the same as it was for earlier ioctl.
> +		 * The first extent is used to pass the end offset and device
> +		 * of the last FIEMAP call.  Otherwise, we ask for extents
> +		 * starting from where the last mapping ended. */
> +		if (flags & FIEMAP_FLAG_DEVICE_ORDER) {
> +			fm_ext[0].fe_logical =	fm_ext[i - 1].fe_logical +
> +						fm_ext[i - 1].fe_length;
> +			fm_ext[0].fe_device =	fm_ext[i - 1].fe_device;
> +			fiemap->fm_start =	0;
> +		} else {
> +			fiemap->fm_start =	fm_ext[i - 1].fe_logical +
> +						fm_ext[i - 1].fe_length;
> +		}
>  	} while (last == 0);
>  
>  	*num_extents = tot_extents;
> @@ -303,6 +333,8 @@ static int filefrag_fibmap(int fd, int blk_shift, int *num_extents,
>  	memset(&fm_ext, 0, sizeof(fm_ext));
>  	memset(&fm_last, 0, sizeof(fm_last));
>  	if (force_extent) {
> +		memset(&fm_ext, 0, sizeof(fm_ext));
> +		fm_ext.fe_device = st->st_dev;
>  		fm_ext.fe_flags = FIEMAP_EXTENT_MERGED;
>  	}
>  
> @@ -437,6 +469,13 @@ static int frag_report(const char *filename)
>  			is_ext2 = 1;
>  	}
>  
> +	/* Check if filesystem is Lustre.  Always print in extent format
> +	 * with 1kB blocks, using the device-relative logical offsets. */
> +	if (fsinfo.f_type == LUSTRE_SUPER_MAGIC) {
> +		device_offset = 1;
> +		blocksize = blocksize ?: 1024;
> +	}
> +
>  	if (is_ext2) {
>  		long cylgroups = div_ceil(fsinfo.f_blocks, blksize * 8);
>  
> @@ -524,10 +563,11 @@ int main(int argc, char**argv)
>  	char **cpp;
>  	int rc = 0, c;
>  
> -	while ((c = getopt(argc, argv, "Bb::eksvxX")) != EOF) {
> +	while ((c = getopt(argc, argv, "Bb::eklsvxX")) != EOF) {
>  		switch (c) {
>  		case 'B':
>  			force_bmap++;
> +			force_extent = 0;
>  			break;
>  		case 'b':
>  			if (optarg) {
> @@ -567,6 +607,9 @@ int main(int argc, char**argv)
>  		case 'k':
>  			blocksize = 1024;
>  			break;
> +		case 'l':
> +			device_offset++;
> +			break;
>  		case 's':
>  			sync_file++;
>  			break;
> -- 
> 2.14.3
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ