[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140711182558.GG10417@birch.djwong.org>
Date: Fri, 11 Jul 2014 11:25:58 -0700
From: "Darrick J. Wong" <darrick.wong@...cle.com>
To: "Theodore Ts'o" <tytso@....edu>
Cc: Ext4 Developers List <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH] mke2fs: add support to align hugefiles relative to
beginning of the disk
On Tue, Jul 08, 2014 at 08:44:51PM -0400, Theodore Ts'o wrote:
> Add the mke2fs.conf configuration option which causes the hugefiles to
> be aligned to the beginning of the disk. This is important if the the
> reason for aligning the hugefiles is to support hard-drive specific
> features such as Shingled Magnetic Recording (SMR).
>
> Signed-off-by: Theodore Ts'o <tytso@....edu>
> ---
> misc/mk_hugefiles.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++--
> misc/mke2fs.c | 2 +-
> misc/mke2fs.conf.5.in | 7 +++
> misc/mke2fs.h | 2 +-
> 4 files changed, 157 insertions(+), 8 deletions(-)
>
> diff --git a/misc/mk_hugefiles.c b/misc/mk_hugefiles.c
> index b7a9840..ea42b6c 100644
> --- a/misc/mk_hugefiles.c
> +++ b/misc/mk_hugefiles.c
> @@ -3,9 +3,11 @@
> */
>
> #define _XOPEN_SOURCE 600 /* for inclusion of PATH_MAX in Solaris */
> +#define _BSD_SOURCE /* for makedev() and major() */
>
> #include "config.h"
> #include <stdio.h>
> +#include <stdarg.h>
> #include <string.h>
> #include <strings.h>
> #include <fcntl.h>
> @@ -60,6 +62,141 @@ static char *fn_buf;
> static char *fn_numbuf;
> int zero_hugefile = 1;
>
> +#define SYSFS_PATH_LEN 256
> +typedef char sysfs_path_t[SYSFS_PATH_LEN];
> +
> +#ifndef HAVE_SNPRINTF
> +/*
> + * We are very careful to avoid needing to worry about buffer
> + * overflows, so we don't really need to use snprintf() except as an
> + * additional safety check. So if snprintf() is not present, it's
> + * safe to fall back to vsprintf(). This provides portability since
> + * vsprintf() is guaranteed by C89, while snprintf() is only
> + * guaranteed by C99 --- which for example, Microsoft Visual Studio
> + * has *still* not bothered to implement. :-/ (Not that I expect
> + * mke2fs to be ported to MS Visual Studio any time soon, but
> + * libext2fs *does* get built on Microsoft platforms, and we might
> + * want to move this into libext2fs some day.)
> + */
> +static int my_snprintf(char *str, size_t size, const char *format, ...)
> +{
> + va_list ap;
> + int ret;
> +
> + va_start(ap, format);
> + ret = vsprintf(str, format, ap);
> + va_end(ap);
> + return ret;
> +}
> +
> +#define snprintf my_snprintf
> +#endif
> +
> +/*
> + * Fall back to Linux's definitions of makedev and major are needed.
> + * The search_sysfs_block() function is highly unlikely to work on
> + * non-Linux systems anyway.
> + */
> +#ifndef makedev
> +#define makedev(maj, min) (((maj) << 8) + (min))
> +#endif
> +
> +static char *search_sysfs_block(dev_t devno, sysfs_path_t ret_path)
> +{
> + struct dirent *de, *p_de;
> + DIR *dir = NULL, *p_dir = NULL;
> + FILE *f;
> + sysfs_path_t path, p_path;
> + unsigned int major, minor;
> + char *ret = ret_path;
> +
> + ret_path[0] = 0;
> + if ((dir = opendir("/sys/block")) == NULL)
> + return NULL;
> + while ((de = readdir(dir)) != NULL) {
> + if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..") ||
> + strlen(de->d_name) > sizeof(path)-32)
> + continue;
> + snprintf(path, SYSFS_PATH_LEN,
> + "/sys/block/%s/dev", de->d_name);
> + f = fopen(path, "r");
> + if (f &&
> + (fscanf(f, "%u:%u", &major, &minor) == 2)) {
> + fclose(f); f = NULL;
> + if (makedev(major, minor) == devno) {
> + snprintf(ret_path, SYSFS_PATH_LEN,
> + "/sys/block/%s", de->d_name);
> + goto success;
> + }
> +#ifdef major
> + if (major(devno) != major)
> + continue;
> +#endif
> + }
> + if (f)
> + fclose(f);
> +
> + snprintf(path, SYSFS_PATH_LEN, "/sys/block/%s", de->d_name);
> +
> + if (p_dir)
> + closedir(p_dir);
> + if ((p_dir = opendir(path)) == NULL)
> + continue;
> + while ((p_de = readdir(p_dir)) != NULL) {
> + if (!strcmp(p_de->d_name, ".") ||
> + !strcmp(p_de->d_name, "..") ||
> + (strlen(p_de->d_name) >
> + SYSFS_PATH_LEN - strlen(path) - 32))
> + continue;
> + snprintf(p_path, SYSFS_PATH_LEN, "%s/%s/dev",
> + path, p_de->d_name);
> +
> + f = fopen(p_path, "r");
> + if (f &&
> + (fscanf(f, "%u:%u", &major, &minor) == 2) &&
> + (((major << 8) + minor) == devno)) {
> + fclose(f);
> + snprintf(ret_path, SYSFS_PATH_LEN, "%s/%s",
> + path, p_de->d_name);
> + goto success;
> + }
> + if (f)
> + fclose(f);
> + }
> + }
> + ret = NULL;
> +success:
> + if (dir)
> + closedir(dir);
> + if (p_dir)
> + closedir(p_dir);
> + return ret;
> +}
> +
> +static blk64_t get_partition_start(const char *device_name)
> +{
> + unsigned long long start;
> + sysfs_path_t path;
> + struct stat st;
> + FILE *f;
> + char *cp;
> + int n;
> +
> + if ((stat(device_name, &st) < 0) || !S_ISBLK(st.st_mode))
> + return 0;
> +
> + cp = search_sysfs_block(st.st_rdev, path);
> + if (!cp)
> + return 0;
> + strncat(path, "/start", SYSFS_PATH_LEN);
The third argument is the maximum number of bytes to concatenate from the
second argument ("/start"). Though it's unlikely that we'll ever find anything
in /sys/block approaching 255 characters, we might as well guard against stack
corruption:
if (strlen(path) > SYSFS_PATH_LEN - strlen("/start") - 1)
return 0;
strcat(path, "/start");
Oh, I guess coverity is complaining about this too.
Though FWIW, "find /sys | while read f; do echo "$f" | wc -c; done | sort -g |
tail -n 5" spits out "133" as the longest sysfs path ever. I guess that could
be much longer on some multi-node NUMA box or whatever.
<shrug> /me codes up a fix, tosses it on the patch pile.
--D
> + f = fopen(path, "r");
> + if (!f)
> + return 0;
> + n = fscanf(f, "%llu", &start);
> + fclose(f);
> + return (n == 1) ? start : 0;
> +}
> +
> static errcode_t create_directory(ext2_filsys fs, char *dir,
> ext2_ino_t *ret_ino)
>
> @@ -310,24 +447,26 @@ static blk64_t get_start_block(ext2_filsys fs, blk64_t slack)
> return blk;
> }
>
> -static blk64_t round_up_align(blk64_t b, unsigned long align)
> +static blk64_t round_up_align(blk64_t b, unsigned long align,
> + blk64_t part_offset)
> {
> unsigned long m;
>
> if (align == 0)
> return b;
> - m = b % align;
> + part_offset = part_offset % align;
> + m = (b + part_offset) % align;
> if (m)
> b += align - m;
> return b;
> }
>
> -errcode_t mk_hugefiles(ext2_filsys fs)
> +errcode_t mk_hugefiles(ext2_filsys fs, const char *device_name)
> {
> unsigned long i;
> ext2_ino_t dir;
> errcode_t retval;
> - blk64_t fs_blocks;
> + blk64_t fs_blocks, part_offset;
> unsigned long align;
> int d, dsize;
> char *t;
> @@ -348,7 +487,10 @@ errcode_t mk_hugefiles(ext2_filsys fs)
> t = get_string_from_profile(fs_types, "hugefiles_align", "0");
> align = parse_num_blocks2(t, fs->super->s_log_block_size);
> free(t);
> - num_blocks = round_up_align(num_blocks, align);
> + if (get_bool_from_profile(fs_types, "hugefiles_align_disk", 0))
> + part_offset = get_partition_start(device_name) /
> + (fs->blocksize / 512);
> + num_blocks = round_up_align(num_blocks, align, 0);
> zero_hugefile = get_bool_from_profile(fs_types, "zero_hugefiles",
> zero_hugefile);
>
> @@ -400,7 +542,7 @@ errcode_t mk_hugefiles(ext2_filsys fs)
> num_slack += calc_overhead(fs, num_blocks) * num_files;
> num_slack += (num_files / 16) + 1; /* space for dir entries */
> goal = get_start_block(fs, num_slack);
> - goal = round_up_align(goal, align);
> + goal = round_up_align(goal, align, part_offset);
>
> if ((num_blocks ? num_blocks : fs_blocks) >
> (0x80000000UL / fs->blocksize))
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index ecd47e6..da77e3a 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -2913,7 +2913,7 @@ no_journal:
> EXT4_FEATURE_RO_COMPAT_QUOTA))
> create_quota_inodes(fs);
>
> - retval = mk_hugefiles(fs);
> + retval = mk_hugefiles(fs, device_name);
> if (retval)
> com_err(program_name, retval, "while creating huge files");
>
> diff --git a/misc/mke2fs.conf.5.in b/misc/mke2fs.conf.5.in
> index 8e25892..19458ac 100644
> --- a/misc/mke2fs.conf.5.in
> +++ b/misc/mke2fs.conf.5.in
> @@ -480,6 +480,13 @@ files. It also forces the size of huge files to be a multiple of the
> requested alignment. If this relation is not specified, no alignment
> requirement will be imposed on the huge files.
> .TP
> +.I hugefiles_align_disk
> +Thie relations specifies whether the alignment should be relative to the
> +beginning of the hard drive (assuming that the starting offset of the
> +partition is available to mke2fs). The default value is false, which
> +if will cause hugefile alignment to be relative to the beginning of the
> +file system.
> +.TP
> .I hugefiles_name
> This relation specifies the base file name for the huge files.
> .TP
> diff --git a/misc/mke2fs.h b/misc/mke2fs.h
> index 9fa6bfe..ce72cb3 100644
> --- a/misc/mke2fs.h
> +++ b/misc/mke2fs.h
> @@ -24,7 +24,7 @@ extern int get_bool_from_profile(char **types, const char *opt, int def_val);
> extern int int_log10(unsigned long long arg);
>
> /* mk_hugefiles.c */
> -extern errcode_t mk_hugefiles(ext2_filsys fs);
> +extern errcode_t mk_hugefiles(ext2_filsys fs, const char *device_name);
>
>
>
> --
> 2.0.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists