[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87plpkhg8m.fsf@mailhost.krisman.be>
Date: Tue, 03 Sep 2024 12:08:41 -0400
From: Gabriel Krisman Bertazi <krisman@...e.de>
To: André Almeida <andrealmeid@...lia.com>
Cc: Hugh Dickins <hughd@...gle.com>, Andrew Morton
<akpm@...ux-foundation.org>, Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
krisman@...nel.org, linux-mm@...ck.org, linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org, kernel-dev@...lia.com, Daniel Rosenberg
<drosen@...gle.com>, smcv@...labora.com, Christoph Hellwig <hch@....de>
Subject: Re: [PATCH v2 5/8] tmpfs: Add casefold lookup support
André Almeida <andrealmeid@...lia.com> writes:
> Enable casefold lookup in tmpfs, based on the enconding defined by
> userspace. That means that instead of comparing byte per byte a file
> name, it compares to a case-insensitive equivalent of the Unicode
> string.
>
> * Dcache handling
>
> There's a special need when dealing with case-insensitive dentries.
> First of all, we currently invalidated every negative casefold dentries.
> That happens because currently VFS code has no proper support to deal
> with that, giving that it could incorrectly reuse a previous filename
> for a new file that has a casefold match. For instance, this could
> happen:
>
> $ mkdir DIR
> $ rm -r DIR
> $ mkdir dir
> $ ls
> DIR/
>
> And would be perceived as inconsistency from userspace point of view,
> because even that we match files in a case-insensitive manner, we still
> honor whatever is the initial filename.
>
> Along with that, tmpfs stores only the first equivalent name dentry used
> in the dcache, preventing duplications of dentries in the dcache. The
> d_compare() version for casefold files uses a normalized string, so the
> filename under lookup will be compared to another normalized string for
> the existing file, achieving a casefolded lookup.
>
> * Enabling casefold via mount options
>
> Most filesystems have their data stored in disk, so casefold option need
> to be enabled when building a filesystem on a device (via mkfs).
> However, as tmpfs is a RAM backed filesystem, there's no disk
> information and thus no mkfs to store information about casefold.
>
> For tmpfs, create casefold options for mounting. Userspace can then
> enable casefold support for a mount point using:
>
> $ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/
>
> Userspace must set what Unicode standard is aiming to. The available
> options depends on what the kernel Unicode subsystem supports.
>
> And for strict encoding:
>
> $ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/
>
> Strict encoding means that tmpfs will refuse to create invalid UTF-8
> sequences. When this option is not enabled, any invalid sequence will be
> treated as an opaque byte sequence, ignoring the encoding thus not being
> able to be looked up in a case-insensitive way.
>
> Signed-off-by: André Almeida <andrealmeid@...lia.com>
> ---
> mm/shmem.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 111 insertions(+), 4 deletions(-)
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 5a77acf6ac6a..0f918010bc54 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -40,6 +40,8 @@
> #include <linux/fs_parser.h>
> #include <linux/swapfile.h>
> #include <linux/iversion.h>
> +#include <linux/unicode.h>
> +#include <linux/parser.h>
> #include "swap.h"
>
> static struct vfsmount *shm_mnt __ro_after_init;
> @@ -123,6 +125,8 @@ struct shmem_options {
> bool noswap;
> unsigned short quota_types;
> struct shmem_quota_limits qlimits;
> + struct unicode_map *encoding;
> + bool strict_encoding;
> #define SHMEM_SEEN_BLOCKS 1
> #define SHMEM_SEEN_INODES 2
> #define SHMEM_SEEN_HUGE 4
> @@ -3427,6 +3431,11 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
> if (IS_ERR(inode))
> return PTR_ERR(inode);
>
> +#if IS_ENABLED(CONFIG_UNICODE)
> + if (!utf8_check_strict_name(dir, &dentry->d_name))
> + return -EINVAL;
> +#endif
Please, fold it into the code when possible:
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
> +
> error = simple_acl_create(dir, inode);
> if (error)
> goto out_iput;
> @@ -3442,7 +3451,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
> dir->i_size += BOGO_DIRENT_SIZE;
> inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
> inode_inc_iversion(dir);
> - d_instantiate(dentry, inode);
> +
> + if (IS_CASEFOLDED(dir))
If you have if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
It can be optimized out when CONFIG_UNICODE=n.
> + d_add(dentry, inode);
> + else
> + d_instantiate(dentry, inode);
> dget(dentry); /* Extra count - pin the dentry in core */
> return error;
>
> @@ -3533,7 +3547,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir,
> inc_nlink(inode);
> ihold(inode); /* New dentry reference */
> dget(dentry); /* Extra pinning count for the created dentry */
> - d_instantiate(dentry, inode);
> + if (IS_CASEFOLDED(dir))
> + d_add(dentry, inode);
> + else
> + d_instantiate(dentry, inode);
> out:
> return ret;
> }
> @@ -3553,6 +3570,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
> inode_inc_iversion(dir);
> drop_nlink(inode);
> dput(dentry); /* Undo the count from "create" - does all the work */
> +
> + /*
> + * For now, VFS can't deal with case-insensitive negative dentries, so
> + * we invalidate them
> + */
> + if (IS_CASEFOLDED(dir))
> + d_invalidate(dentry);
> +
likewise and also below.
> return 0;
> }
>
> @@ -3697,7 +3722,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
> dir->i_size += BOGO_DIRENT_SIZE;
> inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
> inode_inc_iversion(dir);
> - d_instantiate(dentry, inode);
> + if (IS_CASEFOLDED(dir))
> + d_add(dentry, inode);
> + else
> + d_instantiate(dentry, inode);
> dget(dentry);
> return 0;
>
> @@ -4050,6 +4078,8 @@ enum shmem_param {
> Opt_usrquota_inode_hardlimit,
> Opt_grpquota_block_hardlimit,
> Opt_grpquota_inode_hardlimit,
> + Opt_casefold,
> + Opt_strict_encoding,
> };
>
> static const struct constant_table shmem_param_enums_huge[] = {
> @@ -4081,9 +4111,47 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
> fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
> fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
> #endif
> + fsparam_string("casefold", Opt_casefold),
> + fsparam_flag ("strict_encoding", Opt_strict_encoding),
> {}
> };
>
> +#if IS_ENABLED(CONFIG_UNICODE)
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
> +{
> + struct shmem_options *ctx = fc->fs_private;
> + unsigned int maj = 0, min = 0, rev = 0, version_number;
> + char version[10];
> + int ret;
> + struct unicode_map *encoding;
> +
> + if (strncmp(param->string, "utf8-", 5))
> + return invalfc(fc, "Only utf8 encondings are supported");
> + ret = strscpy(version, param->string + 5, sizeof(version));
the extra buffer and the copy seem unnecessary. It won't live past this
function anyway. Can you just pass the offseted param->string to
utf8_parse_version?
> + if (ret < 0)
> + return invalfc(fc, "Invalid enconding argument: %s",
> + param->string);
enconding=>encoding
> +
> + ret = utf8_parse_version(version, &maj, &min, &rev);
> + if (ret)
> + return invalfc(fc, "Invalid utf8 version: %s", version);
> + version_number = UNICODE_AGE(maj, min, rev);
> + encoding = utf8_load(version_number);
utf8_load(UNICODE_AGE(maj, min, rev));
and drop version_number.
> + if (IS_ERR(encoding))
> + return invalfc(fc, "Invalid utf8 version: %s", version);
> + pr_info("tmpfs: Using encoding provided by mount options: %s\n",
> + param->string);
> + ctx->encoding = encoding;
> +
> + return 0;
> +}
> +#else
> +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
> +{
> + return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
> +}
> +#endif
> +
> static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
> {
> struct shmem_options *ctx = fc->fs_private;
> @@ -4242,6 +4310,11 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
> "Group quota inode hardlimit too large.");
> ctx->qlimits.grpquota_ihardlimit = size;
> break;
> + case Opt_casefold:
> + return shmem_parse_opt_casefold(fc, param);
> + case Opt_strict_encoding:
> + ctx->strict_encoding = true;
> + break;
> }
> return 0;
>
> @@ -4471,6 +4544,11 @@ static void shmem_put_super(struct super_block *sb)
> {
> struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
>
> +#if IS_ENABLED(CONFIG_UNICODE)
> + if (sb->s_encoding)
> + utf8_unload(sb->s_encoding);
> +#endif
> +
> #ifdef CONFIG_TMPFS_QUOTA
> shmem_disable_quotas(sb);
> #endif
> @@ -4515,6 +4593,16 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
> }
> sb->s_export_op = &shmem_export_ops;
> sb->s_flags |= SB_NOSEC | SB_I_VERSION;
> +
> +#if IS_ENABLED(CONFIG_UNICODE)
> + if (ctx->encoding) {
> + sb->s_encoding = ctx->encoding;
> + generic_set_sb_d_ops(sb);
> + if (ctx->strict_encoding)
> + sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
> + }
> +#endif
> +
> #else
> sb->s_flags |= SB_NOUSER;
> #endif
> @@ -4704,11 +4792,28 @@ static const struct inode_operations shmem_inode_operations = {
> #endif
> };
>
> +static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
> +{
> + if (dentry->d_name.len > NAME_MAX)
> + return ERR_PTR(-ENAMETOOLONG);
> +
> + /*
> + * For now, VFS can't deal with case-insensitive negative dentries, so
> + * we prevent them from being created
> + */
> + if (IS_CASEFOLDED(dir))
> + return NULL;
> +
> + d_add(dentry, NULL);
> +
> + return NULL;
> +}
> +
> static const struct inode_operations shmem_dir_inode_operations = {
> #ifdef CONFIG_TMPFS
> .getattr = shmem_getattr,
> .create = shmem_create,
> - .lookup = simple_lookup,
> + .lookup = shmem_lookup,
simple_lookup sets the dentry operations to enable a custom d_delete,
but this disables that. Without it, negative dentries will linger after
the filesystem is gone.
We want to preserve the current behavior both for normal and casefolding
directories. You need a new flavor of generic_ci_dentry_ops with that
hook for casefolding shmem, as well as ensure &simple_dentry_operations
is still set for every dentry in non-casefolding volumes.
> .link = shmem_link,
> .unlink = shmem_unlink,
> .symlink = shmem_symlink,
> @@ -4791,6 +4896,8 @@ int shmem_init_fs_context(struct fs_context *fc)
> ctx->uid = current_fsuid();
> ctx->gid = current_fsgid();
>
> + ctx->encoding = NULL;
> +
> fc->fs_private = ctx;
> fc->ops = &shmem_fs_context_ops;
> return 0;
--
Gabriel Krisman Bertazi
Powered by blists - more mailing lists