lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOQ4uxhgz=ZZvfxXcECE-daWmwqOCEMWH5Nh_weOeDciP8xh9w@mail.gmail.com>
Date: Sat, 9 Aug 2025 17:13:29 +0200
From: Amir Goldstein <amir73il@...il.com>
To: André Almeida <andrealmeid@...lia.com>, 
	Miklos Szeredi <miklos@...redi.hu>
Cc: Theodore Tso <tytso@....edu>, Gabriel Krisman Bertazi <krisman@...nel.org>, linux-unionfs@...r.kernel.org, 
	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org, 
	Alexander Viro <viro@...iv.linux.org.uk>, Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>, 
	kernel-dev@...lia.com
Subject: Re: [PATCH RFC v3 2/7] ovl: Create ovl_casefold() to support
 casefolded strncmp()

On Fri, Aug 8, 2025 at 10:59 PM André Almeida <andrealmeid@...lia.com> wrote:
>
> To add overlayfs support casefold filesystems, create a new function
> ovl_casefold(), to be able to do case-insensitive strncmp().
>
> ovl_casefold() allocates a new buffer and stores the casefolded version
> of the string on it. If the allocation or the casefold operation fails,
> fallback to use the original string. The caller of the function is
> responsible of freeing the buffer.
>
> The other string to be compared is casefolded in a previous step and
> stored at `struct ovl_cache_entry` member `char *cf_name`.
>
> Finally, set the strncmp() parameters to the casefold versions of the
> names to achieve case-insensitive support.
>
> For the non-casefold names, nothing changes and the rb_tree
> search/insert functions just ignores this change.
>
> Signed-off-by: André Almeida <andrealmeid@...lia.com>
> ---
> Changes from v2:
> - Refactor the patch to do a single kmalloc() per rb_tree operation
> - Instead of casefolding the cache entry name everytime per strncmp(),
>   casefold it once and reuse it for every strncmp().
> ---
>  fs/overlayfs/readdir.c | 92 +++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 80 insertions(+), 12 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 2f42fec97f76c2000f76e15c60975db567b2c6d6..422f991393dfae12bcacf326414b7ee19e486ac8 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -71,20 +71,58 @@ static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
>         return rb_entry(n, struct ovl_cache_entry, node);
>  }
>
> +static int ovl_casefold(struct unicode_map *map, const char *str, int len, char **dst)
> +{
> +#if IS_ENABLED(CONFIG_UNICODE)
> +       const struct qstr qstr = { .name = str, .len = len };
> +       int cf_len;
> +
> +       if (!map || is_dot_dotdot(str, len))

if (!IS_ENABLED(CONFIG_UNICODE) || !map ||...

> +               return -1;

Better return 0 for no casefolding.

> +
> +       *dst = kmalloc(OVL_NAME_LEN, GFP_KERNEL);
> +
> +       if (dst) {
> +               cf_len = utf8_casefold(map, &qstr, *dst, OVL_NAME_LEN);
> +
> +               if (cf_len > 0)
> +                       return cf_len;

need to kfree(*dst) in the error case
caller only responsible of free on success

> +       }
> +#endif
> +
> +       return -1;

Better return 0 for no casefolding.

> +}
> +
>  static bool ovl_cache_entry_find_link(const char *name, int len,
>                                       struct rb_node ***link,
> -                                     struct rb_node **parent)
> +                                     struct rb_node **parent,
> +                                     struct unicode_map *map)
>  {
> +       int ret;
> +       char *dst = NULL;
>         bool found = false;
> +       const char *str = name;
>         struct rb_node **newp = *link;
>
> +       ret = ovl_casefold(map, name, len, &dst);
> +
> +       if (ret > 0) {
> +               str = dst;
> +               len = ret;
> +       }
> +
>         while (!found && *newp) {
>                 int cmp;
> +               char *aux;
>                 struct ovl_cache_entry *tmp;
>
>                 *parent = *newp;
> +
>                 tmp = ovl_cache_entry_from_node(*newp);
> -               cmp = strncmp(name, tmp->name, len);
> +
> +               aux = tmp->cf_name ? tmp->cf_name : tmp->name;
> +
> +               cmp = strncmp(str, aux, len);
>                 if (cmp > 0)
>                         newp = &tmp->node.rb_right;
>                 else if (cmp < 0 || len < tmp->len)
> @@ -94,27 +132,50 @@ static bool ovl_cache_entry_find_link(const char *name, int len,
>         }
>         *link = newp;
>
> +       kfree(dst);
> +
>         return found;
>  }
>
>  static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
> -                                                   const char *name, int len)
> +                                                   const char *name, int len,
> +                                                   struct unicode_map *map)
>  {
>         struct rb_node *node = root->rb_node;
> -       int cmp;
> +       struct ovl_cache_entry *p;
> +       const char *str = name;
> +       bool found = false;
> +       char *dst = NULL;
> +       int cmp, ret;
> +
> +       ret = ovl_casefold(map, name, len, &dst);
> +
> +       if (ret > 0) {
> +               str = dst;
> +               len = ret;
> +       }
> +
> +       while (!found && node) {
> +               char *aux;
>
> -       while (node) {
> -               struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
> +               p = ovl_cache_entry_from_node(node);
>
> -               cmp = strncmp(name, p->name, len);
> +               aux = p->cf_name ? p->cf_name : p->name;
> +
> +               cmp = strncmp(str, aux, len);
>                 if (cmp > 0)
>                         node = p->node.rb_right;
>                 else if (cmp < 0 || len < p->len)
>                         node = p->node.rb_left;
>                 else
> -                       return p;
> +                       found = true;
>         }
>
> +       kfree(dst);
> +
> +       if (found)
> +               return p;
> +
>         return NULL;
>  }
>
> @@ -212,7 +273,7 @@ static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
>         struct rb_node *parent = NULL;
>         struct ovl_cache_entry *p;
>
> -       if (ovl_cache_entry_find_link(name, len, &newp, &parent))
> +       if (ovl_cache_entry_find_link(name, len, &newp, &parent, rdd->map))
>                 return true;
>
>         p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
> @@ -234,7 +295,7 @@ static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
>  {
>         struct ovl_cache_entry *p;
>
> -       p = ovl_cache_entry_find(rdd->root, name, namelen);
> +       p = ovl_cache_entry_find(rdd->root, name, namelen, rdd->map);
>         if (p) {
>                 list_move_tail(&p->l_node, &rdd->middle);
>         } else {

IMO, it is nicer to pass the cf_name to the low level rb tree lookup
helpers and call ovl_casefold() only in high level ovl_fill_merge().
This also deduplicates the code from patch 1 of storing the cf_name in
 ovl_cache_entry_new().
something like this (untested) WDYT?

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index b65cdfce31ce..71efb29ad30f 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -79,10 +79,10 @@ static bool ovl_cache_entry_find_link(const char
*name, int len,

                *parent = *newp;
                tmp = ovl_cache_entry_from_node(*newp);
-               cmp = strncmp(name, tmp->name, len);
+               cmp = strncmp(name, tmp->cf_name, len);
                if (cmp > 0)
                        newp = &tmp->node.rb_right;
-               else if (cmp < 0 || len < tmp->len)
+               else if (cmp < 0 || len < tmp->cf_len)
                        newp = &tmp->node.rb_left;
                else
                        found = true;
@@ -101,10 +101,10 @@ static struct ovl_cache_entry
*ovl_cache_entry_find(struct rb_root *root,
        while (node) {
                struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);

-               cmp = strncmp(name, p->name, len);
+               cmp = strncmp(name, p->cf_name, len);
                if (cmp > 0)
                        node = p->node.rb_right;
-               else if (cmp < 0 || len < p->len)
+               else if (cmp < 0 || len < p->cf_len)
                        node = p->node.rb_left;
                else
                        return p;
@@ -145,6 +145,7 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,

 static struct ovl_cache_entry *ovl_cache_entry_new(struct
ovl_readdir_data *rdd,
                                                   const char *name, int len,
+                                                  const char
*cf_name, int cf_len,
                                                   u64 ino, unsigned int d_type)
 {
        struct ovl_cache_entry *p;
@@ -156,6 +157,13 @@ static struct ovl_cache_entry
*ovl_cache_entry_new(struct ovl_readdir_data *rdd,
        memcpy(p->name, name, len);
        p->name[len] = '\0';
        p->len = len;
+       if (cf_name && cf_name != name) {
+               p->cf_name == cf_name
+               p->cf_len = cf_len;
+       } else {
+               p->cf_name = p->name;
+               p->cf_len = len;
+       }
        p->type = d_type;
        p->real_ino = ino;
        p->ino = ino;
@@ -175,17 +183,18 @@ static struct ovl_cache_entry
*ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 }

 static bool ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
-                                 const char *name, int len, u64 ino,
-                                 unsigned int d_type)
+                                 const char *name, int len,
+                                 const char *cf_name, int cf_len,
+                                 u64 ino, unsigned int d_type)
 {
        struct rb_node **newp = &rdd->root->rb_node;
        struct rb_node *parent = NULL;
        struct ovl_cache_entry *p;

-       if (ovl_cache_entry_find_link(name, len, &newp, &parent))
+       if (ovl_cache_entry_find_link(cf_name, cf_len, &newp, &parent))
                return true;

-       p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
+       p = ovl_cache_entry_new(rdd, name, len, cf_name, cf_len, ino, d_type);
        if (p == NULL) {
                rdd->err = -ENOMEM;
                return false;
@@ -200,15 +209,17 @@ static bool ovl_cache_entry_add_rb(struct
ovl_readdir_data *rdd,

 static bool ovl_fill_lowest(struct ovl_readdir_data *rdd,
                           const char *name, int namelen,
+                          const char *cf_name, int cf_len,
                           loff_t offset, u64 ino, unsigned int d_type)
 {
        struct ovl_cache_entry *p;

-       p = ovl_cache_entry_find(rdd->root, name, namelen);
+       p = ovl_cache_entry_find(rdd->root, cf_name, cf_len);
        if (p) {
                list_move_tail(&p->l_node, &rdd->middle);
        } else {
-               p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+               p = ovl_cache_entry_new(rdd, name, namelen, cf_name, cf_len,
+                                       ino, d_type);
                if (p == NULL)
                        rdd->err = -ENOMEM;
                else
@@ -223,8 +234,11 @@ void ovl_cache_free(struct list_head *list)
        struct ovl_cache_entry *p;
        struct ovl_cache_entry *n;

-       list_for_each_entry_safe(p, n, list, l_node)
+       list_for_each_entry_safe(p, n, list, l_node) {
+               if (p->cf_name != p->name)
+                       kfree(p->cf_name);
                kfree(p);
+       }

        INIT_LIST_HEAD(list);
 }
@@ -260,12 +274,26 @@ static bool ovl_fill_merge(struct dir_context
*ctx, const char *name,
 {
        struct ovl_readdir_data *rdd =
                container_of(ctx, struct ovl_readdir_data, ctx);
+       char *cf_name;
+       int cf_len = 0;
+
+       if (ofs->casefold)
+               cf_len = ovl_casefold(rdd->map, name, namelen, &cf_name);
+
+       /* lookup in rb tree by casefolded name or orig name */
+       if (cf_len <= 0) {
+               cf_name = name;
+               cf_len = namelen;
+       }

        rdd->count++;
-       if (!rdd->is_lowest)
-               return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
-       else
-               return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
+       if (!rdd->is_lowest) {
+               return ovl_cache_entry_add_rb(rdd, name, namelen,
cf_name, cf_len,
+                                             ino, d_type);
+       } else {
+               return ovl_fill_lowest(rdd, name, namelen, cf_name, cf_len,
+                                      offset, ino, d_type);
+       }
 }

 static int ovl_check_whiteouts(const struct path *path, struct
ovl_readdir_data *rdd)
@@ -555,7 +583,7 @@ static bool ovl_fill_plain(struct dir_context
*ctx, const char *name,
                container_of(ctx, struct ovl_readdir_data, ctx);

        rdd->count++;
-       p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
+       p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
        if (p == NULL) {
                rdd->err = -ENOMEM;
                return false;

> @@ -640,7 +701,8 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,

All the changes below this point related to code paths of
ovl_iterate_real(), ovl_dir_read_impure() and struct ovl_readdir_translate
are irrelevant to casefolding and not needed.

This code iterates a single layer "real directory" and "translates" real ino to
ovl ino in some cases, but it does not compare any names between layers.

Thanks,
Amir.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ