[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CALBjLoDHwKLgj3-uF-Tmmy=2MGne5pCPGQFYtUfXvSwa0V+_Nw@mail.gmail.com>
Date: Fri, 20 Dec 2024 09:04:42 -0800
From: Daniel Lee <chullee@...gle.com>
To: Chao Yu <chao@...nel.org>
Cc: Jaegeuk Kim <jaegeuk@...nel.org>, linux-f2fs-devel@...ts.sourceforge.net,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] f2fs: Introduce linear search for dentries
On Fri, Dec 20, 2024 at 5:25 AM Chao Yu <chao@...nel.org> wrote:
>
> On 2024/12/20 12:59, Daniel Lee wrote:
> > On Thu, Dec 19, 2024 at 5:29 AM Chao Yu <chao@...nel.org> wrote:
> >>
> >> Hi Daniel,
> >>
> >> On 2024/12/17 15:55, Daniel Lee wrote:
> >>> This patch addresses an issue where some files in case-insensitive
> >>> directories become inaccessible due to changes in how the kernel function,
> >>> utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5
> >>> ("unicode: Don't special case ignorable code points").
> >>>
> >>> F2FS uses these case-folded names to calculate hash values for locating
> >>> dentries and stores them on disk. Since utf8_casefold() can produce
> >>> different output across kernel versions, stored hash values and newly
> >>> calculated hash values may differ. This results in affected files no
> >>> longer being found via the hash-based lookup.
> >>>
> >>> To resolve this, the patch introduces a linear search fallback.
> >>> If the initial hash-based search fails, F2FS will sequentially scan the
> >>> directory entries.
> >>>
> >>
> >> Need a fixes line?
> >
> > Thanks. I will add the commit 5c26d2f1d3f5 to the Fixes:
> >
> >>
> >>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586
> >>> Signed-off-by: Daniel Lee <chullee@...gle.com>
> >>> ---
> >>> fs/f2fs/dir.c | 38 ++++++++++++++++++++++++++++----------
> >>> fs/f2fs/f2fs.h | 6 ++++--
> >>> fs/f2fs/inline.c | 5 +++--
> >>> 3 files changed, 35 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
> >>> index 47a5c806cf16..a85d19b4e178 100644
> >>> --- a/fs/f2fs/dir.c
> >>> +++ b/fs/f2fs/dir.c
> >>> @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
> >>> static struct f2fs_dir_entry *find_in_block(struct inode *dir,
> >>> struct page *dentry_page,
> >>> const struct f2fs_filename *fname,
> >>> - int *max_slots)
> >>> + int *max_slots,
> >>> + bool use_hash)
> >>> {
> >>> struct f2fs_dentry_block *dentry_blk;
> >>> struct f2fs_dentry_ptr d;
> >>> @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
> >>> dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
> >>>
> >>> make_dentry_ptr_block(dir, &d, dentry_blk);
> >>> - return f2fs_find_target_dentry(&d, fname, max_slots);
> >>> + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
> >>> }
> >>>
> >>> static inline int f2fs_match_name(const struct inode *dir,
> >>> @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
> >>> }
> >>>
> >>> struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>> - const struct f2fs_filename *fname, int *max_slots)
> >>> + const struct f2fs_filename *fname, int *max_slots,
> >>> + bool use_hash)
> >>> {
> >>> struct f2fs_dir_entry *de;
> >>> unsigned long bit_pos = 0;
> >>> @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>> continue;
> >>> }
> >>>
> >>> - if (de->hash_code == fname->hash) {
> >>> + if (!use_hash || de->hash_code == fname->hash) {
> >>> res = f2fs_match_name(d->inode, fname,
> >>> d->filename[bit_pos],
> >>> le16_to_cpu(de->name_len));
> >>> @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>> static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>> unsigned int level,
> >>> const struct f2fs_filename *fname,
> >>> - struct page **res_page)
> >>> + struct page **res_page,
> >>> + bool use_hash)
> >>> {
> >>> int s = GET_DENTRY_SLOTS(fname->disk_name.len);
> >>> unsigned int nbucket, nblock;
> >>> - unsigned int bidx, end_block;
> >>> + unsigned int bidx, end_block, bucket_no;
> >>> struct page *dentry_page;
> >>> struct f2fs_dir_entry *de = NULL;
> >>> pgoff_t next_pgofs;
> >>> @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>> nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
> >>> nblock = bucket_blocks(level);
> >>>
> >>> + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
> >>> +
> >>> +start_find_bucket:
> >>> bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
> >>> - le32_to_cpu(fname->hash) % nbucket);
> >>> + bucket_no);
> >>> end_block = bidx + nblock;
> >>>
> >>> while (bidx < end_block) {
> >>> @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>> }
> >>> }
> >>>
> >>> - de = find_in_block(dir, dentry_page, fname, &max_slots);
> >>> + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
> >>> if (IS_ERR(de)) {
> >>> *res_page = ERR_CAST(de);
> >>> de = NULL;
> >>> @@ -307,6 +313,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
> >>> bidx++;
> >>> }
> >>>
> >>> + if (!use_hash && !de && ++bucket_no < nbucket)
> >>> + goto start_find_bucket;
> >>> +
> >>> if (!de && room && F2FS_I(dir)->chash != fname->hash) {
> >>
> >> Do we need to avoid accessing or assigning hash if use_hash is false?
> >>
> >
> > Is it still worth keeping the hash for the creation if both hash-based
> > and linear searches failed?
>
> It needs to be kept for hash-based searching failure? since it was added to
> enhance performance of file creation:
> - open(..., O_CREAT)
> - f2fs_lookup
> : didn't find target dirent, then, record last chash & clevel
> - f2fs_create
> : create dirent in clevel bucket once chash matches
>
I understand your point and I will revise it accordingly.
> Thanks,
>
> >
> >> Thanks,
> >>
> >>> F2FS_I(dir)->chash = fname->hash;
> >>> F2FS_I(dir)->clevel = level;
> >>> @@ -323,11 +332,13 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
> >>> struct f2fs_dir_entry *de = NULL;
> >>> unsigned int max_depth;
> >>> unsigned int level;
> >>> + bool use_hash = true;
> >>>
> >>> *res_page = NULL;
> >>>
> >>> +start_find_entry:
> >>> if (f2fs_has_inline_dentry(dir)) {
> >>> - de = f2fs_find_in_inline_dir(dir, fname, res_page);
> >>> + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
> >>> goto out;
> >>> }
> >>>
> >>> @@ -343,11 +354,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
> >>> }
> >>>
> >>> for (level = 0; level < max_depth; level++) {
> >>> - de = find_in_level(dir, level, fname, res_page);
> >>> + de = find_in_level(dir, level, fname, res_page, use_hash);
> >>> if (de || IS_ERR(*res_page))
> >>> break;
> >>> }
> >>> +
> >>> out:
> >>> +#if IS_ENABLED(CONFIG_UNICODE)
> >>> + if (IS_CASEFOLDED(dir) && !de && use_hash) {
> >>> + use_hash = false;
> >>> + goto start_find_entry;
> >>> + }
> >>> +#endif
> >>> /* This is to increase the speed of f2fs_create */
> >>> if (!de)
> >>> F2FS_I(dir)->task = current;
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index f523dd302bf6..1afebb9c4061 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
> >>> struct f2fs_filename *fname);
> >>> void f2fs_free_filename(struct f2fs_filename *fname);
> >>> struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
> >>> - const struct f2fs_filename *fname, int *max_slots);
> >>> + const struct f2fs_filename *fname, int *max_slots,
> >>> + bool use_hash);
> >>> int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
> >>> unsigned int start_pos, struct fscrypt_str *fstr);
> >>> void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
> >>> @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
> >>> int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
> >>> struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >>> const struct f2fs_filename *fname,
> >>> - struct page **res_page);
> >>> + struct page **res_page,
> >>> + bool use_hash);
> >>> int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
> >>> struct page *ipage);
> >>> int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
> >>> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> >>> index cbd2a0d34804..3e3c35d4c98b 100644
> >>> --- a/fs/f2fs/inline.c
> >>> +++ b/fs/f2fs/inline.c
> >>> @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
> >>>
> >>> struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >>> const struct f2fs_filename *fname,
> >>> - struct page **res_page)
> >>> + struct page **res_page,
> >>> + bool use_hash)
> >>> {
> >>> struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
> >>> struct f2fs_dir_entry *de;
> >>> @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
> >>> inline_dentry = inline_data_addr(dir, ipage);
> >>>
> >>> make_dentry_ptr_inline(dir, &d, inline_dentry);
> >>> - de = f2fs_find_target_dentry(&d, fname, NULL);
> >>> + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
> >>> unlock_page(ipage);
> >>> if (IS_ERR(de)) {
> >>> *res_page = ERR_CAST(de);
> >>
>
Powered by blists - more mailing lists