[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2dee7da9-e65d-47c0-a7f5-2e952e4aa784@kernel.org>
Date: Mon, 15 Apr 2024 11:26:25 +0800
From: Chao Yu <chao@...nel.org>
To: Jaegeuk Kim <jaegeuk@...nel.org>
Cc: linux-f2fs-devel@...ts.sourceforge.net, linux-kernel@...r.kernel.org,
Daeho Jeong <daeho43@...il.com>
Subject: Re: [PATCH v3] f2fs: zone: don't block IO if there is remained open
zone
On 2024/4/14 23:19, Jaegeuk Kim wrote:
> It seems this caused kernel hang. Chao, have you tested this patch enough?
Jaegeuk,
Oh, I've checked this patch w/ fsstress before submitting it, but missed
the SPO testcase... do you encounter kernel hang w/ SPO testcase?
Anyway, let me test it more.
Thanks,
>
> On 04/13, Chao Yu wrote:
>> On 2024/4/13 5:11, Jaegeuk Kim wrote:
>>> On 04/07, Chao Yu wrote:
>>>> max open zone may be larger than log header number of f2fs, for
>>>> such case, it doesn't need to wait last IO in previous zone, let's
>>>> introduce available_open_zone semaphore, and reduce it once we
>>>> submit first write IO in a zone, and increase it after completion
>>>> of last IO in the zone.
>>>>
>>>> Cc: Daeho Jeong <daeho43@...il.com>
>>>> Signed-off-by: Chao Yu <chao@...nel.org>
>>>> ---
>>>> v3:
>>>> - avoid race condition in between __submit_merged_bio()
>>>> and __allocate_new_segment().
>>>> fs/f2fs/data.c | 105 ++++++++++++++++++++++++++++++----------------
>>>> fs/f2fs/f2fs.h | 34 ++++++++++++---
>>>> fs/f2fs/iostat.c | 7 ++++
>>>> fs/f2fs/iostat.h | 2 +
>>>> fs/f2fs/segment.c | 43 ++++++++++++++++---
>>>> fs/f2fs/segment.h | 12 +++++-
>>>> fs/f2fs/super.c | 2 +
>>>> 7 files changed, 156 insertions(+), 49 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>> index 0d88649c60a5..18a4ac0a06bc 100644
>>>> --- a/fs/f2fs/data.c
>>>> +++ b/fs/f2fs/data.c
>>>> @@ -373,11 +373,10 @@ static void f2fs_write_end_io(struct bio *bio)
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> static void f2fs_zone_write_end_io(struct bio *bio)
>>>> {
>>>> - struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
>>>> + struct f2fs_sb_info *sbi = iostat_get_bio_private(bio);
>>>> - bio->bi_private = io->bi_private;
>>>> - complete(&io->zone_wait);
>>>> f2fs_write_end_io(bio);
>>>> + up(&sbi->available_open_zones);
>>>> }
>>>> #endif
>>>> @@ -531,6 +530,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
>>>> if (!io->bio)
>>>> return;
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (io->open_zone) {
>>>> + /*
>>>> + * if there is no open zone, it will wait for last IO in
>>>> + * previous zone before submitting new IO.
>>>> + */
>>>> + down(&fio->sbi->available_open_zones);
>>>> + io->open_zone = false;
>>>> + io->zone_openned = true;
>>>> + }
>>>> +
>>>> + if (io->close_zone) {
>>>> + io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>> + io->zone_openned = false;
>>>> + io->close_zone = false;
>>>> + }
>>>> +#endif
>>>> +
>>>> if (is_read_io(fio->op)) {
>>>> trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
>>>> f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
>>>> @@ -601,9 +618,9 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
>>>> INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
>>>> init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> - init_completion(&sbi->write_io[i][j].zone_wait);
>>>> - sbi->write_io[i][j].zone_pending_bio = NULL;
>>>> - sbi->write_io[i][j].bi_private = NULL;
>>>> + sbi->write_io[i][j].open_zone = false;
>>>> + sbi->write_io[i][j].zone_openned = false;
>>>> + sbi->write_io[i][j].close_zone = false;
>>>> #endif
>>>> }
>>>> }
>>>> @@ -634,6 +651,31 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
>>>> f2fs_up_write(&io->io_rwsem);
>>>> }
>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type)
>>>> +{
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + struct f2fs_bio_info *io;
>>>> +
>>>> + if (!f2fs_sb_has_blkzoned(sbi))
>>>> + return;
>>>> +
>>>> + io = sbi->write_io[PAGE_TYPE(type)] + type_to_temp(type);
>>>> +
>>>> + f2fs_down_write(&io->io_rwsem);
>>>> + if (io->zone_openned) {
>>>> + if (io->bio) {
>>>> + io->close_zone = true;
>>>> + __submit_merged_bio(io);
>>>> + } else if (io->zone_openned) {
>>>> + up(&sbi->available_open_zones);
>>>> + io->zone_openned = false;
>>>> + }
>>>> + }
>>>> + f2fs_up_write(&io->io_rwsem);
>>>> +#endif
>>>> +
>>>> +}
>>>> +
>>>> static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>> struct inode *inode, struct page *page,
>>>> nid_t ino, enum page_type type, bool force)
>>>> @@ -918,22 +960,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
>>>> }
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> -static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
>>>> +static bool is_blkaddr_zone_boundary(struct f2fs_sb_info *sbi,
>>>> + block_t blkaddr, bool start)
>>>> {
>>>> - int devi = 0;
>>>> + if (!f2fs_blkaddr_in_seqzone(sbi, blkaddr))
>>>> + return false;
>>>> +
>>>> + if (start)
>>>> + return (blkaddr % sbi->blocks_per_blkz) == 0;
>>>> + return (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>> - if (f2fs_is_multi_device(sbi)) {
>>>> - devi = f2fs_target_device_index(sbi, blkaddr);
>>>> - if (blkaddr < FDEV(devi).start_blk ||
>>>> - blkaddr > FDEV(devi).end_blk) {
>>>> - f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>> - return false;
>>>> - }
>>>> - blkaddr -= FDEV(devi).start_blk;
>>>> - }
>>>> - return bdev_is_zoned(FDEV(devi).bdev) &&
>>>> - f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
>>>> - (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
>>>> }
>>>> #endif
>>>> @@ -944,20 +980,14 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>> struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
>>>> struct page *bio_page;
>>>> enum count_type type;
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + bool blkzoned = f2fs_sb_has_blkzoned(sbi) && btype < META;
>>>> +#endif
>>>> f2fs_bug_on(sbi, is_read_io(fio->op));
>>>> f2fs_down_write(&io->io_rwsem);
>>>> next:
>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>> - if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
>>>> - wait_for_completion_io(&io->zone_wait);
>>>> - bio_put(io->zone_pending_bio);
>>>> - io->zone_pending_bio = NULL;
>>>> - io->bi_private = NULL;
>>>> - }
>>>> -#endif
>>>> -
>>>> if (fio->in_list) {
>>>> spin_lock(&io->io_lock);
>>>> if (list_empty(&io->io_list)) {
>>>> @@ -985,6 +1015,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>> type = WB_DATA_TYPE(bio_page, fio->compressed_page);
>>>> inc_page_count(sbi, type);
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (blkzoned && is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, true))
>>>> + io->open_zone = true;
>>>> +#endif
>>>> +
>>>> if (io->bio &&
>>>> (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
>>>> fio->new_blkaddr) ||
>>>> @@ -1010,15 +1045,11 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
>>>> io->last_block_in_bio = fio->new_blkaddr;
>>>> trace_f2fs_submit_page_write(fio->page, fio);
>>>> +
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> - if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
>>>> - is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
>>>> - bio_get(io->bio);
>>>> - reinit_completion(&io->zone_wait);
>>>> - io->bi_private = io->bio->bi_private;
>>>> - io->bio->bi_private = io;
>>>> - io->bio->bi_end_io = f2fs_zone_write_end_io;
>>>> - io->zone_pending_bio = io->bio;
>>>> + if (blkzoned &&
>>>> + is_blkaddr_zone_boundary(sbi, fio->new_blkaddr, false)) {
>>>> + io->close_zone = true;
>>>> __submit_merged_bio(io);
>>>> }
>>>> #endif
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 694f8a52cb84..8a26530cf4fb 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -1234,16 +1234,16 @@ struct f2fs_bio_info {
>>>> struct bio *bio; /* bios to merge */
>>>> sector_t last_block_in_bio; /* last block number */
>>>> struct f2fs_io_info fio; /* store buffered io info. */
>>>> -#ifdef CONFIG_BLK_DEV_ZONED
>>>> - struct completion zone_wait; /* condition value for the previous open zone to close */
>>>> - struct bio *zone_pending_bio; /* pending bio for the previous zone */
>>>> - void *bi_private; /* previous bi_private for pending bio */
>>>> -#endif
>>>> struct f2fs_rwsem io_rwsem; /* blocking op for bio */
>>>> spinlock_t io_lock; /* serialize DATA/NODE IOs */
>>>> struct list_head io_list; /* track fios */
>>>> struct list_head bio_list; /* bio entry list head */
>>>> struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + bool open_zone; /* open a zone */
>>>> + bool zone_openned; /* zone has been openned */
>>>> + bool close_zone; /* close a zone */
>>>> +#endif
>>>> };
>>>> #define FDEV(i) (sbi->devs[i])
>>>> @@ -1560,6 +1560,7 @@ struct f2fs_sb_info {
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>> unsigned int blocks_per_blkz; /* F2FS blocks per zone */
>>>> unsigned int max_open_zones; /* max open zone resources of the zoned device */
>>>> + struct semaphore available_open_zones; /* available open zones */
>>>> #endif
>>>> /* for node-related operations */
>>>> @@ -3822,6 +3823,7 @@ void f2fs_destroy_bio_entry_cache(void);
>>>> void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
>>>> enum page_type type);
>>>> int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi);
>>>> +void f2fs_blkzoned_submit_merged_write(struct f2fs_sb_info *sbi, int type);
>>>> void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
>>>> void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
>>>> struct inode *inode, struct page *page,
>>>> @@ -4469,6 +4471,28 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
>>>> return test_bit(zno, FDEV(devi).blkz_seq);
>>>> }
>>>> +
>>>> +static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>>> + block_t blkaddr)
>>>> +{
>>>> + int devi = 0;
>>>> +
>>>> + if (f2fs_is_multi_device(sbi)) {
>>>> + devi = f2fs_target_device_index(sbi, blkaddr);
>>>> + if (blkaddr < FDEV(devi).start_blk ||
>>>> + blkaddr > FDEV(devi).end_blk) {
>>>> + f2fs_err(sbi, "Invalid block %x", blkaddr);
>>>> + return false;
>>>> + }
>>>> + blkaddr -= FDEV(devi).start_blk;
>>>> + }
>>>> +
>>>> + if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>>> + !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>>> + return false;
>>>> +
>>>> + return true;
>>>
>>> Applied as below.
>>>
>>> @@ -4485,11 +4485,8 @@ static inline bool f2fs_blkaddr_in_seqzone(struct f2fs_sb_info *sbi,
>>> blkaddr -= FDEV(devi).start_blk;
>>> }
>>>
>>> - if (!bdev_is_zoned(FDEV(devi).bdev) ||
>>> - !f2fs_blkz_is_seq(sbi, devi, blkaddr))
>>> - return false;
>>> -
>>> - return true;
>>> + return bdev_is_zoned(FDEV(devi).bdev) &&
>>> + f2fs_blkz_is_seq(sbi, devi, blkaddr);
>>
>> Looks good, thank you for cleanup.
>>
>> Thanks,
>>
>>> }
>>> #endif
>>>
>>>> +}
>>>> #endif
>>>> static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi,
>>>> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
>>>> index f8703038e1d8..a8626e297876 100644
>>>> --- a/fs/f2fs/iostat.c
>>>> +++ b/fs/f2fs/iostat.c
>>>> @@ -237,6 +237,13 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
>>>> spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
>>>> }
>>>> +void *iostat_get_bio_private(struct bio *bio)
>>>> +{
>>>> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>> +
>>>> + return iostat_ctx->sbi;
>>>> +}
>>>> +
>>>> void iostat_update_and_unbind_ctx(struct bio *bio)
>>>> {
>>>> struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
>>>> diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h
>>>> index eb99d05cf272..9006c3d41590 100644
>>>> --- a/fs/f2fs/iostat.h
>>>> +++ b/fs/f2fs/iostat.h
>>>> @@ -58,6 +58,7 @@ static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
>>>> return iostat_ctx->post_read_ctx;
>>>> }
>>>> +extern void *iostat_get_bio_private(struct bio *bio);
>>>> extern void iostat_update_and_unbind_ctx(struct bio *bio);
>>>> extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>> struct bio *bio, struct bio_post_read_ctx *ctx);
>>>> @@ -68,6 +69,7 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
>>>> #else
>>>> static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode,
>>>> enum iostat_type type, unsigned long long io_bytes) {}
>>>> +static inline void *iostat_get_bio_private(struct bio *bio) { return bio->bi_private; }
>>>> static inline void iostat_update_and_unbind_ctx(struct bio *bio) {}
>>>> static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
>>>> struct bio *bio, struct bio_post_read_ctx *ctx) {}
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 4fd76e867e0a..4a3cf2888faf 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -3140,6 +3140,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
>>>> return err;
>>>> stat_inc_seg_type(sbi, curseg);
>>>> locate_dirty_segment(sbi, old_segno);
>>>> +
>>>> + f2fs_blkzoned_submit_merged_write(sbi, type);
>>>> +
>>>> return 0;
>>>> }
>>>> @@ -3461,12 +3464,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
>>>> f2fs_bug_on(fio->sbi, true);
>>>> }
>>>> - if (IS_HOT(type))
>>>> - fio->temp = HOT;
>>>> - else if (IS_WARM(type))
>>>> - fio->temp = WARM;
>>>> - else
>>>> - fio->temp = COLD;
>>>> + fio->temp = type_to_temp(type);
>>>> return type;
>>>> }
>>>> @@ -4132,6 +4130,27 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
>>>> return -EINVAL;
>>>> }
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (f2fs_sb_has_blkzoned(sbi)) {
>>>> + for (type = 0; type < NR_PERSISTENT_LOG; type++) {
>>>> + struct curseg_info *curseg = CURSEG_I(sbi, type);
>>>> + enum page_type ptype;
>>>> + enum temp_type temp;
>>>> +
>>>> + if (!(curseg->next_blkoff % sbi->blocks_per_blkz))
>>>> + continue;
>>>> +
>>>> + if (!f2fs_blkaddr_in_seqzone(sbi,
>>>> + START_BLOCK(sbi, curseg->segno)))
>>>> + continue;
>>>> +
>>>> + ptype = PAGE_TYPE(type);
>>>> + temp = type_to_temp(type);
>>>> + down(&sbi->available_open_zones);
>>>> + sbi->write_io[ptype][temp].zone_openned = true;
>>>> + }
>>>> + }
>>>> +#endif
>>>> return 0;
>>>> }
>>>> @@ -5451,6 +5470,18 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
>>>> for (i = 0; i < NR_CURSEG_TYPE; i++) {
>>>> kfree(array[i].sum_blk);
>>>> kfree(array[i].journal);
>>>> +
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> + if (f2fs_sb_has_blkzoned(sbi)) {
>>>> + enum page_type ptype = PAGE_TYPE(i);
>>>> + enum temp_type temp = type_to_temp(i);
>>>> +
>>>> + if (sbi->write_io[ptype][temp].zone_openned) {
>>>> + up(&sbi->available_open_zones);
>>>> + sbi->write_io[ptype][temp].zone_openned = false;
>>>> + }
>>>> + }
>>>> +#endif
>>>> }
>>>> kfree(array);
>>>> }
>>>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
>>>> index e1c0f418aa11..855978ca869f 100644
>>>> --- a/fs/f2fs/segment.h
>>>> +++ b/fs/f2fs/segment.h
>>>> @@ -24,7 +24,8 @@
>>>> #define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
>>>> #define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
>>>> -#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
>>>> +#define PAGE_TYPE(t) (IS_NODESEG(t) ? NODE : DATA)
>>>> +#define SE_PAGETYPE(se) (PAGE_TYPE((se)->type))
>>>> static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
>>>> unsigned short seg_type)
>>>> @@ -965,3 +966,12 @@ static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
>>>> return GET_SEGNO(sbi, FDEV(devi).start_blk);
>>>> return 0;
>>>> }
>>>> +
>>>> +static inline enum temp_type type_to_temp(int type)
>>>> +{
>>>> + if (IS_HOT(type))
>>>> + return HOT;
>>>> + else if (IS_WARM(type))
>>>> + return WARM;
>>>> + return COLD;
>>>> +}
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index fdf358c7f808..954baa6c100d 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -3893,6 +3893,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
>>>> sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
>>>> return -EINVAL;
>>>> }
>>>> +
>>>> + sema_init(&sbi->available_open_zones, sbi->max_open_zones);
>>>> }
>>>> zone_sectors = bdev_zone_sectors(bdev);
>>>> --
>>>> 2.40.1
Powered by blists - more mailing lists