[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180428023751.GD10595@jaegeuk-macbookpro.roam.corp.google.com>
Date: Fri, 27 Apr 2018 19:37:51 -0700
From: Jaegeuk Kim <jaegeuk@...nel.org>
To: Chao Yu <yuchao0@...wei.com>
Cc: linux-f2fs-devel@...ts.sourceforge.net,
linux-kernel@...r.kernel.org, chao@...nel.org
Subject: Re: [PATCH v2] f2fs: fix to avoid race during access gc_thread
pointer
On 04/27, Chao Yu wrote:
> On 2018/4/27 10:04, Chao Yu wrote:
> > On 2018/4/27 0:03, Jaegeuk Kim wrote:
> >> On 04/24, Chao Yu wrote:
> >>> Thread A Thread B Thread C
> >>> - f2fs_remount
> >>> - stop_gc_thread
> >>> - f2fs_sbi_store
> >>> - issue_discard_thread
> >>> sbi->gc_thread = NULL;
> >>> sbi->gc_thread->gc_wake = 1
> >>> access sbi->gc_thread->gc_urgent
> >>>
> >>> Previously, we allocate memory for sbi->gc_thread based on background
> >>> gc thread mount option, the memory can be released if we turn off
> >>> that mount option, but still there are several places access gc_thread
> >>> pointer without considering race condition, result in NULL point
> >>> dereference.
> >>
> >> Do we just need to check &sb->s_umount in f2fs_sbi_store() and
> >
> > Better,
> >
> >> issue_discard_thread?
> >
> > There is more cases can be called from different scenarios:
> > - select_policy()
> > - need_SSR()
>
> BTW, do you agree with introducing {init,destroy}_gc_context as the patch does?
I don't think so. The issue only requires mutex() in sysfs/discard_thread.
>
> Thanks,
>
> >
> > Thanks,
> >
> >>
> >>>
> >>> In order to fix this issue, keep gc_thread structure valid in sbi all
> >>> the time instead of alloc/free it dynamically.
> >>>
> >>> In addition, add a rw semaphore to exclude rw operation in fields of
> >>> gc_thread.
> >>>
> >>> Signed-off-by: Chao Yu <yuchao0@...wei.com>
> >>> ---
> >>> v2:
> >>> - add a rw semaphore to exclude rw operation suggested by Jaegeuk.
> >>> fs/f2fs/debug.c | 3 +--
> >>> fs/f2fs/f2fs.h | 9 ++++++-
> >>> fs/f2fs/gc.c | 78 ++++++++++++++++++++++++++++++++++++-------------------
> >>> fs/f2fs/gc.h | 1 +
> >>> fs/f2fs/segment.c | 10 +++++--
> >>> fs/f2fs/super.c | 26 +++++++++++++------
> >>> fs/f2fs/sysfs.c | 26 ++++++++++++++-----
> >>> 7 files changed, 107 insertions(+), 46 deletions(-)
> >>>
> >>> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> >>> index a66107b5cfff..0fbd674c66fb 100644
> >>> --- a/fs/f2fs/debug.c
> >>> +++ b/fs/f2fs/debug.c
> >>> @@ -221,8 +221,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
> >>> si->cache_mem = 0;
> >>>
> >>> /* build gc */
> >>> - if (sbi->gc_thread)
> >>> - si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>> + si->cache_mem += sizeof(struct f2fs_gc_kthread);
> >>>
> >>> /* build merge flush thread */
> >>> if (SM_I(sbi)->fcc_info)
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index 8f3ad9662d13..75d3b4875429 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -1412,6 +1412,11 @@ static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
> >>> return (struct sit_info *)(SM_I(sbi)->sit_info);
> >>> }
> >>>
> >>> +static inline struct f2fs_gc_kthread *GC_I(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + return (struct f2fs_gc_kthread *)(sbi->gc_thread);
> >>> +}
> >>> +
> >>> static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
> >>> {
> >>> return (struct free_segmap_info *)(SM_I(sbi)->free_info);
> >>> @@ -2954,8 +2959,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> >>> /*
> >>> * gc.c
> >>> */
> >>> +int init_gc_context(struct f2fs_sb_info *sbi);
> >>> +void destroy_gc_context(struct f2fs_sb_info * sbi);
> >>> int start_gc_thread(struct f2fs_sb_info *sbi);
> >>> -void stop_gc_thread(struct f2fs_sb_info *sbi);
> >>> +bool stop_gc_thread(struct f2fs_sb_info *sbi);
> >>> block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
> >>> int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
> >>> unsigned int segno);
> >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> >>> index 70418b34c5f6..2febb84b2fd6 100644
> >>> --- a/fs/f2fs/gc.c
> >>> +++ b/fs/f2fs/gc.c
> >>> @@ -26,8 +26,8 @@
> >>> static int gc_thread_func(void *data)
> >>> {
> >>> struct f2fs_sb_info *sbi = data;
> >>> - struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >>> - wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
> >>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>> + wait_queue_head_t *wq = &gc_th->gc_wait_queue_head;
> >>> unsigned int wait_ms;
> >>>
> >>> wait_ms = gc_th->min_sleep_time;
> >>> @@ -114,17 +114,15 @@ static int gc_thread_func(void *data)
> >>> return 0;
> >>> }
> >>>
> >>> -int start_gc_thread(struct f2fs_sb_info *sbi)
> >>> +int init_gc_context(struct f2fs_sb_info *sbi)
> >>> {
> >>> struct f2fs_gc_kthread *gc_th;
> >>> - dev_t dev = sbi->sb->s_bdev->bd_dev;
> >>> - int err = 0;
> >>>
> >>> gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
> >>> - if (!gc_th) {
> >>> - err = -ENOMEM;
> >>> - goto out;
> >>> - }
> >>> + if (!gc_th)
> >>> + return -ENOMEM;
> >>> +
> >>> + gc_th->f2fs_gc_task = NULL;
> >>>
> >>> gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
> >>> gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
> >>> @@ -135,35 +133,59 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
> >>> gc_th->gc_urgent = 0;
> >>> gc_th->gc_wake= 0;
> >>>
> >>> + init_rwsem(&gc_th->gc_rwsem);
> >>> +
> >>> sbi->gc_thread = gc_th;
> >>> - init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
> >>> - sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> +void destroy_gc_context(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + kfree(GC_I(sbi));
> >>> + sbi->gc_thread = NULL;
> >>> +}
> >>> +
> >>> +int start_gc_thread(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>> + dev_t dev = sbi->sb->s_bdev->bd_dev;
> >>> + int err = 0;
> >>> +
> >>> + init_waitqueue_head(&gc_th->gc_wait_queue_head);
> >>> + gc_th->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
> >>> "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
> >>> if (IS_ERR(gc_th->f2fs_gc_task)) {
> >>> err = PTR_ERR(gc_th->f2fs_gc_task);
> >>> - kfree(gc_th);
> >>> - sbi->gc_thread = NULL;
> >>> + gc_th->f2fs_gc_task = NULL;
> >>> }
> >>> -out:
> >>> +
> >>> return err;
> >>> }
> >>>
> >>> -void stop_gc_thread(struct f2fs_sb_info *sbi)
> >>> +bool stop_gc_thread(struct f2fs_sb_info *sbi)
> >>> {
> >>> - struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
> >>> - if (!gc_th)
> >>> - return;
> >>> - kthread_stop(gc_th->f2fs_gc_task);
> >>> - kfree(gc_th);
> >>> - sbi->gc_thread = NULL;
> >>> + struct f2fs_gc_kthread *gc_th = GC_I(sbi);
> >>> + bool stopped = false;
> >>> +
> >>> + down_write(&gc_th->gc_rwsem);
> >>> + if (gc_th->f2fs_gc_task) {
> >>> + kthread_stop(gc_th->f2fs_gc_task);
> >>> + gc_th->f2fs_gc_task = NULL;
> >>> + stopped = true;
> >>> + }
> >>> + up_write(&gc_th->gc_rwsem);
> >>> +
> >>> + return stopped;
> >>> }
> >>>
> >>> static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>> {
> >>> int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
> >>>
> >>> - if (!gc_th)
> >>> - return gc_mode;
> >>> + down_read(&gc_th->gc_rwsem);
> >>> + if (!gc_th->f2fs_gc_task)
> >>> + goto out;
> >>>
> >>> if (gc_th->gc_idle) {
> >>> if (gc_th->gc_idle == 1)
> >>> @@ -173,6 +195,8 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
> >>> }
> >>> if (gc_th->gc_urgent)
> >>> gc_mode = GC_GREEDY;
> >>> +out:
> >>> + up_read(&gc_th->gc_rwsem);
> >>> return gc_mode;
> >>> }
> >>>
> >>> @@ -187,17 +211,19 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
> >>> p->max_search = dirty_i->nr_dirty[type];
> >>> p->ofs_unit = 1;
> >>> } else {
> >>> - p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
> >>> + p->gc_mode = select_gc_type(GC_I(sbi), gc_type);
> >>> p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
> >>> p->max_search = dirty_i->nr_dirty[DIRTY];
> >>> p->ofs_unit = sbi->segs_per_sec;
> >>> }
> >>>
> >>> /* we need to check every dirty segments in the FG_GC case */
> >>> - if (gc_type != FG_GC &&
> >>> - (sbi->gc_thread && !sbi->gc_thread->gc_urgent) &&
> >>> + down_read(&GC_I(sbi)->gc_rwsem);
> >>> + if (gc_type != FG_GC && GC_I(sbi)->f2fs_gc_task &&
> >>> + !GC_I(sbi)->gc_urgent &&
> >>> p->max_search > sbi->max_victim_search)
> >>> p->max_search = sbi->max_victim_search;
> >>> + up_read(&GC_I(sbi)->gc_rwsem);
> >>>
> >>> /* let's select beginning hot/small space first in no_heap mode*/
> >>> if (test_opt(sbi, NOHEAP) &&
> >>> diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
> >>> index b0045d4c8d1e..9a5b273328c2 100644
> >>> --- a/fs/f2fs/gc.h
> >>> +++ b/fs/f2fs/gc.h
> >>> @@ -26,6 +26,7 @@
> >>> #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
> >>>
> >>> struct f2fs_gc_kthread {
> >>> + struct rw_semaphore gc_rwsem; /* semaphore for f2fs_gc_task */
> >>> struct task_struct *f2fs_gc_task;
> >>> wait_queue_head_t gc_wait_queue_head;
> >>>
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 5960768d26df..f787eea1b2f6 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -177,8 +177,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
> >>>
> >>> if (test_opt(sbi, LFS))
> >>> return false;
> >>> - if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >>> + down_read(&GC_I(sbi)->gc_rwsem);
> >>> + if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent) {
> >>> + down_read(&GC_I(sbi)->gc_rwsem);
> >>> return true;
> >>> + }
> >>> + up_read(&GC_I(sbi)->gc_rwsem);
> >>>
> >>> return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
> >>> SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
> >>> @@ -1425,8 +1429,10 @@ static int issue_discard_thread(void *data)
> >>> if (dcc->discard_wake)
> >>> dcc->discard_wake = 0;
> >>>
> >>> - if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
> >>> + down_read(&GC_I(sbi)->gc_rwsem);
> >>> + if (GC_I(sbi)->f2fs_gc_task && GC_I(sbi)->gc_urgent)
> >>> init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
> >>> + up_read(&GC_I(sbi)->gc_rwsem);
> >>>
> >>> sb_start_intwrite(sbi->sb);
> >>>
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index dedb8e50b440..199f29dce86d 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -1044,6 +1044,7 @@ static void f2fs_put_super(struct super_block *sb)
> >>> kfree(sbi->raw_super);
> >>>
> >>> destroy_device_list(sbi);
> >>> + destroy_gc_context(sbi);
> >>> mempool_destroy(sbi->write_io_dummy);
> >>> #ifdef CONFIG_QUOTA
> >>> for (i = 0; i < MAXQUOTAS; i++)
> >>> @@ -1476,15 +1477,18 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> >>> * option. Also sync the filesystem.
> >>> */
> >>> if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) {
> >>> - if (sbi->gc_thread) {
> >>> - stop_gc_thread(sbi);
> >>> - need_restart_gc = true;
> >>> + need_restart_gc = stop_gc_thread(sbi);
> >>> + } else {
> >>> + down_write(&GC_I(sbi)->gc_rwsem);
> >>> + if (!GC_I(sbi)->f2fs_gc_task) {
> >>> + err = start_gc_thread(sbi);
> >>> + if (err) {
> >>> + up_write(&GC_I(sbi)->gc_rwsem);
> >>> + goto restore_opts;
> >>> + }
> >>> + need_stop_gc = true;
> >>> }
> >>> - } else if (!sbi->gc_thread) {
> >>> - err = start_gc_thread(sbi);
> >>> - if (err)
> >>> - goto restore_opts;
> >>> - need_stop_gc = true;
> >>> + up_write(&GC_I(sbi)->gc_rwsem);
> >>> }
> >>>
> >>> if (*flags & SB_RDONLY ||
> >>> @@ -2771,6 +2775,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>> goto free_meta_inode;
> >>> }
> >>>
> >>> + err = init_gc_context(sbi);
> >>> + if (err)
> >>> + goto free_checkpoint;
> >>> +
> >>> /* Initialize device list */
> >>> err = f2fs_scan_devices(sbi);
> >>> if (err) {
> >>> @@ -2981,6 +2989,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> >>> destroy_segment_manager(sbi);
> >>> free_devices:
> >>> destroy_device_list(sbi);
> >>> + destroy_gc_context(sbi);
> >>> +free_checkpoint:
> >>> kfree(sbi->ckpt);
> >>> free_meta_inode:
> >>> make_bad_inode(sbi->meta_inode);
> >>> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> >>> index 2c53de9251be..b8d09935e43f 100644
> >>> --- a/fs/f2fs/sysfs.c
> >>> +++ b/fs/f2fs/sysfs.c
> >>> @@ -46,7 +46,7 @@ struct f2fs_attr {
> >>> static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
> >>> {
> >>> if (struct_type == GC_THREAD)
> >>> - return (unsigned char *)sbi->gc_thread;
> >>> + return (unsigned char *)GC_I(sbi);
> >>> else if (struct_type == SM_INFO)
> >>> return (unsigned char *)SM_I(sbi);
> >>> else if (struct_type == DCC_INFO)
> >>> @@ -248,16 +248,28 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
> >>> if (!strcmp(a->attr.name, "trim_sections"))
> >>> return -EINVAL;
> >>>
> >>> - *ui = t;
> >>> -
> >>> - if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
> >>> + if (!strcmp(a->attr.name, "iostat_enable") && t == 0)
> >>> f2fs_reset_iostat(sbi);
> >>> - if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
> >>> - sbi->gc_thread->gc_wake = 1;
> >>> - wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
> >>> +
> >>> + if (a->struct_type == GC_THREAD) {
> >>> + down_write(&GC_I(sbi)->gc_rwsem);
> >>> + if (!GC_I(sbi)->f2fs_gc_task) {
> >>> + up_write(&GC_I(sbi)->gc_rwsem);
> >>> + return -EINVAL;
> >>> + }
> >>> + }
> >>> +
> >>> + if (!strcmp(a->attr.name, "gc_urgent") && t == 1) {
> >>> + GC_I(sbi)->gc_wake = 1;
> >>> + wake_up_interruptible_all(&GC_I(sbi)->gc_wait_queue_head);
> >>> wake_up_discard_thread(sbi, true);
> >>> }
> >>>
> >>> + *ui = t;
> >>> +
> >>> + if (a->struct_type == GC_THREAD)
> >>> + up_write(&GC_I(sbi)->gc_rwsem);
> >>> +
> >>> return count;
> >>> }
> >>>
> >>> --
> >>> 2.15.0.55.gc2ece9dc4de6
> >>
> >> .
> >>
> >
> >
> > .
> >
Powered by blists - more mailing lists