[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d233fc29-e3ab-4761-9368-c203efc0466e@redhat.com>
Date: Wed, 28 Feb 2024 20:07:34 +0800
From: Xiao Ni <xni@...hat.com>
To: Yu Kuai <yukuai1@...weicloud.com>, mpatocka@...hat.com,
heinzm@...hat.com, blazej.kucman@...ux.intel.com, agk@...hat.com,
snitzer@...nel.org, dm-devel@...ts.linux.dev, song@...nel.org,
yukuai3@...wei.com, neilb@...e.de, shli@...com, akpm@...l.org
Cc: linux-kernel@...r.kernel.org, linux-raid@...r.kernel.org,
yi.zhang@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH v5 04/14] md: don't register sync_thread for reshape
directly
在 2024/2/1 下午5:25, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@...wei.com>
>
> Currently, if reshape is interrupted, then reassemble the array will
> register sync_thread directly from pers->run(), in this case
> 'MD_RECOVERY_RUNNING' is set directly, however, there is no guarantee
> that md_do_sync() will be executed, hence stop_sync_thread() will hang
> because 'MD_RECOVERY_RUNNING' can't be cleared.
Hi Kuai
I have a question here. Is it the reason sync_thread can't run
md_do_sync because kthread_should_stop, so it doesn't have the chance to
set MD_RECOVERY_DONE? Why creating sync thread in md_check_recovery
doesn't have this problem? Could you explain more about this?
Best Regards
Xiao
>
> Last patch make sure that md_do_sync() will set MD_RECOVERY_DONE,
> however, following hang can still be triggered by dm-raid test
> shell/lvconvert-raid-reshape.sh occasionally:
>
> [root@...ora ~]# cat /proc/1982/stack
> [<0>] stop_sync_thread+0x1ab/0x270 [md_mod]
> [<0>] md_frozen_sync_thread+0x5c/0xa0 [md_mod]
> [<0>] raid_presuspend+0x1e/0x70 [dm_raid]
> [<0>] dm_table_presuspend_targets+0x40/0xb0 [dm_mod]
> [<0>] __dm_destroy+0x2a5/0x310 [dm_mod]
> [<0>] dm_destroy+0x16/0x30 [dm_mod]
> [<0>] dev_remove+0x165/0x290 [dm_mod]
> [<0>] ctl_ioctl+0x4bb/0x7b0 [dm_mod]
> [<0>] dm_ctl_ioctl+0x11/0x20 [dm_mod]
> [<0>] vfs_ioctl+0x21/0x60
> [<0>] __x64_sys_ioctl+0xb9/0xe0
> [<0>] do_syscall_64+0xc6/0x230
> [<0>] entry_SYSCALL_64_after_hwframe+0x6c/0x74
>
> Meanwhile mddev->recovery is:
> MD_RECOVERY_RUNNING |
> MD_RECOVERY_INTR |
> MD_RECOVERY_RESHAPE |
> MD_RECOVERY_FROZEN
>
> Fix this problem by remove the code to register sync_thread directly
> from raid10 and raid5. And let md_check_recovery() to register
> sync_thread.
>
> Fixes: f67055780caa ("[PATCH] md: Checkpoint and allow restart of raid5 reshape")
> Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
> drivers/md/md.c | 5 ++++-
> drivers/md/raid10.c | 16 ++--------------
> drivers/md/raid5.c | 29 ++---------------------------
> 3 files changed, 8 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index c65dfd156090..6c5d0a372927 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -9372,6 +9372,7 @@ static void md_start_sync(struct work_struct *ws)
> struct mddev *mddev = container_of(ws, struct mddev, sync_work);
> int spares = 0;
> bool suspend = false;
> + char *name;
>
> if (md_spares_need_change(mddev))
> suspend = true;
> @@ -9404,8 +9405,10 @@ static void md_start_sync(struct work_struct *ws)
> if (spares)
> md_bitmap_write_all(mddev->bitmap);
>
> + name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
> + "reshape" : "resync";
> rcu_assign_pointer(mddev->sync_thread,
> - md_register_thread(md_do_sync, mddev, "resync"));
> + md_register_thread(md_do_sync, mddev, name));
> if (!mddev->sync_thread) {
> pr_warn("%s: could not start resync thread...\n",
> mdname(mddev));
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 7412066ea22c..a5f8419e2df1 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev)
> clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
> clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> - rcu_assign_pointer(mddev->sync_thread,
> - md_register_thread(md_do_sync, mddev, "reshape"));
> - if (!mddev->sync_thread)
> - goto out_free_conf;
> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> }
>
> return 0;
> @@ -4573,16 +4569,8 @@ static int raid10_start_reshape(struct mddev *mddev)
> clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
> set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> -
> - rcu_assign_pointer(mddev->sync_thread,
> - md_register_thread(md_do_sync, mddev, "reshape"));
> - if (!mddev->sync_thread) {
> - ret = -EAGAIN;
> - goto abort;
> - }
> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> conf->reshape_checkpoint = jiffies;
> - md_wakeup_thread(mddev->sync_thread);
> md_new_event();
> return 0;
>
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 8497880135ee..6a7a32f7fb91 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev)
> clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
> clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> - rcu_assign_pointer(mddev->sync_thread,
> - md_register_thread(md_do_sync, mddev, "reshape"));
> - if (!mddev->sync_thread)
> - goto abort;
> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> }
>
> /* Ok, everything is just fine now */
> @@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev)
> clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
> clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
> set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
> - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
> - rcu_assign_pointer(mddev->sync_thread,
> - md_register_thread(md_do_sync, mddev, "reshape"));
> - if (!mddev->sync_thread) {
> - mddev->recovery = 0;
> - spin_lock_irq(&conf->device_lock);
> - write_seqcount_begin(&conf->gen_lock);
> - mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
> - mddev->new_chunk_sectors =
> - conf->chunk_sectors = conf->prev_chunk_sectors;
> - mddev->new_layout = conf->algorithm = conf->prev_algo;
> - rdev_for_each(rdev, mddev)
> - rdev->new_data_offset = rdev->data_offset;
> - smp_wmb();
> - conf->generation --;
> - conf->reshape_progress = MaxSector;
> - mddev->reshape_position = MaxSector;
> - write_seqcount_end(&conf->gen_lock);
> - spin_unlock_irq(&conf->device_lock);
> - return -EAGAIN;
> - }
> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> conf->reshape_checkpoint = jiffies;
> - md_wakeup_thread(mddev->sync_thread);
> md_new_event();
> return 0;
> }
Powered by blists - more mailing lists