diff mbox series

[v5,09/14] dm-raid: really frozen sync_thread during suspend

Message ID 20240201092559.910982-10-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series dm-raid/md/raid: fix v6.7 regressions | expand

Commit Message

Yu Kuai Feb. 1, 2024, 9:25 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

1) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
   it only prevent new sync_thread to start, and it can't stop the
   running sync thread;
2) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
   it as condition for md_stop_writes() in raid_postsuspend() doesn't
   look correct.
3) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
   and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
   new sync_thread can start unexpected.

Fix above problems by using the new helper to suspend the array during
suspend, also disallow raid_message() to change sync_thread status
during suspend.

Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
and with previous fixes, the test won't hang there anymore, however, the
test will still fail and complain that ext4 is corrupted. And with this
patch, the test won't hang due to stop_sync_thread() or fail due to ext4
is corrupted anymore. However, there is still a deadlock related to
dm-raid456 that will be fixed in following patches.

Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/dm-raid.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

Comments

Xiao Ni Feb. 18, 2024, 4:53 a.m. UTC | #1
Hi Kuai

On Thu, Feb 1, 2024 at 5:30 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> 1) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
>    it only prevent new sync_thread to start, and it can't stop the
>    running sync thread;

Agree with this

> 2) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
>    it as condition for md_stop_writes() in raid_postsuspend() doesn't
>    look correct.

I don't agree with it. __md_stop_writes stops sync thread, so it needs
to check this flag. And It looks like the name __md_stop_writes is not
right. Does it really stop write io? mddev_suspend should be the
function that stop write request. From my understanding,
raid_postsuspend does two jobs. One is stopping sync thread. Two is
suspending array.

> 3) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
>    and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
>    new sync_thread can start unexpected.

md_action_store doesn't check this either. If the array is suspended
and MD_RECOVERY_FROZEN is cleared, before patch01, sync thread can't
happen. So it looks like patch01 breaks the logic.

Regards
Xiao


>
> Fix above problems by using the new helper to suspend the array during
> suspend, also disallow raid_message() to change sync_thread status
> during suspend.
>
> Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
> test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
> and with previous fixes, the test won't hang there anymore, however, the
> test will still fail and complain that ext4 is corrupted. And with this
> patch, the test won't hang due to stop_sync_thread() or fail due to ext4
> is corrupted anymore. However, there is still a deadlock related to
> dm-raid456 that will be fixed in following patches.
>
> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
> Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
> Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
> Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
> Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/dm-raid.c | 38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
> index eb009d6bb03a..5ce3c6020b1b 100644
> --- a/drivers/md/dm-raid.c
> +++ b/drivers/md/dm-raid.c
> @@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>         rs->md.ro = 1;
>         rs->md.in_sync = 1;
>
> -       /* Keep array frozen until resume. */
> -       set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
> -
>         /* Has to be held on running the array */
>         mddev_suspend_and_lock_nointr(&rs->md);
> +
> +       /* Keep array frozen until resume. */
> +       md_frozen_sync_thread(&rs->md);
> +
>         r = md_run(&rs->md);
>         rs->md.in_sync = 0; /* Assume already marked dirty */
>         if (r) {
> @@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
>         if (!mddev->pers || !mddev->pers->sync_request)
>                 return -EINVAL;
>
> +       if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
> +               return -EBUSY;
> +
>         if (!strcasecmp(argv[0], "frozen"))
>                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         else
> @@ -3791,15 +3795,31 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
>         blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
>  }
>
> +static void raid_presuspend(struct dm_target *ti)
> +{
> +       struct raid_set *rs = ti->private;
> +
> +       mddev_lock_nointr(&rs->md);
> +       md_frozen_sync_thread(&rs->md);
> +       mddev_unlock(&rs->md);
> +}
> +
> +static void raid_presuspend_undo(struct dm_target *ti)
> +{
> +       struct raid_set *rs = ti->private;
> +
> +       mddev_lock_nointr(&rs->md);
> +       md_unfrozen_sync_thread(&rs->md);
> +       mddev_unlock(&rs->md);
> +}
> +
>  static void raid_postsuspend(struct dm_target *ti)
>  {
>         struct raid_set *rs = ti->private;
>
>         if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
>                 /* Writes have to be stopped before suspending to avoid deadlocks. */
> -               if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
> -                       md_stop_writes(&rs->md);
> -
> +               md_stop_writes(&rs->md);
>                 mddev_suspend(&rs->md, false);
>         }
>  }
> @@ -4012,8 +4032,6 @@ static int raid_preresume(struct dm_target *ti)
>         }
>
>         /* Check for any resize/reshape on @rs and adjust/initiate */
> -       /* Be prepared for mddev_resume() in raid_resume() */
> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
>                 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
>                 mddev->resync_min = mddev->recovery_cp;
> @@ -4056,9 +4074,9 @@ static void raid_resume(struct dm_target *ti)
>                         rs_set_capacity(rs);
>
>                 mddev_lock_nointr(mddev);
> -               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>                 mddev->ro = 0;
>                 mddev->in_sync = 0;
> +               md_unfrozen_sync_thread(mddev);
>                 mddev_unlock_and_resume(mddev);
>         }
>  }
> @@ -4074,6 +4092,8 @@ static struct target_type raid_target = {
>         .message = raid_message,
>         .iterate_devices = raid_iterate_devices,
>         .io_hints = raid_io_hints,
> +       .presuspend = raid_presuspend,
> +       .presuspend_undo = raid_presuspend_undo,
>         .postsuspend = raid_postsuspend,
>         .preresume = raid_preresume,
>         .resume = raid_resume,
> --
> 2.39.2
>
Yu Kuai Feb. 18, 2024, 6:34 a.m. UTC | #2
Hi,

在 2024/02/18 12:53, Xiao Ni 写道:
> Hi Kuai
> 
> On Thu, Feb 1, 2024 at 5:30 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> 1) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
>>     it only prevent new sync_thread to start, and it can't stop the
>>     running sync thread;
> 
> Agree with this
> 
>> 2) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
>>     it as condition for md_stop_writes() in raid_postsuspend() doesn't
>>     look correct.
> 
> I don't agree with it. __md_stop_writes stops sync thread, so it needs
> to check this flag. And It looks like the name __md_stop_writes is not
> right. Does it really stop write io? mddev_suspend should be the
> function that stop write request. From my understanding,
> raid_postsuspend does two jobs. One is stopping sync thread. Two is
> suspending array.

MD_RECOVERY_FROZEN is not just used in __md_stop_writes(), so I think
it's not correct to to check this. For example, if MD_RECOVERY_FROZEN is
set by raid_message(), then __md_stop_writes() will be skipped.

> 
>> 3) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
>>     and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
>>     new sync_thread can start unexpected.
> 
> md_action_store doesn't check this either. If the array is suspended
> and MD_RECOVERY_FROZEN is cleared, before patch01, sync thread can't
> happen. So it looks like patch01 breaks the logic.

The difference is that md/raid doen't need to frozen sync_thread while
suspending the array for now. And I don't understand at all why sync
thread can't happed before patch01.

Thanks,
Kuai

> 
> Regards
> Xiao
> 
> 
>>
>> Fix above problems by using the new helper to suspend the array during
>> suspend, also disallow raid_message() to change sync_thread status
>> during suspend.
>>
>> Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
>> test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
>> and with previous fixes, the test won't hang there anymore, however, the
>> test will still fail and complain that ext4 is corrupted. And with this
>> patch, the test won't hang due to stop_sync_thread() or fail due to ext4
>> is corrupted anymore. However, there is still a deadlock related to
>> dm-raid456 that will be fixed in following patches.
>>
>> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
>> Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
>> Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
>> Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
>> Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/dm-raid.c | 38 +++++++++++++++++++++++++++++---------
>>   1 file changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
>> index eb009d6bb03a..5ce3c6020b1b 100644
>> --- a/drivers/md/dm-raid.c
>> +++ b/drivers/md/dm-raid.c
>> @@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>          rs->md.ro = 1;
>>          rs->md.in_sync = 1;
>>
>> -       /* Keep array frozen until resume. */
>> -       set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
>> -
>>          /* Has to be held on running the array */
>>          mddev_suspend_and_lock_nointr(&rs->md);
>> +
>> +       /* Keep array frozen until resume. */
>> +       md_frozen_sync_thread(&rs->md);
>> +
>>          r = md_run(&rs->md);
>>          rs->md.in_sync = 0; /* Assume already marked dirty */
>>          if (r) {
>> @@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
>>          if (!mddev->pers || !mddev->pers->sync_request)
>>                  return -EINVAL;
>>
>> +       if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
>> +               return -EBUSY;
>> +
>>          if (!strcasecmp(argv[0], "frozen"))
>>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          else
>> @@ -3791,15 +3795,31 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
>>          blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
>>   }
>>
>> +static void raid_presuspend(struct dm_target *ti)
>> +{
>> +       struct raid_set *rs = ti->private;
>> +
>> +       mddev_lock_nointr(&rs->md);
>> +       md_frozen_sync_thread(&rs->md);
>> +       mddev_unlock(&rs->md);
>> +}
>> +
>> +static void raid_presuspend_undo(struct dm_target *ti)
>> +{
>> +       struct raid_set *rs = ti->private;
>> +
>> +       mddev_lock_nointr(&rs->md);
>> +       md_unfrozen_sync_thread(&rs->md);
>> +       mddev_unlock(&rs->md);
>> +}
>> +
>>   static void raid_postsuspend(struct dm_target *ti)
>>   {
>>          struct raid_set *rs = ti->private;
>>
>>          if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
>>                  /* Writes have to be stopped before suspending to avoid deadlocks. */
>> -               if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
>> -                       md_stop_writes(&rs->md);
>> -
>> +               md_stop_writes(&rs->md);
>>                  mddev_suspend(&rs->md, false);
>>          }
>>   }
>> @@ -4012,8 +4032,6 @@ static int raid_preresume(struct dm_target *ti)
>>          }
>>
>>          /* Check for any resize/reshape on @rs and adjust/initiate */
>> -       /* Be prepared for mddev_resume() in raid_resume() */
>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
>>                  set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
>>                  mddev->resync_min = mddev->recovery_cp;
>> @@ -4056,9 +4074,9 @@ static void raid_resume(struct dm_target *ti)
>>                          rs_set_capacity(rs);
>>
>>                  mddev_lock_nointr(mddev);
>> -               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>                  mddev->ro = 0;
>>                  mddev->in_sync = 0;
>> +               md_unfrozen_sync_thread(mddev);
>>                  mddev_unlock_and_resume(mddev);
>>          }
>>   }
>> @@ -4074,6 +4092,8 @@ static struct target_type raid_target = {
>>          .message = raid_message,
>>          .iterate_devices = raid_iterate_devices,
>>          .io_hints = raid_io_hints,
>> +       .presuspend = raid_presuspend,
>> +       .presuspend_undo = raid_presuspend_undo,
>>          .postsuspend = raid_postsuspend,
>>          .preresume = raid_preresume,
>>          .resume = raid_resume,
>> --
>> 2.39.2
>>
> 
> .
>
Xiao Ni Feb. 19, 2024, 7:27 a.m. UTC | #3
On Sun, Feb 18, 2024 at 2:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> Hi,
>
> 在 2024/02/18 12:53, Xiao Ni 写道:
> > Hi Kuai
> >
> > On Thu, Feb 1, 2024 at 5:30 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
> >>
> >> From: Yu Kuai <yukuai3@huawei.com>
> >>
> >> 1) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
> >>     it only prevent new sync_thread to start, and it can't stop the
> >>     running sync thread;
> >
> > Agree with this
> >
> >> 2) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
> >>     it as condition for md_stop_writes() in raid_postsuspend() doesn't
> >>     look correct.
> >
> > I don't agree with it. __md_stop_writes stops sync thread, so it needs
> > to check this flag. And It looks like the name __md_stop_writes is not
> > right. Does it really stop write io? mddev_suspend should be the
> > function that stop write request. From my understanding,
> > raid_postsuspend does two jobs. One is stopping sync thread. Two is
> > suspending array.
>
> MD_RECOVERY_FROZEN is not just used in __md_stop_writes(), so I think
> it's not correct to to check this. For example, if MD_RECOVERY_FROZEN is
> set by raid_message(), then __md_stop_writes() will be skipped.

Hi Kuai

raid_message sets MD_RECOVERY_FROZEN and it stops the sync thread
synchronously. So it doesn't need __md_stop_writes. So from md and
dmraid, it has a rule. If you set MD_RECOVERY_FROZEN, you're in the
process of stopping sync thread.

>
> >
> >> 3) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
> >>     and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
> >>     new sync_thread can start unexpected.
> >
> > md_action_store doesn't check this either. If the array is suspended
> > and MD_RECOVERY_FROZEN is cleared, before patch01, sync thread can't
> > happen. So it looks like patch01 breaks the logic.
>
> The difference is that md/raid doen't need to frozen sync_thread while
> suspending the array for now. And I don't understand at all why sync
> thread can't happed before patch01.

There is a condition you mentioned above -- the array is suspended.
Before patch01, if one array is suspended, the sync thread can't
happen. Even raid_messages clears MD_RECOVERY_FROZEN, the sync thread
can't start. After resume the array, the sync thread can start again.

Regards
Xiao
>
> Thanks,
> Kuai
>
> >
> > Regards
> > Xiao
> >
> >
> >>
> >> Fix above problems by using the new helper to suspend the array during
> >> suspend, also disallow raid_message() to change sync_thread status
> >> during suspend.
> >>
> >> Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
> >> test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
> >> and with previous fixes, the test won't hang there anymore, however, the
> >> test will still fail and complain that ext4 is corrupted. And with this
> >> patch, the test won't hang due to stop_sync_thread() or fail due to ext4
> >> is corrupted anymore. However, there is still a deadlock related to
> >> dm-raid456 that will be fixed in following patches.
> >>
> >> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
> >> Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
> >> Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
> >> Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
> >> Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> >> ---
> >>   drivers/md/dm-raid.c | 38 +++++++++++++++++++++++++++++---------
> >>   1 file changed, 29 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
> >> index eb009d6bb03a..5ce3c6020b1b 100644
> >> --- a/drivers/md/dm-raid.c
> >> +++ b/drivers/md/dm-raid.c
> >> @@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >>          rs->md.ro = 1;
> >>          rs->md.in_sync = 1;
> >>
> >> -       /* Keep array frozen until resume. */
> >> -       set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
> >> -
> >>          /* Has to be held on running the array */
> >>          mddev_suspend_and_lock_nointr(&rs->md);
> >> +
> >> +       /* Keep array frozen until resume. */
> >> +       md_frozen_sync_thread(&rs->md);
> >> +
> >>          r = md_run(&rs->md);
> >>          rs->md.in_sync = 0; /* Assume already marked dirty */
> >>          if (r) {
> >> @@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
> >>          if (!mddev->pers || !mddev->pers->sync_request)
> >>                  return -EINVAL;
> >>
> >> +       if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
> >> +               return -EBUSY;
> >> +
> >>          if (!strcasecmp(argv[0], "frozen"))
> >>                  set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>          else
> >> @@ -3791,15 +3795,31 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
> >>          blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
> >>   }
> >>
> >> +static void raid_presuspend(struct dm_target *ti)
> >> +{
> >> +       struct raid_set *rs = ti->private;
> >> +
> >> +       mddev_lock_nointr(&rs->md);
> >> +       md_frozen_sync_thread(&rs->md);
> >> +       mddev_unlock(&rs->md);
> >> +}
> >> +
> >> +static void raid_presuspend_undo(struct dm_target *ti)
> >> +{
> >> +       struct raid_set *rs = ti->private;
> >> +
> >> +       mddev_lock_nointr(&rs->md);
> >> +       md_unfrozen_sync_thread(&rs->md);
> >> +       mddev_unlock(&rs->md);
> >> +}
> >> +
> >>   static void raid_postsuspend(struct dm_target *ti)
> >>   {
> >>          struct raid_set *rs = ti->private;
> >>
> >>          if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
> >>                  /* Writes have to be stopped before suspending to avoid deadlocks. */
> >> -               if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
> >> -                       md_stop_writes(&rs->md);
> >> -
> >> +               md_stop_writes(&rs->md);
> >>                  mddev_suspend(&rs->md, false);
> >>          }
> >>   }
> >> @@ -4012,8 +4032,6 @@ static int raid_preresume(struct dm_target *ti)
> >>          }
> >>
> >>          /* Check for any resize/reshape on @rs and adjust/initiate */
> >> -       /* Be prepared for mddev_resume() in raid_resume() */
> >> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>          if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
> >>                  set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
> >>                  mddev->resync_min = mddev->recovery_cp;
> >> @@ -4056,9 +4074,9 @@ static void raid_resume(struct dm_target *ti)
> >>                          rs_set_capacity(rs);
> >>
> >>                  mddev_lock_nointr(mddev);
> >> -               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>                  mddev->ro = 0;
> >>                  mddev->in_sync = 0;
> >> +               md_unfrozen_sync_thread(mddev);
> >>                  mddev_unlock_and_resume(mddev);
> >>          }
> >>   }
> >> @@ -4074,6 +4092,8 @@ static struct target_type raid_target = {
> >>          .message = raid_message,
> >>          .iterate_devices = raid_iterate_devices,
> >>          .io_hints = raid_io_hints,
> >> +       .presuspend = raid_presuspend,
> >> +       .presuspend_undo = raid_presuspend_undo,
> >>          .postsuspend = raid_postsuspend,
> >>          .preresume = raid_preresume,
> >>          .resume = raid_resume,
> >> --
> >> 2.39.2
> >>
> >
> > .
> >
>
Yu Kuai Feb. 19, 2024, 7:53 a.m. UTC | #4
Hi,

在 2024/02/19 15:27, Xiao Ni 写道:
> On Sun, Feb 18, 2024 at 2:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> Hi,
>>
>> 在 2024/02/18 12:53, Xiao Ni 写道:
>>> Hi Kuai
>>>
>>> On Thu, Feb 1, 2024 at 5:30 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>>>
>>>> From: Yu Kuai <yukuai3@huawei.com>
>>>>
>>>> 1) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
>>>>      it only prevent new sync_thread to start, and it can't stop the
>>>>      running sync thread;
>>>
>>> Agree with this
>>>
>>>> 2) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
>>>>      it as condition for md_stop_writes() in raid_postsuspend() doesn't
>>>>      look correct.
>>>
>>> I don't agree with it. __md_stop_writes stops sync thread, so it needs
>>> to check this flag. And It looks like the name __md_stop_writes is not
>>> right. Does it really stop write io? mddev_suspend should be the
>>> function that stop write request. From my understanding,
>>> raid_postsuspend does two jobs. One is stopping sync thread. Two is
>>> suspending array.
>>
>> MD_RECOVERY_FROZEN is not just used in __md_stop_writes(), so I think
>> it's not correct to to check this. For example, if MD_RECOVERY_FROZEN is
>> set by raid_message(), then __md_stop_writes() will be skipped.
> 
> Hi Kuai
> 
> raid_message sets MD_RECOVERY_FROZEN and it stops the sync thread
> synchronously. So it doesn't need __md_stop_writes. So from md and
> dmraid, it has a rule. If you set MD_RECOVERY_FROZEN, you're in the
> process of stopping sync thread.

There are so much problems here, I'm not sure if you really walk through
all patches here.

1) stop the sync_thread synchronously is problematic, and raid_message()
doesn't even hold 'reconfig_mutex' for md_reap_sync_thread();
2) skip __md_stop_writes() because sycn_thread is stopped is wrong,
__md_stop_writes() does more work.
> 
>>
>>>
>>>> 3) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
>>>>      and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
>>>>      new sync_thread can start unexpected.
>>>
>>> md_action_store doesn't check this either. If the array is suspended
>>> and MD_RECOVERY_FROZEN is cleared, before patch01, sync thread can't
>>> happen. So it looks like patch01 breaks the logic.
>>
>> The difference is that md/raid doen't need to frozen sync_thread while
>> suspending the array for now. And I don't understand at all why sync
>> thread can't happed before patch01.
> 
> There is a condition you mentioned above -- the array is suspended.
> Before patch01, if one array is suspended, the sync thread can't
3) before patch 1, sync_thread can still running even if array is
suspended;
And even without patch 1, raid_message() can still start new
sync_thread:

// assume sync_thread is not register
raid_postsuspend	raid_message
  md_stop_writes
			 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery)
			 if (!mddev->suspended)
			  md_wakeup_thread
			  // new sync_thread is registered
  mddev_suspend

> happen. Even raid_messages clears MD_RECOVERY_FROZEN, the sync thread
> can't start. After resume the array, the sync thread can start again.

4) I think I don't need to explain again why suspended should not be
used to prevent starting new sync_thread;

Thanks,
Kuai

> 
> Regards
> Xiao
>>
>> Thanks,
>> Kuai
>>
>>>
>>> Regards
>>> Xiao
>>>
>>>
>>>>
>>>> Fix above problems by using the new helper to suspend the array during
>>>> suspend, also disallow raid_message() to change sync_thread status
>>>> during suspend.
>>>>
>>>> Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
>>>> test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
>>>> and with previous fixes, the test won't hang there anymore, however, the
>>>> test will still fail and complain that ext4 is corrupted. And with this
>>>> patch, the test won't hang due to stop_sync_thread() or fail due to ext4
>>>> is corrupted anymore. However, there is still a deadlock related to
>>>> dm-raid456 that will be fixed in following patches.
>>>>
>>>> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
>>>> Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
>>>> Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
>>>> Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
>>>> Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
>>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>>> ---
>>>>    drivers/md/dm-raid.c | 38 +++++++++++++++++++++++++++++---------
>>>>    1 file changed, 29 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
>>>> index eb009d6bb03a..5ce3c6020b1b 100644
>>>> --- a/drivers/md/dm-raid.c
>>>> +++ b/drivers/md/dm-raid.c
>>>> @@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>>>           rs->md.ro = 1;
>>>>           rs->md.in_sync = 1;
>>>>
>>>> -       /* Keep array frozen until resume. */
>>>> -       set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
>>>> -
>>>>           /* Has to be held on running the array */
>>>>           mddev_suspend_and_lock_nointr(&rs->md);
>>>> +
>>>> +       /* Keep array frozen until resume. */
>>>> +       md_frozen_sync_thread(&rs->md);
>>>> +
>>>>           r = md_run(&rs->md);
>>>>           rs->md.in_sync = 0; /* Assume already marked dirty */
>>>>           if (r) {
>>>> @@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
>>>>           if (!mddev->pers || !mddev->pers->sync_request)
>>>>                   return -EINVAL;
>>>>
>>>> +       if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
>>>> +               return -EBUSY;
>>>> +
>>>>           if (!strcasecmp(argv[0], "frozen"))
>>>>                   set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>>           else
>>>> @@ -3791,15 +3795,31 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
>>>>           blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
>>>>    }
>>>>
>>>> +static void raid_presuspend(struct dm_target *ti)
>>>> +{
>>>> +       struct raid_set *rs = ti->private;
>>>> +
>>>> +       mddev_lock_nointr(&rs->md);
>>>> +       md_frozen_sync_thread(&rs->md);
>>>> +       mddev_unlock(&rs->md);
>>>> +}
>>>> +
>>>> +static void raid_presuspend_undo(struct dm_target *ti)
>>>> +{
>>>> +       struct raid_set *rs = ti->private;
>>>> +
>>>> +       mddev_lock_nointr(&rs->md);
>>>> +       md_unfrozen_sync_thread(&rs->md);
>>>> +       mddev_unlock(&rs->md);
>>>> +}
>>>> +
>>>>    static void raid_postsuspend(struct dm_target *ti)
>>>>    {
>>>>           struct raid_set *rs = ti->private;
>>>>
>>>>           if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
>>>>                   /* Writes have to be stopped before suspending to avoid deadlocks. */
>>>> -               if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
>>>> -                       md_stop_writes(&rs->md);
>>>> -
>>>> +               md_stop_writes(&rs->md);
>>>>                   mddev_suspend(&rs->md, false);
>>>>           }
>>>>    }
>>>> @@ -4012,8 +4032,6 @@ static int raid_preresume(struct dm_target *ti)
>>>>           }
>>>>
>>>>           /* Check for any resize/reshape on @rs and adjust/initiate */
>>>> -       /* Be prepared for mddev_resume() in raid_resume() */
>>>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>>           if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
>>>>                   set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
>>>>                   mddev->resync_min = mddev->recovery_cp;
>>>> @@ -4056,9 +4074,9 @@ static void raid_resume(struct dm_target *ti)
>>>>                           rs_set_capacity(rs);
>>>>
>>>>                   mddev_lock_nointr(mddev);
>>>> -               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>>                   mddev->ro = 0;
>>>>                   mddev->in_sync = 0;
>>>> +               md_unfrozen_sync_thread(mddev);
>>>>                   mddev_unlock_and_resume(mddev);
>>>>           }
>>>>    }
>>>> @@ -4074,6 +4092,8 @@ static struct target_type raid_target = {
>>>>           .message = raid_message,
>>>>           .iterate_devices = raid_iterate_devices,
>>>>           .io_hints = raid_io_hints,
>>>> +       .presuspend = raid_presuspend,
>>>> +       .presuspend_undo = raid_presuspend_undo,
>>>>           .postsuspend = raid_postsuspend,
>>>>           .preresume = raid_preresume,
>>>>           .resume = raid_resume,
>>>> --
>>>> 2.39.2
>>>>
>>>
>>> .
>>>
>>
> 
> 
> .
>
Xiao Ni Feb. 19, 2024, 8:45 a.m. UTC | #5
On Mon, Feb 19, 2024 at 3:53 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> Hi,
>
> 在 2024/02/19 15:27, Xiao Ni 写道:
> > On Sun, Feb 18, 2024 at 2:34 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
> >>
> >> Hi,
> >>
> >> 在 2024/02/18 12:53, Xiao Ni 写道:
> >>> Hi Kuai
> >>>
> >>> On Thu, Feb 1, 2024 at 5:30 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
> >>>>
> >>>> From: Yu Kuai <yukuai3@huawei.com>
> >>>>
> >>>> 1) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
> >>>>      it only prevent new sync_thread to start, and it can't stop the
> >>>>      running sync thread;
> >>>
> >>> Agree with this
> >>>
> >>>> 2) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
> >>>>      it as condition for md_stop_writes() in raid_postsuspend() doesn't
> >>>>      look correct.
> >>>
> >>> I don't agree with it. __md_stop_writes stops sync thread, so it needs
> >>> to check this flag. And It looks like the name __md_stop_writes is not
> >>> right. Does it really stop write io? mddev_suspend should be the
> >>> function that stop write request. From my understanding,
> >>> raid_postsuspend does two jobs. One is stopping sync thread. Two is
> >>> suspending array.
> >>
> >> MD_RECOVERY_FROZEN is not just used in __md_stop_writes(), so I think
> >> it's not correct to to check this. For example, if MD_RECOVERY_FROZEN is
> >> set by raid_message(), then __md_stop_writes() will be skipped.
> >
> > Hi Kuai
> >
> > raid_message sets MD_RECOVERY_FROZEN and it stops the sync thread
> > synchronously. So it doesn't need __md_stop_writes. So from md and
> > dmraid, it has a rule. If you set MD_RECOVERY_FROZEN, you're in the
> > process of stopping sync thread.
>
> There are so much problems here, I'm not sure if you really walk through
> all patches here.

I haven't read all of them. But as you mentioned, the following
patches are based on patch01. They work together.  I want to narrow
the change to fix these regression problems. But it depends on the
song's decision.

>
> 1) stop the sync_thread synchronously is problematic, and raid_message()
> doesn't even hold 'reconfig_mutex' for md_reap_sync_thread();
> 2) skip __md_stop_writes() because sycn_thread is stopped is wrong,
> __md_stop_writes() does more work.

Agree with this. We can use the same way as action_store does. But we
can do this later, not this patch set.

> >
> >>
> >>>
> >>>> 3) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
> >>>>      and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
> >>>>      new sync_thread can start unexpected.
> >>>
> >>> md_action_store doesn't check this either. If the array is suspended
> >>> and MD_RECOVERY_FROZEN is cleared, before patch01, sync thread can't
> >>> happen. So it looks like patch01 breaks the logic.
> >>
> >> The difference is that md/raid doen't need to frozen sync_thread while
> >> suspending the array for now. And I don't understand at all why sync
> >> thread can't happed before patch01.
> >
> > There is a condition you mentioned above -- the array is suspended.
> > Before patch01, if one array is suspended, the sync thread can't
> 3) before patch 1, sync_thread can still running even if array is
> suspended;
> And even without patch 1, raid_message() can still start new
> sync_thread:
>
> // assume sync_thread is not register
> raid_postsuspend        raid_message
>   md_stop_writes
>                          set_bit(MD_RECOVERY_NEEDED, &mddev->recovery)
>                          if (!mddev->suspended)
>                           md_wakeup_thread
>                           // new sync_thread is registered
>   mddev_suspend

The array is not suspended in the above case. Before patch01, after
mddev_suspend, sync thread can't start.

But this looks like a problem. I'm not sure if dm has a way to handle
the concurrency. In md, we have a new lock sync_mutex to protect this,
right? If dm doesn't do this, dm-raid can do the same thing as md
does.

>
> > happen. Even raid_messages clears MD_RECOVERY_FROZEN, the sync thread
> > can't start. After resume the array, the sync thread can start again.
>
> 4) I think I don't need to explain again why suspended should not be
> used to prevent starting new sync_thread;

Yes. I understand you. But I only follow the existing logic. It has
been there for many years. Especially for dmraid/lvmraid, maybe there
are some codes that depend on this logic. For such a change, I don't
reject it 100%. I just want to say we need to be more careful.

Best Regards
Xiao
>
> Thanks,
> Kuai
>
> >
> > Regards
> > Xiao
> >>
> >> Thanks,
> >> Kuai
> >>
> >>>
> >>> Regards
> >>> Xiao
> >>>
> >>>
> >>>>
> >>>> Fix above problems by using the new helper to suspend the array during
> >>>> suspend, also disallow raid_message() to change sync_thread status
> >>>> during suspend.
> >>>>
> >>>> Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
> >>>> test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
> >>>> and with previous fixes, the test won't hang there anymore, however, the
> >>>> test will still fail and complain that ext4 is corrupted. And with this
> >>>> patch, the test won't hang due to stop_sync_thread() or fail due to ext4
> >>>> is corrupted anymore. However, there is still a deadlock related to
> >>>> dm-raid456 that will be fixed in following patches.
> >>>>
> >>>> Reported-by: Mikulas Patocka <mpatocka@redhat.com>
> >>>> Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
> >>>> Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
> >>>> Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
> >>>> Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
> >>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> >>>> ---
> >>>>    drivers/md/dm-raid.c | 38 +++++++++++++++++++++++++++++---------
> >>>>    1 file changed, 29 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
> >>>> index eb009d6bb03a..5ce3c6020b1b 100644
> >>>> --- a/drivers/md/dm-raid.c
> >>>> +++ b/drivers/md/dm-raid.c
> >>>> @@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >>>>           rs->md.ro = 1;
> >>>>           rs->md.in_sync = 1;
> >>>>
> >>>> -       /* Keep array frozen until resume. */
> >>>> -       set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
> >>>> -
> >>>>           /* Has to be held on running the array */
> >>>>           mddev_suspend_and_lock_nointr(&rs->md);
> >>>> +
> >>>> +       /* Keep array frozen until resume. */
> >>>> +       md_frozen_sync_thread(&rs->md);
> >>>> +
> >>>>           r = md_run(&rs->md);
> >>>>           rs->md.in_sync = 0; /* Assume already marked dirty */
> >>>>           if (r) {
> >>>> @@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
> >>>>           if (!mddev->pers || !mddev->pers->sync_request)
> >>>>                   return -EINVAL;
> >>>>
> >>>> +       if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
> >>>> +               return -EBUSY;
> >>>> +
> >>>>           if (!strcasecmp(argv[0], "frozen"))
> >>>>                   set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>>>           else
> >>>> @@ -3791,15 +3795,31 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
> >>>>           blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
> >>>>    }
> >>>>
> >>>> +static void raid_presuspend(struct dm_target *ti)
> >>>> +{
> >>>> +       struct raid_set *rs = ti->private;
> >>>> +
> >>>> +       mddev_lock_nointr(&rs->md);
> >>>> +       md_frozen_sync_thread(&rs->md);
> >>>> +       mddev_unlock(&rs->md);
> >>>> +}
> >>>> +
> >>>> +static void raid_presuspend_undo(struct dm_target *ti)
> >>>> +{
> >>>> +       struct raid_set *rs = ti->private;
> >>>> +
> >>>> +       mddev_lock_nointr(&rs->md);
> >>>> +       md_unfrozen_sync_thread(&rs->md);
> >>>> +       mddev_unlock(&rs->md);
> >>>> +}
> >>>> +
> >>>>    static void raid_postsuspend(struct dm_target *ti)
> >>>>    {
> >>>>           struct raid_set *rs = ti->private;
> >>>>
> >>>>           if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
> >>>>                   /* Writes have to be stopped before suspending to avoid deadlocks. */
> >>>> -               if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
> >>>> -                       md_stop_writes(&rs->md);
> >>>> -
> >>>> +               md_stop_writes(&rs->md);
> >>>>                   mddev_suspend(&rs->md, false);
> >>>>           }
> >>>>    }
> >>>> @@ -4012,8 +4032,6 @@ static int raid_preresume(struct dm_target *ti)
> >>>>           }
> >>>>
> >>>>           /* Check for any resize/reshape on @rs and adjust/initiate */
> >>>> -       /* Be prepared for mddev_resume() in raid_resume() */
> >>>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>>>           if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
> >>>>                   set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
> >>>>                   mddev->resync_min = mddev->recovery_cp;
> >>>> @@ -4056,9 +4074,9 @@ static void raid_resume(struct dm_target *ti)
> >>>>                           rs_set_capacity(rs);
> >>>>
> >>>>                   mddev_lock_nointr(mddev);
> >>>> -               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>>>                   mddev->ro = 0;
> >>>>                   mddev->in_sync = 0;
> >>>> +               md_unfrozen_sync_thread(mddev);
> >>>>                   mddev_unlock_and_resume(mddev);
> >>>>           }
> >>>>    }
> >>>> @@ -4074,6 +4092,8 @@ static struct target_type raid_target = {
> >>>>           .message = raid_message,
> >>>>           .iterate_devices = raid_iterate_devices,
> >>>>           .io_hints = raid_io_hints,
> >>>> +       .presuspend = raid_presuspend,
> >>>> +       .presuspend_undo = raid_presuspend_undo,
> >>>>           .postsuspend = raid_postsuspend,
> >>>>           .preresume = raid_preresume,
> >>>>           .resume = raid_resume,
> >>>> --
> >>>> 2.39.2
> >>>>
> >>>
> >>> .
> >>>
> >>
> >
> >
> > .
> >
>
diff mbox series

Patch

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index eb009d6bb03a..5ce3c6020b1b 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3240,11 +3240,12 @@  static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	rs->md.ro = 1;
 	rs->md.in_sync = 1;
 
-	/* Keep array frozen until resume. */
-	set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
-
 	/* Has to be held on running the array */
 	mddev_suspend_and_lock_nointr(&rs->md);
+
+	/* Keep array frozen until resume. */
+	md_frozen_sync_thread(&rs->md);
+
 	r = md_run(&rs->md);
 	rs->md.in_sync = 0; /* Assume already marked dirty */
 	if (r) {
@@ -3722,6 +3723,9 @@  static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
 	if (!mddev->pers || !mddev->pers->sync_request)
 		return -EINVAL;
 
+	if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+		return -EBUSY;
+
 	if (!strcasecmp(argv[0], "frozen"))
 		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 	else
@@ -3791,15 +3795,31 @@  static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
 }
 
+static void raid_presuspend(struct dm_target *ti)
+{
+	struct raid_set *rs = ti->private;
+
+	mddev_lock_nointr(&rs->md);
+	md_frozen_sync_thread(&rs->md);
+	mddev_unlock(&rs->md);
+}
+
+static void raid_presuspend_undo(struct dm_target *ti)
+{
+	struct raid_set *rs = ti->private;
+
+	mddev_lock_nointr(&rs->md);
+	md_unfrozen_sync_thread(&rs->md);
+	mddev_unlock(&rs->md);
+}
+
 static void raid_postsuspend(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
 
 	if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
 		/* Writes have to be stopped before suspending to avoid deadlocks. */
-		if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
-			md_stop_writes(&rs->md);
-
+		md_stop_writes(&rs->md);
 		mddev_suspend(&rs->md, false);
 	}
 }
@@ -4012,8 +4032,6 @@  static int raid_preresume(struct dm_target *ti)
 	}
 
 	/* Check for any resize/reshape on @rs and adjust/initiate */
-	/* Be prepared for mddev_resume() in raid_resume() */
-	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 	if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
 		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
 		mddev->resync_min = mddev->recovery_cp;
@@ -4056,9 +4074,9 @@  static void raid_resume(struct dm_target *ti)
 			rs_set_capacity(rs);
 
 		mddev_lock_nointr(mddev);
-		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		mddev->ro = 0;
 		mddev->in_sync = 0;
+		md_unfrozen_sync_thread(mddev);
 		mddev_unlock_and_resume(mddev);
 	}
 }
@@ -4074,6 +4092,8 @@  static struct target_type raid_target = {
 	.message = raid_message,
 	.iterate_devices = raid_iterate_devices,
 	.io_hints = raid_io_hints,
+	.presuspend = raid_presuspend,
+	.presuspend_undo = raid_presuspend_undo,
 	.postsuspend = raid_postsuspend,
 	.preresume = raid_preresume,
 	.resume = raid_resume,