diff mbox

[v8,03/10] md: Neither resync nor reshape while the system is frozen

Message ID 20171010210346.14574-4-bart.vanassche@wdc.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bart Van Assche Oct. 10, 2017, 9:03 p.m. UTC
Some people use the md driver on laptops and use the suspend and
resume functionality. Since it is essential that submitting of
new I/O requests stops before a hibernation image is created,
interrupt the md resync and reshape actions if the system is
being frozen. Note: the resync and reshape will restart after
the system is resumed and a message similar to the following
will appear in the system log:

md: md0: data-check interrupted.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: linux-raid@vger.kernel.org
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
---
 drivers/md/md.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

Comments

Shaohua Li Oct. 10, 2017, 10:30 p.m. UTC | #1
On Tue, Oct 10, 2017 at 02:03:39PM -0700, Bart Van Assche wrote:
> Some people use the md driver on laptops and use the suspend and
> resume functionality. Since it is essential that submitting of
> new I/O requests stops before a hibernation image is created,
> interrupt the md resync and reshape actions if the system is
> being frozen. Note: the resync and reshape will restart after
> the system is resumed and a message similar to the following
> will appear in the system log:
> 
> md: md0: data-check interrupted.

Where do we restart resync and reshape?
 
> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
> Cc: Shaohua Li <shli@kernel.org>
> Cc: linux-raid@vger.kernel.org
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Johannes Thumshirn <jthumshirn@suse.de>
> ---
>  drivers/md/md.c | 30 +++++++++++++++++++++++++++++-
>  1 file changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index b99584e5d6b1..d712d3320c1d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -66,6 +66,8 @@
>  #include <linux/raid/md_u.h>
>  #include <linux/slab.h>
>  #include <linux/percpu-refcount.h>
> +#include <linux/freezer.h>
> +#include <linux/suspend.h>
>  
>  #include <trace/events/block.h>
>  #include "md.h"
> @@ -7439,6 +7441,7 @@ static int md_thread(void *arg)
>  	 */
>  
>  	allow_signal(SIGKILL);
> +	set_freezable();
>  	while (!kthread_should_stop()) {
>  
>  		/* We need to wait INTERRUPTIBLE so that
> @@ -7449,7 +7452,7 @@ static int md_thread(void *arg)
>  		if (signal_pending(current))
>  			flush_signals(current);
>  
> -		wait_event_interruptible_timeout
> +		wait_event_freezable_timeout
>  			(thread->wqueue,
>  			 test_bit(THREAD_WAKEUP, &thread->flags)
>  			 || kthread_should_stop() || kthread_should_park(),
> @@ -8963,6 +8966,29 @@ static void md_stop_all_writes(void)
>  		mdelay(1000*1);
>  }
>  
> +/*
> + * Ensure that neither resyncing nor reshaping occurs while the system is
> + * frozen.
> + */
> +static int md_notify_pm(struct notifier_block *bl, unsigned long state,
> +			void *unused)
> +{
> +	pr_debug("%s: state = %ld\n", __func__, state);
> +
> +	switch (state) {
> +	case PM_HIBERNATION_PREPARE:
> +	case PM_SUSPEND_PREPARE:
> +	case PM_RESTORE_PREPARE:
> +		md_stop_all_writes();
> +		break;
> +	}
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block md_pm_notifier = {
> +	.notifier_call	= md_notify_pm,
> +};
> +
>  static int md_notify_reboot(struct notifier_block *this,
>  			    unsigned long code, void *x)
>  {
> @@ -9009,6 +9035,7 @@ static int __init md_init(void)
>  			    md_probe, NULL, NULL);
>  
>  	register_reboot_notifier(&md_reboot_notifier);
> +	register_pm_notifier(&md_pm_notifier);
>  	raid_table_header = register_sysctl_table(raid_root_table);
>  
>  	md_geninit();
> @@ -9248,6 +9275,7 @@ static __exit void md_exit(void)
>  
>  	unregister_blkdev(MD_MAJOR,"md");
>  	unregister_blkdev(mdp_major, "mdp");
> +	unregister_pm_notifier(&md_pm_notifier);
>  	unregister_reboot_notifier(&md_reboot_notifier);
>  	unregister_sysctl_table(raid_table_header);
>  
> -- 
> 2.14.2
>
Bart Van Assche Oct. 10, 2017, 11:33 p.m. UTC | #2
On Tue, 2017-10-10 at 15:30 -0700, Shaohua Li wrote:
> On Tue, Oct 10, 2017 at 02:03:39PM -0700, Bart Van Assche wrote:

> > Some people use the md driver on laptops and use the suspend and

> > resume functionality. Since it is essential that submitting of

> > new I/O requests stops before a hibernation image is created,

> > interrupt the md resync and reshape actions if the system is

> > being frozen. Note: the resync and reshape will restart after

> > the system is resumed and a message similar to the following

> > will appear in the system log:

> > 

> > md: md0: data-check interrupted.

> 

> Where do we restart resync and reshape?


Hello Shaohua,

My understanding of the md driver is as follows:
- Before any write occurs, md_write_start() is called. That function clears
  mddev->safemode and checks mddev->in_sync. If that variable is set, it is
  cleared and md_write_start() wakes up mddev->thread and waits until the
  superblock has been written to disk.
- All mddev->thread implementations call md_check_recovery(). That function
  updates the superblock by calling md_update_sb() if needed.
  md_check_recovery() also starts a resync thread if the array is not in
  sync. See also the comment block above md_check_recovery().
- md_do_sync() performs the resync. If it completes successfully the
  "in_sync" state is set (set_in_sync()). If it is interrupted the "in_sync"
  state is not modified.
- super_90_sync() sets the MD_SB_CLEAN flag in the on-disk superblock if the
  "in_sync" flag is set.

In other words: if the array is in sync the MD_SB_CLEAN flag is set in the
on-disk superblock. If a resync is needed that flag is not set. The
MD_SB_CLEAN flag is checked when the superblock is loaded. If that flag is
not set a resync is started.

Bart.
Shaohua Li Oct. 11, 2017, 1:48 a.m. UTC | #3
On Tue, Oct 10, 2017 at 11:33:06PM +0000, Bart Van Assche wrote:
> On Tue, 2017-10-10 at 15:30 -0700, Shaohua Li wrote:
> > On Tue, Oct 10, 2017 at 02:03:39PM -0700, Bart Van Assche wrote:
> > > Some people use the md driver on laptops and use the suspend and
> > > resume functionality. Since it is essential that submitting of
> > > new I/O requests stops before a hibernation image is created,
> > > interrupt the md resync and reshape actions if the system is
> > > being frozen. Note: the resync and reshape will restart after
> > > the system is resumed and a message similar to the following
> > > will appear in the system log:
> > > 
> > > md: md0: data-check interrupted.
> > 
> > Where do we restart resync and reshape?
> 
> Hello Shaohua,
> 
> My understanding of the md driver is as follows:
> - Before any write occurs, md_write_start() is called. That function clears
>   mddev->safemode and checks mddev->in_sync. If that variable is set, it is
>   cleared and md_write_start() wakes up mddev->thread and waits until the
>   superblock has been written to disk.
> - All mddev->thread implementations call md_check_recovery(). That function
>   updates the superblock by calling md_update_sb() if needed.
>   md_check_recovery() also starts a resync thread if the array is not in
>   sync. See also the comment block above md_check_recovery().
> - md_do_sync() performs the resync. If it completes successfully the
>   "in_sync" state is set (set_in_sync()). If it is interrupted the "in_sync"
>   state is not modified.
> - super_90_sync() sets the MD_SB_CLEAN flag in the on-disk superblock if the
>   "in_sync" flag is set.
> 
> In other words: if the array is in sync the MD_SB_CLEAN flag is set in the
> on-disk superblock. If a resync is needed that flag is not set. The
> MD_SB_CLEAN flag is checked when the superblock is loaded. If that flag is
> not set a resync is started.

The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, which
will prevent md_check_recovery restarting resync/reshape. I think we need
preserve mddev->recovery in suspend prepare and restore after resume

Thanks,
Shaohua
Bart Van Assche Oct. 11, 2017, 5:17 p.m. UTC | #4
On Tue, 2017-10-10 at 18:48 -0700, Shaohua Li wrote:
> The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, which

> will prevent md_check_recovery restarting resync/reshape. I think we need

> preserve mddev->recovery in suspend prepare and restore after resume


Hello Shaohua,

As far as I can see __md_stop_writes() sets MD_RECOVERY_FROZEN and can set
MD_RECOVERY_INTR. Since md_check_recovery() clears MD_RECOVERY_INTR I think
it should be sufficient to save and restore the state of the
MD_RECOVERY_FROZEN flag. However, when I ran the following test:
* echo frozen > /sys/block/md0/md/sync_action
* cat /sys/block/md0/md/sync_action
* systemctl hibernate
* (power on system again)
* cat /sys/block/md0/md/sync_action

the output that appeared was as follows:

frozen
idle

Does that mean that a hibernate or suspend followed by a resume is sufficient
to clear the MD_RECOVERY_FROZEN flag for the md drivers and hence that the
patch at the start of this e-mail thread does not need any further
modifications?

Thanks,

Bart.
Shaohua Li Oct. 11, 2017, 7:32 p.m. UTC | #5
On Wed, Oct 11, 2017 at 05:17:56PM +0000, Bart Van Assche wrote:
> On Tue, 2017-10-10 at 18:48 -0700, Shaohua Li wrote:
> > The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, which
> > will prevent md_check_recovery restarting resync/reshape. I think we need
> > preserve mddev->recovery in suspend prepare and restore after resume
> 
> Hello Shaohua,
> 
> As far as I can see __md_stop_writes() sets MD_RECOVERY_FROZEN and can set
> MD_RECOVERY_INTR. Since md_check_recovery() clears MD_RECOVERY_INTR I think
> it should be sufficient to save and restore the state of the
> MD_RECOVERY_FROZEN flag. However, when I ran the following test:
> * echo frozen > /sys/block/md0/md/sync_action
> * cat /sys/block/md0/md/sync_action
> * systemctl hibernate
> * (power on system again)
> * cat /sys/block/md0/md/sync_action
> 
> the output that appeared was as follows:
> 
> frozen
> idle 
> Does that mean that a hibernate or suspend followed by a resume is sufficient
> to clear the MD_RECOVERY_FROZEN flag for the md drivers and hence that the
> patch at the start of this e-mail thread does not need any further
> modifications?

Have no idea why it shows 'idle'. From my understanding, after resume, previous
memory is stored and MD_RECOVERY_FROZEN bit should be set. Was trying to
understand what happens.

Thanks,
Shaohua
diff mbox

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b99584e5d6b1..d712d3320c1d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -66,6 +66,8 @@ 
 #include <linux/raid/md_u.h>
 #include <linux/slab.h>
 #include <linux/percpu-refcount.h>
+#include <linux/freezer.h>
+#include <linux/suspend.h>
 
 #include <trace/events/block.h>
 #include "md.h"
@@ -7439,6 +7441,7 @@  static int md_thread(void *arg)
 	 */
 
 	allow_signal(SIGKILL);
+	set_freezable();
 	while (!kthread_should_stop()) {
 
 		/* We need to wait INTERRUPTIBLE so that
@@ -7449,7 +7452,7 @@  static int md_thread(void *arg)
 		if (signal_pending(current))
 			flush_signals(current);
 
-		wait_event_interruptible_timeout
+		wait_event_freezable_timeout
 			(thread->wqueue,
 			 test_bit(THREAD_WAKEUP, &thread->flags)
 			 || kthread_should_stop() || kthread_should_park(),
@@ -8963,6 +8966,29 @@  static void md_stop_all_writes(void)
 		mdelay(1000*1);
 }
 
+/*
+ * Ensure that neither resyncing nor reshaping occurs while the system is
+ * frozen.
+ */
+static int md_notify_pm(struct notifier_block *bl, unsigned long state,
+			void *unused)
+{
+	pr_debug("%s: state = %ld\n", __func__, state);
+
+	switch (state) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+	case PM_RESTORE_PREPARE:
+		md_stop_all_writes();
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block md_pm_notifier = {
+	.notifier_call	= md_notify_pm,
+};
+
 static int md_notify_reboot(struct notifier_block *this,
 			    unsigned long code, void *x)
 {
@@ -9009,6 +9035,7 @@  static int __init md_init(void)
 			    md_probe, NULL, NULL);
 
 	register_reboot_notifier(&md_reboot_notifier);
+	register_pm_notifier(&md_pm_notifier);
 	raid_table_header = register_sysctl_table(raid_root_table);
 
 	md_geninit();
@@ -9248,6 +9275,7 @@  static __exit void md_exit(void)
 
 	unregister_blkdev(MD_MAJOR,"md");
 	unregister_blkdev(mdp_major, "mdp");
+	unregister_pm_notifier(&md_pm_notifier);
 	unregister_reboot_notifier(&md_reboot_notifier);
 	unregister_sysctl_table(raid_table_header);