diff mbox series

dm-integrity: Fix flush with external metadata device

Message ID 20201220140222.2f341344@gecko.fritz.box (mailing list archive)
State Superseded, archived
Delegated to: Mike Snitzer
Headers show
Series dm-integrity: Fix flush with external metadata device | expand

Commit Message

Lukas Straub Dec. 20, 2020, 1:02 p.m. UTC
With an external metadata device, flush requests aren't passed down
to the data device.

Fix this by issuing flush in the right places: In integrity_commit
when not in journal mode, in do_journal_write after writing the
contents of the journal to the disk and in dm_integrity_postsuspend.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 drivers/md/dm-integrity.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Lukas Straub Dec. 29, 2020, 9:47 a.m. UTC | #1
On Sun, 20 Dec 2020 14:02:22 +0100
Lukas Straub <lukasstraub2@web.de> wrote:

> With an external metadata device, flush requests aren't passed down
> to the data device.
> 
> Fix this by issuing flush in the right places: In integrity_commit
> when not in journal mode, in do_journal_write after writing the
> contents of the journal to the disk and in dm_integrity_postsuspend.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  drivers/md/dm-integrity.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index 5a7a1b90e671..a26ed65869f6 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
>  	if (unlikely(ic->mode != 'J')) {
>  		spin_unlock_irq(&ic->endio_wait.lock);
>  		dm_integrity_flush_buffers(ic);
> +		if (ic->meta_dev)
> +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
>  		goto release_flush_bios;
>  	}
>  
> @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
>  	wait_for_completion_io(&comp.comp);
>  
>  	dm_integrity_flush_buffers(ic);
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  }
>  
>  static void integrity_writer(struct work_struct *w)
> @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
>  #endif
>  	}
>  
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
>  
>  	ic->journal_uptodate = true;

Ping...

--
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mike Snitzer Jan. 4, 2021, 8:30 p.m. UTC | #2
On Sun, Dec 20 2020 at  8:02am -0500,
Lukas Straub <lukasstraub2@web.de> wrote:

> With an external metadata device, flush requests aren't passed down
> to the data device.
> 
> Fix this by issuing flush in the right places: In integrity_commit
> when not in journal mode, in do_journal_write after writing the
> contents of the journal to the disk and in dm_integrity_postsuspend.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  drivers/md/dm-integrity.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index 5a7a1b90e671..a26ed65869f6 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
>  	if (unlikely(ic->mode != 'J')) {
>  		spin_unlock_irq(&ic->endio_wait.lock);
>  		dm_integrity_flush_buffers(ic);
> +		if (ic->meta_dev)
> +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
>  		goto release_flush_bios;
>  	}
>  
> @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
>  	wait_for_completion_io(&comp.comp);
>  
>  	dm_integrity_flush_buffers(ic);
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  }
>  
>  static void integrity_writer(struct work_struct *w)
> @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
>  #endif
>  	}
>  
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
>  
>  	ic->journal_uptodate = true;
> -- 
> 2.20.1


Seems like a pretty bad oversight... but shouldn't you also make sure to
flush the data device _before_ the metadata is flushed?

Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mikulas Patocka Jan. 8, 2021, 4:12 p.m. UTC | #3
On Mon, 4 Jan 2021, Mike Snitzer wrote:

> On Sun, Dec 20 2020 at  8:02am -0500,
> Lukas Straub <lukasstraub2@web.de> wrote:
> 
> > With an external metadata device, flush requests aren't passed down
> > to the data device.
> > 
> > Fix this by issuing flush in the right places: In integrity_commit
> > when not in journal mode, in do_journal_write after writing the
> > contents of the journal to the disk and in dm_integrity_postsuspend.
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> >  drivers/md/dm-integrity.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index 5a7a1b90e671..a26ed65869f6 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
> >  	if (unlikely(ic->mode != 'J')) {
> >  		spin_unlock_irq(&ic->endio_wait.lock);
> >  		dm_integrity_flush_buffers(ic);
> > +		if (ic->meta_dev)
> > +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> >  		goto release_flush_bios;
> >  	}
> >  
> > @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
> >  	wait_for_completion_io(&comp.comp);
> >  
> >  	dm_integrity_flush_buffers(ic);
> > +	if (ic->meta_dev)
> > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > +
> >  }
> >  
> >  static void integrity_writer(struct work_struct *w)
> > @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
> >  #endif
> >  	}
> >  
> > +	if (ic->meta_dev)
> > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > +
> >  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
> >  
> >  	ic->journal_uptodate = true;
> > -- 
> > 2.20.1
> 
> 
> Seems like a pretty bad oversight... but shouldn't you also make sure to
> flush the data device _before_ the metadata is flushed?
> 
> Mike

I think, ordering is not a problem.

A disk may flush its cache spontaneously anytime, so it doesn't matter in 
which order do we flush them. Similarly a dm-bufio buffer may be flushed 
anytime - if the machine is running out of memory and a dm-bufio shrinker 
is called.

I'll send another patch for this - I've created a patch that flushes the 
metadata device cache and data device cache in parallel, so that 
performance degradation is reduced.

My patch also doesn't use GFP_NOIO allocation - which can in theory 
deadlock if we are swapping on dm-integrity device.

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mike Snitzer Jan. 8, 2021, 6:38 p.m. UTC | #4
On Fri, Jan 08 2021 at 11:12am -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> 
> 
> On Mon, 4 Jan 2021, Mike Snitzer wrote:
> 
> > On Sun, Dec 20 2020 at  8:02am -0500,
> > Lukas Straub <lukasstraub2@web.de> wrote:
> > 
> > > With an external metadata device, flush requests aren't passed down
> > > to the data device.
> > > 
> > > Fix this by issuing flush in the right places: In integrity_commit
> > > when not in journal mode, in do_journal_write after writing the
> > > contents of the journal to the disk and in dm_integrity_postsuspend.
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > ---
> > >  drivers/md/dm-integrity.c | 8 ++++++++
> > >  1 file changed, 8 insertions(+)
> > > 
> > > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > > index 5a7a1b90e671..a26ed65869f6 100644
> > > --- a/drivers/md/dm-integrity.c
> > > +++ b/drivers/md/dm-integrity.c
> > > @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
> > >  	if (unlikely(ic->mode != 'J')) {
> > >  		spin_unlock_irq(&ic->endio_wait.lock);
> > >  		dm_integrity_flush_buffers(ic);
> > > +		if (ic->meta_dev)
> > > +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > >  		goto release_flush_bios;
> > >  	}
> > >  
> > > @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
> > >  	wait_for_completion_io(&comp.comp);
> > >  
> > >  	dm_integrity_flush_buffers(ic);
> > > +	if (ic->meta_dev)
> > > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > > +
> > >  }
> > >  
> > >  static void integrity_writer(struct work_struct *w)
> > > @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
> > >  #endif
> > >  	}
> > >  
> > > +	if (ic->meta_dev)
> > > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > > +
> > >  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
> > >  
> > >  	ic->journal_uptodate = true;
> > > -- 
> > > 2.20.1
> > 
> > 
> > Seems like a pretty bad oversight... but shouldn't you also make sure to
> > flush the data device _before_ the metadata is flushed?
> > 
> > Mike
> 
> I think, ordering is not a problem.
> 
> A disk may flush its cache spontaneously anytime, so it doesn't matter in 
> which order do we flush them. Similarly a dm-bufio buffer may be flushed 
> anytime - if the machine is running out of memory and a dm-bufio shrinker 
> is called.
> 
> I'll send another patch for this - I've created a patch that flushes the 
> metadata device cache and data device cache in parallel, so that 
> performance degradation is reduced.
> 
> My patch also doesn't use GFP_NOIO allocation - which can in theory 
> deadlock if we are swapping on dm-integrity device.

OK, I see your patch, but my concern about ordering was more to do with
crash consistency.  What if metadata is updated but data isn't?

On the surface, your approach of issuing the flushes in parallel seems
to expose us to inconsistency upon recovery from a crash.
If that isn't the case please explain why not.

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mikulas Patocka Jan. 8, 2021, 7 p.m. UTC | #5
On Fri, 8 Jan 2021, Mike Snitzer wrote:

> > > Seems like a pretty bad oversight... but shouldn't you also make sure to
> > > flush the data device _before_ the metadata is flushed?
> > > 
> > > Mike
> > 
> > I think, ordering is not a problem.
> > 
> > A disk may flush its cache spontaneously anytime, so it doesn't matter in 
> > which order do we flush them. Similarly a dm-bufio buffer may be flushed 
> > anytime - if the machine is running out of memory and a dm-bufio shrinker 
> > is called.
> > 
> > I'll send another patch for this - I've created a patch that flushes the 
> > metadata device cache and data device cache in parallel, so that 
> > performance degradation is reduced.
> > 
> > My patch also doesn't use GFP_NOIO allocation - which can in theory 
> > deadlock if we are swapping on dm-integrity device.
> 
> OK, I see your patch, but my concern about ordering was more to do with
> crash consistency.  What if metadata is updated but data isn't?

In journal mode: do_journal_write will copy data from the journal to 
appropriate places and then flushes both data and metadata device. If a 
crash happens during flush, the journal will be replayed on next reboot - 
the replay operation will overwrite both data and metadata from the 
journal.

In bitmap mode: bitmap_flush_work will first issue flush on both data and 
metadata device, and then clear the dirty bits in a bitmap. If a crash 
happens during the flush, the bits in the bitmap are still set and the 
checksums will be recalculated on next reboot.

> On the surface, your approach of issuing the flushes in parallel seems
> to expose us to inconsistency upon recovery from a crash.
> If that isn't the case please explain why not.

The disk may flush its internal cache anytime. So, if you do 
"blkdev_issue_flush(disk1); blkdev_issue_flush(disk2);" there is no 
ordering guarantee at all. The firmware in disk2 may flush the cache 
spontaneously, before you called blkdev_issue_flush(disk1).

> Thanks,
> Mike

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 5a7a1b90e671..a26ed65869f6 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2196,6 +2196,8 @@  static void integrity_commit(struct work_struct *w)
 	if (unlikely(ic->mode != 'J')) {
 		spin_unlock_irq(&ic->endio_wait.lock);
 		dm_integrity_flush_buffers(ic);
+		if (ic->meta_dev)
+			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
 		goto release_flush_bios;
 	}
 
@@ -2410,6 +2412,9 @@  static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
 	wait_for_completion_io(&comp.comp);
 
 	dm_integrity_flush_buffers(ic);
+	if (ic->meta_dev)
+		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+
 }
 
 static void integrity_writer(struct work_struct *w)
@@ -2949,6 +2954,9 @@  static void dm_integrity_postsuspend(struct dm_target *ti)
 #endif
 	}
 
+	if (ic->meta_dev)
+		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+
 	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
 
 	ic->journal_uptodate = true;