diff mbox series

[v2] dm-integrity: Prevent RMW for full metadata buffer writes

Message ID 20200227142601.61f3cd54@luklap (mailing list archive)
State Rejected, archived
Delegated to: Mike Snitzer
Headers show
Series [v2] dm-integrity: Prevent RMW for full metadata buffer writes | expand

Commit Message

Lukas Straub Feb. 27, 2020, 1:26 p.m. UTC
If a full metadata buffer is being written, don't read it first. This
prevents a read-modify-write cycle and increases performance on HDDs
considerably.

To do this we now calculate the checksums for all sectors in the bio in one
go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
which now checks if we overwrite the whole buffer.

Benchmarking with a 5400RPM HDD with bitmap mode:
integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress

Without patch:
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s

With patch:
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
Hello Everyone,
So here is v2, now checking if we overwrite a whole metadata buffer instead
of the (buggy) check if we overwrite a whole tag area before.
Performance stayed the same (with --buffer-sectors=1).

The only quirk now is that it advertises a very big optimal io size in the
standard configuration (where buffer_sectors=128). Is this a Problem?

v2:
 -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
 -fix optimal io size to check if we overwrite a whole metadata buffer

Regards,
Lukas Straub

 drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 34 deletions(-)

--
2.20.1


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Comments

Mike Snitzer March 24, 2020, 5:18 p.m. UTC | #1
On Thu, Feb 27 2020 at  8:26am -0500,
Lukas Straub <lukasstraub2@web.de> wrote:

> If a full metadata buffer is being written, don't read it first. This
> prevents a read-modify-write cycle and increases performance on HDDs
> considerably.
> 
> To do this we now calculate the checksums for all sectors in the bio in one
> go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> which now checks if we overwrite the whole buffer.
> 
> Benchmarking with a 5400RPM HDD with bitmap mode:
> integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> 
> Without patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> 
> With patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
> Hello Everyone,
> So here is v2, now checking if we overwrite a whole metadata buffer instead
> of the (buggy) check if we overwrite a whole tag area before.
> Performance stayed the same (with --buffer-sectors=1).
> 
> The only quirk now is that it advertises a very big optimal io size in the
> standard configuration (where buffer_sectors=128). Is this a Problem?
> 
> v2:
>  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
>  -fix optimal io size to check if we overwrite a whole metadata buffer
> 
> Regards,
> Lukas Straub


Not sure why you didn't cc Mikulas but I just checked with him and he
thinks:

You're only seeing a boost because you're using 512-byte sector and
512-byte buffer. Patch helps that case but hurts in most other cases
(due to small buffers).

Mike


>  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
>  1 file changed, 47 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index b225b3e445fa..a6d3cf1406df 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
>  		if (unlikely(r))
>  			return r;
> 
> -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> -		if (IS_ERR(data))
> -			return PTR_ERR(data);
> +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> +		if (op == TAG_WRITE && *metadata_offset == 0 \
> +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		} else {
> +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		}
> 
>  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
>  		dp = data + *metadata_offset;
> @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
>  {
>  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
>  	struct dm_integrity_c *ic = dio->ic;
> +	unsigned sectors_to_process = dio->range.n_sectors;
> +	sector_t sector = dio->range.logical_sector;
> 
>  	int r;
> 
> @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
>  		struct bio_vec bv;
>  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
>  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> -		char *checksums;
> +		char *checksums, *checksums_ptr;
>  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
>  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> -		unsigned sectors_to_process = dio->range.n_sectors;
> -		sector_t sector = dio->range.logical_sector;
> 
>  		if (unlikely(ic->mode == 'R'))
>  			goto skip_io;
> 
> -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
>  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
>  		if (!checksums) {
>  			checksums = checksums_onstack;
> @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
>  			}
>  		}
> 
> +		checksums_ptr = checksums;
>  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
>  			unsigned pos;
> -			char *mem, *checksums_ptr;
> -
> -again:
> +			char *mem;
>  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
>  			pos = 0;
> -			checksums_ptr = checksums;
>  			do {
>  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> -				checksums_ptr += ic->tag_size;
> -				sectors_to_process -= ic->sectors_per_block;
> +
> +				if (likely(checksums != checksums_onstack)) {
> +					checksums_ptr += ic->tag_size;
> +				} else {
> +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> +					if (unlikely(r))
> +						goto internal_hash_error;
> +				}
> +
>  				pos += ic->sectors_per_block << SECTOR_SHIFT;
>  				sector += ic->sectors_per_block;
> -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> +				sectors_to_process -= ic->sectors_per_block;
> +			} while (pos < bv.bv_len && sectors_to_process);
>  			kunmap_atomic(mem);
> 
> -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> -			if (unlikely(r)) {
> -				if (r > 0) {
> -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> -					r = -EILSEQ;
> -					atomic64_inc(&ic->number_of_mismatches);
> -				}
> -				if (likely(checksums != checksums_onstack))
> -					kfree(checksums);
> -				goto error;
> -			}
> -
>  			if (!sectors_to_process)
>  				break;
> +		}
> 
> -			if (unlikely(pos < bv.bv_len)) {
> -				bv.bv_offset += pos;
> -				bv.bv_len -= pos;
> -				goto again;
> +		if (likely(checksums != checksums_onstack)) {
> +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> +						!dio->write ? TAG_CMP : TAG_WRITE);
> +			if (unlikely(r)) {
> +				kfree(checksums);
> +				goto internal_hash_error;
>  			}
> +			kfree(checksums);
>  		}
> 
> -		if (likely(checksums != checksums_onstack))
> -			kfree(checksums);
>  	} else {
>  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> 
> @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
>  skip_io:
>  	dec_in_flight(dio);
>  	return;
> +internal_hash_error:
> +	if (r > 0) {
> +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> +		r = -EILSEQ;
> +		atomic64_inc(&ic->number_of_mismatches);
> +	}
>  error:
>  	dio->bi_status = errno_to_blk_status(r);
>  	dec_in_flight(dio);
> @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
>  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
>  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
>  	}
> +
> +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
>  }
> 
>  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> --
> 2.20.1
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Lukas Straub March 24, 2020, 6:59 p.m. UTC | #2
On Tue, 24 Mar 2020 13:18:22 -0400
Mike Snitzer <snitzer@redhat.com> wrote:

> On Thu, Feb 27 2020 at  8:26am -0500,
> Lukas Straub <lukasstraub2@web.de> wrote:
> 
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> > 
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> > 
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > 
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > 
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> > 
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> > 
> > v2:
> >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > 
> > Regards,
> > Lukas Straub  
> 
> 
> Not sure why you didn't cc Mikulas but I just checked with him and he
> thinks:
> 
> You're only seeing a boost because you're using 512-byte sector and
> 512-byte buffer. Patch helps that case but hurts in most other cases
> (due to small buffers).

Hmm, buffer-sectors is still user configurable. If the user wants fast
write performance on slow HDDs he can set a small buffer-sector and
benefit from this patch. With the default buffer-sectors=128 it
shouldn't have any impact.

Regards,
Lukas Straub


> Mike
> 
> 
> >  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> >  		if (unlikely(r))
> >  			return r;
> > 
> > -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -		if (IS_ERR(data))
> > -			return PTR_ERR(data);
> > +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > +		if (op == TAG_WRITE && *metadata_offset == 0 \
> > +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		} else {
> > +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		}
> > 
> >  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> >  		dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> >  	struct dm_integrity_c *ic = dio->ic;
> > +	unsigned sectors_to_process = dio->range.n_sectors;
> > +	sector_t sector = dio->range.logical_sector;
> > 
> >  	int r;
> > 
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> >  		struct bio_vec bv;
> >  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> >  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > -		char *checksums;
> > +		char *checksums, *checksums_ptr;
> >  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> >  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -		unsigned sectors_to_process = dio->range.n_sectors;
> > -		sector_t sector = dio->range.logical_sector;
> > 
> >  		if (unlikely(ic->mode == 'R'))
> >  			goto skip_io;
> > 
> > -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >  		if (!checksums) {
> >  			checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> >  			}
> >  		}
> > 
> > +		checksums_ptr = checksums;
> >  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >  			unsigned pos;
> > -			char *mem, *checksums_ptr;
> > -
> > -again:
> > +			char *mem;
> >  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >  			pos = 0;
> > -			checksums_ptr = checksums;
> >  			do {
> >  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > -				checksums_ptr += ic->tag_size;
> > -				sectors_to_process -= ic->sectors_per_block;
> > +
> > +				if (likely(checksums != checksums_onstack)) {
> > +					checksums_ptr += ic->tag_size;
> > +				} else {
> > +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > +					if (unlikely(r))
> > +						goto internal_hash_error;
> > +				}
> > +
> >  				pos += ic->sectors_per_block << SECTOR_SHIFT;
> >  				sector += ic->sectors_per_block;
> > -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > +				sectors_to_process -= ic->sectors_per_block;
> > +			} while (pos < bv.bv_len && sectors_to_process);
> >  			kunmap_atomic(mem);
> > 
> > -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > -			if (unlikely(r)) {
> > -				if (r > 0) {
> > -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -					r = -EILSEQ;
> > -					atomic64_inc(&ic->number_of_mismatches);
> > -				}
> > -				if (likely(checksums != checksums_onstack))
> > -					kfree(checksums);
> > -				goto error;
> > -			}
> > -
> >  			if (!sectors_to_process)
> >  				break;
> > +		}
> > 
> > -			if (unlikely(pos < bv.bv_len)) {
> > -				bv.bv_offset += pos;
> > -				bv.bv_len -= pos;
> > -				goto again;
> > +		if (likely(checksums != checksums_onstack)) {
> > +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +						!dio->write ? TAG_CMP : TAG_WRITE);
> > +			if (unlikely(r)) {
> > +				kfree(checksums);
> > +				goto internal_hash_error;
> >  			}
> > +			kfree(checksums);
> >  		}
> > 
> > -		if (likely(checksums != checksums_onstack))
> > -			kfree(checksums);
> >  	} else {
> >  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> > 
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >  	dec_in_flight(dio);
> >  	return;
> > +internal_hash_error:
> > +	if (r > 0) {
> > +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > +		r = -EILSEQ;
> > +		atomic64_inc(&ic->number_of_mismatches);
> > +	}
> >  error:
> >  	dio->bi_status = errno_to_blk_status(r);
> >  	dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> >  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> >  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> >  	}
> > +
> > +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> >  }
> > 
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> >   
>
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mike Snitzer March 24, 2020, 7:11 p.m. UTC | #3
On Tue, Mar 24 2020 at  2:59pm -0400,
Lukas Straub <lukasstraub2@web.de> wrote:

> On Tue, 24 Mar 2020 13:18:22 -0400
> Mike Snitzer <snitzer@redhat.com> wrote:
> 
> > On Thu, Feb 27 2020 at  8:26am -0500,
> > Lukas Straub <lukasstraub2@web.de> wrote:
> > 
> > > If a full metadata buffer is being written, don't read it first. This
> > > prevents a read-modify-write cycle and increases performance on HDDs
> > > considerably.
> > > 
> > > To do this we now calculate the checksums for all sectors in the bio in one
> > > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > > which now checks if we overwrite the whole buffer.
> > > 
> > > Benchmarking with a 5400RPM HDD with bitmap mode:
> > > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > > 
> > > Without patch:
> > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > > 
> > > With patch:
> > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > ---
> > > Hello Everyone,
> > > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > > of the (buggy) check if we overwrite a whole tag area before.
> > > Performance stayed the same (with --buffer-sectors=1).
> > > 
> > > The only quirk now is that it advertises a very big optimal io size in the
> > > standard configuration (where buffer_sectors=128). Is this a Problem?
> > > 
> > > v2:
> > >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > > 
> > > Regards,
> > > Lukas Straub  
> > 
> > 
> > Not sure why you didn't cc Mikulas but I just checked with him and he
> > thinks:
> > 
> > You're only seeing a boost because you're using 512-byte sector and
> > 512-byte buffer. Patch helps that case but hurts in most other cases
> > (due to small buffers).
> 
> Hmm, buffer-sectors is still user configurable. If the user wants fast
> write performance on slow HDDs he can set a small buffer-sector and
> benefit from this patch. With the default buffer-sectors=128 it
> shouldn't have any impact.

OK, if you'd be willing to conduct tests to prove there is no impact
with larger buffers that'd certainly help reinforce your position and
make it easier for me to take your patch.

But if you're just testing against slow HDD testbeds then the test
coverage isn't wide enough.

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Lukas Straub March 26, 2020, 11:24 a.m. UTC | #4
On Tue, 24 Mar 2020 15:11:49 -0400
Mike Snitzer <snitzer@redhat.com> wrote:

> On Tue, Mar 24 2020 at  2:59pm -0400,
> Lukas Straub <lukasstraub2@web.de> wrote:
> 
> > On Tue, 24 Mar 2020 13:18:22 -0400
> > Mike Snitzer <snitzer@redhat.com> wrote:
> >   
> > > On Thu, Feb 27 2020 at  8:26am -0500,
> > > Lukas Straub <lukasstraub2@web.de> wrote:
> > >   
> > > > If a full metadata buffer is being written, don't read it first. This
> > > > prevents a read-modify-write cycle and increases performance on HDDs
> > > > considerably.
> > > > 
> > > > To do this we now calculate the checksums for all sectors in the bio in one
> > > > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > > > which now checks if we overwrite the whole buffer.
> > > > 
> > > > Benchmarking with a 5400RPM HDD with bitmap mode:
> > > > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > > > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > > > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > > > 
> > > > Without patch:
> > > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > > > 
> > > > With patch:
> > > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > > > 
> > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > ---
> > > > Hello Everyone,
> > > > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > > > of the (buggy) check if we overwrite a whole tag area before.
> > > > Performance stayed the same (with --buffer-sectors=1).
> > > > 
> > > > The only quirk now is that it advertises a very big optimal io size in the
> > > > standard configuration (where buffer_sectors=128). Is this a Problem?
> > > > 
> > > > v2:
> > > >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > > >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > > > 
> > > > Regards,
> > > > Lukas Straub    
> > > 
> > > 
> > > Not sure why you didn't cc Mikulas but I just checked with him and he
> > > thinks:
> > > 
> > > You're only seeing a boost because you're using 512-byte sector and
> > > 512-byte buffer. Patch helps that case but hurts in most other cases
> > > (due to small buffers).  
> > 
> > Hmm, buffer-sectors is still user configurable. If the user wants fast
> > write performance on slow HDDs he can set a small buffer-sector and
> > benefit from this patch. With the default buffer-sectors=128 it
> > shouldn't have any impact.  
> 
> OK, if you'd be willing to conduct tests to prove there is no impact
> with larger buffers that'd certainly help reinforce your position and
> make it easier for me to take your patch.
> 
> But if you're just testing against slow HDD testbeds then the test
> coverage isn't wide enough.
> 
> Thanks,
> Mike
> 

Sure,
This time testing with an Samsung 850 EVO SSD:

without patch:

root@teststore:/home/lukas# integritysetup format --no-wipe --batch-mode -I crc32c -B /dev/sdc1
root@teststore:/home/lukas# integritysetup open -I crc32c -B /dev/sdc1 ssda_integ
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
4177264640 bytes (4.2 GB, 3.9 GiB) copied, 28 s, 149 MB/s
65536+0 records in
65536+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 28.8946 s, 149 MB/s
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=4M count=$((256*4)) conv=fsync oflag=direct status=progress
4211081216 bytes (4.2 GB, 3.9 GiB) copied, 22 s, 191 MB/s
1024+0 records in
1024+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 22.6355 s, 190 MB/s
root@teststore:/home/lukas# time mkfs.ext4 /dev/mapper/ssda_integ 4g
mke2fs 1.45.5 (07-Jan-2020)
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: 679c4808-d549-4576-a8ef-4c46c78c6070
Superblock backups stored on blocks:
        32768, 98304, 163840, 229376, 294912, 819200, 884736

Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done


real    0m0.318s
user    0m0.016s
sys     0m0.001s
root@teststore:/home/lukas# mount /dev/mapper/ssda_integ /mnt/
root@teststore:/home/lukas# cd /mnt/
root@teststore:/mnt# time tar -xJf /home/lukas/linux-5.5.4.tar.xz

real    0m18.708s
user    0m14.351s
sys     0m8.585s
root@teststore:/mnt# time rm -rf linux-5.5.4/

real    0m3.226s
user    0m0.117s
sys     0m2.958s


with patch:

root@teststore:/home/lukas# integritysetup format --no-wipe --batch-mode -I crc32c -B /dev/sdc1
root@teststore:/home/lukas# integritysetup open -I crc32c -B /dev/sdc1 ssda_integ
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
4169007104 bytes (4.2 GB, 3.9 GiB) copied, 27 s, 154 MB/s
65536+0 records in
65536+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 27.9165 s, 154 MB/s
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=4M count=$((256*4)) conv=fsync oflag=direct status=progress
4169138176 bytes (4.2 GB, 3.9 GiB) copied, 22 s, 189 MB/s
1024+0 records in
1024+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 22.9181 s, 187 MB/s
root@teststore:/home/lukas# time mkfs.ext4 /dev/mapper/ssda_integ 4g
mke2fs 1.45.5 (07-Jan-2020)
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: 9a9decad-19e2-46a4-8cc5-ce27238829f2
Superblock backups stored on blocks: 
        32768, 98304, 163840, 229376, 294912, 819200, 884736

Allocating group tables: done                            
Writing inode tables: done                            
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done 


real    0m0.341s
user    0m0.000s
sys     0m0.016s
root@teststore:/home/lukas# mount /dev/mapper/ssda_integ /mnt/
root@teststore:/home/lukas# cd /mnt/
root@teststore:/mnt# time tar -xJf /home/lukas/linux-5.5.4.tar.xz

real    0m18.409s
user    0m14.191s
sys     0m8.585s
root@teststore:/mnt# time rm -rf linux-5.5.4/

real    0m3.200s
user    0m0.133s
sys     0m2.979s


Regards,
Lukas Straub
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mikulas Patocka March 30, 2020, 4:34 p.m. UTC | #5
Hi

I tested it on my rotational disk:


On Thu, 27 Feb 2020, Lukas Straub wrote:

> If a full metadata buffer is being written, don't read it first. This
> prevents a read-modify-write cycle and increases performance on HDDs
> considerably.
> 
> To do this we now calculate the checksums for all sectors in the bio in one
> go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> which now checks if we overwrite the whole buffer.
> 
> Benchmarking with a 5400RPM HDD with bitmap mode:
> integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> 
> Without patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s

I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).

> With patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s

I get 110 MB/s.

> Signed-off-by: Lukas Straub <lukasstraub2@web.de>

BUT: when I removed "--buffer-sectors=1" from the command line, I've got 
these numbers:
without the patch: 101 MB/s
with the patch: 101 MB/s

So, you crippled performance with "--buffer-sectors=1" and then made a 
patch to restore it.

The patch only helps 10%.

Mikulas


> ---
> Hello Everyone,
> So here is v2, now checking if we overwrite a whole metadata buffer instead
> of the (buggy) check if we overwrite a whole tag area before.
> Performance stayed the same (with --buffer-sectors=1).
> 
> The only quirk now is that it advertises a very big optimal io size in the
> standard configuration (where buffer_sectors=128). Is this a Problem?
> 
> v2:
>  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
>  -fix optimal io size to check if we overwrite a whole metadata buffer
> 
> Regards,
> Lukas Straub
> 
>  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
>  1 file changed, 47 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index b225b3e445fa..a6d3cf1406df 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
>  		if (unlikely(r))
>  			return r;
> 
> -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> -		if (IS_ERR(data))
> -			return PTR_ERR(data);
> +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> +		if (op == TAG_WRITE && *metadata_offset == 0 \
> +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		} else {
> +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		}
> 
>  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
>  		dp = data + *metadata_offset;
> @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
>  {
>  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
>  	struct dm_integrity_c *ic = dio->ic;
> +	unsigned sectors_to_process = dio->range.n_sectors;
> +	sector_t sector = dio->range.logical_sector;
> 
>  	int r;
> 
> @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
>  		struct bio_vec bv;
>  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
>  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> -		char *checksums;
> +		char *checksums, *checksums_ptr;
>  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
>  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> -		unsigned sectors_to_process = dio->range.n_sectors;
> -		sector_t sector = dio->range.logical_sector;
> 
>  		if (unlikely(ic->mode == 'R'))
>  			goto skip_io;
> 
> -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
>  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
>  		if (!checksums) {
>  			checksums = checksums_onstack;
> @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
>  			}
>  		}
> 
> +		checksums_ptr = checksums;
>  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
>  			unsigned pos;
> -			char *mem, *checksums_ptr;
> -
> -again:
> +			char *mem;
>  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
>  			pos = 0;
> -			checksums_ptr = checksums;
>  			do {
>  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> -				checksums_ptr += ic->tag_size;
> -				sectors_to_process -= ic->sectors_per_block;
> +
> +				if (likely(checksums != checksums_onstack)) {
> +					checksums_ptr += ic->tag_size;
> +				} else {
> +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> +					if (unlikely(r))
> +						goto internal_hash_error;
> +				}
> +
>  				pos += ic->sectors_per_block << SECTOR_SHIFT;
>  				sector += ic->sectors_per_block;
> -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> +				sectors_to_process -= ic->sectors_per_block;
> +			} while (pos < bv.bv_len && sectors_to_process);
>  			kunmap_atomic(mem);
> 
> -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> -			if (unlikely(r)) {
> -				if (r > 0) {
> -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> -					r = -EILSEQ;
> -					atomic64_inc(&ic->number_of_mismatches);
> -				}
> -				if (likely(checksums != checksums_onstack))
> -					kfree(checksums);
> -				goto error;
> -			}
> -
>  			if (!sectors_to_process)
>  				break;
> +		}
> 
> -			if (unlikely(pos < bv.bv_len)) {
> -				bv.bv_offset += pos;
> -				bv.bv_len -= pos;
> -				goto again;
> +		if (likely(checksums != checksums_onstack)) {
> +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> +						!dio->write ? TAG_CMP : TAG_WRITE);
> +			if (unlikely(r)) {
> +				kfree(checksums);
> +				goto internal_hash_error;
>  			}
> +			kfree(checksums);
>  		}
> 
> -		if (likely(checksums != checksums_onstack))
> -			kfree(checksums);
>  	} else {
>  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> 
> @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
>  skip_io:
>  	dec_in_flight(dio);
>  	return;
> +internal_hash_error:
> +	if (r > 0) {
> +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> +		r = -EILSEQ;
> +		atomic64_inc(&ic->number_of_mismatches);
> +	}
>  error:
>  	dio->bi_status = errno_to_blk_status(r);
>  	dec_in_flight(dio);
> @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
>  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
>  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
>  	}
> +
> +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
>  }
> 
>  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> --
> 2.20.1
> 
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Lukas Straub March 31, 2020, 12:06 p.m. UTC | #6
On Mon, 30 Mar 2020 12:34:04 -0400 (EDT)
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
> 
> I tested it on my rotational disk:
> 
> 
> On Thu, 27 Feb 2020, Lukas Straub wrote:
> 
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> > 
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> > 
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > 
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s  
> 
> I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
> 
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s  
> 
> I get 110 MB/s.
> 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> 
> BUT: when I removed "--buffer-sectors=1" from the command line, I've got 
> these numbers:
> without the patch: 101 MB/s

I get 86.5 MB/s.

So in my case it's worth it and as you saw it doesn't (negatively) affect other cases.

Regards,
Lukas Straub

> with the patch: 101 MB/s
> 
> So, you crippled performance with "--buffer-sectors=1" and then made a 
> patch to restore it.
> 
> The patch only helps 10%.
>
> Mikulas
> 
> 
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> > 
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> > 
> > v2:
> >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > 
> > Regards,
> > Lukas Straub
> > 
> >  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> >  		if (unlikely(r))
> >  			return r;
> > 
> > -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -		if (IS_ERR(data))
> > -			return PTR_ERR(data);
> > +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > +		if (op == TAG_WRITE && *metadata_offset == 0 \
> > +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		} else {
> > +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		}
> > 
> >  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> >  		dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> >  	struct dm_integrity_c *ic = dio->ic;
> > +	unsigned sectors_to_process = dio->range.n_sectors;
> > +	sector_t sector = dio->range.logical_sector;
> > 
> >  	int r;
> > 
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> >  		struct bio_vec bv;
> >  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> >  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > -		char *checksums;
> > +		char *checksums, *checksums_ptr;
> >  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> >  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -		unsigned sectors_to_process = dio->range.n_sectors;
> > -		sector_t sector = dio->range.logical_sector;
> > 
> >  		if (unlikely(ic->mode == 'R'))
> >  			goto skip_io;
> > 
> > -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >  		if (!checksums) {
> >  			checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> >  			}
> >  		}
> > 
> > +		checksums_ptr = checksums;
> >  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >  			unsigned pos;
> > -			char *mem, *checksums_ptr;
> > -
> > -again:
> > +			char *mem;
> >  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >  			pos = 0;
> > -			checksums_ptr = checksums;
> >  			do {
> >  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > -				checksums_ptr += ic->tag_size;
> > -				sectors_to_process -= ic->sectors_per_block;
> > +
> > +				if (likely(checksums != checksums_onstack)) {
> > +					checksums_ptr += ic->tag_size;
> > +				} else {
> > +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > +					if (unlikely(r))
> > +						goto internal_hash_error;
> > +				}
> > +
> >  				pos += ic->sectors_per_block << SECTOR_SHIFT;
> >  				sector += ic->sectors_per_block;
> > -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > +				sectors_to_process -= ic->sectors_per_block;
> > +			} while (pos < bv.bv_len && sectors_to_process);
> >  			kunmap_atomic(mem);
> > 
> > -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > -			if (unlikely(r)) {
> > -				if (r > 0) {
> > -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -					r = -EILSEQ;
> > -					atomic64_inc(&ic->number_of_mismatches);
> > -				}
> > -				if (likely(checksums != checksums_onstack))
> > -					kfree(checksums);
> > -				goto error;
> > -			}
> > -
> >  			if (!sectors_to_process)
> >  				break;
> > +		}
> > 
> > -			if (unlikely(pos < bv.bv_len)) {
> > -				bv.bv_offset += pos;
> > -				bv.bv_len -= pos;
> > -				goto again;
> > +		if (likely(checksums != checksums_onstack)) {
> > +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +						!dio->write ? TAG_CMP : TAG_WRITE);
> > +			if (unlikely(r)) {
> > +				kfree(checksums);
> > +				goto internal_hash_error;
> >  			}
> > +			kfree(checksums);
> >  		}
> > 
> > -		if (likely(checksums != checksums_onstack))
> > -			kfree(checksums);
> >  	} else {
> >  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> > 
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >  	dec_in_flight(dio);
> >  	return;
> > +internal_hash_error:
> > +	if (r > 0) {
> > +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > +		r = -EILSEQ;
> > +		atomic64_inc(&ic->number_of_mismatches);
> > +	}
> >  error:
> >  	dio->bi_status = errno_to_blk_status(r);
> >  	dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> >  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> >  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> >  	}
> > +
> > +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> >  }
> > 
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> > 
> > 
> > --
> > dm-devel mailing list
> > dm-devel@redhat.com
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >   
>
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Lukas Straub April 3, 2020, 11:58 a.m. UTC | #7
On Mon, 30 Mar 2020 12:34:04 -0400 (EDT)
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
> 
> I tested it on my rotational disk:
> 
> 
> On Thu, 27 Feb 2020, Lukas Straub wrote:
> 
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> > 
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> > 
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > 
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s  
> 
> I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
> 
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s  
> 
> I get 110 MB/s.
> 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> 
> BUT: when I removed "--buffer-sectors=1" from the command line, I've got 
> these numbers:
> without the patch: 101 MB/s

I get 86.5 MB/s.

So in my case it's worth it and as you saw it doesn't (negatively) affect other cases.

Regards,
Lukas Straub

> with the patch: 101 MB/s
> 
> So, you crippled performance with "--buffer-sectors=1" and then made a 
> patch to restore it.
> 
> The patch only helps 10%.
>
> Mikulas
> 
> 
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> > 
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> > 
> > v2:
> >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > 
> > Regards,
> > Lukas Straub
> > 
> >  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> >  		if (unlikely(r))
> >  			return r;
> > 
> > -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -		if (IS_ERR(data))
> > -			return PTR_ERR(data);
> > +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > +		if (op == TAG_WRITE && *metadata_offset == 0 \
> > +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		} else {
> > +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		}
> > 
> >  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> >  		dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> >  	struct dm_integrity_c *ic = dio->ic;
> > +	unsigned sectors_to_process = dio->range.n_sectors;
> > +	sector_t sector = dio->range.logical_sector;
> > 
> >  	int r;
> > 
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> >  		struct bio_vec bv;
> >  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> >  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > -		char *checksums;
> > +		char *checksums, *checksums_ptr;
> >  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> >  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -		unsigned sectors_to_process = dio->range.n_sectors;
> > -		sector_t sector = dio->range.logical_sector;
> > 
> >  		if (unlikely(ic->mode == 'R'))
> >  			goto skip_io;
> > 
> > -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >  		if (!checksums) {
> >  			checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> >  			}
> >  		}
> > 
> > +		checksums_ptr = checksums;
> >  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >  			unsigned pos;
> > -			char *mem, *checksums_ptr;
> > -
> > -again:
> > +			char *mem;
> >  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >  			pos = 0;
> > -			checksums_ptr = checksums;
> >  			do {
> >  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > -				checksums_ptr += ic->tag_size;
> > -				sectors_to_process -= ic->sectors_per_block;
> > +
> > +				if (likely(checksums != checksums_onstack)) {
> > +					checksums_ptr += ic->tag_size;
> > +				} else {
> > +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > +					if (unlikely(r))
> > +						goto internal_hash_error;
> > +				}
> > +
> >  				pos += ic->sectors_per_block << SECTOR_SHIFT;
> >  				sector += ic->sectors_per_block;
> > -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > +				sectors_to_process -= ic->sectors_per_block;
> > +			} while (pos < bv.bv_len && sectors_to_process);
> >  			kunmap_atomic(mem);
> > 
> > -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > -			if (unlikely(r)) {
> > -				if (r > 0) {
> > -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -					r = -EILSEQ;
> > -					atomic64_inc(&ic->number_of_mismatches);
> > -				}
> > -				if (likely(checksums != checksums_onstack))
> > -					kfree(checksums);
> > -				goto error;
> > -			}
> > -
> >  			if (!sectors_to_process)
> >  				break;
> > +		}
> > 
> > -			if (unlikely(pos < bv.bv_len)) {
> > -				bv.bv_offset += pos;
> > -				bv.bv_len -= pos;
> > -				goto again;
> > +		if (likely(checksums != checksums_onstack)) {
> > +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +						!dio->write ? TAG_CMP : TAG_WRITE);
> > +			if (unlikely(r)) {
> > +				kfree(checksums);
> > +				goto internal_hash_error;
> >  			}
> > +			kfree(checksums);
> >  		}
> > 
> > -		if (likely(checksums != checksums_onstack))
> > -			kfree(checksums);
> >  	} else {
> >  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> > 
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >  	dec_in_flight(dio);
> >  	return;
> > +internal_hash_error:
> > +	if (r > 0) {
> > +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > +		r = -EILSEQ;
> > +		atomic64_inc(&ic->number_of_mismatches);
> > +	}
> >  error:
> >  	dio->bi_status = errno_to_blk_status(r);
> >  	dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> >  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> >  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> >  	}
> > +
> > +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> >  }
> > 
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> > 
> > 
> > --
> > dm-devel mailing list
> > dm-devel@redhat.com
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >   
>
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index b225b3e445fa..a6d3cf1406df 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1309,9 +1309,17 @@  static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
 		if (unlikely(r))
 			return r;

-		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
-		if (IS_ERR(data))
-			return PTR_ERR(data);
+		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
+		if (op == TAG_WRITE && *metadata_offset == 0 \
+			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
+			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		} else {
+			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		}

 		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
 		dp = data + *metadata_offset;
@@ -1514,6 +1522,8 @@  static void integrity_metadata(struct work_struct *w)
 {
 	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
 	struct dm_integrity_c *ic = dio->ic;
+	unsigned sectors_to_process = dio->range.n_sectors;
+	sector_t sector = dio->range.logical_sector;

 	int r;

@@ -1522,16 +1532,14 @@  static void integrity_metadata(struct work_struct *w)
 		struct bio_vec bv;
 		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
 		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
-		char *checksums;
+		char *checksums, *checksums_ptr;
 		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
 		char checksums_onstack[HASH_MAX_DIGESTSIZE];
-		unsigned sectors_to_process = dio->range.n_sectors;
-		sector_t sector = dio->range.logical_sector;

 		if (unlikely(ic->mode == 'R'))
 			goto skip_io;

-		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
+		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
 				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
 		if (!checksums) {
 			checksums = checksums_onstack;
@@ -1542,49 +1550,45 @@  static void integrity_metadata(struct work_struct *w)
 			}
 		}

+		checksums_ptr = checksums;
 		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
 			unsigned pos;
-			char *mem, *checksums_ptr;
-
-again:
+			char *mem;
 			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
 			pos = 0;
-			checksums_ptr = checksums;
 			do {
 				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
-				checksums_ptr += ic->tag_size;
-				sectors_to_process -= ic->sectors_per_block;
+
+				if (likely(checksums != checksums_onstack)) {
+					checksums_ptr += ic->tag_size;
+				} else {
+					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
+					if (unlikely(r))
+						goto internal_hash_error;
+				}
+
 				pos += ic->sectors_per_block << SECTOR_SHIFT;
 				sector += ic->sectors_per_block;
-			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
+				sectors_to_process -= ic->sectors_per_block;
+			} while (pos < bv.bv_len && sectors_to_process);
 			kunmap_atomic(mem);

-			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
-						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
-			if (unlikely(r)) {
-				if (r > 0) {
-					DMERR_LIMIT("Checksum failed at sector 0x%llx",
-						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
-					r = -EILSEQ;
-					atomic64_inc(&ic->number_of_mismatches);
-				}
-				if (likely(checksums != checksums_onstack))
-					kfree(checksums);
-				goto error;
-			}
-
 			if (!sectors_to_process)
 				break;
+		}

-			if (unlikely(pos < bv.bv_len)) {
-				bv.bv_offset += pos;
-				bv.bv_len -= pos;
-				goto again;
+		if (likely(checksums != checksums_onstack)) {
+			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
+						!dio->write ? TAG_CMP : TAG_WRITE);
+			if (unlikely(r)) {
+				kfree(checksums);
+				goto internal_hash_error;
 			}
+			kfree(checksums);
 		}

-		if (likely(checksums != checksums_onstack))
-			kfree(checksums);
 	} else {
 		struct bio_integrity_payload *bip = dio->orig_bi_integrity;

@@ -1615,6 +1619,13 @@  static void integrity_metadata(struct work_struct *w)
 skip_io:
 	dec_in_flight(dio);
 	return;
+internal_hash_error:
+	if (r > 0) {
+		DMERR_LIMIT("Checksum failed at sector 0x%llx",
+				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
+		r = -EILSEQ;
+		atomic64_inc(&ic->number_of_mismatches);
+	}
 error:
 	dio->bi_status = errno_to_blk_status(r);
 	dec_in_flight(dio);
@@ -3019,6 +3030,8 @@  static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
 		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
 		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
 	}
+
+	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
 }

 static void calculate_journal_section_size(struct dm_integrity_c *ic)