diff mbox

[RFC,3/3] md: dm-crypt: Introduce the bulk mode method when sending request

Message ID 70f051223cf882b03b373369b6b65b4a7ebf07b6.1464144791.git.baolin.wang@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

(Exiting) Baolin Wang May 25, 2016, 6:12 a.m. UTC
In now dm-crypt code, it is ineffective to map one segment (always one
sector) of one bio with just only one scatterlist at one time for hardware
crypto engine. Especially for some encryption mode (like ecb or xts mode)
cooperating with the crypto engine, they just need one initial IV or null
IV instead of different IV for each sector. In this situation We can consider
to use multiple scatterlists to map the whole bio and send all scatterlists
of one bio to crypto engine to encrypt or decrypt, which can improve the
hardware engine's efficiency.

With this optimization, On my test setup (beaglebone black board) using 64KB
I/Os on an eMMC storage device I saw about 60% improvement in throughput for
encrypted writes, and about 100% improvement for encrypted reads. But this
is not fit for other modes which need different IV for each sector.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
---
 drivers/md/dm-crypt.c |  188 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 176 insertions(+), 12 deletions(-)

Comments

Mike Snitzer May 26, 2016, 2:04 p.m. UTC | #1
Comments inlined.

In general the most concerning bit is the need for memory allocation in
the IO path (see comment/question below near call to sg_alloc_table).
In DM targets we make heavy use of .ctr preallocated memory and/or
per-bio-data to avoid memory allocations in the IO path.

On Wed, May 25 2016 at  2:12am -0400,
Baolin Wang <baolin.wang@linaro.org> wrote:

> In now dm-crypt code, it is ineffective to map one segment (always one
> sector) of one bio with just only one scatterlist at one time for hardware
> crypto engine. Especially for some encryption mode (like ecb or xts mode)
> cooperating with the crypto engine, they just need one initial IV or null
> IV instead of different IV for each sector. In this situation We can consider
> to use multiple scatterlists to map the whole bio and send all scatterlists
> of one bio to crypto engine to encrypt or decrypt, which can improve the
> hardware engine's efficiency.
> 
> With this optimization, On my test setup (beaglebone black board) using 64KB
> I/Os on an eMMC storage device I saw about 60% improvement in throughput for
> encrypted writes, and about 100% improvement for encrypted reads. But this
> is not fit for other modes which need different IV for each sector.
> 
> Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
> ---
>  drivers/md/dm-crypt.c |  188 +++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 176 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
> index 4f3cb35..1c86ea7 100644
> --- a/drivers/md/dm-crypt.c
> +++ b/drivers/md/dm-crypt.c
> @@ -33,6 +33,7 @@
>  #include <linux/device-mapper.h>
>  
>  #define DM_MSG_PREFIX "crypt"
> +#define DM_MAX_SG_LIST	1024
>  
>  /*
>   * context holding the current state of a multi-part conversion
> @@ -46,6 +47,8 @@ struct convert_context {
>  	sector_t cc_sector;
>  	atomic_t cc_pending;
>  	struct skcipher_request *req;
> +	struct sg_table sgt_in;
> +	struct sg_table sgt_out;
>  };
>  
>  /*
> @@ -803,6 +806,108 @@ static struct crypt_iv_operations crypt_iv_tcw_ops = {
>  	.post	   = crypt_iv_tcw_post
>  };
>  
> +/*
> + * Check how many sg entry numbers are needed when map one bio
> + * with scatterlists in advance.
> + */
> +static unsigned int crypt_sg_entry(struct bio *bio_t)
> +{
> +	struct request_queue *q = bdev_get_queue(bio_t->bi_bdev);
> +	int cluster = blk_queue_cluster(q);
> +	struct bio_vec bvec, bvprv = { NULL };
> +	struct bvec_iter biter;
> +	unsigned long nbytes = 0, sg_length = 0;
> +	unsigned int sg_cnt = 0, first_bvec = 0;
> +
> +	if (bio_t->bi_rw & REQ_DISCARD) {
> +		if (bio_t->bi_vcnt)
> +			return 1;
> +		return 0;
> +	}
> +
> +	if (bio_t->bi_rw & REQ_WRITE_SAME)
> +		return 1;
> +
> +	bio_for_each_segment(bvec, bio_t, biter) {
> +		nbytes = bvec.bv_len;
> +
> +		if (!cluster) {
> +			sg_cnt++;
> +			continue;
> +		}
> +
> +		if (!first_bvec) {
> +			first_bvec = 1;
> +			goto new_segment;
> +		}
> +
> +		if (sg_length + nbytes > queue_max_segment_size(q))
> +			goto new_segment;
> +
> +		if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec))
> +			goto new_segment;
> +
> +		if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bvec))
> +			goto new_segment;
> +
> +		sg_length += nbytes;
> +		continue;
> +
> +new_segment:
> +		memcpy(&bvprv, &bvec, sizeof(struct bio_vec));
> +		sg_length = nbytes;
> +		sg_cnt++;
> +	}
> +
> +	return sg_cnt;
> +}
> +
> +static int crypt_convert_alloc_table(struct crypt_config *cc,
> +				     struct convert_context *ctx)
> +{
> +	struct bio *bio_in = ctx->bio_in;
> +	struct bio *bio_out = ctx->bio_out;
> +	unsigned int mode = skcipher_is_bulk_mode(any_tfm(cc));

please use: bool bulk_mode = ...

> +	unsigned int sg_in_max, sg_out_max;
> +	int ret = 0;
> +
> +	if (!mode)
> +		goto out2;

Please use more descriptive label names than out[1-3]

> +
> +	/*
> +	 * Need to calculate how many sg entry need to be used
> +	 * for this bio.
> +	 */
> +	sg_in_max = crypt_sg_entry(bio_in) + 1;

The return from crypt_sg_entry() is pretty awkward, given you just go on
to add 1; as is the bounds checking.. the magic value of 2 needs to be
be made clearer.

> +	if (sg_in_max > DM_MAX_SG_LIST || sg_in_max <= 2)
> +		goto out2;
> +
> +	ret = sg_alloc_table(&ctx->sgt_in, sg_in_max, GFP_KERNEL);

Is it safe to be using GFP_KERNEL here?  AFAIK this is in the IO mapping
path and we try to avoid memory allocations at all costs -- due to the
risk of deadlock when issuing IO to stacked block devices (dm-crypt
could be part of a much more elaborate IO stack).

> +	if (ret)
> +		goto out2;
> +
> +	if (bio_data_dir(bio_in) == READ)
> +		goto out1;
> +
> +	sg_out_max = crypt_sg_entry(bio_out) + 1;
> +	if (sg_out_max > DM_MAX_SG_LIST || sg_out_max <= 2)
> +		goto out3;
> +
> +	ret = sg_alloc_table(&ctx->sgt_out, sg_out_max, GFP_KERNEL);
> +	if (ret)
> +		goto out3;
> +
> +	return 0;
> +
> +out3:

out_free_table?

> +	sg_free_table(&ctx->sgt_in);
> +out2:

out_skip_alloc?

> +	ctx->sgt_in.orig_nents = 0;
> +out1:

out_skip_write?

> +	ctx->sgt_out.orig_nents = 0;
> +	return ret;
> +}
> +
>  static void crypt_convert_init(struct crypt_config *cc,
>  			       struct convert_context *ctx,
>  			       struct bio *bio_out, struct bio *bio_in,
> @@ -843,7 +948,13 @@ static int crypt_convert_block(struct crypt_config *cc,
>  {
>  	struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
>  	struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
> +	unsigned int mode = skcipher_is_bulk_mode(any_tfm(cc));

again please use: bool bulk_mode = ...

> +	struct bio *bio_in = ctx->bio_in;
> +	struct bio *bio_out = ctx->bio_out;
> +	unsigned int total_bytes = bio_in->bi_iter.bi_size;
>  	struct dm_crypt_request *dmreq;
> +	struct scatterlist *sg_in;
> +	struct scatterlist *sg_out;
>  	u8 *iv;
>  	int r;
>  
> @@ -852,16 +963,6 @@ static int crypt_convert_block(struct crypt_config *cc,
>  
>  	dmreq->iv_sector = ctx->cc_sector;
>  	dmreq->ctx = ctx;
> -	sg_init_table(&dmreq->sg_in, 1);
> -	sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
> -		    bv_in.bv_offset);
> -
> -	sg_init_table(&dmreq->sg_out, 1);
> -	sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
> -		    bv_out.bv_offset);
> -
> -	bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
> -	bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
>  
>  	if (cc->iv_gen_ops) {
>  		r = cc->iv_gen_ops->generator(cc, iv, dmreq);
> @@ -869,8 +970,63 @@ static int crypt_convert_block(struct crypt_config *cc,
>  			return r;
>  	}
>  
> -	skcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out,
> -				   1 << SECTOR_SHIFT, iv);
> +	if (mode && ctx->sgt_in.orig_nents > 0) {
> +		struct scatterlist *sg = NULL;
> +		unsigned int total_sg_in, total_sg_out;
> +
> +		total_sg_in = blk_bio_map_sg(bdev_get_queue(bio_in->bi_bdev),
> +					     bio_in, ctx->sgt_in.sgl, &sg);
> +		if ((total_sg_in <= 0) ||
> +		    (total_sg_in > ctx->sgt_in.orig_nents)) {
> +			DMERR("%s in sg map error %d, sg table nents[%d]\n",
> +			      __func__, total_sg_in, ctx->sgt_in.orig_nents);
> +			return -EINVAL;
> +		}
> +
> +		if (sg)
> +			sg_mark_end(sg);
> +
> +		ctx->iter_in.bi_size -= total_bytes;
> +		sg_in = ctx->sgt_in.sgl;
> +		sg_out = ctx->sgt_in.sgl;
> +
> +		if (bio_data_dir(bio_in) == READ)
> +			goto set_crypt;
> +
> +		sg = NULL;
> +		total_sg_out = blk_bio_map_sg(bdev_get_queue(bio_out->bi_bdev),
> +					      bio_out, ctx->sgt_out.sgl, &sg);
> +		if ((total_sg_out <= 0) ||
> +		    (total_sg_out > ctx->sgt_out.orig_nents)) {
> +			DMERR("%s out sg map error %d, sg table nents[%d]\n",
> +			      __func__, total_sg_out, ctx->sgt_out.orig_nents);
> +			return -EINVAL;
> +		}
> +
> +		if (sg)
> +			sg_mark_end(sg);
> +
> +		ctx->iter_out.bi_size -= total_bytes;
> +		sg_out = ctx->sgt_out.sgl;
> +	} else  {
> +		sg_init_table(&dmreq->sg_in, 1);
> +		sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
> +			    bv_in.bv_offset);
> +
> +		sg_init_table(&dmreq->sg_out, 1);
> +		sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
> +			    bv_out.bv_offset);
> +
> +		bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
> +		bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
> +
> +		sg_in = &dmreq->sg_in;
> +		sg_out = &dmreq->sg_out;
> +		total_bytes = 1 << SECTOR_SHIFT;
> +	}
> +
> +set_crypt:
> +	skcipher_request_set_crypt(req, sg_in, sg_out, total_bytes, iv);

Given how long this code has gotten I'd prefer to see this factored out
to a new setup method.

>  	if (bio_data_dir(ctx->bio_in) == WRITE)
>  		r = crypto_skcipher_encrypt(req);
> @@ -1081,6 +1237,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
>  	if (io->ctx.req)
>  		crypt_free_req(cc, io->ctx.req, base_bio);
>  
> +	sg_free_table(&io->ctx.sgt_in);
> +	sg_free_table(&io->ctx.sgt_out);
>  	base_bio->bi_error = error;
>  	bio_endio(base_bio);
>  }
> @@ -1312,6 +1470,9 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
>  	io->ctx.iter_out = clone->bi_iter;
>  
>  	sector += bio_sectors(clone);
> +	r = crypt_convert_alloc_table(cc, &io->ctx);
> +	if (r < 0)
> +		io->error = -EIO;
>  
>  	crypt_inc_pending(io);
>  	r = crypt_convert(cc, &io->ctx);
> @@ -1343,6 +1504,9 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
>  
>  	crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
>  			   io->sector);
> +	r = crypt_convert_alloc_table(cc, &io->ctx);
> +	if (r < 0)
> +		io->error = -EIO;
>  
>  	r = crypt_convert(cc, &io->ctx);
>  	if (r < 0)
> -- 
> 1.7.9.5
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
(Exiting) Baolin Wang May 27, 2016, 6:03 a.m. UTC | #2
On 26 May 2016 at 22:04, Mike Snitzer <snitzer@redhat.com> wrote:
> Comments inlined.
>
> In general the most concerning bit is the need for memory allocation in
> the IO path (see comment/question below near call to sg_alloc_table).
> In DM targets we make heavy use of .ctr preallocated memory and/or
> per-bio-data to avoid memory allocations in the IO path.

Make sense.

>
> On Wed, May 25 2016 at  2:12am -0400,
> Baolin Wang <baolin.wang@linaro.org> wrote:
>
>> In now dm-crypt code, it is ineffective to map one segment (always one
>> sector) of one bio with just only one scatterlist at one time for hardware
>> crypto engine. Especially for some encryption mode (like ecb or xts mode)
>> cooperating with the crypto engine, they just need one initial IV or null
>> IV instead of different IV for each sector. In this situation We can consider
>> to use multiple scatterlists to map the whole bio and send all scatterlists
>> of one bio to crypto engine to encrypt or decrypt, which can improve the
>> hardware engine's efficiency.
>>
>> With this optimization, On my test setup (beaglebone black board) using 64KB
>> I/Os on an eMMC storage device I saw about 60% improvement in throughput for
>> encrypted writes, and about 100% improvement for encrypted reads. But this
>> is not fit for other modes which need different IV for each sector.
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
>> ---
>>  drivers/md/dm-crypt.c |  188 +++++++++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 176 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
>> index 4f3cb35..1c86ea7 100644
>> --- a/drivers/md/dm-crypt.c
>> +++ b/drivers/md/dm-crypt.c
>> @@ -33,6 +33,7 @@
>>  #include <linux/device-mapper.h>
>>
>>  #define DM_MSG_PREFIX "crypt"
>> +#define DM_MAX_SG_LIST       1024
>>
>>  /*
>>   * context holding the current state of a multi-part conversion
>> @@ -46,6 +47,8 @@ struct convert_context {
>>       sector_t cc_sector;
>>       atomic_t cc_pending;
>>       struct skcipher_request *req;
>> +     struct sg_table sgt_in;
>> +     struct sg_table sgt_out;
>>  };
>>
>>  /*
>> @@ -803,6 +806,108 @@ static struct crypt_iv_operations crypt_iv_tcw_ops = {
>>       .post      = crypt_iv_tcw_post
>>  };
>>
>> +/*
>> + * Check how many sg entry numbers are needed when map one bio
>> + * with scatterlists in advance.
>> + */
>> +static unsigned int crypt_sg_entry(struct bio *bio_t)
>> +{
>> +     struct request_queue *q = bdev_get_queue(bio_t->bi_bdev);
>> +     int cluster = blk_queue_cluster(q);
>> +     struct bio_vec bvec, bvprv = { NULL };
>> +     struct bvec_iter biter;
>> +     unsigned long nbytes = 0, sg_length = 0;
>> +     unsigned int sg_cnt = 0, first_bvec = 0;
>> +
>> +     if (bio_t->bi_rw & REQ_DISCARD) {
>> +             if (bio_t->bi_vcnt)
>> +                     return 1;
>> +             return 0;
>> +     }
>> +
>> +     if (bio_t->bi_rw & REQ_WRITE_SAME)
>> +             return 1;
>> +
>> +     bio_for_each_segment(bvec, bio_t, biter) {
>> +             nbytes = bvec.bv_len;
>> +
>> +             if (!cluster) {
>> +                     sg_cnt++;
>> +                     continue;
>> +             }
>> +
>> +             if (!first_bvec) {
>> +                     first_bvec = 1;
>> +                     goto new_segment;
>> +             }
>> +
>> +             if (sg_length + nbytes > queue_max_segment_size(q))
>> +                     goto new_segment;
>> +
>> +             if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec))
>> +                     goto new_segment;
>> +
>> +             if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bvec))
>> +                     goto new_segment;
>> +
>> +             sg_length += nbytes;
>> +             continue;
>> +
>> +new_segment:
>> +             memcpy(&bvprv, &bvec, sizeof(struct bio_vec));
>> +             sg_length = nbytes;
>> +             sg_cnt++;
>> +     }
>> +
>> +     return sg_cnt;
>> +}
>> +
>> +static int crypt_convert_alloc_table(struct crypt_config *cc,
>> +                                  struct convert_context *ctx)
>> +{
>> +     struct bio *bio_in = ctx->bio_in;
>> +     struct bio *bio_out = ctx->bio_out;
>> +     unsigned int mode = skcipher_is_bulk_mode(any_tfm(cc));
>
> please use: bool bulk_mode = ...

OK.

>
>> +     unsigned int sg_in_max, sg_out_max;
>> +     int ret = 0;
>> +
>> +     if (!mode)
>> +             goto out2;
>
> Please use more descriptive label names than out[1-3]

OK.

>
>> +
>> +     /*
>> +      * Need to calculate how many sg entry need to be used
>> +      * for this bio.
>> +      */
>> +     sg_in_max = crypt_sg_entry(bio_in) + 1;
>
> The return from crypt_sg_entry() is pretty awkward, given you just go on
> to add 1; as is the bounds checking.. the magic value of 2 needs to be
> be made clearer.

I'll remove the crypt_sg_entry() function.

>
>> +     if (sg_in_max > DM_MAX_SG_LIST || sg_in_max <= 2)
>> +             goto out2;
>> +
>> +     ret = sg_alloc_table(&ctx->sgt_in, sg_in_max, GFP_KERNEL);
>
> Is it safe to be using GFP_KERNEL here?  AFAIK this is in the IO mapping
> path and we try to avoid memory allocations at all costs -- due to the
> risk of deadlock when issuing IO to stacked block devices (dm-crypt
> could be part of a much more elaborate IO stack).

OK. I'll move the sg table allocation to be preallocated in the .ctr function.

>
>> +     if (ret)
>> +             goto out2;
>> +
>> +     if (bio_data_dir(bio_in) == READ)
>> +             goto out1;
>> +
>> +     sg_out_max = crypt_sg_entry(bio_out) + 1;
>> +     if (sg_out_max > DM_MAX_SG_LIST || sg_out_max <= 2)
>> +             goto out3;
>> +
>> +     ret = sg_alloc_table(&ctx->sgt_out, sg_out_max, GFP_KERNEL);
>> +     if (ret)
>> +             goto out3;
>> +
>> +     return 0;
>> +
>> +out3:
>
> out_free_table?
>
>> +     sg_free_table(&ctx->sgt_in);
>> +out2:
>
> out_skip_alloc?
>
>> +     ctx->sgt_in.orig_nents = 0;
>> +out1:
>
> out_skip_write?
>
>> +     ctx->sgt_out.orig_nents = 0;
>> +     return ret;
>> +}
>> +
>>  static void crypt_convert_init(struct crypt_config *cc,
>>                              struct convert_context *ctx,
>>                              struct bio *bio_out, struct bio *bio_in,
>> @@ -843,7 +948,13 @@ static int crypt_convert_block(struct crypt_config *cc,
>>  {
>>       struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
>>       struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
>> +     unsigned int mode = skcipher_is_bulk_mode(any_tfm(cc));
>
> again please use: bool bulk_mode = ...

OK.

>
>> +     struct bio *bio_in = ctx->bio_in;
>> +     struct bio *bio_out = ctx->bio_out;
>> +     unsigned int total_bytes = bio_in->bi_iter.bi_size;
>>       struct dm_crypt_request *dmreq;
>> +     struct scatterlist *sg_in;
>> +     struct scatterlist *sg_out;
>>       u8 *iv;
>>       int r;
>>
>> @@ -852,16 +963,6 @@ static int crypt_convert_block(struct crypt_config *cc,
>>
>>       dmreq->iv_sector = ctx->cc_sector;
>>       dmreq->ctx = ctx;
>> -     sg_init_table(&dmreq->sg_in, 1);
>> -     sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
>> -                 bv_in.bv_offset);
>> -
>> -     sg_init_table(&dmreq->sg_out, 1);
>> -     sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
>> -                 bv_out.bv_offset);
>> -
>> -     bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
>> -     bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
>>
>>       if (cc->iv_gen_ops) {
>>               r = cc->iv_gen_ops->generator(cc, iv, dmreq);
>> @@ -869,8 +970,63 @@ static int crypt_convert_block(struct crypt_config *cc,
>>                       return r;
>>       }
>>
>> -     skcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out,
>> -                                1 << SECTOR_SHIFT, iv);
>> +     if (mode && ctx->sgt_in.orig_nents > 0) {
>> +             struct scatterlist *sg = NULL;
>> +             unsigned int total_sg_in, total_sg_out;
>> +
>> +             total_sg_in = blk_bio_map_sg(bdev_get_queue(bio_in->bi_bdev),
>> +                                          bio_in, ctx->sgt_in.sgl, &sg);
>> +             if ((total_sg_in <= 0) ||
>> +                 (total_sg_in > ctx->sgt_in.orig_nents)) {
>> +                     DMERR("%s in sg map error %d, sg table nents[%d]\n",
>> +                           __func__, total_sg_in, ctx->sgt_in.orig_nents);
>> +                     return -EINVAL;
>> +             }
>> +
>> +             if (sg)
>> +                     sg_mark_end(sg);
>> +
>> +             ctx->iter_in.bi_size -= total_bytes;
>> +             sg_in = ctx->sgt_in.sgl;
>> +             sg_out = ctx->sgt_in.sgl;
>> +
>> +             if (bio_data_dir(bio_in) == READ)
>> +                     goto set_crypt;
>> +
>> +             sg = NULL;
>> +             total_sg_out = blk_bio_map_sg(bdev_get_queue(bio_out->bi_bdev),
>> +                                           bio_out, ctx->sgt_out.sgl, &sg);
>> +             if ((total_sg_out <= 0) ||
>> +                 (total_sg_out > ctx->sgt_out.orig_nents)) {
>> +                     DMERR("%s out sg map error %d, sg table nents[%d]\n",
>> +                           __func__, total_sg_out, ctx->sgt_out.orig_nents);
>> +                     return -EINVAL;
>> +             }
>> +
>> +             if (sg)
>> +                     sg_mark_end(sg);
>> +
>> +             ctx->iter_out.bi_size -= total_bytes;
>> +             sg_out = ctx->sgt_out.sgl;
>> +     } else  {
>> +             sg_init_table(&dmreq->sg_in, 1);
>> +             sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
>> +                         bv_in.bv_offset);
>> +
>> +             sg_init_table(&dmreq->sg_out, 1);
>> +             sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
>> +                         bv_out.bv_offset);
>> +
>> +             bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
>> +             bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
>> +
>> +             sg_in = &dmreq->sg_in;
>> +             sg_out = &dmreq->sg_out;
>> +             total_bytes = 1 << SECTOR_SHIFT;
>> +     }
>> +
>> +set_crypt:
>> +     skcipher_request_set_crypt(req, sg_in, sg_out, total_bytes, iv);
>
> Given how long this code has gotten I'd prefer to see this factored out
> to a new setup method.

I'll refactor this long function. Thanks for your comments.

>
>>       if (bio_data_dir(ctx->bio_in) == WRITE)
>>               r = crypto_skcipher_encrypt(req);
>> @@ -1081,6 +1237,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
>>       if (io->ctx.req)
>>               crypt_free_req(cc, io->ctx.req, base_bio);
>>
>> +     sg_free_table(&io->ctx.sgt_in);
>> +     sg_free_table(&io->ctx.sgt_out);
>>       base_bio->bi_error = error;
>>       bio_endio(base_bio);
>>  }
>> @@ -1312,6 +1470,9 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
>>       io->ctx.iter_out = clone->bi_iter;
>>
>>       sector += bio_sectors(clone);
>> +     r = crypt_convert_alloc_table(cc, &io->ctx);
>> +     if (r < 0)
>> +             io->error = -EIO;
>>
>>       crypt_inc_pending(io);
>>       r = crypt_convert(cc, &io->ctx);
>> @@ -1343,6 +1504,9 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
>>
>>       crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
>>                          io->sector);
>> +     r = crypt_convert_alloc_table(cc, &io->ctx);
>> +     if (r < 0)
>> +             io->error = -EIO;
>>
>>       r = crypt_convert(cc, &io->ctx);
>>       if (r < 0)
>> --
>> 1.7.9.5
>>
>> --
>> dm-devel mailing list
>> dm-devel@redhat.com
>> https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox

Patch

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4f3cb35..1c86ea7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -33,6 +33,7 @@ 
 #include <linux/device-mapper.h>
 
 #define DM_MSG_PREFIX "crypt"
+#define DM_MAX_SG_LIST	1024
 
 /*
  * context holding the current state of a multi-part conversion
@@ -46,6 +47,8 @@  struct convert_context {
 	sector_t cc_sector;
 	atomic_t cc_pending;
 	struct skcipher_request *req;
+	struct sg_table sgt_in;
+	struct sg_table sgt_out;
 };
 
 /*
@@ -803,6 +806,108 @@  static struct crypt_iv_operations crypt_iv_tcw_ops = {
 	.post	   = crypt_iv_tcw_post
 };
 
+/*
+ * Check how many sg entry numbers are needed when map one bio
+ * with scatterlists in advance.
+ */
+static unsigned int crypt_sg_entry(struct bio *bio_t)
+{
+	struct request_queue *q = bdev_get_queue(bio_t->bi_bdev);
+	int cluster = blk_queue_cluster(q);
+	struct bio_vec bvec, bvprv = { NULL };
+	struct bvec_iter biter;
+	unsigned long nbytes = 0, sg_length = 0;
+	unsigned int sg_cnt = 0, first_bvec = 0;
+
+	if (bio_t->bi_rw & REQ_DISCARD) {
+		if (bio_t->bi_vcnt)
+			return 1;
+		return 0;
+	}
+
+	if (bio_t->bi_rw & REQ_WRITE_SAME)
+		return 1;
+
+	bio_for_each_segment(bvec, bio_t, biter) {
+		nbytes = bvec.bv_len;
+
+		if (!cluster) {
+			sg_cnt++;
+			continue;
+		}
+
+		if (!first_bvec) {
+			first_bvec = 1;
+			goto new_segment;
+		}
+
+		if (sg_length + nbytes > queue_max_segment_size(q))
+			goto new_segment;
+
+		if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec))
+			goto new_segment;
+
+		if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bvec))
+			goto new_segment;
+
+		sg_length += nbytes;
+		continue;
+
+new_segment:
+		memcpy(&bvprv, &bvec, sizeof(struct bio_vec));
+		sg_length = nbytes;
+		sg_cnt++;
+	}
+
+	return sg_cnt;
+}
+
+static int crypt_convert_alloc_table(struct crypt_config *cc,
+				     struct convert_context *ctx)
+{
+	struct bio *bio_in = ctx->bio_in;
+	struct bio *bio_out = ctx->bio_out;
+	unsigned int mode = skcipher_is_bulk_mode(any_tfm(cc));
+	unsigned int sg_in_max, sg_out_max;
+	int ret = 0;
+
+	if (!mode)
+		goto out2;
+
+	/*
+	 * Need to calculate how many sg entry need to be used
+	 * for this bio.
+	 */
+	sg_in_max = crypt_sg_entry(bio_in) + 1;
+	if (sg_in_max > DM_MAX_SG_LIST || sg_in_max <= 2)
+		goto out2;
+
+	ret = sg_alloc_table(&ctx->sgt_in, sg_in_max, GFP_KERNEL);
+	if (ret)
+		goto out2;
+
+	if (bio_data_dir(bio_in) == READ)
+		goto out1;
+
+	sg_out_max = crypt_sg_entry(bio_out) + 1;
+	if (sg_out_max > DM_MAX_SG_LIST || sg_out_max <= 2)
+		goto out3;
+
+	ret = sg_alloc_table(&ctx->sgt_out, sg_out_max, GFP_KERNEL);
+	if (ret)
+		goto out3;
+
+	return 0;
+
+out3:
+	sg_free_table(&ctx->sgt_in);
+out2:
+	ctx->sgt_in.orig_nents = 0;
+out1:
+	ctx->sgt_out.orig_nents = 0;
+	return ret;
+}
+
 static void crypt_convert_init(struct crypt_config *cc,
 			       struct convert_context *ctx,
 			       struct bio *bio_out, struct bio *bio_in,
@@ -843,7 +948,13 @@  static int crypt_convert_block(struct crypt_config *cc,
 {
 	struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
 	struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
+	unsigned int mode = skcipher_is_bulk_mode(any_tfm(cc));
+	struct bio *bio_in = ctx->bio_in;
+	struct bio *bio_out = ctx->bio_out;
+	unsigned int total_bytes = bio_in->bi_iter.bi_size;
 	struct dm_crypt_request *dmreq;
+	struct scatterlist *sg_in;
+	struct scatterlist *sg_out;
 	u8 *iv;
 	int r;
 
@@ -852,16 +963,6 @@  static int crypt_convert_block(struct crypt_config *cc,
 
 	dmreq->iv_sector = ctx->cc_sector;
 	dmreq->ctx = ctx;
-	sg_init_table(&dmreq->sg_in, 1);
-	sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
-		    bv_in.bv_offset);
-
-	sg_init_table(&dmreq->sg_out, 1);
-	sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
-		    bv_out.bv_offset);
-
-	bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
-	bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
 
 	if (cc->iv_gen_ops) {
 		r = cc->iv_gen_ops->generator(cc, iv, dmreq);
@@ -869,8 +970,63 @@  static int crypt_convert_block(struct crypt_config *cc,
 			return r;
 	}
 
-	skcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out,
-				   1 << SECTOR_SHIFT, iv);
+	if (mode && ctx->sgt_in.orig_nents > 0) {
+		struct scatterlist *sg = NULL;
+		unsigned int total_sg_in, total_sg_out;
+
+		total_sg_in = blk_bio_map_sg(bdev_get_queue(bio_in->bi_bdev),
+					     bio_in, ctx->sgt_in.sgl, &sg);
+		if ((total_sg_in <= 0) ||
+		    (total_sg_in > ctx->sgt_in.orig_nents)) {
+			DMERR("%s in sg map error %d, sg table nents[%d]\n",
+			      __func__, total_sg_in, ctx->sgt_in.orig_nents);
+			return -EINVAL;
+		}
+
+		if (sg)
+			sg_mark_end(sg);
+
+		ctx->iter_in.bi_size -= total_bytes;
+		sg_in = ctx->sgt_in.sgl;
+		sg_out = ctx->sgt_in.sgl;
+
+		if (bio_data_dir(bio_in) == READ)
+			goto set_crypt;
+
+		sg = NULL;
+		total_sg_out = blk_bio_map_sg(bdev_get_queue(bio_out->bi_bdev),
+					      bio_out, ctx->sgt_out.sgl, &sg);
+		if ((total_sg_out <= 0) ||
+		    (total_sg_out > ctx->sgt_out.orig_nents)) {
+			DMERR("%s out sg map error %d, sg table nents[%d]\n",
+			      __func__, total_sg_out, ctx->sgt_out.orig_nents);
+			return -EINVAL;
+		}
+
+		if (sg)
+			sg_mark_end(sg);
+
+		ctx->iter_out.bi_size -= total_bytes;
+		sg_out = ctx->sgt_out.sgl;
+	} else  {
+		sg_init_table(&dmreq->sg_in, 1);
+		sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
+			    bv_in.bv_offset);
+
+		sg_init_table(&dmreq->sg_out, 1);
+		sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
+			    bv_out.bv_offset);
+
+		bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
+		bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
+
+		sg_in = &dmreq->sg_in;
+		sg_out = &dmreq->sg_out;
+		total_bytes = 1 << SECTOR_SHIFT;
+	}
+
+set_crypt:
+	skcipher_request_set_crypt(req, sg_in, sg_out, total_bytes, iv);
 
 	if (bio_data_dir(ctx->bio_in) == WRITE)
 		r = crypto_skcipher_encrypt(req);
@@ -1081,6 +1237,8 @@  static void crypt_dec_pending(struct dm_crypt_io *io)
 	if (io->ctx.req)
 		crypt_free_req(cc, io->ctx.req, base_bio);
 
+	sg_free_table(&io->ctx.sgt_in);
+	sg_free_table(&io->ctx.sgt_out);
 	base_bio->bi_error = error;
 	bio_endio(base_bio);
 }
@@ -1312,6 +1470,9 @@  static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 	io->ctx.iter_out = clone->bi_iter;
 
 	sector += bio_sectors(clone);
+	r = crypt_convert_alloc_table(cc, &io->ctx);
+	if (r < 0)
+		io->error = -EIO;
 
 	crypt_inc_pending(io);
 	r = crypt_convert(cc, &io->ctx);
@@ -1343,6 +1504,9 @@  static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 
 	crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
 			   io->sector);
+	r = crypt_convert_alloc_table(cc, &io->ctx);
+	if (r < 0)
+		io->error = -EIO;
 
 	r = crypt_convert(cc, &io->ctx);
 	if (r < 0)