Message ID | 20220915164826.1396245-3-sarthakkukreti@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce provisioning primitives for thinly provisioned storage | expand |
On Thu, Sep 15 2022 at 12:48P -0400, Sarthak Kukreti <sarthakkukreti@chromium.org> wrote: > From: Sarthak Kukreti <sarthakkukreti@chromium.org> > > Add support to dm devices for REQ_OP_PROVISION. The default mode > is to pass through the request and dm-thin will utilize it to provision > blocks. > > Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org> > --- > drivers/md/dm-crypt.c | 4 +- > drivers/md/dm-linear.c | 1 + > drivers/md/dm-table.c | 17 +++++++ > drivers/md/dm-thin.c | 86 +++++++++++++++++++++++++++++++++-- > drivers/md/dm.c | 4 ++ > include/linux/device-mapper.h | 6 +++ > 6 files changed, 113 insertions(+), 5 deletions(-) > > diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c > index 159c6806c19b..357f0899cfb6 100644 > --- a/drivers/md/dm-crypt.c > +++ b/drivers/md/dm-crypt.c > @@ -3081,6 +3081,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar > if (ret) > return ret; > > + ti->num_provision_bios = 1; > + > while (opt_params--) { > opt_string = dm_shift_arg(&as); > if (!opt_string) { > @@ -3384,7 +3386,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) > * - for REQ_OP_DISCARD caller must use flush if IO ordering matters > */ > if (unlikely(bio->bi_opf & REQ_PREFLUSH || > - bio_op(bio) == REQ_OP_DISCARD)) { > + bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_PROVISION)) { > bio_set_dev(bio, cc->dev->bdev); > if (bio_sectors(bio)) > bio->bi_iter.bi_sector = cc->start + > diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c > index 3212ef6aa81b..1aa782149428 100644 > --- a/drivers/md/dm-linear.c > +++ b/drivers/md/dm-linear.c > @@ -61,6 +61,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) > ti->num_discard_bios = 1; > ti->num_secure_erase_bios = 1; > ti->num_write_zeroes_bios = 1; > + ti->num_provision_bios = 1; > ti->private = lc; > return 0; > > diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c > index 332f96b58252..b7f9cb66b7ba 100644 > --- a/drivers/md/dm-table.c > +++ b/drivers/md/dm-table.c > @@ -1853,6 +1853,18 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t) > return true; > } > > +static bool dm_table_supports_provision(struct dm_table *t) > +{ > + for (unsigned int i = 0; i < t->num_targets; i++) { > + struct dm_target *ti = dm_table_get_target(t, i); > + > + if (ti->num_provision_bios) > + return true; > + } > + > + return false; > +} > + This needs to go a step further and verify a device in the stack actually services REQ_OP_PROVISION. Please see dm_table_supports_discards(): it iterates all devices in the table and checks that support is advertised. For discard, DM requires that _all_ devices in a table advertise support (that is pretty strict and likely could be relaxed to _any_). You'll need ti->provision_supported (like ->discards_supported) to advertise actual support is provided by dm-thinp (even if underlying devices don't support it). And yeah, dm-thinp passdown support for REQ_OP_PROVISION can follow later as needed (if there actual HW that would benefit from REQ_OP_PROVISION). Mike
On Fri, Sep 23, 2022 at 7:23 AM Mike Snitzer <snitzer@redhat.com> wrote: > > On Thu, Sep 15 2022 at 12:48P -0400, > Sarthak Kukreti <sarthakkukreti@chromium.org> wrote: > > > From: Sarthak Kukreti <sarthakkukreti@chromium.org> > > > > Add support to dm devices for REQ_OP_PROVISION. The default mode > > is to pass through the request and dm-thin will utilize it to provision > > blocks. > > > > Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org> > > --- > > drivers/md/dm-crypt.c | 4 +- > > drivers/md/dm-linear.c | 1 + > > drivers/md/dm-table.c | 17 +++++++ > > drivers/md/dm-thin.c | 86 +++++++++++++++++++++++++++++++++-- > > drivers/md/dm.c | 4 ++ > > include/linux/device-mapper.h | 6 +++ > > 6 files changed, 113 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c > > index 159c6806c19b..357f0899cfb6 100644 > > --- a/drivers/md/dm-crypt.c > > +++ b/drivers/md/dm-crypt.c > > @@ -3081,6 +3081,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar > > if (ret) > > return ret; > > > > + ti->num_provision_bios = 1; > > + > > while (opt_params--) { > > opt_string = dm_shift_arg(&as); > > if (!opt_string) { > > @@ -3384,7 +3386,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) > > * - for REQ_OP_DISCARD caller must use flush if IO ordering matters > > */ > > if (unlikely(bio->bi_opf & REQ_PREFLUSH || > > - bio_op(bio) == REQ_OP_DISCARD)) { > > + bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_PROVISION)) { > > bio_set_dev(bio, cc->dev->bdev); > > if (bio_sectors(bio)) > > bio->bi_iter.bi_sector = cc->start + > > diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c > > index 3212ef6aa81b..1aa782149428 100644 > > --- a/drivers/md/dm-linear.c > > +++ b/drivers/md/dm-linear.c > > @@ -61,6 +61,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) > > ti->num_discard_bios = 1; > > ti->num_secure_erase_bios = 1; > > ti->num_write_zeroes_bios = 1; > > + ti->num_provision_bios = 1; > > ti->private = lc; > > return 0; > > > > diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c > > index 332f96b58252..b7f9cb66b7ba 100644 > > --- a/drivers/md/dm-table.c > > +++ b/drivers/md/dm-table.c > > @@ -1853,6 +1853,18 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t) > > return true; > > } > > > > +static bool dm_table_supports_provision(struct dm_table *t) > > +{ > > + for (unsigned int i = 0; i < t->num_targets; i++) { > > + struct dm_target *ti = dm_table_get_target(t, i); > > + > > + if (ti->num_provision_bios) > > + return true; > > + } > > + > > + return false; > > +} > > + > > This needs to go a step further and verify a device in the stack > actually services REQ_OP_PROVISION. > > Please see dm_table_supports_discards(): it iterates all devices in > the table and checks that support is advertised. > > For discard, DM requires that _all_ devices in a table advertise > support (that is pretty strict and likely could be relaxed to _any_). > > You'll need ti->provision_supported (like ->discards_supported) to > advertise actual support is provided by dm-thinp (even if underlying > devices don't support it). > > And yeah, dm-thinp passdown support for REQ_OP_PROVISION can follow > later as needed (if there actual HW that would benefit from > REQ_OP_PROVISION). > Done, thanks (the provision support, not the passdown)! I think the one case where passdown might help is to build images with dm-thinp already set up on one of the partitions (I have something in the works for ChromiumOS images to do VM tests with preset state :)). That would allow us to preallocate space for thin logical volumes inside the image file. > Mike >
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 159c6806c19b..357f0899cfb6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3081,6 +3081,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar if (ret) return ret; + ti->num_provision_bios = 1; + while (opt_params--) { opt_string = dm_shift_arg(&as); if (!opt_string) { @@ -3384,7 +3386,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) * - for REQ_OP_DISCARD caller must use flush if IO ordering matters */ if (unlikely(bio->bi_opf & REQ_PREFLUSH || - bio_op(bio) == REQ_OP_DISCARD)) { + bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_PROVISION)) { bio_set_dev(bio, cc->dev->bdev); if (bio_sectors(bio)) bio->bi_iter.bi_sector = cc->start + diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 3212ef6aa81b..1aa782149428 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -61,6 +61,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_discard_bios = 1; ti->num_secure_erase_bios = 1; ti->num_write_zeroes_bios = 1; + ti->num_provision_bios = 1; ti->private = lc; return 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 332f96b58252..b7f9cb66b7ba 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1853,6 +1853,18 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t) return true; } +static bool dm_table_supports_provision(struct dm_table *t) +{ + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (ti->num_provision_bios) + return true; + } + + return false; +} + static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1989,6 +2001,11 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_write_zeroes(t)) q->limits.max_write_zeroes_sectors = 0; + if (dm_table_supports_provision(t)) + blk_queue_max_provision_sectors(q, UINT_MAX >> 9); + else + q->limits.max_provision_sectors = 0; + dm_table_verify_integrity(t); /* diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e76c96c760a9..fd3eb306c823 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -908,7 +908,8 @@ static void __inc_remap_and_issue_cell(void *context, struct bio *bio; while ((bio = bio_list_pop(&cell->bios))) { - if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) + if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_PROVISION) bio_list_add(&info->defer_bios, bio); else { inc_all_io_entry(info->tc->pool, bio); @@ -1012,6 +1013,9 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) goto out; } + if (bio && bio_op(bio) == REQ_OP_PROVISION) + return; + /* * Release any bios held while the block was being provisioned. * If we are processing a write bio that completely covers the block, @@ -1388,6 +1392,9 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, m->data_block = data_block; m->cell = cell; + if (bio && bio_op(bio) == REQ_OP_PROVISION) + m->bio = bio; + /* * If the whole block of data is being overwritten or we are not * zeroing pre-existing data, we can issue the bio immediately. @@ -1897,7 +1904,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block /* * Fill read bios with zeroes and complete them immediately. */ - if (bio_data_dir(bio) == READ) { + if (bio_data_dir(bio) == READ && bio_op(bio) != REQ_OP_PROVISION) { zero_fill_bio(bio); cell_defer_no_holder(tc, cell); bio_endio(bio); @@ -1980,6 +1987,69 @@ static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) } } +static void process_provision_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) +{ + int r; + struct pool *pool = tc->pool; + struct bio *bio = cell->holder; + dm_block_t begin, end; + struct dm_thin_lookup_result lookup_result; + + if (tc->requeue_mode) { + cell_requeue(pool, cell); + return; + } + + get_bio_block_range(tc, bio, &begin, &end); + + while (begin != end) { + r = ensure_next_mapping(pool); + if (r) + /* we did our best */ + return; + + r = dm_thin_find_block(tc->td, begin, 1, &lookup_result); + switch (r) { + case 0: + begin++; + break; + case -ENODATA: + provision_block(tc, bio, begin, cell); + begin++; + break; + default: + DMERR_LIMIT( + "%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); + bio_io_error(bio); + begin++; + break; + } + } + bio_endio(bio); + cell_defer_no_holder(tc, cell); +} + +static void process_provision_bio(struct thin_c *tc, struct bio *bio) +{ + dm_block_t begin, end; + struct dm_cell_key virt_key; + struct dm_bio_prison_cell *virt_cell; + + get_bio_block_range(tc, bio, &begin, &end); + if (begin == end) { + bio_endio(bio); + return; + } + + build_key(tc->td, VIRTUAL, begin, end, &virt_key); + if (bio_detain(tc->pool, &virt_key, bio, &virt_cell)) + return; + + process_provision_cell(tc, virt_cell); +} + static void process_bio(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -2024,7 +2094,7 @@ static void __process_bio_read_only(struct thin_c *tc, struct bio *bio, case -ENODATA: if (cell) cell_defer_no_holder(tc, cell); - if (rw != READ) { + if (rw != READ || bio_op(bio) == REQ_OP_PROVISION) { handle_unserviceable_bio(tc->pool, bio); break; } @@ -2200,6 +2270,8 @@ static void process_thin_deferred_bios(struct thin_c *tc) if (bio_op(bio) == REQ_OP_DISCARD) pool->process_discard(tc, bio); + else if (bio_op(bio) == REQ_OP_PROVISION) + process_provision_bio(tc, bio); else pool->process_bio(tc, bio); @@ -2716,7 +2788,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } - if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) { + if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_PROVISION) { thin_defer_bio_with_throttle(tc, bio); return DM_MAPIO_SUBMITTED; } @@ -3353,6 +3426,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_bios = 1; + ti->num_provision_bios = 1; /* * Only need to enable discards if the pool should pass @@ -4043,6 +4117,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); } + /* * pt->adjusted_pf is a staging area for the actual features to use. * They get transferred to the live pool in bind_control_target() @@ -4233,6 +4308,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_discard_bios = 1; } + ti->num_provision_bios = 1; + mutex_unlock(&dm_thin_pool_table.mutex); spin_lock_irq(&tc->pool->lock); @@ -4447,6 +4524,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */ + limits->max_provision_sectors = 2048 * 1024 * 16; /* 16G */ } static struct target_type thin_target = { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 60549b65c799..3fe524800f5a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1600,6 +1600,7 @@ static bool is_abnormal_io(struct bio *bio) case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: + case REQ_OP_PROVISION: return true; default: break; @@ -1624,6 +1625,9 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, case REQ_OP_WRITE_ZEROES: num_bios = ti->num_write_zeroes_bios; break; + case REQ_OP_PROVISION: + num_bios = ti->num_provision_bios; + break; default: break; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 04c6acf7faaa..edeb47195b6f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -333,6 +333,12 @@ struct dm_target { */ unsigned num_write_zeroes_bios; + /* + * The number of PROVISION bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. + */ + unsigned num_provision_bios; + /* * The minimum number of extra bytes allocated in each io for the * target to use.