@@ -1115,9 +1115,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
struct bfq_group *bfqg;
u64 v;
- ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
if (ret)
- return ret;
+ goto out;
if (sscanf(ctx.body, "%llu", &v) == 1) {
/* require "default" on dfl */
@@ -1139,7 +1141,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
ret = 0;
}
out:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
@@ -626,69 +626,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
/**
- * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
- * @inputp: input string pointer
+ * blkg_conf_init - initialize a blkg_conf_ctx
+ * @ctx: blkg_conf_ctx to initialize
+ * @input: input string
*
- * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
- * from @input and get and return the matching bdev. *@inputp is
- * updated to point past the device node prefix. Returns an ERR_PTR()
- * value on error.
+ * Initialize @ctx which can be used to parse blkg config input string @input.
+ * Once initialized, @ctx can be used with blkg_conf_open_bdev() and
+ * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
+ */
+void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
+{
+ *ctx = (struct blkg_conf_ctx){ .input = input };
+}
+EXPORT_SYMBOL_GPL(blkg_conf_init);
+
+/**
+ * blkg_conf_open_bdev - parse and open bdev for per-blkg config update
+ * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
- * Use this function iff blkg_conf_prep() can't be used for some reason.
+ * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
+ * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
+ * set to point past the device node prefix.
+ *
+ * This function may be called multiple times on @ctx and the extra calls become
+ * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
+ * explicitly if bdev access is needed without resolving the blkcg / policy part
+ * of @ctx->input. Returns -errno on error.
*/
-struct block_device *blkcg_conf_open_bdev(char **inputp)
+int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
{
- char *input = *inputp;
+ char *input = ctx->input;
unsigned int major, minor;
struct block_device *bdev;
int key_len;
+ if (ctx->bdev)
+ return 0;
+
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
input += key_len;
if (!isspace(*input))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
input = skip_spaces(input);
bdev = blkdev_get_no_open(MKDEV(major, minor));
if (!bdev)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
if (bdev_is_partition(bdev)) {
blkdev_put_no_open(bdev);
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
}
- *inputp = input;
- return bdev;
+ ctx->body = input;
+ ctx->bdev = bdev;
+ return 0;
}
/**
* blkg_conf_prep - parse and prepare for per-blkg config update
* @blkcg: target block cgroup
* @pol: target policy
- * @input: input string
- * @ctx: blkg_conf_ctx to be filled
+ * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
+ *
+ * Parse per-blkg config update from @ctx->input and initialize @ctx
+ * accordingly. On success, @ctx->body points to the part of @ctx->input
+ * following MAJ:MIN, @ctx->bdev points to the target block device and
+ * @ctx->blkg to the blkg being configured.
*
- * Parse per-blkg config update from @input and initialize @ctx with the
- * result. @ctx->blkg points to the blkg to be updated and @ctx->body the
- * part of @input following MAJ:MIN. This function returns with queue lock
- * held and must be paired with blkg_conf_finish().
+ * blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
+ * function returns with queue lock held and must be followed by
+ * blkg_conf_exit().
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- char *input, struct blkg_conf_ctx *ctx)
+ struct blkg_conf_ctx *ctx)
__acquires(&bdev->bd_queue->queue_lock)
{
- struct block_device *bdev;
struct gendisk *disk;
struct request_queue *q;
struct blkcg_gq *blkg;
int ret;
- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- disk = bdev->bd_disk;
+ ret = blkg_conf_open_bdev(ctx);
+ if (ret)
+ return ret;
+
+ disk = ctx->bdev->bd_disk;
q = disk->queue;
/*
@@ -766,9 +790,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
success:
blk_queue_exit(q);
- ctx->bdev = bdev;
ctx->blkg = blkg;
- ctx->body = input;
return 0;
fail_preloaded:
@@ -778,7 +800,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
fail_exit_queue:
blk_queue_exit(q);
fail:
- blkdev_put_no_open(bdev);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
@@ -794,19 +815,27 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
EXPORT_SYMBOL_GPL(blkg_conf_prep);
/**
- * blkg_conf_finish - finish up per-blkg config update
- * @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
+ * blkg_conf_exit - clean up per-blkg config update
+ * @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
- * Finish up after per-blkg config update. This function must be paired
- * with blkg_conf_prep().
+ * Clean up after per-blkg config update. This function must be called on all
+ * blkg_conf_ctx's initialized with blkg_conf_init().
*/
-void blkg_conf_finish(struct blkg_conf_ctx *ctx)
+void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
{
- spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
- blkdev_put_no_open(ctx->bdev);
+ if (ctx->blkg) {
+ spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
+ ctx->blkg = NULL;
+ }
+
+ if (ctx->bdev) {
+ blkdev_put_no_open(ctx->bdev);
+ ctx->body = NULL;
+ ctx->bdev = NULL;
+ }
}
-EXPORT_SYMBOL_GPL(blkg_conf_finish);
+EXPORT_SYMBOL_GPL(blkg_conf_exit);
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
{
@@ -208,15 +208,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
struct blkg_conf_ctx {
+ char *input;
+ char *body;
struct block_device *bdev;
struct blkcg_gq *blkg;
- char *body;
};
-struct block_device *blkcg_conf_open_bdev(char **inputp);
+void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
+int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
- char *input, struct blkg_conf_ctx *ctx);
-void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+ struct blkg_conf_ctx *ctx);
+void blkg_conf_exit(struct blkg_conf_ctx *ctx);
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
@@ -3091,9 +3091,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
return nbytes;
}
- ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
if (ret)
- return ret;
+ goto err;
iocg = blkg_to_iocg(ctx.blkg);
@@ -3112,12 +3114,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
weight_updated(iocg, &now);
spin_unlock(&iocg->ioc->lock);
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
- blkg_conf_finish(&ctx);
- return -EINVAL;
+ ret = -EINVAL;
+err:
+ blkg_conf_exit(&ctx);
+ return ret;
}
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@@ -3172,19 +3176,22 @@ static const match_table_t qos_tokens = {
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
- struct block_device *bdev;
+ struct blkg_conf_ctx ctx;
struct gendisk *disk;
struct ioc *ioc;
u32 qos[NR_QOS_PARAMS];
bool enable, user;
- char *p;
+ char *body, *p;
int ret;
- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ blkg_conf_init(&ctx, input);
- disk = bdev->bd_disk;
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
+ goto err;
+
+ body = ctx.body;
+ disk = ctx.bdev->bd_disk;
ioc = q_to_ioc(disk->queue);
if (!ioc) {
ret = blk_iocost_init(disk);
@@ -3201,7 +3208,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
enable = ioc->enabled;
user = ioc->user_qos_params;
- while ((p = strsep(&input, " \t\n"))) {
+ while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@@ -3290,7 +3297,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
spin_unlock_irq(&ioc->lock);
@@ -3300,7 +3307,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ret = -EINVAL;
err:
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return ret;
}
@@ -3351,22 +3358,25 @@ static const match_table_t i_lcoef_tokens = {
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
- struct block_device *bdev;
+ struct blkg_conf_ctx ctx;
struct request_queue *q;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
- char *p;
+ char *body, *p;
int ret;
- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ blkg_conf_init(&ctx, input);
+
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
+ goto err;
- q = bdev_get_queue(bdev);
+ body = ctx.body;
+ q = bdev_get_queue(ctx.bdev);
ioc = q_to_ioc(q);
if (!ioc) {
- ret = blk_iocost_init(bdev->bd_disk);
+ ret = blk_iocost_init(ctx.bdev->bd_disk);
if (ret)
goto err;
ioc = q_to_ioc(q);
@@ -3379,7 +3389,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
user = ioc->user_cost_model;
- while ((p = strsep(&input, " \t\n"))) {
+ while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@@ -3426,7 +3436,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
@@ -3437,7 +3447,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ret = -EINVAL;
err:
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return ret;
}
@@ -842,9 +842,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 oldval;
int ret;
- ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
if (ret)
- return ret;
+ goto out;
iolat = blkg_to_lat(ctx.blkg);
p = ctx.body;
@@ -880,7 +882,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
iolatency_clear_scaling(blkg);
ret = 0;
out:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
@@ -1369,9 +1369,11 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
int ret;
u64 v;
- ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
if (ret)
- return ret;
+ goto out_finish;
ret = -EINVAL;
if (sscanf(ctx.body, "%llu", &v) != 1)
@@ -1390,7 +1392,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
tg_conf_updated(tg, false);
ret = 0;
out_finish:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
@@ -1562,9 +1564,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
int ret;
int index = of_cft(of)->private;
- ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx);
if (ret)
- return ret;
+ goto out_finish;
tg = blkg_to_tg(ctx.blkg);
tg_update_carryover(tg);
@@ -1663,7 +1667,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
tg->td->limit_valid[LIMIT_LOW]);
ret = 0;
out_finish:
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
We want to support lazy init of rq-qos policies so that iolatency is enabled lazily on configuration instead of gendisk initialization. The way blkg config helpers are structured now is a bit awkward for that. Let's restructure: * blkcg_conf_open_bdev() is renamed to blkg_conf_open_bdev(). The blkcg_ prefix was used because the bdev opening step is blkg-independent. However, the distinction is too subtle and confuses more than helps. Let's switch to blkg prefix so that it's consistent with the type and other helper names. * struct blkg_conf_ctx now remembers the original input string and is always initialized by the new blkg_conf_init(). * blkg_conf_open_bdev() is updated to take a pointer to blkg_conf_ctx like blkg_conf_prep() and can be called multiple times safely. Instead of modifying the double pointer to input string directly, blkg_conf_open_bdev() now sets blkg_conf_ctx->body. * blkg_conf_finish() is renamed to blkg_conf_exit() for symmetry and now must be called on all blkg_conf_ctx's which were initialized with blkg_conf_init(). Combined, this allows the users to either open the bdev first or do it altogether with blkg_conf_prep() which will help implementing lazy init of rq-qos policies. blkg_conf_init/exit() will also be used implement synchronization against device removal. This is necessary because iolat / iocost are configured through cgroupfs instead of one of the files under /sys/block/DEVICE. As cgroupfs operations aren't synchronized with block layer, the lazy init and other configuration operations may race against device removal. This patch makes blkg_conf_init/exit() used consistently for all cgroup-orginating configurations making them a good place to implement explicit synchronization. Users are updated accordingly. No behavior change is intended by this patch. v2: bfq wasn't updated in v1 causing a build error. Fixed. v3: Update the description to include future use of blkg_conf_init/exit() as synchronization points. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Josef Bacik <josef@toxicpanda.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Yu Kuai <yukuai1@huaweicloud.com> --- block/bfq-cgroup.c | 8 ++-- block/blk-cgroup.c | 105 +++++++++++++++++++++++++++--------------- block/blk-cgroup.h | 10 ++-- block/blk-iocost.c | 58 +++++++++++++---------- block/blk-iolatency.c | 8 ++-- block/blk-throttle.c | 16 ++++--- 6 files changed, 127 insertions(+), 78 deletions(-)