Message ID | 20230621162333.30027-1-jack@suse.cz (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | bcache: Fix block device claiming | expand |
On Wed, Jun 21, 2023 at 06:23:26PM +0200, Jan Kara wrote: > Allocate holder object (cache or cached_dev) before offloading the > rest of the startup to async work. This will allow us to open the block > block device with proper holder. This is a pretty big change for this fix, and we'd want to retest the error paths - that's hard to do, because the fault injection framework I was using for that never made it upstream. What about just exposing a proper API for changing the holder? Wouldn't that be simpler? > > Signed-off-by: Jan Kara <jack@suse.cz> > --- > drivers/md/bcache/super.c | 66 +++++++++++++++------------------------ > 1 file changed, 25 insertions(+), 41 deletions(-) > > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c > index e2a803683105..913dd94353b6 100644 > --- a/drivers/md/bcache/super.c > +++ b/drivers/md/bcache/super.c > @@ -2448,6 +2448,7 @@ struct async_reg_args { > struct cache_sb *sb; > struct cache_sb_disk *sb_disk; > struct block_device *bdev; > + void *holder; > }; > > static void register_bdev_worker(struct work_struct *work) > @@ -2455,22 +2456,13 @@ static void register_bdev_worker(struct work_struct *work) > int fail = false; > struct async_reg_args *args = > container_of(work, struct async_reg_args, reg_work.work); > - struct cached_dev *dc; > - > - dc = kzalloc(sizeof(*dc), GFP_KERNEL); > - if (!dc) { > - fail = true; > - put_page(virt_to_page(args->sb_disk)); > - blkdev_put(args->bdev, bcache_kobj); > - goto out; > - } > > mutex_lock(&bch_register_lock); > - if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) > + if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) > + < 0) > fail = true; > mutex_unlock(&bch_register_lock); > > -out: > if (fail) > pr_info("error %s: fail to register backing device\n", > args->path); > @@ -2485,21 +2477,11 @@ static void register_cache_worker(struct work_struct *work) > int fail = false; > struct async_reg_args *args = > container_of(work, struct async_reg_args, reg_work.work); > - struct cache *ca; > - > - ca = kzalloc(sizeof(*ca), GFP_KERNEL); > - if (!ca) { > - fail = true; > - put_page(virt_to_page(args->sb_disk)); > - blkdev_put(args->bdev, bcache_kobj); > - goto out; > - } > > /* blkdev_put() will be called in bch_cache_release() */ > - if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) > + if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) > fail = true; > > -out: > if (fail) > pr_info("error %s: fail to register cache device\n", > args->path); > @@ -2520,6 +2502,13 @@ static void register_device_async(struct async_reg_args *args) > queue_delayed_work(system_wq, &args->reg_work, 10); > } > > +static void *alloc_holder_object(struct cache_sb *sb) > +{ > + if (SB_IS_BDEV(sb)) > + return kzalloc(sizeof(struct cached_dev), GFP_KERNEL); > + return kzalloc(sizeof(struct cache), GFP_KERNEL); > +} > + > static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, > const char *buffer, size_t size) > { > @@ -2528,6 +2517,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, > struct cache_sb *sb; > struct cache_sb_disk *sb_disk; > struct block_device *bdev; > + void *holder; > ssize_t ret; > bool async_registration = false; > > @@ -2585,6 +2575,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, > if (err) > goto out_blkdev_put; > > + holder = alloc_holder_object(sb); > + if (!holder) { > + ret = -ENOMEM; > + err = "cannot allocate memory"; > + goto out_put_sb_page; > + } > + > err = "failed to register device"; > > if (async_registration) { > @@ -2595,44 +2592,29 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, > if (!args) { > ret = -ENOMEM; > err = "cannot allocate memory"; > - goto out_put_sb_page; > + goto out_free_holder; > } > > args->path = path; > args->sb = sb; > args->sb_disk = sb_disk; > args->bdev = bdev; > + args->holder = holder; > register_device_async(args); > /* No wait and returns to user space */ > goto async_done; > } > > if (SB_IS_BDEV(sb)) { > - struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); > - > - if (!dc) { > - ret = -ENOMEM; > - err = "cannot allocate memory"; > - goto out_put_sb_page; > - } > - > mutex_lock(&bch_register_lock); > - ret = register_bdev(sb, sb_disk, bdev, dc); > + ret = register_bdev(sb, sb_disk, bdev, holder); > mutex_unlock(&bch_register_lock); > /* blkdev_put() will be called in cached_dev_free() */ > if (ret < 0) > goto out_free_sb; > } else { > - struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); > - > - if (!ca) { > - ret = -ENOMEM; > - err = "cannot allocate memory"; > - goto out_put_sb_page; > - } > - > /* blkdev_put() will be called in bch_cache_release() */ > - ret = register_cache(sb, sb_disk, bdev, ca); > + ret = register_cache(sb, sb_disk, bdev, holder); > if (ret) > goto out_free_sb; > } > @@ -2644,6 +2626,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, > async_done: > return size; > > +out_free_holder: > + kfree(holder); > out_put_sb_page: > put_page(virt_to_page(sb_disk)); > out_blkdev_put: > -- > 2.35.3 >
On Wed 21-06-23 13:56:59, Kent Overstreet wrote: > On Wed, Jun 21, 2023 at 06:23:26PM +0200, Jan Kara wrote: > > Allocate holder object (cache or cached_dev) before offloading the > > rest of the startup to async work. This will allow us to open the block > > block device with proper holder. > > This is a pretty big change for this fix, and we'd want to retest the > error paths - that's hard to do, because the fault injection framework I > was using for that never made it upstream. I agree those are somewhat difficult to test. Although with memory allocation error injection, we can easily simulate failures in alloc_holder_object() or say later in bcache_device_init() if that's what you're after to give at least some testing to the error paths. Admittedly, I've just tested that registering and unregistering bcache devices works without giving warnings. Or are you more worried about the "reopen the block device" logic (and error handling) in the second patch? > What about just exposing a proper API for changing the holder? Wouldn't > that be simpler? It would be doable but frankly I'd prefer to avoid implementing the API for changing the holder just for bcache. For all I care bcache can also just generate random cookie (or an id from IDA) and pass it as a holder to blkdev_get_by_dev(). It would do the job as well and we then don't have to play games with changing the holder. It would just need to be propagated to the places doing blkdev_put(). Do you find that better? I'm now working on a changes that will make blkdev_get_by_dev() return bdev_handle (which contains bdev pointer but also other stuff we need to propagate to blkdev_put()) so when that is done, the cookie propagation will happen automatically. Honza
On Thu, Jun 22, 2023 at 12:09:54PM +0200, Jan Kara wrote: > On Wed 21-06-23 13:56:59, Kent Overstreet wrote: > > On Wed, Jun 21, 2023 at 06:23:26PM +0200, Jan Kara wrote: > > > Allocate holder object (cache or cached_dev) before offloading the > > > rest of the startup to async work. This will allow us to open the block > > > block device with proper holder. > > > > This is a pretty big change for this fix, and we'd want to retest the > > error paths - that's hard to do, because the fault injection framework I > > was using for that never made it upstream. > > I agree those are somewhat difficult to test. Although with memory > allocation error injection, we can easily simulate failures in > alloc_holder_object() or say later in bcache_device_init() if that's what > you're after to give at least some testing to the error paths. Admittedly, > I've just tested that registering and unregistering bcache devices works > without giving warnings. Or are you more worried about the "reopen the > block device" logic (and error handling) in the second patch? All error paths need to be tested, yeah. > > What about just exposing a proper API for changing the holder? Wouldn't > > that be simpler? > > It would be doable but frankly I'd prefer to avoid implementing the API for > changing the holder just for bcache. For all I care bcache can also just > generate random cookie (or an id from IDA) and pass it as a holder to > blkdev_get_by_dev(). It would do the job as well and we then don't have to > play games with changing the holder. It would just need to be propagated to > the places doing blkdev_put(). Do you find that better? Well, looking over the code it doesn't seem like it would really simplify the fix, unfortunately. bcachefs has the same issue, but in bcachefs we've already got a handle object where we can allocate and stash a holder - analagous to the bdev_handle you're working on. > I'm now working on a changes that will make blkdev_get_by_dev() return > bdev_handle (which contains bdev pointer but also other stuff we need to > propagate to blkdev_put()) so when that is done, the cookie propagation > will happen automatically. bdev_handle definitely sounds like the right direction. Just changing the holder really seems like it should be easiest to me, and it can get deleted after bdev_handle lands, but maybe there's a new wrinkle I'm not aware of? Anyways, as the error paths get tested I'm ok with this patchset - it's fine if it's done completely by hand (you can just add a goto fail in the appropriate place and make sure nothing explodes). Coli - we should talk about testing at some point. I've been working on test infrastructure; it would be great to get the bcache tests in ktest updated, I've now got a CI we could point at those tests. Christoph - you really gotta start CCing people properly, the patch that broke this didn't even it linux-bcache. No bueno.
On Thu, Jun 22, 2023 at 08:05:48AM -0400, Kent Overstreet wrote: > Christoph - you really gotta start CCing people properly, the patch that > broke this didn't even it linux-bcache. No bueno. The series did absolutely go out to linux-bcache. See here for v2: https://lore.kernel.org/linux-bcache/20230608110258.189493-13-hch@lst.de/T/#u
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e2a803683105..913dd94353b6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2448,6 +2448,7 @@ struct async_reg_args { struct cache_sb *sb; struct cache_sb_disk *sb_disk; struct block_device *bdev; + void *holder; }; static void register_bdev_worker(struct work_struct *work) @@ -2455,22 +2456,13 @@ static void register_bdev_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cached_dev *dc; - - dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) + < 0) fail = true; mutex_unlock(&bch_register_lock); -out: if (fail) pr_info("error %s: fail to register backing device\n", args->path); @@ -2485,21 +2477,11 @@ static void register_cache_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cache *ca; - - ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) fail = true; -out: if (fail) pr_info("error %s: fail to register cache device\n", args->path); @@ -2520,6 +2502,13 @@ static void register_device_async(struct async_reg_args *args) queue_delayed_work(system_wq, &args->reg_work, 10); } +static void *alloc_holder_object(struct cache_sb *sb) +{ + if (SB_IS_BDEV(sb)) + return kzalloc(sizeof(struct cached_dev), GFP_KERNEL); + return kzalloc(sizeof(struct cache), GFP_KERNEL); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2528,6 +2517,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, struct cache_sb *sb; struct cache_sb_disk *sb_disk; struct block_device *bdev; + void *holder; ssize_t ret; bool async_registration = false; @@ -2585,6 +2575,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (err) goto out_blkdev_put; + holder = alloc_holder_object(sb); + if (!holder) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + err = "failed to register device"; if (async_registration) { @@ -2595,44 +2592,29 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!args) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_free_holder; } args->path = path; args->sb = sb; args->sb_disk = sb_disk; args->bdev = bdev; + args->holder = holder; register_device_async(args); /* No wait and returns to user space */ goto async_done; } if (SB_IS_BDEV(sb)) { - struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - - if (!dc) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev, dc); + ret = register_bdev(sb, sb_disk, bdev, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { - struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev, ca); + ret = register_cache(sb, sb_disk, bdev, holder); if (ret) goto out_free_sb; } @@ -2644,6 +2626,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, async_done: return size; +out_free_holder: + kfree(holder); out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put:
Allocate holder object (cache or cached_dev) before offloading the rest of the startup to async work. This will allow us to open the block block device with proper holder. Signed-off-by: Jan Kara <jack@suse.cz> --- drivers/md/bcache/super.c | 66 +++++++++++++++------------------------ 1 file changed, 25 insertions(+), 41 deletions(-)