Message ID | 2642808b-a7d0-28ff-f288-0f4eabc562f7@i-love.sakura.ne.jp (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | loop: replace loop_ctl_mutex with loop_idr_spinlock | expand |
On Fri, Aug 27, 2021 at 01:03:45AM +0900, Tetsuo Handa wrote: > > loop_unregister_transfer() which is called from cleanup_cryptoloop() > currently lacks serialization between kfree() from loop_remove() from > loop_control_remove() and mutex_lock() from unregister_transfer_cb(). > We can use refcount and loop_idr_spinlock for serialization between > these functions. So before we start complicating things for loop_release_xfer - how do you actually reproduce loop_unregister_transfer finding a loop device with a transfer set? AFAICS loop_unregister_transfer is only called form exit_cryptoloop, which can only be called when cryptoloop has a zero reference count. But as long as a transfer is registered an extra refcount is held on its owner. > @@ -2313,20 +2320,20 @@ static int loop_add(int i) > goto out; > lo->lo_state = Lo_unbound; > > - err = mutex_lock_killable(&loop_ctl_mutex); > - if (err) > - goto out_free_dev; > - > /* allocate id, if @id >= 0, we're requesting that specific id */ > + idr_preload(GFP_KERNEL); > + spin_lock(&loop_idr_spinlock); > if (i >= 0) { > - err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); > + err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC); > if (err == -ENOSPC) > err = -EEXIST; > } else { > - err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); > + err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC); > } > + spin_unlock(&loop_idr_spinlock); > + idr_preload_end(); Can you explain why the mutex is switched to a spinlock? I could not find any caller that can't block, so there doesn't seem to be a real need for a spinlock, while a spinlock requires extra work and GFP_ATOMIC allocations here. Dropping the _killable probably makes some sense, but seems like a separate cleanup. > + if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) { > + spin_unlock(&loop_idr_spinlock); > + return -ENODEV; > } > + spin_unlock(&loop_idr_spinlock); > + refcount_dec(&lo->idr_visible); > + /* > + * Try to wait for concurrent callers (they should complete shortly due to > + * lo->lo_state == Lo_deleting) operating on this loop device, in order to > + * help that subsequent loop_add() will not to fail with -EEXIST. > + * Note that this is best effort. > + */ > + for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++) > + schedule_timeout_killable(1); > + ret = 0; This dance looks pretty strange to me. I think just making idr_visible an atomic_t and using atomic_cmpxchg with just 0 and 1 as valid versions will make this much simpler, as it avoids the need to deal with a > 1 count, and it also serializes multiple removal calls. I quickly hacked this up as a slight variant of your patch, and it's been running the syzbot reproducer you pointed me to for quite while now: diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f0cdff0c5fbf4..69ced1feb18d5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2113,28 +2113,29 @@ int loop_register_transfer(struct loop_func_table *funcs) return 0; } -static int unregister_transfer_cb(int id, void *ptr, void *data) -{ - struct loop_device *lo = ptr; - struct loop_func_table *xfer = data; - - mutex_lock(&lo->lo_mutex); - if (lo->lo_encryption == xfer) - loop_release_xfer(lo); - mutex_unlock(&lo->lo_mutex); - return 0; -} - int loop_unregister_transfer(int number) { unsigned int n = number; struct loop_func_table *xfer; + struct loop_device *lo; + int id; if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) return -EINVAL; xfer_funcs[n] = NULL; - idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); + + /* + * loop_unregister_transfer is only called from cryptoloop module + * unload. Given that each loop device that has a transfer enabled + * hold a reference to the module implementing it we should never + * get here with a transfer that is set. + */ + mutex_lock(&loop_ctl_mutex); + idr_for_each_entry(&loop_index_idr, lo, id) + WARN_ON_ONCE(lo->lo_encryption == xfer); + mutex_unlock(&loop_ctl_mutex); + return 0; } @@ -2325,8 +2326,9 @@ static int loop_add(int i) } else { err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } + mutex_unlock(&loop_ctl_mutex); if (err < 0) - goto out_unlock; + goto out_free_dev; i = err; err = -ENOMEM; @@ -2392,15 +2394,17 @@ static int loop_add(int i) disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); + /* Make this loop device reachable from pathname. */ add_disk(disk); - mutex_unlock(&loop_ctl_mutex); + /* Show this loop device. */ + atomic_set(&lo->idr_visible, 1); return i; out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: + mutex_lock(&loop_ctl_mutex); idr_remove(&loop_index_idr, i); -out_unlock: mutex_unlock(&loop_ctl_mutex); out_free_dev: kfree(lo); @@ -2410,9 +2414,14 @@ static int loop_add(int i) static void loop_remove(struct loop_device *lo) { + /* Make this loop device unreachable from pathname. */ del_gendisk(lo->lo_disk); blk_cleanup_disk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); + mutex_lock(&loop_ctl_mutex); + idr_remove(&loop_index_idr, lo->lo_number); + mutex_unlock(&loop_ctl_mutex); + /* There is no route which can find this loop device. */ mutex_destroy(&lo->lo_mutex); kfree(lo); } @@ -2440,29 +2449,40 @@ static int loop_control_remove(int idx) if (ret) return ret; + /* + * Identify the loop device to remove. Skip the device if it is owned by + * loop_remove()/loop_add() where it is not safe to access lo_mutex. + * The loop device is marked invisible even if we bail out of the + * removal, but the only other place checking the visibility is the + * LOOP_CTL_GET_FREE ioctl, which checks the same flags as we do below, + * and which is fundamentally racy anyway. + */ lo = idr_find(&loop_index_idr, idx); - if (!lo) { - ret = -ENODEV; - goto out_unlock_ctrl; + if (!lo || atomic_cmpxchg(&lo->idr_visible, 1, 0) == 0) { + mutex_unlock(&loop_ctl_mutex); + return -ENODEV; } + mutex_unlock(&loop_ctl_mutex); ret = mutex_lock_killable(&lo->lo_mutex); if (ret) - goto out_unlock_ctrl; + goto mark_visible; if (lo->lo_state != Lo_unbound || atomic_read(&lo->lo_refcnt) > 0) { mutex_unlock(&lo->lo_mutex); ret = -EBUSY; - goto out_unlock_ctrl; + goto mark_visible; } + /* Mark this loop device no longer open()-able. */ lo->lo_state = Lo_deleting; mutex_unlock(&lo->lo_mutex); - idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); -out_unlock_ctrl: - mutex_unlock(&loop_ctl_mutex); - return ret; + return 0; + +mark_visible: + atomic_inc(&lo->idr_visible); + return -EBUSY; } static int loop_control_get_free(int idx) @@ -2474,7 +2494,8 @@ static int loop_control_get_free(int idx) if (ret) return ret; idr_for_each_entry(&loop_index_idr, lo, id) { - if (lo->lo_state == Lo_unbound) + if (atomic_read(&lo->idr_visible) && + lo->lo_state == Lo_unbound) goto found; } mutex_unlock(&loop_ctl_mutex); @@ -2590,10 +2611,12 @@ static void __exit loop_exit(void) unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); - mutex_lock(&loop_ctl_mutex); + /* + * There is no need to use loop_ctl_mutex here, for nobody else can + * access loop_index_idr when this module is unloading. + */ idr_for_each_entry(&loop_index_idr, lo, id) loop_remove(lo); - mutex_unlock(&loop_ctl_mutex); idr_destroy(&loop_index_idr); } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 1988899db63ac..1ec5135da54a7 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -68,6 +68,7 @@ struct loop_device { struct blk_mq_tag_set tag_set; struct gendisk *lo_disk; struct mutex lo_mutex; + atomic_t idr_visible; /* a bool in reality */ }; struct loop_cmd {
On 2021/08/28 3:43, Christoph Hellwig wrote: > On Fri, Aug 27, 2021 at 01:03:45AM +0900, Tetsuo Handa wrote: >> >> loop_unregister_transfer() which is called from cleanup_cryptoloop() >> currently lacks serialization between kfree() from loop_remove() from >> loop_control_remove() and mutex_lock() from unregister_transfer_cb(). >> We can use refcount and loop_idr_spinlock for serialization between >> these functions. > > > So before we start complicating things for loop_release_xfer - how > do you actually reproduce loop_unregister_transfer finding a loop > device with a transfer set? AFAICS loop_unregister_transfer is only > called form exit_cryptoloop, which can only be called when > cryptoloop has a zero reference count. But as long as a transfer > is registered an extra refcount is held on its owner. Indeed, lo->lo_encryption is set to non-NULL by loop_init_xfer() after a refcount is taken and lo->lo_encryption is reset to NULL by loop_release_xfer() before that refount is dropped, and these operations are serialized by lo->lo_mutex. Then, lo->lo_encryption == xfer can't happen unless forced module unload is requested. That is, it seems that unregister_transfer_cb() is there in case forced module unload of cryptoloop module was requested. And in that case, there is no point with crashing the kernel via panic_on_warn == 1 && WARN_ON_ONCE(). Simple printk() will be sufficient. Removing unregister_transfer_cb() (if we ignore forced module unload) will be a separate patch. > >> @@ -2313,20 +2320,20 @@ static int loop_add(int i) >> goto out; >> lo->lo_state = Lo_unbound; >> >> - err = mutex_lock_killable(&loop_ctl_mutex); >> - if (err) >> - goto out_free_dev; >> - >> /* allocate id, if @id >= 0, we're requesting that specific id */ >> + idr_preload(GFP_KERNEL); >> + spin_lock(&loop_idr_spinlock); >> if (i >= 0) { >> - err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); >> + err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC); >> if (err == -ENOSPC) >> err = -EEXIST; >> } else { >> - err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); >> + err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC); >> } >> + spin_unlock(&loop_idr_spinlock); >> + idr_preload_end(); > > Can you explain why the mutex is switched to a spinlock? I could not > find any caller that can't block, so there doesn't seem to be a real > need for a spinlock, while a spinlock requires extra work and GFP_ATOMIC > allocations here. Dropping the _killable probably makes some sense, > but seems like a separate cleanup. In order to annotate that extra operations that might sleep should not be added inside this section. Use of sleepable locks tends to get extra operations (e.g. wait for a different mutex / completion) and makes it unclear what the lock is protecting. I can imagine a future that someone adds an unwanted dependency inside this section if we use mutex here. Technically, we can add preempt_diable() after mutex_lock() and preempt_enable() before mutex_unlock() in order to annotate that extra operations that might sleep should be avoided. But idr_alloc(GFP_ATOMIC)/idr_find()/idr_for_each_entry() etc. will be fast enough. > >> + if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) { >> + spin_unlock(&loop_idr_spinlock); >> + return -ENODEV; >> } >> + spin_unlock(&loop_idr_spinlock); > >> + refcount_dec(&lo->idr_visible); >> + /* >> + * Try to wait for concurrent callers (they should complete shortly due to >> + * lo->lo_state == Lo_deleting) operating on this loop device, in order to >> + * help that subsequent loop_add() will not to fail with -EEXIST. >> + * Note that this is best effort. >> + */ >> + for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++) >> + schedule_timeout_killable(1); >> + ret = 0; > > This dance looks pretty strange to me. I think just making idr_visible > an atomic_t and using atomic_cmpxchg with just 0 and 1 as valid versions > will make this much simpler, as it avoids the need to deal with a > 1 > count, and it also serializes multiple removal calls. Yes if we ignore forced module unload (which needs to synchronously check lo->lo_encryption) of cryptoloop module. If we don't ignore forced module unload, we could update my patch to keep only mutex_destroy() and kfree() deferred by a refcount, for only lo->lo_state, lo->lo_refcnt and lo->lo_encryption would be accessed under lo->lo_mutex serialization. There is no need to defer "del_gendisk() + idr_remove()" sequence for concurrent callers.
On 2021/08/28 10:10, Tetsuo Handa wrote: > If we don't ignore forced module unload, we could update my patch to keep only > mutex_destroy() and kfree() deferred by a refcount, for only lo->lo_state, > lo->lo_refcnt and lo->lo_encryption would be accessed under lo->lo_mutex > serialization. There is no need to defer "del_gendisk() + idr_remove()" > sequence for concurrent callers. > OK, here is a delta patch to make it no longer best effort. We can consider removal of cryptoloop module after this patch, starting from a printk() for deprecated message. drivers/block/loop.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2113,7 +2113,11 @@ int loop_register_transfer(struct loop_func_table *funcs) return 0; } -static void loop_remove(struct loop_device *lo); +static void loop_destroy(struct loop_device *lo) +{ + mutex_destroy(&lo->lo_mutex); + kfree(lo); +} int loop_unregister_transfer(int number) { @@ -2137,7 +2141,7 @@ int loop_unregister_transfer(int number) loop_release_xfer(lo); mutex_unlock(&lo->lo_mutex); if (refcount_dec_and_test(&lo->idr_visible)) - loop_remove(lo); + loop_destroy(lo); spin_lock(&loop_idr_spinlock); } spin_unlock(&loop_idr_spinlock); @@ -2426,9 +2430,6 @@ static void loop_remove(struct loop_device *lo) spin_lock(&loop_idr_spinlock); idr_remove(&loop_index_idr, lo->lo_number); spin_unlock(&loop_idr_spinlock); - /* There is no route which can find this loop device. */ - mutex_destroy(&lo->lo_mutex); - kfree(lo); } static void loop_probe(dev_t dev) @@ -2452,7 +2453,7 @@ static int loop_control_remove(int idx) /* * Identify the loop device to remove. Skip the device if it is owned by - * loop_remove()/loop_add() where it is not safe to access lo_mutex. + * loop_add() where it is not safe to access lo_mutex. */ spin_lock(&loop_idr_spinlock); lo = idr_find(&loop_index_idr, idx); @@ -2479,19 +2480,11 @@ static int loop_control_remove(int idx) mutex_unlock(&lo->lo_mutex); /* Hide this loop device. */ refcount_dec(&lo->idr_visible); - /* - * Try to wait for concurrent callers (they should complete shortly due to - * lo->lo_state == Lo_deleting) operating on this loop device, in order to - * help that subsequent loop_add() will not to fail with -EEXIST. - * Note that this is best effort. - */ - for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++) - schedule_timeout_killable(1); - ret = 0; + /* Remove this loop device, but wait concurrent callers before destroy. */ + loop_remove(lo); out: - /* Remove this loop device. */ if (refcount_dec_and_test(&lo->idr_visible)) - loop_remove(lo); + loop_destroy(lo); return ret; } @@ -2623,8 +2616,10 @@ static void __exit loop_exit(void) * There is no need to use loop_idr_spinlock here, for nobody else can * access loop_index_idr when this module is unloading. */ - idr_for_each_entry(&loop_index_idr, lo, id) + idr_for_each_entry(&loop_index_idr, lo, id) { loop_remove(lo); + loop_destroy(lo); + } idr_destroy(&loop_index_idr); }
On Sat, Aug 28, 2021 at 10:10:36AM +0900, Tetsuo Handa wrote: > That is, it seems that unregister_transfer_cb() is there in case forced module > unload of cryptoloop module was requested. And in that case, there is no point > with crashing the kernel via panic_on_warn == 1 && WARN_ON_ONCE(). Simple printk() > will be sufficient. If we have that case for forced module unload a WARN_ON is the right thing. That being said we can simply do the cmpxchg based protection for that case as well if you want to keep it. That will lead to a spurious loop remove failure with -EBUSY when a concurrent force module removal for cryptoloop is happening, but if you do something like that you get to keep the pieces. > In order to annotate that extra operations that might sleep should not be > added inside this section. Use of sleepable locks tends to get extra > operations (e.g. wait for a different mutex / completion) and makes it unclear > what the lock is protecting. I can imagine a future that someone adds an > unwanted dependency inside this section if we use mutex here. > > Technically, we can add preempt_diable() after mutex_lock() and > preempt_enable() before mutex_unlock() in order to annotate that > extra operations that might sleep should be avoided. > But idr_alloc(GFP_ATOMIC)/idr_find()/idr_for_each_entry() etc. will be > fast enough. Well, split that into a cleanup patch if you think it is worth the effort, with a good changelog. Not really part of the bug fix.
On 2021/08/28 16:18, Christoph Hellwig wrote: > On Sat, Aug 28, 2021 at 10:10:36AM +0900, Tetsuo Handa wrote: >> That is, it seems that unregister_transfer_cb() is there in case forced module >> unload of cryptoloop module was requested. And in that case, there is no point >> with crashing the kernel via panic_on_warn == 1 && WARN_ON_ONCE(). Simple printk() >> will be sufficient. > > If we have that case for forced module unload a WARN_ON is the right thing. > That being said we can simply do the cmpxchg based protection for that > case as well if you want to keep it. That will lead to a spurious > loop remove failure with -EBUSY when a concurrent force module removal > for cryptoloop is happening, but if you do something like that you get > to keep the pieces. Oh, given that commit 222013f9ac30b9ce ("cryptoloop: add a deprecation warning") was already merged into linux.git , there is no point with worrying about forced module unloading. Then, I would warn like +#ifdef CONFIG_MODULE_UNLOAD + if (module_refcount(xfer->owner) != -1) + pr_err("Unregistering a transfer function in use. Expect kernel crashes.\n"); +#endif than + idr_for_each_entry(&loop_index_idr, lo, id) + WARN_ON_ONCE(lo->lo_encryption == xfer); in your patch. (Actually, nobody calls loop_unregister_transfer() if CONFIG_MODULE_UNLOAD=n ...) Then, your atomic_cmpxchg(&lo->idr_visible, 1, 0) == 0 approach will be OK (I would use + atomic_set(&lo->idr_visible, 1); than + atomic_inc(&lo->idr_visible); because it is "Show this loop device again.").
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f0cdff0c5fbf..783b3d2ed277 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -87,7 +87,7 @@ #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) static DEFINE_IDR(loop_index_idr); -static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_SPINLOCK(loop_idr_spinlock); static DEFINE_MUTEX(loop_validate_mutex); /** @@ -2113,28 +2113,35 @@ int loop_register_transfer(struct loop_func_table *funcs) return 0; } -static int unregister_transfer_cb(int id, void *ptr, void *data) -{ - struct loop_device *lo = ptr; - struct loop_func_table *xfer = data; - - mutex_lock(&lo->lo_mutex); - if (lo->lo_encryption == xfer) - loop_release_xfer(lo); - mutex_unlock(&lo->lo_mutex); - return 0; -} +static void loop_remove(struct loop_device *lo); int loop_unregister_transfer(int number) { unsigned int n = number; struct loop_func_table *xfer; + struct loop_device *lo; + int id; if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) return -EINVAL; xfer_funcs[n] = NULL; - idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); + + spin_lock(&loop_idr_spinlock); + idr_for_each_entry(&loop_index_idr, lo, id) { + if (!refcount_inc_not_zero(&lo->idr_visible)) + continue; + spin_unlock(&loop_idr_spinlock); + mutex_lock(&lo->lo_mutex); + if (lo->lo_encryption == xfer) + loop_release_xfer(lo); + mutex_unlock(&lo->lo_mutex); + if (refcount_dec_and_test(&lo->idr_visible)) + loop_remove(lo); + spin_lock(&loop_idr_spinlock); + } + spin_unlock(&loop_idr_spinlock); + return 0; } @@ -2313,20 +2320,20 @@ static int loop_add(int i) goto out; lo->lo_state = Lo_unbound; - err = mutex_lock_killable(&loop_ctl_mutex); - if (err) - goto out_free_dev; - /* allocate id, if @id >= 0, we're requesting that specific id */ + idr_preload(GFP_KERNEL); + spin_lock(&loop_idr_spinlock); if (i >= 0) { - err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); + err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_ATOMIC); if (err == -ENOSPC) err = -EEXIST; } else { - err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); + err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_ATOMIC); } + spin_unlock(&loop_idr_spinlock); + idr_preload_end(); if (err < 0) - goto out_unlock; + goto out_free_dev; i = err; err = -ENOMEM; @@ -2392,16 +2399,18 @@ static int loop_add(int i) disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); + /* Make this loop device reachable from pathname. */ add_disk(disk); - mutex_unlock(&loop_ctl_mutex); + /* Show this loop device. */ + refcount_set(&lo->idr_visible, 1); return i; out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: + spin_lock(&loop_idr_spinlock); idr_remove(&loop_index_idr, i); -out_unlock: - mutex_unlock(&loop_ctl_mutex); + spin_unlock(&loop_idr_spinlock); out_free_dev: kfree(lo); out: @@ -2410,9 +2419,14 @@ static int loop_add(int i) static void loop_remove(struct loop_device *lo) { + /* Make this loop device unreachable from pathname. */ del_gendisk(lo->lo_disk); blk_cleanup_disk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); + spin_lock(&loop_idr_spinlock); + idr_remove(&loop_index_idr, lo->lo_number); + spin_unlock(&loop_idr_spinlock); + /* There is no route which can find this loop device. */ mutex_destroy(&lo->lo_mutex); kfree(lo); } @@ -2435,52 +2449,67 @@ static int loop_control_remove(int idx) pr_warn("deleting an unspecified loop device is not supported.\n"); return -EINVAL; } - - ret = mutex_lock_killable(&loop_ctl_mutex); - if (ret) - return ret; + /* + * Identify the loop device to remove. Skip the device if it is owned by + * loop_remove()/loop_add() where it is not safe to access lo_mutex. + */ + spin_lock(&loop_idr_spinlock); lo = idr_find(&loop_index_idr, idx); - if (!lo) { - ret = -ENODEV; - goto out_unlock_ctrl; + if (!lo || !refcount_inc_not_zero(&lo->idr_visible)) { + spin_unlock(&loop_idr_spinlock); + return -ENODEV; } + spin_unlock(&loop_idr_spinlock); ret = mutex_lock_killable(&lo->lo_mutex); if (ret) - goto out_unlock_ctrl; + goto out; if (lo->lo_state != Lo_unbound || atomic_read(&lo->lo_refcnt) > 0) { mutex_unlock(&lo->lo_mutex); - ret = -EBUSY; - goto out_unlock_ctrl; + if (lo->lo_state == Lo_deleting) + ret = -ENODEV; + else + ret = -EBUSY; + goto out; } + /* Mark this loop device no longer open()-able. */ lo->lo_state = Lo_deleting; mutex_unlock(&lo->lo_mutex); - - idr_remove(&loop_index_idr, lo->lo_number); - loop_remove(lo); -out_unlock_ctrl: - mutex_unlock(&loop_ctl_mutex); + /* Hide this loop device. */ + refcount_dec(&lo->idr_visible); + /* + * Try to wait for concurrent callers (they should complete shortly due to + * lo->lo_state == Lo_deleting) operating on this loop device, in order to + * help that subsequent loop_add() will not to fail with -EEXIST. + * Note that this is best effort. + */ + for (ret = 0; refcount_read(&lo->idr_visible) != 1 && ret < HZ; ret++) + schedule_timeout_killable(1); + ret = 0; +out: + /* Remove this loop device. */ + if (refcount_dec_and_test(&lo->idr_visible)) + loop_remove(lo); return ret; } static int loop_control_get_free(int idx) { struct loop_device *lo; - int id, ret; + int id; - ret = mutex_lock_killable(&loop_ctl_mutex); - if (ret) - return ret; + spin_lock(&loop_idr_spinlock); idr_for_each_entry(&loop_index_idr, lo, id) { - if (lo->lo_state == Lo_unbound) + if (refcount_read(&lo->idr_visible) && + lo->lo_state == Lo_unbound) goto found; } - mutex_unlock(&loop_ctl_mutex); + spin_unlock(&loop_idr_spinlock); return loop_add(-1); found: - mutex_unlock(&loop_ctl_mutex); + spin_unlock(&loop_idr_spinlock); return id; } @@ -2590,10 +2619,12 @@ static void __exit loop_exit(void) unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); - mutex_lock(&loop_ctl_mutex); + /* + * There is no need to use loop_idr_spinlock here, for nobody else can + * access loop_index_idr when this module is unloading. + */ idr_for_each_entry(&loop_index_idr, lo, id) loop_remove(lo); - mutex_unlock(&loop_ctl_mutex); idr_destroy(&loop_index_idr); } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 1988899db63a..bed350d8722f 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -68,6 +68,7 @@ struct loop_device { struct blk_mq_tag_set tag_set; struct gendisk *lo_disk; struct mutex lo_mutex; + refcount_t idr_visible; }; struct loop_cmd {