Message ID | 521eb103-db46-3f34-e878-0cdd585ee8bd@i-love.sakura.ne.jp (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] loop: reintroduce global lock for safe loop_validate_file() traversal | expand |
Jens, any questions? I'm testing this patch in linux-next.git since next-20210714, and got no regression reports. I think this patch is ready to be merged. On 2021/07/06 23:40, Tetsuo Handa wrote: > Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per > device lock") re-opened a race window for NULL pointer dereference at > loop_validate_file() where commit 310ca162d779efee ("block/loop: Use > global lock for ioctl() operation.") has closed. > > Although we need to guarantee that other loop devices will not change > during traversal, we can't take remote "struct loop_device"->lo_mutex > inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore, > introduce a global lock dedicated for loop_validate_file() which is > conditionally taken before local "struct loop_device"->lo_mutex is taken. > > Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") > --- > Changes in v3: > Add memory barrier between loop_configure() and loop_validate_file(). > Flush at loop_change_fd() to avoid possible use-after-free. > > Changes in v2: > Minimize lock duration in __loop_clr_fd(). > Add kerneldoc to lock/unlock helper functions. > Reformat local variables declaration. > > drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++----------- > 1 file changed, 97 insertions(+), 31 deletions(-) > > diff --git a/drivers/block/loop.c b/drivers/block/loop.c > index cc0e8c39a48b..2a5b3d365250 100644 > --- a/drivers/block/loop.c > +++ b/drivers/block/loop.c > @@ -88,6 +88,47 @@ > > static DEFINE_IDR(loop_index_idr); > static DEFINE_MUTEX(loop_ctl_mutex); > +static DEFINE_MUTEX(loop_validate_mutex); > + > +/** > + * loop_global_lock_killable() - take locks for safe loop_validate_file() test > + * > + * @lo: struct loop_device > + * @global: true if @lo is about to bind another "struct loop_device", false otherwise > + * > + * Returns 0 on success, -EINTR otherwise. > + * > + * Since loop_validate_file() traverses on other "struct loop_device" if > + * is_loop_device() is true, we need a global lock for serializing concurrent > + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. > + */ > +static int loop_global_lock_killable(struct loop_device *lo, bool global) > +{ > + int err; > + > + if (global) { > + err = mutex_lock_killable(&loop_validate_mutex); > + if (err) > + return err; > + } > + err = mutex_lock_killable(&lo->lo_mutex); > + if (err && global) > + mutex_unlock(&loop_validate_mutex); > + return err; > +} > + > +/** > + * loop_global_unlock() - release locks taken by loop_global_lock_killable() > + * > + * @lo: struct loop_device > + * @global: true if @lo was about to bind another "struct loop_device", false otherwise > + */ > +static void loop_global_unlock(struct loop_device *lo, bool global) > +{ > + mutex_unlock(&lo->lo_mutex); > + if (global) > + mutex_unlock(&loop_validate_mutex); > +} > > static int max_part; > static int part_shift; > @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) > while (is_loop_device(f)) { > struct loop_device *l; > > + lockdep_assert_held(&loop_validate_mutex); > if (f->f_mapping->host->i_rdev == bdev->bd_dev) > return -EBADF; > > l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; > - if (l->lo_state != Lo_bound) { > + if (l->lo_state != Lo_bound) > return -EINVAL; > - } > + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ > + rmb(); > f = l->lo_backing_file; > } > if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) > @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) > static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, > unsigned int arg) > { > - struct file *file = NULL, *old_file; > - int error; > - bool partscan; > + struct file *file = fget(arg); > + struct file *old_file; > + int error; > + bool partscan; > + bool is_loop; > > - error = mutex_lock_killable(&lo->lo_mutex); > + if (!file) > + return -EBADF; > + is_loop = is_loop_device(file); > + error = loop_global_lock_killable(lo, is_loop); > if (error) > - return error; > + goto out_putf; > error = -ENXIO; > if (lo->lo_state != Lo_bound) > goto out_err; > @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, > if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) > goto out_err; > > - error = -EBADF; > - file = fget(arg); > - if (!file) > - goto out_err; > - > error = loop_validate_file(file, bdev); > if (error) > goto out_err; > @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, > loop_update_dio(lo); > blk_mq_unfreeze_queue(lo->lo_queue); > partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; > - mutex_unlock(&lo->lo_mutex); > + loop_global_unlock(lo, is_loop); > + > + /* > + * Flush loop_validate_file() before fput(), for l->lo_backing_file > + * might be pointing at old_file which might be the last reference. > + */ > + if (!is_loop) { > + mutex_lock(&loop_validate_mutex); > + mutex_unlock(&loop_validate_mutex); > + } > /* > * We must drop file reference outside of lo_mutex as dropping > * the file ref can take open_mutex which creates circular locking > @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, > return 0; > > out_err: > - mutex_unlock(&lo->lo_mutex); > - if (file) > - fput(file); > + loop_global_unlock(lo, is_loop); > +out_putf: > + fput(file); > return error; > } > > @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, > struct block_device *bdev, > const struct loop_config *config) > { > - struct file *file; > - struct inode *inode; > + struct file *file = fget(config->fd); > + struct inode *inode; > struct address_space *mapping; > - int error; > - loff_t size; > - bool partscan; > - unsigned short bsize; > + int error; > + loff_t size; > + bool partscan; > + unsigned short bsize; > + bool is_loop; > + > + if (!file) > + return -EBADF; > + is_loop = is_loop_device(file); > > /* This is safe, since we have a reference from open(). */ > __module_get(THIS_MODULE); > > - error = -EBADF; > - file = fget(config->fd); > - if (!file) > - goto out; > - > /* > * If we don't hold exclusive handle for the device, upgrade to it > * here to avoid changing device under exclusive owner. > @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, > goto out_putf; > } > > - error = mutex_lock_killable(&lo->lo_mutex); > + error = loop_global_lock_killable(lo, is_loop); > if (error) > goto out_bdev; > > @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, > size = get_loop_size(lo, file); > loop_set_size(lo, size); > > + /* Order wrt reading lo_state in loop_validate_file(). */ > + wmb(); > + > lo->lo_state = Lo_bound; > if (part_shift) > lo->lo_flags |= LO_FLAGS_PARTSCAN; > @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, > * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). > */ > bdgrab(bdev); > - mutex_unlock(&lo->lo_mutex); > + loop_global_unlock(lo, is_loop); > if (partscan) > loop_reread_partitions(lo); > if (!(mode & FMODE_EXCL)) > @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, > return 0; > > out_unlock: > - mutex_unlock(&lo->lo_mutex); > + loop_global_unlock(lo, is_loop); > out_bdev: > if (!(mode & FMODE_EXCL)) > bd_abort_claiming(bdev, loop_configure); > out_putf: > fput(file); > -out: > /* This is safe: open() is still holding a reference. */ > module_put(THIS_MODULE); > return error; > @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) > int lo_number; > struct loop_worker *pos, *worker; > > + /* > + * Flush loop_configure() and loop_change_fd(). It is acceptable for > + * loop_validate_file() to succeed, for actual clear operation has not > + * started yet. > + */ > + mutex_lock(&loop_validate_mutex); > + mutex_unlock(&loop_validate_mutex); > + /* > + * loop_validate_file() now fails because l->lo_state != Lo_bound > + * became visible. > + */ > + > mutex_lock(&lo->lo_mutex); > if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { > err = -ENXIO; >
On 7/6/21 8:40 AM, Tetsuo Handa wrote: > Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per > device lock") re-opened a race window for NULL pointer dereference at > loop_validate_file() where commit 310ca162d779efee ("block/loop: Use > global lock for ioctl() operation.") has closed. > > Although we need to guarantee that other loop devices will not change > during traversal, we can't take remote "struct loop_device"->lo_mutex > inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore, > introduce a global lock dedicated for loop_validate_file() which is > conditionally taken before local "struct loop_device"->lo_mutex is taken. I'll queue this up for next weeks merging. Christoph, are you happy with it at this point? Can't say it's a thing of beauty, but the problem does seem real.
On Fri, Jul 23, 2021 at 10:19:39AM -0600, Jens Axboe wrote: > I'll queue this up for next weeks merging. Christoph, are you happy with > it at this point? Can't say it's a thing of beauty, but the problem does > seem real. Exactly my feelings.
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cc0e8c39a48b..2a5b3d365250 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,6 +88,47 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int lo_number; struct loop_worker *pos, *worker; + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO;
Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") re-opened a race window for NULL pointer dereference at loop_validate_file() where commit 310ca162d779efee ("block/loop: Use global lock for ioctl() operation.") has closed. Although we need to guarantee that other loop devices will not change during traversal, we can't take remote "struct loop_device"->lo_mutex inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore, introduce a global lock dedicated for loop_validate_file() which is conditionally taken before local "struct loop_device"->lo_mutex is taken. Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") --- Changes in v3: Add memory barrier between loop_configure() and loop_validate_file(). Flush at loop_change_fd() to avoid possible use-after-free. Changes in v2: Minimize lock duration in __loop_clr_fd(). Add kerneldoc to lock/unlock helper functions. Reformat local variables declaration. drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 31 deletions(-)