Message ID | 20200130114258.8482-1-hare@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | rbd: lock object request list | expand |
On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote: > The object request list can be accessed from various contexts > so we need to lock it to avoid concurrent modifications and > random crashes. > > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- > 1 file changed, 24 insertions(+), 7 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 5710b2a8609c..ddc170661607 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -344,6 +344,7 @@ struct rbd_img_request { > > struct list_head lock_item; > struct list_head object_extents; /* obj_req.ex structs */ > + struct mutex object_mutex; > > struct mutex state_mutex; > struct pending_result pending; > @@ -1664,6 +1665,7 @@ static struct rbd_img_request > *rbd_img_request_create( > INIT_LIST_HEAD(&img_request->lock_item); > INIT_LIST_HEAD(&img_request->object_extents); > mutex_init(&img_request->state_mutex); > + mutex_init(&img_request->object_mutex); > kref_init(&img_request->kref); > > return img_request; > @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct > kref *kref) > dout("%s: img %p\n", __func__, img_request); > > WARN_ON(!list_empty(&img_request->lock_item)); > + mutex_lock(&img_request->object_mutex); > for_each_obj_request_safe(img_request, obj_request, > next_obj_request) > rbd_img_obj_request_del(img_request, obj_request); > + mutex_unlock(&img_request->object_mutex); > > if (img_request_layered_test(img_request)) { > img_request_layered_clear(img_request); > @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct > rbd_img_request *img_req) > struct rbd_obj_request *obj_req, *next_obj_req; > int ret; > > + mutex_lock(&img_req->object_mutex); > for_each_obj_request_safe(img_req, obj_req, next_obj_req) { > switch (img_req->op_type) { > case OBJ_OP_READ: > @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct > rbd_img_request *img_req) > continue; > } > } > - > + mutex_unlock(&img_req->object_mutex); > img_req->state = RBD_IMG_START; > return 0; > } > @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct > rbd_img_request *img_req, > * position in the provided bio (list) or bio_vec array. > */ > fctx->iter = *fctx->pos; > + mutex_lock(&img_req->object_mutex); > for (i = 0; i < num_img_extents; i++) { > ret = ceph_file_to_extents(&img_req->rbd_dev->layout, > img_extents[i].fe_off, > @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct > rbd_img_request *img_req, > &img_req->object_extents, > alloc_object_extent, > img_req, > fctx->set_pos_fn, &fctx- > >iter); > - if (ret) > + if (ret) { > + mutex_unlock(&img_req->object_mutex); > return ret; > + } > } > - > + mutex_unlock(&img_req->object_mutex); > return __rbd_img_fill_request(img_req); > } > > @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct > rbd_img_request *img_req, > * or bio_vec array because when mapped, those bio_vecs can > straddle > * stripe unit boundaries. > */ > + mutex_lock(&img_req->object_mutex); > fctx->iter = *fctx->pos; > for (i = 0; i < num_img_extents; i++) { > ret = ceph_file_to_extents(&rbd_dev->layout, > @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct > rbd_img_request *img_req, > alloc_object_extent, > img_req, > fctx->count_fn, &fctx- > >iter); > if (ret) > - return ret; > + goto out_unlock; > } > > for_each_obj_request(img_req, obj_req) { > obj_req->bvec_pos.bvecs = kmalloc_array(obj_req- > >bvec_count, > sizeof(*obj_req- > >bvec_pos.bvecs), > GFP_NOIO); > - if (!obj_req->bvec_pos.bvecs) > - return -ENOMEM; > + if (!obj_req->bvec_pos.bvecs) { > + ret = -ENOMEM; > + goto out_unlock; > + } > } > > /* > @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct > rbd_img_request *img_req, > &img_req->object_extents, > fctx->copy_fn, &fctx->iter); > if (ret) > - return ret; > + goto out_unlock; > } > + mutex_unlock(&img_req->object_mutex); > > return __rbd_img_fill_request(img_req); > +out_unlock: > + mutex_unlock(&img_req->object_mutex); > + return ret; > } > > static int rbd_img_fill_nodata(struct rbd_img_request *img_req, > @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct > rbd_img_request *img_req) > > rbd_assert(!img_req->pending.result && !img_req- > >pending.num_pending); > > + mutex_lock(&img_req->object_mutex); > for_each_obj_request(img_req, obj_req) { > int result = 0; > > @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct > rbd_img_request *img_req) > img_req->pending.num_pending++; > } > } > + mutex_unlock(&img_req->object_mutex); > } > > static bool rbd_img_advance(struct rbd_img_request *img_req, int > *result) Looks good to me. Just wonder how we escaped this for so long. Reviewed-by: Laurence Oberman <loberman@redhat.com>
On Thu, Jan 30, 2020 at 12:43 PM Hannes Reinecke <hare@suse.de> wrote: > > The object request list can be accessed from various contexts > so we need to lock it to avoid concurrent modifications and > random crashes. > > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- > 1 file changed, 24 insertions(+), 7 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 5710b2a8609c..ddc170661607 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -344,6 +344,7 @@ struct rbd_img_request { > > struct list_head lock_item; > struct list_head object_extents; /* obj_req.ex structs */ > + struct mutex object_mutex; > > struct mutex state_mutex; > struct pending_result pending; > @@ -1664,6 +1665,7 @@ static struct rbd_img_request *rbd_img_request_create( > INIT_LIST_HEAD(&img_request->lock_item); > INIT_LIST_HEAD(&img_request->object_extents); > mutex_init(&img_request->state_mutex); > + mutex_init(&img_request->object_mutex); > kref_init(&img_request->kref); > > return img_request; > @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct kref *kref) > dout("%s: img %p\n", __func__, img_request); > > WARN_ON(!list_empty(&img_request->lock_item)); > + mutex_lock(&img_request->object_mutex); > for_each_obj_request_safe(img_request, obj_request, next_obj_request) > rbd_img_obj_request_del(img_request, obj_request); > + mutex_unlock(&img_request->object_mutex); > > if (img_request_layered_test(img_request)) { > img_request_layered_clear(img_request); > @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) > struct rbd_obj_request *obj_req, *next_obj_req; > int ret; > > + mutex_lock(&img_req->object_mutex); > for_each_obj_request_safe(img_req, obj_req, next_obj_req) { > switch (img_req->op_type) { > case OBJ_OP_READ: > @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) > continue; > } > } > - > + mutex_unlock(&img_req->object_mutex); > img_req->state = RBD_IMG_START; > return 0; > } > @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req, > * position in the provided bio (list) or bio_vec array. > */ > fctx->iter = *fctx->pos; > + mutex_lock(&img_req->object_mutex); > for (i = 0; i < num_img_extents; i++) { > ret = ceph_file_to_extents(&img_req->rbd_dev->layout, > img_extents[i].fe_off, > @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req, > &img_req->object_extents, > alloc_object_extent, img_req, > fctx->set_pos_fn, &fctx->iter); > - if (ret) > + if (ret) { > + mutex_unlock(&img_req->object_mutex); > return ret; > + } > } > - > + mutex_unlock(&img_req->object_mutex); > return __rbd_img_fill_request(img_req); > } > > @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req, > * or bio_vec array because when mapped, those bio_vecs can straddle > * stripe unit boundaries. > */ > + mutex_lock(&img_req->object_mutex); > fctx->iter = *fctx->pos; > for (i = 0; i < num_img_extents; i++) { > ret = ceph_file_to_extents(&rbd_dev->layout, > @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req, > alloc_object_extent, img_req, > fctx->count_fn, &fctx->iter); > if (ret) > - return ret; > + goto out_unlock; > } > > for_each_obj_request(img_req, obj_req) { > obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count, > sizeof(*obj_req->bvec_pos.bvecs), > GFP_NOIO); > - if (!obj_req->bvec_pos.bvecs) > - return -ENOMEM; > + if (!obj_req->bvec_pos.bvecs) { > + ret = -ENOMEM; > + goto out_unlock; > + } > } > > /* > @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req, > &img_req->object_extents, > fctx->copy_fn, &fctx->iter); > if (ret) > - return ret; > + goto out_unlock; > } > + mutex_unlock(&img_req->object_mutex); > > return __rbd_img_fill_request(img_req); > +out_unlock: > + mutex_unlock(&img_req->object_mutex); > + return ret; > } > > static int rbd_img_fill_nodata(struct rbd_img_request *img_req, > @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req) > > rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); > > + mutex_lock(&img_req->object_mutex); > for_each_obj_request(img_req, obj_req) { > int result = 0; > > @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req) > img_req->pending.num_pending++; > } > } > + mutex_unlock(&img_req->object_mutex); > } > > static bool rbd_img_advance(struct rbd_img_request *img_req, int *result) Hi Hannes, I'm afraid I don't immediately see the issue and the commit message is very light on details. Can you elaborate on what concurrent modifications are possible? An example of a crash? Thanks, Ilya
On 1/30/20 3:26 PM, Laurence Oberman wrote: > On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote: >> The object request list can be accessed from various contexts >> so we need to lock it to avoid concurrent modifications and >> random crashes. >> >> Signed-off-by: Hannes Reinecke <hare@suse.de> >> --- >> drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- >> 1 file changed, 24 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c >> index 5710b2a8609c..ddc170661607 100644 >> --- a/drivers/block/rbd.c >> +++ b/drivers/block/rbd.c >> @@ -344,6 +344,7 @@ struct rbd_img_request { >> >> struct list_head lock_item; >> struct list_head object_extents; /* obj_req.ex structs */ >> + struct mutex object_mutex; >> >> struct mutex state_mutex; >> struct pending_result pending; >> @@ -1664,6 +1665,7 @@ static struct rbd_img_request >> *rbd_img_request_create( >> INIT_LIST_HEAD(&img_request->lock_item); >> INIT_LIST_HEAD(&img_request->object_extents); >> mutex_init(&img_request->state_mutex); >> + mutex_init(&img_request->object_mutex); >> kref_init(&img_request->kref); >> >> return img_request; >> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct >> kref *kref) >> dout("%s: img %p\n", __func__, img_request); >> >> WARN_ON(!list_empty(&img_request->lock_item)); >> + mutex_lock(&img_request->object_mutex); >> for_each_obj_request_safe(img_request, obj_request, >> next_obj_request) >> rbd_img_obj_request_del(img_request, obj_request); >> + mutex_unlock(&img_request->object_mutex); >> >> if (img_request_layered_test(img_request)) { >> img_request_layered_clear(img_request); >> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct >> rbd_img_request *img_req) >> struct rbd_obj_request *obj_req, *next_obj_req; >> int ret; >> >> + mutex_lock(&img_req->object_mutex); >> for_each_obj_request_safe(img_req, obj_req, next_obj_req) { >> switch (img_req->op_type) { >> case OBJ_OP_READ: >> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct >> rbd_img_request *img_req) >> continue; >> } >> } >> - >> + mutex_unlock(&img_req->object_mutex); >> img_req->state = RBD_IMG_START; >> return 0; >> } >> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct >> rbd_img_request *img_req, >> * position in the provided bio (list) or bio_vec array. >> */ >> fctx->iter = *fctx->pos; >> + mutex_lock(&img_req->object_mutex); >> for (i = 0; i < num_img_extents; i++) { >> ret = ceph_file_to_extents(&img_req->rbd_dev->layout, >> img_extents[i].fe_off, >> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct >> rbd_img_request *img_req, >> &img_req->object_extents, >> alloc_object_extent, >> img_req, >> fctx->set_pos_fn, &fctx- >>> iter); >> - if (ret) >> + if (ret) { >> + mutex_unlock(&img_req->object_mutex); >> return ret; >> + } >> } >> - >> + mutex_unlock(&img_req->object_mutex); >> return __rbd_img_fill_request(img_req); >> } >> >> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct >> rbd_img_request *img_req, >> * or bio_vec array because when mapped, those bio_vecs can >> straddle >> * stripe unit boundaries. >> */ >> + mutex_lock(&img_req->object_mutex); >> fctx->iter = *fctx->pos; >> for (i = 0; i < num_img_extents; i++) { >> ret = ceph_file_to_extents(&rbd_dev->layout, >> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct >> rbd_img_request *img_req, >> alloc_object_extent, >> img_req, >> fctx->count_fn, &fctx- >>> iter); >> if (ret) >> - return ret; >> + goto out_unlock; >> } >> >> for_each_obj_request(img_req, obj_req) { >> obj_req->bvec_pos.bvecs = kmalloc_array(obj_req- >>> bvec_count, >> sizeof(*obj_req- >>> bvec_pos.bvecs), >> GFP_NOIO); >> - if (!obj_req->bvec_pos.bvecs) >> - return -ENOMEM; >> + if (!obj_req->bvec_pos.bvecs) { >> + ret = -ENOMEM; >> + goto out_unlock; >> + } >> } >> >> /* >> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct >> rbd_img_request *img_req, >> &img_req->object_extents, >> fctx->copy_fn, &fctx->iter); >> if (ret) >> - return ret; >> + goto out_unlock; >> } >> + mutex_unlock(&img_req->object_mutex); >> >> return __rbd_img_fill_request(img_req); >> +out_unlock: >> + mutex_unlock(&img_req->object_mutex); >> + return ret; >> } >> >> static int rbd_img_fill_nodata(struct rbd_img_request *img_req, >> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct >> rbd_img_request *img_req) >> >> rbd_assert(!img_req->pending.result && !img_req- >>> pending.num_pending); >> >> + mutex_lock(&img_req->object_mutex); >> for_each_obj_request(img_req, obj_req) { >> int result = 0; >> >> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct >> rbd_img_request *img_req) >> img_req->pending.num_pending++; >> } >> } >> + mutex_unlock(&img_req->object_mutex); >> } >> >> static bool rbd_img_advance(struct rbd_img_request *img_req, int >> *result) > > Looks good to me. Just wonder how we escaped this for so long. > > Reviewed-by: Laurence Oberman <loberman@redhat.com> > The whole state machine is utterly fragile. I'll be posting a patchset to clean stuff up somewhat, but it's still a beast. I'm rather surprised that it doesn't break more often ... Cheers, Hannes
On Thu, Jan 30, 2020 at 4:39 PM Hannes Reinecke <hare@suse.de> wrote: > > On 1/30/20 3:26 PM, Laurence Oberman wrote: > > On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote: > >> The object request list can be accessed from various contexts > >> so we need to lock it to avoid concurrent modifications and > >> random crashes. > >> > >> Signed-off-by: Hannes Reinecke <hare@suse.de> > >> --- > >> drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- > >> 1 file changed, 24 insertions(+), 7 deletions(-) > >> > >> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > >> index 5710b2a8609c..ddc170661607 100644 > >> --- a/drivers/block/rbd.c > >> +++ b/drivers/block/rbd.c > >> @@ -344,6 +344,7 @@ struct rbd_img_request { > >> > >> struct list_head lock_item; > >> struct list_head object_extents; /* obj_req.ex structs */ > >> + struct mutex object_mutex; > >> > >> struct mutex state_mutex; > >> struct pending_result pending; > >> @@ -1664,6 +1665,7 @@ static struct rbd_img_request > >> *rbd_img_request_create( > >> INIT_LIST_HEAD(&img_request->lock_item); > >> INIT_LIST_HEAD(&img_request->object_extents); > >> mutex_init(&img_request->state_mutex); > >> + mutex_init(&img_request->object_mutex); > >> kref_init(&img_request->kref); > >> > >> return img_request; > >> @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct > >> kref *kref) > >> dout("%s: img %p\n", __func__, img_request); > >> > >> WARN_ON(!list_empty(&img_request->lock_item)); > >> + mutex_lock(&img_request->object_mutex); > >> for_each_obj_request_safe(img_request, obj_request, > >> next_obj_request) > >> rbd_img_obj_request_del(img_request, obj_request); > >> + mutex_unlock(&img_request->object_mutex); > >> > >> if (img_request_layered_test(img_request)) { > >> img_request_layered_clear(img_request); > >> @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct > >> rbd_img_request *img_req) > >> struct rbd_obj_request *obj_req, *next_obj_req; > >> int ret; > >> > >> + mutex_lock(&img_req->object_mutex); > >> for_each_obj_request_safe(img_req, obj_req, next_obj_req) { > >> switch (img_req->op_type) { > >> case OBJ_OP_READ: > >> @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct > >> rbd_img_request *img_req) > >> continue; > >> } > >> } > >> - > >> + mutex_unlock(&img_req->object_mutex); > >> img_req->state = RBD_IMG_START; > >> return 0; > >> } > >> @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct > >> rbd_img_request *img_req, > >> * position in the provided bio (list) or bio_vec array. > >> */ > >> fctx->iter = *fctx->pos; > >> + mutex_lock(&img_req->object_mutex); > >> for (i = 0; i < num_img_extents; i++) { > >> ret = ceph_file_to_extents(&img_req->rbd_dev->layout, > >> img_extents[i].fe_off, > >> @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct > >> rbd_img_request *img_req, > >> &img_req->object_extents, > >> alloc_object_extent, > >> img_req, > >> fctx->set_pos_fn, &fctx- > >>> iter); > >> - if (ret) > >> + if (ret) { > >> + mutex_unlock(&img_req->object_mutex); > >> return ret; > >> + } > >> } > >> - > >> + mutex_unlock(&img_req->object_mutex); > >> return __rbd_img_fill_request(img_req); > >> } > >> > >> @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct > >> rbd_img_request *img_req, > >> * or bio_vec array because when mapped, those bio_vecs can > >> straddle > >> * stripe unit boundaries. > >> */ > >> + mutex_lock(&img_req->object_mutex); > >> fctx->iter = *fctx->pos; > >> for (i = 0; i < num_img_extents; i++) { > >> ret = ceph_file_to_extents(&rbd_dev->layout, > >> @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct > >> rbd_img_request *img_req, > >> alloc_object_extent, > >> img_req, > >> fctx->count_fn, &fctx- > >>> iter); > >> if (ret) > >> - return ret; > >> + goto out_unlock; > >> } > >> > >> for_each_obj_request(img_req, obj_req) { > >> obj_req->bvec_pos.bvecs = kmalloc_array(obj_req- > >>> bvec_count, > >> sizeof(*obj_req- > >>> bvec_pos.bvecs), > >> GFP_NOIO); > >> - if (!obj_req->bvec_pos.bvecs) > >> - return -ENOMEM; > >> + if (!obj_req->bvec_pos.bvecs) { > >> + ret = -ENOMEM; > >> + goto out_unlock; > >> + } > >> } > >> > >> /* > >> @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct > >> rbd_img_request *img_req, > >> &img_req->object_extents, > >> fctx->copy_fn, &fctx->iter); > >> if (ret) > >> - return ret; > >> + goto out_unlock; > >> } > >> + mutex_unlock(&img_req->object_mutex); > >> > >> return __rbd_img_fill_request(img_req); > >> +out_unlock: > >> + mutex_unlock(&img_req->object_mutex); > >> + return ret; > >> } > >> > >> static int rbd_img_fill_nodata(struct rbd_img_request *img_req, > >> @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct > >> rbd_img_request *img_req) > >> > >> rbd_assert(!img_req->pending.result && !img_req- > >>> pending.num_pending); > >> > >> + mutex_lock(&img_req->object_mutex); > >> for_each_obj_request(img_req, obj_req) { > >> int result = 0; > >> > >> @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct > >> rbd_img_request *img_req) > >> img_req->pending.num_pending++; > >> } > >> } > >> + mutex_unlock(&img_req->object_mutex); > >> } > >> > >> static bool rbd_img_advance(struct rbd_img_request *img_req, int > >> *result) > > > > Looks good to me. Just wonder how we escaped this for so long. > > > > Reviewed-by: Laurence Oberman <loberman@redhat.com> > > > The whole state machine is utterly fragile. > I'll be posting a patchset to clean stuff up somewhat, > but it's still a beast. What do you want me to do about this patch then? > I'm rather surprised that it doesn't break more often ... If you or Laurence saw it break, I would appreciate the details. Thanks, Ilya
On Fri, 2020-01-31 at 10:50 +0100, Ilya Dryomov wrote: > On Thu, Jan 30, 2020 at 4:39 PM Hannes Reinecke <hare@suse.de> wrote: > > > > On 1/30/20 3:26 PM, Laurence Oberman wrote: > > > On Thu, 2020-01-30 at 12:42 +0100, Hannes Reinecke wrote: > > > > The object request list can be accessed from various contexts > > > > so we need to lock it to avoid concurrent modifications and > > > > random crashes. > > > > > > > > Signed-off-by: Hannes Reinecke <hare@suse.de> > > > > --- > > > > drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- > > > > 1 file changed, 24 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > > > > index 5710b2a8609c..ddc170661607 100644 > > > > --- a/drivers/block/rbd.c > > > > +++ b/drivers/block/rbd.c > > > > @@ -344,6 +344,7 @@ struct rbd_img_request { > > > > > > > > struct list_head lock_item; > > > > struct list_head object_extents; /* obj_req.ex > > > > structs */ > > > > + struct mutex object_mutex; > > > > > > > > struct mutex state_mutex; > > > > struct pending_result pending; > > > > @@ -1664,6 +1665,7 @@ static struct rbd_img_request > > > > *rbd_img_request_create( > > > > INIT_LIST_HEAD(&img_request->lock_item); > > > > INIT_LIST_HEAD(&img_request->object_extents); > > > > mutex_init(&img_request->state_mutex); > > > > + mutex_init(&img_request->object_mutex); > > > > kref_init(&img_request->kref); > > > > > > > > return img_request; > > > > @@ -1680,8 +1682,10 @@ static void > > > > rbd_img_request_destroy(struct > > > > kref *kref) > > > > dout("%s: img %p\n", __func__, img_request); > > > > > > > > WARN_ON(!list_empty(&img_request->lock_item)); > > > > + mutex_lock(&img_request->object_mutex); > > > > for_each_obj_request_safe(img_request, obj_request, > > > > next_obj_request) > > > > rbd_img_obj_request_del(img_request, obj_request); > > > > + mutex_unlock(&img_request->object_mutex); > > > > > > > > if (img_request_layered_test(img_request)) { > > > > img_request_layered_clear(img_request); > > > > @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct > > > > rbd_img_request *img_req) > > > > struct rbd_obj_request *obj_req, *next_obj_req; > > > > int ret; > > > > > > > > + mutex_lock(&img_req->object_mutex); > > > > for_each_obj_request_safe(img_req, obj_req, next_obj_req) > > > > { > > > > switch (img_req->op_type) { > > > > case OBJ_OP_READ: > > > > @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct > > > > rbd_img_request *img_req) > > > > continue; > > > > } > > > > } > > > > - > > > > + mutex_unlock(&img_req->object_mutex); > > > > img_req->state = RBD_IMG_START; > > > > return 0; > > > > } > > > > @@ -2569,6 +2574,7 @@ static int > > > > rbd_img_fill_request_nocopy(struct > > > > rbd_img_request *img_req, > > > > * position in the provided bio (list) or bio_vec array. > > > > */ > > > > fctx->iter = *fctx->pos; > > > > + mutex_lock(&img_req->object_mutex); > > > > for (i = 0; i < num_img_extents; i++) { > > > > ret = ceph_file_to_extents(&img_req->rbd_dev- > > > > >layout, > > > > img_extents[i].fe_off, > > > > @@ -2576,10 +2582,12 @@ static int > > > > rbd_img_fill_request_nocopy(struct > > > > rbd_img_request *img_req, > > > > &img_req- > > > > >object_extents, > > > > alloc_object_extent, > > > > img_req, > > > > fctx->set_pos_fn, > > > > &fctx- > > > > > iter); > > > > > > > > - if (ret) > > > > + if (ret) { > > > > + mutex_unlock(&img_req->object_mutex); > > > > return ret; > > > > + } > > > > } > > > > - > > > > + mutex_unlock(&img_req->object_mutex); > > > > return __rbd_img_fill_request(img_req); > > > > } > > > > > > > > @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct > > > > rbd_img_request *img_req, > > > > * or bio_vec array because when mapped, those bio_vecs > > > > can > > > > straddle > > > > * stripe unit boundaries. > > > > */ > > > > + mutex_lock(&img_req->object_mutex); > > > > fctx->iter = *fctx->pos; > > > > for (i = 0; i < num_img_extents; i++) { > > > > ret = ceph_file_to_extents(&rbd_dev->layout, > > > > @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct > > > > rbd_img_request *img_req, > > > > alloc_object_extent, > > > > img_req, > > > > fctx->count_fn, &fctx- > > > > > iter); > > > > > > > > if (ret) > > > > - return ret; > > > > + goto out_unlock; > > > > } > > > > > > > > for_each_obj_request(img_req, obj_req) { > > > > obj_req->bvec_pos.bvecs = kmalloc_array(obj_req- > > > > > bvec_count, > > > > > > > > sizeof(*obj_req- > > > > > bvec_pos.bvecs), > > > > > > > > GFP_NOIO); > > > > - if (!obj_req->bvec_pos.bvecs) > > > > - return -ENOMEM; > > > > + if (!obj_req->bvec_pos.bvecs) { > > > > + ret = -ENOMEM; > > > > + goto out_unlock; > > > > + } > > > > } > > > > > > > > /* > > > > @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct > > > > rbd_img_request *img_req, > > > > &img_req- > > > > >object_extents, > > > > fctx->copy_fn, &fctx- > > > > >iter); > > > > if (ret) > > > > - return ret; > > > > + goto out_unlock; > > > > } > > > > + mutex_unlock(&img_req->object_mutex); > > > > > > > > return __rbd_img_fill_request(img_req); > > > > +out_unlock: > > > > + mutex_unlock(&img_req->object_mutex); > > > > + return ret; > > > > } > > > > > > > > static int rbd_img_fill_nodata(struct rbd_img_request > > > > *img_req, > > > > @@ -3552,6 +3567,7 @@ static void > > > > rbd_img_object_requests(struct > > > > rbd_img_request *img_req) > > > > > > > > rbd_assert(!img_req->pending.result && !img_req- > > > > > pending.num_pending); > > > > > > > > + mutex_lock(&img_req->object_mutex); > > > > for_each_obj_request(img_req, obj_req) { > > > > int result = 0; > > > > > > > > @@ -3564,6 +3580,7 @@ static void > > > > rbd_img_object_requests(struct > > > > rbd_img_request *img_req) > > > > img_req->pending.num_pending++; > > > > } > > > > } > > > > + mutex_unlock(&img_req->object_mutex); > > > > } > > > > > > > > static bool rbd_img_advance(struct rbd_img_request *img_req, > > > > int > > > > *result) > > > > > > Looks good to me. Just wonder how we escaped this for so long. > > > > > > Reviewed-by: Laurence Oberman <loberman@redhat.com> > > > > > > > The whole state machine is utterly fragile. > > I'll be posting a patchset to clean stuff up somewhat, > > but it's still a beast. > > What do you want me to do about this patch then? > > > I'm rather surprised that it doesn't break more often ... > > If you or Laurence saw it break, I would appreciate the details. > > Thanks, > > Ilya > That is what I mentioned when I reviewed it. While I understand Hannes's patch and it looked right to me, here in support, I have not seen any reported cases of panics so was wondering how it escaped me so far. Regards Laurence
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5710b2a8609c..ddc170661607 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -344,6 +344,7 @@ struct rbd_img_request { struct list_head lock_item; struct list_head object_extents; /* obj_req.ex structs */ + struct mutex object_mutex; struct mutex state_mutex; struct pending_result pending; @@ -1664,6 +1665,7 @@ static struct rbd_img_request *rbd_img_request_create( INIT_LIST_HEAD(&img_request->lock_item); INIT_LIST_HEAD(&img_request->object_extents); mutex_init(&img_request->state_mutex); + mutex_init(&img_request->object_mutex); kref_init(&img_request->kref); return img_request; @@ -1680,8 +1682,10 @@ static void rbd_img_request_destroy(struct kref *kref) dout("%s: img %p\n", __func__, img_request); WARN_ON(!list_empty(&img_request->lock_item)); + mutex_lock(&img_request->object_mutex); for_each_obj_request_safe(img_request, obj_request, next_obj_request) rbd_img_obj_request_del(img_request, obj_request); + mutex_unlock(&img_request->object_mutex); if (img_request_layered_test(img_request)) { img_request_layered_clear(img_request); @@ -2486,6 +2490,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) struct rbd_obj_request *obj_req, *next_obj_req; int ret; + mutex_lock(&img_req->object_mutex); for_each_obj_request_safe(img_req, obj_req, next_obj_req) { switch (img_req->op_type) { case OBJ_OP_READ: @@ -2510,7 +2515,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) continue; } } - + mutex_unlock(&img_req->object_mutex); img_req->state = RBD_IMG_START; return 0; } @@ -2569,6 +2574,7 @@ static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req, * position in the provided bio (list) or bio_vec array. */ fctx->iter = *fctx->pos; + mutex_lock(&img_req->object_mutex); for (i = 0; i < num_img_extents; i++) { ret = ceph_file_to_extents(&img_req->rbd_dev->layout, img_extents[i].fe_off, @@ -2576,10 +2582,12 @@ static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req, &img_req->object_extents, alloc_object_extent, img_req, fctx->set_pos_fn, &fctx->iter); - if (ret) + if (ret) { + mutex_unlock(&img_req->object_mutex); return ret; + } } - + mutex_unlock(&img_req->object_mutex); return __rbd_img_fill_request(img_req); } @@ -2620,6 +2628,7 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req, * or bio_vec array because when mapped, those bio_vecs can straddle * stripe unit boundaries. */ + mutex_lock(&img_req->object_mutex); fctx->iter = *fctx->pos; for (i = 0; i < num_img_extents; i++) { ret = ceph_file_to_extents(&rbd_dev->layout, @@ -2629,15 +2638,17 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req, alloc_object_extent, img_req, fctx->count_fn, &fctx->iter); if (ret) - return ret; + goto out_unlock; } for_each_obj_request(img_req, obj_req) { obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count, sizeof(*obj_req->bvec_pos.bvecs), GFP_NOIO); - if (!obj_req->bvec_pos.bvecs) - return -ENOMEM; + if (!obj_req->bvec_pos.bvecs) { + ret = -ENOMEM; + goto out_unlock; + } } /* @@ -2652,10 +2663,14 @@ static int rbd_img_fill_request(struct rbd_img_request *img_req, &img_req->object_extents, fctx->copy_fn, &fctx->iter); if (ret) - return ret; + goto out_unlock; } + mutex_unlock(&img_req->object_mutex); return __rbd_img_fill_request(img_req); +out_unlock: + mutex_unlock(&img_req->object_mutex); + return ret; } static int rbd_img_fill_nodata(struct rbd_img_request *img_req, @@ -3552,6 +3567,7 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req) rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); + mutex_lock(&img_req->object_mutex); for_each_obj_request(img_req, obj_req) { int result = 0; @@ -3564,6 +3580,7 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req) img_req->pending.num_pending++; } } + mutex_unlock(&img_req->object_mutex); } static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
The object request list can be accessed from various contexts so we need to lock it to avoid concurrent modifications and random crashes. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/block/rbd.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-)