@@ -1961,7 +1961,8 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
static struct rbd_img_request *rbd_img_request_create(
struct rbd_device *rbd_dev,
u64 offset, u64 length,
- bool write_request)
+ bool write_request,
+ struct ceph_snap_context *snapc)
{
struct rbd_img_request *img_request;
@@ -1969,12 +1970,6 @@ static struct rbd_img_request *rbd_img_request_create(
if (!img_request)
return NULL;
- if (write_request) {
- down_read(&rbd_dev->header_rwsem);
- ceph_get_snap_context(rbd_dev->header.snapc);
- up_read(&rbd_dev->header_rwsem);
- }
-
img_request->rq = NULL;
img_request->rbd_dev = rbd_dev;
img_request->offset = offset;
@@ -1982,7 +1977,7 @@ static struct rbd_img_request *rbd_img_request_create(
img_request->flags = 0;
if (write_request) {
img_request_write_set(img_request);
- img_request->snapc = rbd_dev->header.snapc;
+ img_request->snapc = snapc;
} else {
img_request->snap_id = rbd_dev->spec->snap_id;
}
@@ -2038,8 +2033,8 @@ static struct rbd_img_request *rbd_parent_request_create(
rbd_assert(obj_request->img_request);
rbd_dev = obj_request->img_request->rbd_dev;
- parent_request = rbd_img_request_create(rbd_dev->parent,
- img_offset, length, false);
+ parent_request = rbd_img_request_create(rbd_dev->parent, img_offset,
+ length, false, NULL);
if (!parent_request)
return NULL;
@@ -3065,8 +3060,10 @@ static void rbd_request_fn(struct request_queue *q)
while ((rq = blk_fetch_request(q))) {
bool write_request = rq_data_dir(rq) == WRITE;
struct rbd_img_request *img_request;
+ struct ceph_snap_context *snapc = NULL;
u64 offset;
u64 length;
+ u64 mapping_size;
/* Ignore any non-FS requests that filter through. */
@@ -3120,8 +3117,16 @@ static void rbd_request_fn(struct request_queue *q)
goto end_request; /* Shouldn't happen */
}
+ down_read(&rbd_dev->header_rwsem);
+ mapping_size = rbd_dev->mapping.size;
+ if (op_type != OBJ_OP_READ) {
+ snapc = rbd_dev->header.snapc;
+ ceph_get_snap_context(snapc);
+ }
+ up_read(&rbd_dev->header_rwsem);
+
result = -EIO;
- if (offset + length > rbd_dev->mapping.size) {
+ if (offset + length > mapping_size) {
rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
offset, length, rbd_dev->mapping.size);
goto end_request;
@@ -3129,7 +3134,7 @@ static void rbd_request_fn(struct request_queue *q)
result = -ENOMEM;
img_request = rbd_img_request_create(rbd_dev, offset, length,
- write_request);
+ write_request, snapc);
if (!img_request)
goto end_request;
@@ -3147,6 +3152,8 @@ end_request:
rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
write_request ? "write" : "read",
length, offset, result);
+ if (snapc)
+ ceph_put_snap_context(snapc);
__blk_end_request_all(rq, result);
}
These fields may both change while the image is mapped if a snapshot is created or deleted or the image is resized. They are guarded by rbd_dev->header_rwsem, so hold that while reading them, and store a local copy to refer to outside of the critical section. The local copy will stay consistent since the snapshot context is reference counted, and the mapping size is just a u64. This prevents torn loads from giving us inconsistent values. Move reading header.snapc into the caller of rbd_img_request_create() so that we only need to take the semaphore once. The read-only caller, rbd_parent_request_create() can just pass NULL for snapc, since the snapshot context is only relevant for writes. Signed-off-by: Josh Durgin <josh.durgin@inktank.com> --- drivers/block/rbd.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-)