@@ -386,6 +386,9 @@ struct rbd_device {
struct list_head node;
+ atomic_t inflight_ios;
+ struct completion inflight_wait;
+
/* sysfs related */
struct device dev;
unsigned long open_count; /* protected by lock */
@@ -1654,6 +1657,7 @@ static struct rbd_img_request *rbd_img_request_create(
spin_lock_init(&img_request->completion_lock);
INIT_LIST_HEAD(&img_request->object_extents);
kref_init(&img_request->kref);
+ atomic_inc(&rbd_dev->inflight_ios);
dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
obj_op_name(op_type), img_request);
@@ -1670,6 +1674,8 @@ static void rbd_img_request_destroy(struct kref *kref)
dout("%s: img %p\n", __func__, img_request);
+ atomic_dec(&img_request->rbd_dev->inflight_ios);
+ complete_all(&img_request->rbd_dev->inflight_wait);
for_each_obj_request_safe(img_request, obj_request, next_obj_request)
rbd_img_obj_request_del(img_request, obj_request);
rbd_assert(img_request->obj_request_count == 0);
@@ -3070,26 +3076,39 @@ static void rbd_acquire_lock(struct work_struct *work)
}
}
+static int rbd_inflight_wait(struct rbd_device *rbd_dev)
+{
+ int ret = 0;
+
+ while (atomic_read(&rbd_dev->inflight_ios)) {
+ ret = wait_for_completion_interruptible(&rbd_dev->inflight_wait);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
/*
* lock_rwsem must be held for write
*/
static bool rbd_release_lock(struct rbd_device *rbd_dev)
{
+ int ret = 0;
+
dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
rbd_dev->lock_state);
if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
return false;
rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
- downgrade_write(&rbd_dev->lock_rwsem);
- /*
- * Ensure that all in-flight IO is flushed.
- *
- * FIXME: ceph_osdc_sync() flushes the entire OSD client, which
- * may be shared with other devices.
- */
- ceph_osdc_sync(&rbd_dev->rbd_client->client->osdc);
- up_read(&rbd_dev->lock_rwsem);
+ up_write(&rbd_dev->lock_rwsem);
+
+ ret = rbd_inflight_wait(rbd_dev);
+ if (ret) {
+ down_write(&rbd_dev->lock_rwsem);
+ return false;
+ }
down_write(&rbd_dev->lock_rwsem);
dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
@@ -4387,6 +4406,8 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
INIT_LIST_HEAD(&rbd_dev->node);
init_rwsem(&rbd_dev->header_rwsem);
+ atomic_set(&rbd_dev->inflight_ios, 0);
+ init_completion(&rbd_dev->inflight_wait);
rbd_dev->header.data_pool_id = CEPH_NOPOOL;
ceph_oid_init(&rbd_dev->header_oid);
rbd_dev->header_oloc.pool = spec->pool_id;
currently, we only sync all osdc request in lock releasing, but if we are going to support journaling, we need to wait all img_request complete, not only the low-level in osd_client. Signed-off-by: Dongsheng Yang <dongsheng.yang@easystack.cn> --- drivers/block/rbd.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-)