@@ -5832,6 +5832,190 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
return ret;
}
+enum rbd_journal_event_type {
+ EVENT_TYPE_AIO_DISCARD = 0,
+ EVENT_TYPE_AIO_WRITE = 1,
+ EVENT_TYPE_AIO_FLUSH = 2,
+ EVENT_TYPE_OP_FINISH = 3,
+ EVENT_TYPE_SNAP_CREATE = 4,
+ EVENT_TYPE_SNAP_REMOVE = 5,
+ EVENT_TYPE_SNAP_RENAME = 6,
+ EVENT_TYPE_SNAP_PROTECT = 7,
+ EVENT_TYPE_SNAP_UNPROTECT = 8,
+ EVENT_TYPE_SNAP_ROLLBACK = 9,
+ EVENT_TYPE_RENAME = 10,
+ EVENT_TYPE_RESIZE = 11,
+ EVENT_TYPE_FLATTEN = 12,
+ EVENT_TYPE_DEMOTE_PROMOTE = 13,
+ EVENT_TYPE_SNAP_LIMIT = 14,
+ EVENT_TYPE_UPDATE_FEATURES = 15,
+ EVENT_TYPE_METADATA_SET = 16,
+ EVENT_TYPE_METADATA_REMOVE = 17,
+ EVENT_TYPE_AIO_WRITESAME = 18,
+ EVENT_TYPE_AIO_COMPARE_AND_WRITE = 19,
+};
+
+static struct bio_vec *setup_write_bvecs(void *buf, u64 offset, u64 length)
+{
+ u32 i;
+ struct bio_vec *bvecs = NULL;
+ u32 bvec_count = 0;
+
+ bvec_count = calc_pages_for(offset, length);
+ bvecs = kcalloc(bvec_count, sizeof(*bvecs), GFP_NOIO);
+ if (!bvecs)
+ goto err;
+
+ offset = offset % PAGE_SIZE;
+ for (i = 0; i < bvec_count; i++) {
+ unsigned int len = min(length, (u64)PAGE_SIZE - offset);
+
+ bvecs[i].bv_page = alloc_page(GFP_NOIO);
+ if (!bvecs[i].bv_page)
+ goto free_bvecs;
+
+ bvecs[i].bv_offset = offset;
+ bvecs[i].bv_len = len;
+ memcpy(page_address(bvecs[i].bv_page) + bvecs[i].bv_offset, buf, bvecs[i].bv_len);
+ length -= len;
+ buf += len;
+ offset = 0;
+ }
+
+ rbd_assert(!length);
+
+ return bvecs;
+
+free_bvecs:
+err:
+ return NULL;
+}
+
+static int rbd_journal_handle_aio_discard(struct rbd_device *rbd_dev, void **p, void *end, u8 struct_v, uint64_t commit_tid)
+{
+ uint64_t offset;
+ uint64_t length;
+ int result = 0;
+ enum obj_operation_type op_type;
+ struct rbd_img_request *img_request;
+ struct ceph_snap_context *snapc = NULL;
+
+ offset = ceph_decode_64(p);
+ length = ceph_decode_64(p);
+
+ snapc = rbd_dev->header.snapc;
+ ceph_get_snap_context(snapc);
+ op_type = OBJ_OP_DISCARD;
+
+ img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
+ if (!img_request) {
+ result = -ENOMEM;
+ goto err;
+ }
+ img_request->journaler_commit_tid = commit_tid;
+
+ result = rbd_img_fill_nodata(img_request, offset, length);
+ if (result)
+ goto err;
+
+ rbd_img_request_submit(img_request);
+ result = wait_for_completion_interruptible(&img_request->completion);
+err:
+ return result;
+}
+
+static int rbd_journal_handle_aio_write(struct rbd_device *rbd_dev, void **p, void *end, u8 struct_v, uint64_t commit_tid)
+{
+ uint64_t offset;
+ uint64_t length;
+ char *data;
+ ssize_t data_len;
+ int result = 0;
+ enum obj_operation_type op_type;
+ struct ceph_snap_context *snapc = NULL;
+ struct rbd_img_request *img_request;
+
+ struct ceph_file_extent ex;
+ struct bio_vec *bvecs = NULL;
+
+ offset = ceph_decode_64(p);
+ length = ceph_decode_64(p);
+
+ data_len = ceph_decode_32(p);
+ if (!ceph_has_room(p, end, data_len)) {
+ pr_err("our of range");
+ return -ERANGE;
+ }
+
+ data = *p;
+ *p = (char *) *p + data_len;
+
+ snapc = rbd_dev->header.snapc;
+ ceph_get_snap_context(snapc);
+ op_type = OBJ_OP_WRITE;
+
+ img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
+ if (!img_request) {
+ result = -ENOMEM;
+ goto err;
+ }
+
+ img_request->journaler_commit_tid = commit_tid;
+ snapc = NULL; /* img_request consumes a ref */
+
+ ex.fe_off = offset;
+ ex.fe_len = length;
+
+ bvecs = setup_write_bvecs(data, offset, length);
+ if (!bvecs)
+ rbd_warn(rbd_dev, "failed to alloc bvecs.");
+ result = rbd_img_fill_from_bvecs(img_request,
+ &ex, 1, bvecs);
+ if (result)
+ goto err;
+
+ rbd_img_request_submit(img_request);
+ result = wait_for_completion_interruptible(&img_request->completion);
+err:
+ if (bvecs)
+ kfree(bvecs);
+ return result;
+}
+
+static int rbd_journal_replay(void *entry_handler, struct ceph_journaler_entry *entry, uint64_t commit_tid)
+{
+ struct rbd_device *rbd_dev = entry_handler;
+ void *data = entry->data;
+ void **p = &data;
+ void *end = *p + entry->data_len;
+ uint32_t event_type;
+ u8 struct_v;
+ u32 struct_len;
+ int ret = 0;
+
+ ret = ceph_start_decoding(p, end, 1, "rbd_decode_entry",
+ &struct_v, &struct_len);
+ if (ret)
+ return -EINVAL;
+
+ event_type = ceph_decode_32(p);
+
+ switch (event_type) {
+ case EVENT_TYPE_AIO_WRITE:
+ rbd_journal_handle_aio_write(rbd_dev, p, end, struct_v, commit_tid);
+ break;
+ case EVENT_TYPE_AIO_DISCARD:
+ rbd_journal_handle_aio_discard(rbd_dev, p, end, struct_v, commit_tid);
+ break;
+ case EVENT_TYPE_AIO_FLUSH:
+ break;
+ default:
+ rbd_warn(rbd_dev, "unknown event_type: %u", event_type);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int rbd_journal_allocate_tag(struct rbd_journal *journal);
static int rbd_journal_open(struct rbd_journal *journal)
{
when we found uncommitted events in journal, we need to do a replay. This commit only implement three kinds of events replaying: EVENT_TYPE_AIO_DISCARD: Will send a img_request to image with OBJ_OP_DISCARD, and wait for it completed. EVENT_TYPE_AIO_WRITE: Will send a img_request to image with OBJ_OP_WRITE, and wait for it completed. EVENT_TYPE_AIO_FLUSH: As all other events are replayed in synchoronized way, that means the events before are all flushed. we did nothing for this event. Signed-off-by: Dongsheng Yang <dongsheng.yang@easystack.cn> --- drivers/block/rbd.c | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+)