From patchwork Fri Jan 18 14:56:07 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Roman Penyaev X-Patchwork-Id: 10770573 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9952691E for ; Fri, 18 Jan 2019 14:56:18 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 870FC29EA8 for ; Fri, 18 Jan 2019 14:56:18 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 7AF012E983; Fri, 18 Jan 2019 14:56:18 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id F054A29EA8 for ; Fri, 18 Jan 2019 14:56:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727716AbfARO4Q (ORCPT ); Fri, 18 Jan 2019 09:56:16 -0500 Received: from mx2.suse.de ([195.135.220.15]:58962 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727622AbfARO4P (ORCPT ); Fri, 18 Jan 2019 09:56:15 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay1.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 8EB9AADC8; Fri, 18 Jan 2019 14:56:13 +0000 (UTC) From: Roman Penyaev Cc: David Disseldorp , Roman Penyaev , Ilya Dryomov , Sage Weil , Alex Elder , "Yan, Zheng" , ceph-devel@vger.kernel.org Subject: [RFC PATCH 2/2] libceph, rbd: respect REQ_NOUNMAP by setting new nounmap flag for CEPH_OSD_OP_ZERO Date: Fri, 18 Jan 2019 15:56:07 +0100 Message-Id: <20190118145607.30018-3-rpenyaev@suse.de> X-Mailer: git-send-email 2.19.1 In-Reply-To: <20190118145607.30018-1-rpenyaev@suse.de> References: <20190118145607.30018-1-rpenyaev@suse.de> MIME-Version: 1.0 To: unlisted-recipients:; (no To-header on input) Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This one inctroduces CEPH_OSD_OP_FLAG_ZERO_NOUNMAP flag for CEPH_OSD_OP_ZERO in order to mark zero requests not to do discards on osd side, but zero blocks instead. Old osds versions simply ignore CEPH_OSD_OP_FLAG_ZERO_NOUNMAP set and discard blocks as before. Signed-off-by: Roman Penyaev Cc: Ilya Dryomov Cc: Sage Weil Cc: Alex Elder Cc: "Yan, Zheng" Cc: ceph-devel@vger.kernel.org --- drivers/block/rbd.c | 42 +++++++++++++++++++++++++++++--------- include/linux/ceph/rados.h | 1 + 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f45490134880..6dceb2f2cf51 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -213,6 +213,7 @@ enum obj_request_type { enum obj_operation_type { OBJ_OP_READ = 1, OBJ_OP_WRITE, + OBJ_OP_WRITE_ZEROES, OBJ_OP_DISCARD, }; @@ -856,6 +857,8 @@ static char* obj_op_name(enum obj_operation_type op_type) return "read"; case OBJ_OP_WRITE: return "write"; + case OBJ_OP_WRITE_ZEROES: + return "write_zeroes"; case OBJ_OP_DISCARD: return "discard"; default: @@ -1422,6 +1425,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req) case OBJ_OP_READ: return false; case OBJ_OP_WRITE: + case OBJ_OP_WRITE_ZEROES: case OBJ_OP_DISCARD: return true; default: @@ -1846,13 +1850,14 @@ static int rbd_obj_setup_write(struct rbd_obj_request *obj_req) return 0; } -static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, - unsigned int which) +static void +__rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req, + unsigned int which, bool nounmap) { u32 flags = 0; u16 opcode; - if (rbd_obj_is_entire(obj_req)) { + if (!nounmap && rbd_obj_is_entire(obj_req)) { if (obj_req->num_img_extents) { osd_req_op_init(obj_req->osd_req, which++, CEPH_OSD_OP_CREATE, 0); @@ -1862,10 +1867,11 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, CEPH_OSD_OP_DELETE, 0); opcode = 0; } - } else if (rbd_obj_is_tail(obj_req)) { + } else if (!nounmap && rbd_obj_is_tail(obj_req)) { opcode = CEPH_OSD_OP_TRUNCATE; } else { opcode = CEPH_OSD_OP_ZERO; + flags = (nounmap ? CEPH_OSD_OP_FLAG_ZERO_NOUNMAP : 0); } if (opcode) @@ -1877,7 +1883,8 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, rbd_osd_req_format_write(obj_req); } -static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) +static int rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req, + bool nounmap) { unsigned int num_osd_ops, which = 0; int ret; @@ -1913,7 +1920,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) return ret; } - __rbd_obj_setup_discard(obj_req, which); + __rbd_obj_setup_discard_write_zeroes(obj_req, which, nounmap); return 0; } @@ -1925,6 +1932,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) static int __rbd_img_fill_request(struct rbd_img_request *img_req) { struct rbd_obj_request *obj_req; + bool nounmap = false; int ret; for_each_obj_request(img_req, obj_req) { @@ -1935,8 +1943,12 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) case OBJ_OP_WRITE: ret = rbd_obj_setup_write(obj_req); break; + case OBJ_OP_WRITE_ZEROES: + nounmap = true; + /* fall through */ case OBJ_OP_DISCARD: - ret = rbd_obj_setup_discard(obj_req); + ret = rbd_obj_setup_discard_write_zeroes(obj_req, + nounmap); break; default: rbd_assert(0); @@ -2361,6 +2373,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) { unsigned int num_osd_ops = obj_req->osd_req->r_num_ops; + bool nounmap = false; int ret; dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); @@ -2398,9 +2411,12 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) case OBJ_OP_WRITE: __rbd_obj_setup_write(obj_req, 1); break; + case OBJ_OP_WRITE_ZEROES: + nounmap = true; + /* fall through */ case OBJ_OP_DISCARD: rbd_assert(!rbd_obj_is_entire(obj_req)); - __rbd_obj_setup_discard(obj_req, 1); + __rbd_obj_setup_discard_write_zeroes(obj_req, 1, nounmap); break; default: rbd_assert(0); @@ -2529,6 +2545,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) return rbd_obj_handle_read(obj_req); case OBJ_OP_WRITE: return rbd_obj_handle_write(obj_req); + case OBJ_OP_WRITE_ZEROES: case OBJ_OP_DISCARD: if (rbd_obj_handle_write(obj_req)) { /* @@ -3641,8 +3658,13 @@ static void rbd_queue_workfn(struct work_struct *work) int result; switch (req_op(rq)) { - case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: + if (rq->cmd_flags & REQ_NOUNMAP) { + op_type = OBJ_OP_WRITE_ZEROES; + break; + } + /* fall through */ + case REQ_OP_DISCARD: op_type = OBJ_OP_DISCARD; break; case REQ_OP_WRITE: @@ -3724,7 +3746,7 @@ static void rbd_queue_workfn(struct work_struct *work) img_request->rq = rq; snapc = NULL; /* img_request consumes a ref */ - if (op_type == OBJ_OP_DISCARD) + if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE_ZEROES) result = rbd_img_fill_nodata(img_request, offset, length); else result = rbd_img_fill_from_bio(img_request, offset, length, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 3eb0e55665b4..e19fc5e541c3 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -418,6 +418,7 @@ enum { in the near future */ CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only once by this client */ + CEPH_OSD_OP_FLAG_ZERO_NOUNMAP = 0x200,/* do not discard on zeroing */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/