From patchwork Mon Jan 27 16:43:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Luis Henriques X-Patchwork-Id: 11352895 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C2C731398 for ; Mon, 27 Jan 2020 16:43:28 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id AA759214D8 for ; Mon, 27 Jan 2020 16:43:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726442AbgA0QnW (ORCPT ); Mon, 27 Jan 2020 11:43:22 -0500 Received: from mx2.suse.de ([195.135.220.15]:40646 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726036AbgA0QnV (ORCPT ); Mon, 27 Jan 2020 11:43:21 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id DF117B135; Mon, 27 Jan 2020 16:43:18 +0000 (UTC) From: Luis Henriques To: Jeff Layton , Sage Weil , Ilya Dryomov , "Yan, Zheng" , Gregory Farnum Cc: ceph-devel@vger.kernel.org, linux-kernel@vger.kernel.org, Luis Henriques Subject: [RFC PATCH 1/3] libceph: add non-blocking version of ceph_osdc_copy_from() Date: Mon, 27 Jan 2020 16:43:19 +0000 Message-Id: <20200127164321.17468-2-lhenriques@suse.com> In-Reply-To: <20200127164321.17468-1-lhenriques@suse.com> References: <20200127164321.17468-1-lhenriques@suse.com> MIME-Version: 1.0 Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org A non-blocking version of ceph_osdc_copy_from will allow for callers to send 'copy-from' requests in bulk and wait for all of them to complete in the end. Signed-off-by: Luis Henriques --- include/linux/ceph/osd_client.h | 12 ++++++++ net/ceph/osd_client.c | 54 +++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5a62dbd3f4c2..7916a178d137 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -537,6 +537,18 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, u8 copy_from_flags); +struct ceph_osd_request *ceph_osdc_copy_from_nowait( + struct ceph_osd_client *osdc, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + struct ceph_object_id *dst_oid, + struct ceph_object_locator *dst_oloc, + u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, + u8 copy_from_flags); + /* watch/notify */ struct ceph_osd_linger_request * ceph_osdc_watch(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b68b376d8c2f..7f984532f37c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5346,23 +5346,24 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req, return 0; } -int ceph_osdc_copy_from(struct ceph_osd_client *osdc, - u64 src_snapid, u64 src_version, - struct ceph_object_id *src_oid, - struct ceph_object_locator *src_oloc, - u32 src_fadvise_flags, - struct ceph_object_id *dst_oid, - struct ceph_object_locator *dst_oloc, - u32 dst_fadvise_flags, - u32 truncate_seq, u64 truncate_size, - u8 copy_from_flags) +struct ceph_osd_request *ceph_osdc_copy_from_nowait( + struct ceph_osd_client *osdc, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + struct ceph_object_id *dst_oid, + struct ceph_object_locator *dst_oloc, + u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, + u8 copy_from_flags) { struct ceph_osd_request *req; int ret; req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); if (!req) - return -ENOMEM; + return ERR_PTR(-ENOMEM); req->r_flags = CEPH_OSD_FLAG_WRITE; @@ -5381,11 +5382,38 @@ int ceph_osdc_copy_from(struct ceph_osd_client *osdc, goto out; ceph_osdc_start_request(osdc, req, false); - ret = ceph_osdc_wait_request(osdc, req); + return req; out: ceph_osdc_put_request(req); - return ret; + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ceph_osdc_copy_from_nowait); + +int ceph_osdc_copy_from(struct ceph_osd_client *osdc, + u64 src_snapid, u64 src_version, + struct ceph_object_id *src_oid, + struct ceph_object_locator *src_oloc, + u32 src_fadvise_flags, + struct ceph_object_id *dst_oid, + struct ceph_object_locator *dst_oloc, + u32 dst_fadvise_flags, + u32 truncate_seq, u64 truncate_size, + u8 copy_from_flags) +{ + struct ceph_osd_request *req; + + req = ceph_osdc_copy_from_nowait(osdc, + src_snapid, src_version, + src_oid, src_oloc, + src_fadvise_flags, + dst_oid, dst_oloc, + dst_fadvise_flags, + truncate_seq, truncate_size, + copy_from_flags); + if (IS_ERR(req)) + return PTR_ERR(req); + return ceph_osdc_wait_request(osdc, req); } EXPORT_SYMBOL(ceph_osdc_copy_from); From patchwork Mon Jan 27 16:43:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Luis Henriques X-Patchwork-Id: 11352897 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 216C81395 for ; Mon, 27 Jan 2020 16:43:33 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 093CF214D8 for ; Mon, 27 Jan 2020 16:43:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726772AbgA0Qnb (ORCPT ); Mon, 27 Jan 2020 11:43:31 -0500 Received: from mx2.suse.de ([195.135.220.15]:40684 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726191AbgA0QnW (ORCPT ); Mon, 27 Jan 2020 11:43:22 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 94B9FB146; Mon, 27 Jan 2020 16:43:19 +0000 (UTC) From: Luis Henriques To: Jeff Layton , Sage Weil , Ilya Dryomov , "Yan, Zheng" , Gregory Farnum Cc: ceph-devel@vger.kernel.org, linux-kernel@vger.kernel.org, Luis Henriques Subject: [RFC PATCH 2/3] ceph: parallelize all copy-from requests in copy_file_range Date: Mon, 27 Jan 2020 16:43:20 +0000 Message-Id: <20200127164321.17468-3-lhenriques@suse.com> In-Reply-To: <20200127164321.17468-1-lhenriques@suse.com> References: <20200127164321.17468-1-lhenriques@suse.com> MIME-Version: 1.0 Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org Right now the copy_file_range syscall serializes all the OSDs 'copy-from' operations, waiting for each request to complete before sending the next one. This patch modifies copy_file_range so that all the 'copy-from' operations are sent in bulk and waits for its completion at the end. This will allow significant speed-ups, specially when sending requests for different target OSDs. Signed-off-by: Luis Henriques --- fs/ceph/file.c | 38 +++++++++++++++++++++++++++++++-- include/linux/ceph/osd_client.h | 2 ++ net/ceph/osd_client.c | 1 + 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 1e6cdf2dfe90..5d8f0ba11719 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1931,6 +1931,28 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, return 0; } +static int wait_copy_from_reqs(struct list_head *osd_reqs) +{ + struct ceph_osd_request *req; + int ret = 0, err; + + while (!list_empty(osd_reqs)) { + req = list_first_entry(osd_reqs, + struct ceph_osd_request, + r_copy_item); + list_del_init(&req->r_copy_item); + err = ceph_osdc_wait_request(req->r_osdc, req); + if (err) { + if (!ret) + ret = err; + dout("copy request failed (err=%d)\n", err); + } + ceph_osdc_put_request(req); + } + + return ret; +} + static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags) @@ -1943,12 +1965,14 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode); struct ceph_object_locator src_oloc, dst_oloc; struct ceph_object_id src_oid, dst_oid; + struct ceph_osd_request *req; loff_t endoff = 0, size; ssize_t ret = -EIO; u64 src_objnum, dst_objnum, src_objoff, dst_objoff; u32 src_objlen, dst_objlen, object_size; int src_got = 0, dst_got = 0, err, dirty; bool do_final_copy = false; + LIST_HEAD(osd_reqs); if (src_inode->i_sb != dst_inode->i_sb) { struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode); @@ -2097,7 +2121,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ceph_oid_printf(&dst_oid, "%llx.%08llx", dst_ci->i_vino.ino, dst_objnum); /* Do an object remote copy */ - err = ceph_osdc_copy_from( + req = ceph_osdc_copy_from_nowait( &src_fsc->client->osdc, src_ci->i_vino.snap, 0, &src_oid, &src_oloc, @@ -2108,7 +2132,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, CEPH_OSD_OP_FLAG_FADVISE_DONTNEED, dst_ci->i_truncate_seq, dst_ci->i_truncate_size, CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ); - if (err) { + if (IS_ERR(req)) { + err = PTR_ERR(req); if (err == -EOPNOTSUPP) { src_fsc->have_copy_from2 = false; pr_notice("OSDs don't support 'copy-from2'; " @@ -2117,14 +2142,23 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, dout("ceph_osdc_copy_from returned %d\n", err); if (!ret) ret = err; + /* wait for all queued requests */ + wait_copy_from_reqs(&osd_reqs); goto out_caps; } + list_add(&req->r_copy_item, &osd_reqs); len -= object_size; src_off += object_size; dst_off += object_size; ret += object_size; } + err = wait_copy_from_reqs(&osd_reqs); + if (err) { + if (!ret) + ret = err; + goto out_caps; + } if (len) /* We still need one final local copy */ do_final_copy = true; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 7916a178d137..2b4a14bc6154 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -210,6 +210,8 @@ struct ceph_osd_request { u64 r_data_offset; /* ditto */ bool r_linger; /* don't resend on failure */ + struct list_head r_copy_item; /* used for copy-from operations */ + /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7f984532f37c..16f38c3d606e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -531,6 +531,7 @@ static void request_init(struct ceph_osd_request *req) RB_CLEAR_NODE(&req->r_node); RB_CLEAR_NODE(&req->r_mc_node); INIT_LIST_HEAD(&req->r_private_item); + INIT_LIST_HEAD(&req->r_copy_item); target_init(&req->r_t); } From patchwork Mon Jan 27 16:43:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Luis Henriques X-Patchwork-Id: 11352899 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DE8B21395 for ; Mon, 27 Jan 2020 16:43:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id C795F214D8 for ; Mon, 27 Jan 2020 16:43:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726703AbgA0Qnb (ORCPT ); Mon, 27 Jan 2020 11:43:31 -0500 Received: from mx2.suse.de ([195.135.220.15]:40704 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726205AbgA0QnW (ORCPT ); Mon, 27 Jan 2020 11:43:22 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id 56CA2B15B; Mon, 27 Jan 2020 16:43:20 +0000 (UTC) From: Luis Henriques To: Jeff Layton , Sage Weil , Ilya Dryomov , "Yan, Zheng" , Gregory Farnum Cc: ceph-devel@vger.kernel.org, linux-kernel@vger.kernel.org, Luis Henriques Subject: [RFC PATCH 3/3] ceph: add module param to throttle 'copy-from2' operations Date: Mon, 27 Jan 2020 16:43:21 +0000 Message-Id: <20200127164321.17468-4-lhenriques@suse.com> In-Reply-To: <20200127164321.17468-1-lhenriques@suse.com> References: <20200127164321.17468-1-lhenriques@suse.com> MIME-Version: 1.0 Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org This patch adds a ceph kernel module parameter that allows to throttle the amount of parallel requests that can be sent to the OSDs before waiting for the completion. This allows to prevent DoS'ing the ODSs with too many requests at once when copying a big file. Signed-off-by: Luis Henriques --- fs/ceph/file.c | 10 ++++++++++ fs/ceph/super.c | 4 ++++ fs/ceph/super.h | 2 ++ 3 files changed, 16 insertions(+) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5d8f0ba11719..bf18712f3bd3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1973,6 +1973,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, int src_got = 0, dst_got = 0, err, dirty; bool do_final_copy = false; LIST_HEAD(osd_reqs); + unsigned int ncopies = cfr_throttle; if (src_inode->i_sb != dst_inode->i_sb) { struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode); @@ -2151,6 +2152,15 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, src_off += object_size; dst_off += object_size; ret += object_size; + if (cfr_throttle && (--ncopies == 0)) { + err = wait_copy_from_reqs(&osd_reqs); + if (err) { + if (!ret) + ret = err; + goto out_caps; + } + ncopies = cfr_throttle; + } } err = wait_copy_from_reqs(&osd_reqs); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b62c487a53af..02e8b6f93d50 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1238,6 +1238,10 @@ static void __exit exit_ceph(void) destroy_caches(); } +unsigned int cfr_throttle = 0; +module_param(cfr_throttle, uint, 0644); +MODULE_PARM_DESC(cfr_throttle, "copy_file_range throttle value."); + module_init(init_ceph); module_exit(exit_ceph); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b2f86bed5c2c..fb98b4b1ec72 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -72,6 +72,8 @@ #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ +extern unsigned int cfr_throttle; + struct ceph_mount_options { unsigned int flags;