[35/42] lustre: ldlm: pool recalc forceful call

Message ID	1601942781-24950-36-git-send-email-jsimmons@infradead.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=bqMh=DN=lists.lustre.org=lustre-devel-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B24D8206F4 From: James Simmons <jsimmons@infradead.org> To: Andreas Dilger <adilger@whamcloud.com>, Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.com> Date: Mon, 5 Oct 2020 20:06:14 -0400 Message-Id: <1601942781-24950-36-git-send-email-jsimmons@infradead.org> In-Reply-To: <1601942781-24950-1-git-send-email-jsimmons@infradead.org> References: <1601942781-24950-1-git-send-email-jsimmons@infradead.org> Subject: [lustre-devel] [PATCH 35/42] lustre: ldlm: pool recalc forceful call Precedence: list Cc: Vitaly Fertman <c17818@cray.com>, Lustre Development List <lustre-devel@lists.lustre.org> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: lustre-devel-bounces@lists.lustre.org Sender: "lustre-devel" <lustre-devel-bounces@lists.lustre.org>
Series	lustre: OpenSFS backport for Oct 4 2020 \| expand [00/42] lustre: OpenSFS backport for Oct 4 2020 [01/42] lustre: ptlrpc: don't require CONFIG_CRYPTO_CRC32 [02/42] lustre: dom: lock cancel to drop pages [03/42] lustre: sec: use memchr_inv() to check if page is zero. [04/42] lustre: mdc: fix lovea for replay [05/42] lustre: llite: add test to check client deadlock selinux [06/42] lnet: use init_wait(), not init_waitqueue_entry() [07/42] lustre: lov: make various lov_object.c function static. [08/42] lustre: llite: return -ENODATA if no default layout [09/42] lnet: libcfs: don't save journal_info in dumplog thread. [10/42] lustre: ldlm: lru code cleanup [11/42] lustre: ldlm: cancel LRU improvement [12/42] lnet: Do not set preferred NI for MR peer [13/42] lustre: ptlrpc: prefer crc32_le() over CryptoAPI [14/42] lnet: call event handlers without res_lock [15/42] lnet: Conditionally attach rspt in LNetPut & LNetGet [16/42] lustre: llite: reuse same cl_dio_aio for one IO [17/42] lustre: llite: move iov iter forward by ourself [18/42] lustre: llite: report client stats sumsq [19/42] lnet: Support checking for MD leaks. [20/42] lnet: don't read debugfs lnet stats when shutting down [21/42] lnet: Loosen restrictions on LNet Health params [22/42] lnet: Fix reference leak in lnet_select_pathway [23/42] lustre: llite: prune invalid dentries [24/42] lnet: Do not overwrite destination when routing [25/42] lustre: lov: don't use inline for operations functions. [26/42] lustre: osc: don't allow negative grants [27/42] lustre: mgc: Use IR for client->MDS/OST connections [28/42] lustre: ldlm: don't use a locks without l_ast_data [29/42] lustre: lov: discard unused lov_dump_lmm* functions [30/42] lustre: lov: guard against class_exp2obd() returning NULL. [31/42] lustre: clio: don't call aio_complete() in lustre upon errors [32/42] lustre: llite: it_lock_bits should be bit-wise tested [33/42] lustre: ldlm: control lru_size for extent lock [34/42] lustre: ldlm: pool fixes [35/42] lustre: ldlm: pool recalc forceful call [36/42] lustre: don't take spinlock to read a 'long'. [37/42] lustre: osc: Do ELC on locks with no OSC object [38/42] lnet: deadlock on LNet shutdown [39/42] lustre: update version to 2.13.56 [40/42] lustre: llite: increase readahead default values [41/42] lustre: obdclass: don't initialize obj for zero FID [42/42] lustre: obdclass: fixes and improvements for jobid.

diff --git a/fs/lustre/include/lustre_dlm.h b/fs/lustre/include/lustre_dlm.h index bc6785f..f056c2d 100644 --- a/fs/lustre/include/lustre_dlm.h +++ b/fs/lustre/include/lustre_dlm.h @@ -66,6 +66,7 @@ #define LDLM_DIRTY_AGE_LIMIT (10) #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 #define LDLM_DEFAULT_LRU_SHRINK_BATCH (16) +#define LDLM_DEFAULT_SLV_RECALC_PCT (10) /** * LDLM non-error return states @@ -193,6 +194,19 @@ static inline int lockmode_compat(enum ldlm_mode exist_mode, * */ +/* Cancel lru flag, it indicates we cancel aged locks. */ +enum ldlm_lru_flags { + LDLM_LRU_FLAG_NO_WAIT = 0x1, /* Cancel locks w/o blocking (neither + * sending nor waiting for any RPCs) + */ + LDLM_LRU_FLAG_CLEANUP = 0x2, /* Used when clearing lru, tells + * prepare_lru_list to set discard flag + * on PR extent locks so we don't waste + * time saving pages that will be + * discarded momentarily + */ +}; + struct ldlm_pool; struct ldlm_lock; struct ldlm_resource; @@ -208,7 +222,7 @@ static inline int lockmode_compat(enum ldlm_mode exist_mode, */ struct ldlm_pool_ops { /** Recalculate pool @pl usage */ - int (*po_recalc)(struct ldlm_pool *pl); + int (*po_recalc)(struct ldlm_pool *pl, bool force); /** Cancel at least @nr locks from pool @pl */ int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask); @@ -430,6 +444,12 @@ struct ldlm_namespace { */ unsigned int ns_cancel_batch; + /** + * How much the SLV should decrease in %% to trigger LRU cancel + * urgently. + */ + unsigned int ns_recalc_pct; + /** Maximum allowed age (last used time) for locks in the LRU. Set in * seconds from userspace, but stored in ns to avoid repeat conversions. */ @@ -487,7 +507,13 @@ struct ldlm_namespace { * Flag to indicate namespace is being freed. Used to determine if * recalculation of LDLM pool statistics should be skipped. */ - unsigned ns_stopping:1; + unsigned int ns_stopping:1, + + /** + * Flag to indicate the LRU recalc on RPC reply is in progress. + * Used to limit the process by 1 thread only. + */ + ns_rpc_recalc:1; struct kobject ns_kobj; /* sysfs object */ struct completion ns_kobj_unregister; @@ -1404,6 +1430,7 @@ static inline void check_res_locked(struct ldlm_resource *res) int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, int idx, enum ldlm_side client); void ldlm_pool_fini(struct ldlm_pool *pl); +timeout_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force); void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock); void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock); /** @} */ diff --git a/fs/lustre/ldlm/ldlm_internal.h b/fs/lustre/ldlm/ldlm_internal.h index 788983f..9dc0561 100644 --- a/fs/lustre/ldlm/ldlm_internal.h +++ b/fs/lustre/ldlm/ldlm_internal.h @@ -86,19 +86,6 @@ void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client); /* ldlm_request.c */ -/* Cancel lru flag, it indicates we cancel aged locks. */ -enum ldlm_lru_flags { - LDLM_LRU_FLAG_NO_WAIT = BIT(1), /* Cancel locks w/o blocking (neither - * sending nor waiting for any rpcs) - */ - LDLM_LRU_FLAG_CLEANUP = BIT(2), /* Used when clearing lru, tells - * prepare_lru_list to set discard - * flag on PR extent locks so we - * don't waste time saving pages - * that will be discarded momentarily - */ -}; - int ldlm_cancel_lru(struct ldlm_namespace *ns, int min, enum ldlm_cancel_flags cancel_flags, enum ldlm_lru_flags lru_flags); @@ -163,6 +150,7 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, struct list_head *cancels, int count, enum ldlm_cancel_flags cancel_flags); +int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns); int ldlm_bl_thread_wakeup(void); void ldlm_handle_bl_callback(struct ldlm_namespace *ns, diff --git a/fs/lustre/ldlm/ldlm_lock.c b/fs/lustre/ldlm/ldlm_lock.c index 2931873..0dbd4f3 100644 --- a/fs/lustre/ldlm/ldlm_lock.c +++ b/fs/lustre/ldlm/ldlm_lock.c @@ -808,7 +808,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) if (ldlm_is_fail_loc(lock)) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0); + ldlm_pool_recalc(&ns->ns_pool, true); } else { LDLM_DEBUG(lock, "do not add lock into lru list"); unlock_res_and_lock(lock); diff --git a/fs/lustre/ldlm/ldlm_lockd.c b/fs/lustre/ldlm/ldlm_lockd.c index 7df7af2..4a91a7f 100644 --- a/fs/lustre/ldlm/ldlm_lockd.c +++ b/fs/lustre/ldlm/ldlm_lockd.c @@ -504,6 +504,11 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags); } +int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns) +{ + return ldlm_bl_to_thread(ns, NULL, NULL, NULL, 0, LCF_ASYNC); +} + int ldlm_bl_thread_wakeup(void) { wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq); @@ -856,9 +861,15 @@ static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp, LCF_BL_AST); ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, blwi->blwi_flags); - } else { + } else if (blwi->blwi_lock) { ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld, blwi->blwi_lock); + } else { + ldlm_pool_recalc(&blwi->blwi_ns->ns_pool, true); + spin_lock(&blwi->blwi_ns->ns_lock); + blwi->blwi_ns->ns_rpc_recalc = 0; + spin_unlock(&blwi->blwi_ns->ns_lock); + ldlm_namespace_put(blwi->blwi_ns); } if (blwi->blwi_mem_pressure) memalloc_noreclaim_restore(flags); diff --git a/fs/lustre/ldlm/ldlm_pool.c b/fs/lustre/ldlm/ldlm_pool.c index c37948a..9cee24b 100644 --- a/fs/lustre/ldlm/ldlm_pool.c +++ b/fs/lustre/ldlm/ldlm_pool.c @@ -252,13 +252,13 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl) /** * Recalculates client size pool @pl according to current SLV and Limit. */ -static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) +static int ldlm_cli_pool_recalc(struct ldlm_pool *pl, bool force) { timeout_t recalc_interval_sec; int ret; recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) + if (!force && recalc_interval_sec < pl->pl_recalc_period) return 0; spin_lock(&pl->pl_lock); @@ -266,7 +266,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) * Check if we need to recalc lists now. */ recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) { + if (!force && recalc_interval_sec < pl->pl_recalc_period) { spin_unlock(&pl->pl_lock); return 0; } @@ -346,7 +346,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, * * Returns time in seconds for the next recalc of this pool */ -static timeout_t ldlm_pool_recalc(struct ldlm_pool *pl) +timeout_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force) { timeout_t recalc_interval_sec; int count; @@ -373,7 +373,7 @@ static timeout_t ldlm_pool_recalc(struct ldlm_pool *pl) } if (pl->pl_ops->po_recalc) { - count = pl->pl_ops->po_recalc(pl); + count = pl->pl_ops->po_recalc(pl, force); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, count); } @@ -976,7 +976,7 @@ static void ldlm_pools_recalc(struct work_struct *ws) */ if (!skip) { delay = min(delay, - ldlm_pool_recalc(&ns->ns_pool)); + ldlm_pool_recalc(&ns->ns_pool, false)); ldlm_namespace_put(ns); } } diff --git a/fs/lustre/ldlm/ldlm_request.c b/fs/lustre/ldlm/ldlm_request.c index a8d6df1..dd897ec 100644 --- a/fs/lustre/ldlm/ldlm_request.c +++ b/fs/lustre/ldlm/ldlm_request.c @@ -1129,8 +1129,9 @@ static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp) */ int ldlm_cli_update_pool(struct ptlrpc_request *req) { + struct ldlm_namespace *ns; struct obd_device *obd; - u64 new_slv; + u64 new_slv, ratio; u32 new_limit; if (unlikely(!req->rq_import || !req->rq_import->imp_obd || @@ -1170,17 +1171,39 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) read_unlock(&obd->obd_pool_lock); /* - * Set new SLV and limit in OBD fields to make them accessible - * to the pool thread. We do not access obd_namespace and pool - * directly here as there is no reliable way to make sure that - * they are still alive at cleanup time. Evil races are possible - * which may cause Oops at that time. + * OBD device keeps the new pool attributes before they are handled by + * the pool. */ write_lock(&obd->obd_pool_lock); obd->obd_pool_slv = new_slv; obd->obd_pool_limit = new_limit; write_unlock(&obd->obd_pool_lock); + /* + * Check if an urgent pool recalc is needed, let it to be a change of + * SLV on 10%. It is applicable to LRU resize enabled case only. + */ + ns = obd->obd_namespace; + if (!ns_connect_lru_resize(ns) || + ldlm_pool_get_slv(&ns->ns_pool) < new_slv) + return 0; + + ratio = 100 * new_slv / ldlm_pool_get_slv(&ns->ns_pool); + if (100 - ratio >= ns->ns_recalc_pct && + !ns->ns_stopping && !ns->ns_rpc_recalc) { + bool recalc = false; + + spin_lock(&ns->ns_lock); + if (!ns->ns_stopping && !ns->ns_rpc_recalc) { + ldlm_namespace_get(ns); + recalc = true; + ns->ns_rpc_recalc = 1; + } + spin_unlock(&ns->ns_lock); + if (recalc) + ldlm_bl_to_thread_ns(ns); + } + return 0; } diff --git a/fs/lustre/ldlm/ldlm_resource.c b/fs/lustre/ldlm/ldlm_resource.c index 3527e15..dab837d 100644 --- a/fs/lustre/ldlm/ldlm_resource.c +++ b/fs/lustre/ldlm/ldlm_resource.c @@ -273,6 +273,35 @@ static ssize_t lru_cancel_batch_store(struct kobject *kobj, } LUSTRE_RW_ATTR(lru_cancel_batch); +static ssize_t ns_recalc_pct_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, + ns_kobj); + + return scnprintf(buf, sizeof(buf) - 1, "%u\n", ns->ns_recalc_pct); +} + +static ssize_t ns_recalc_pct_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, + ns_kobj); + unsigned long tmp; + + if (kstrtoul(buffer, 10, &tmp)) + return -EINVAL; + + if (tmp > 100) + return -ERANGE; + + ns->ns_recalc_pct = (unsigned int)tmp; + + return count; +} +LUSTRE_RW_ATTR(ns_recalc_pct); + static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -375,6 +404,7 @@ static ssize_t dirty_age_limit_store(struct kobject *kobj, &lustre_attr_resource_count.attr, &lustre_attr_lock_count.attr, &lustre_attr_lock_unused_count.attr, + &lustre_attr_ns_recalc_pct.attr, &lustre_attr_lru_size.attr, &lustre_attr_lru_cancel_batch.attr, &lustre_attr_lru_max_age.attr, @@ -663,6 +693,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name, ns->ns_nr_unused = 0; ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; ns->ns_cancel_batch = LDLM_DEFAULT_LRU_SHRINK_BATCH; + ns->ns_recalc_pct = LDLM_DEFAULT_SLV_RECALC_PCT; ns->ns_max_age = ktime_set(LDLM_DEFAULT_MAX_ALIVE, 0); ns->ns_orig_connect_flags = 0; ns->ns_connect_flags = 0;

[35/42] lustre: ldlm: pool recalc forceful call

Commit Message

Patch