From patchwork Sat Nov 26 09:03:36 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056398 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C8056C43217 for ; Sat, 26 Nov 2022 09:04:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229606AbiKZJD6 (ORCPT ); Sat, 26 Nov 2022 04:03:58 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33576 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229485AbiKZJDz (ORCPT ); Sat, 26 Nov 2022 04:03:55 -0500 Received: from out30-132.freemail.mail.aliyun.com (out30-132.freemail.mail.aliyun.com [115.124.30.132]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6D07D2A435; Sat, 26 Nov 2022 01:03:52 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R161e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046056;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjT-_1669453429; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjT-_1669453429) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:49 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 1/7] net/smc: remove locks smc_client_lgr_pending and smc_server_lgr_pending Date: Sat, 26 Nov 2022 17:03:36 +0800 Message-Id: <1669453422-38152-2-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" This patch attempts to remove locks named smc_client_lgr_pending and smc_server_lgr_pending, which aim to serialize the creation of link group. However, once link group existed already, those locks are meaningless, worse still, they make incoming connections have to be queued one after the other. Now, the creation of link group is no longer generated by competition, but allocated through following strategy. 1. Try to find a suitable link group, if successd, current connection is considered as NON first contact connection. ends. 2. Check the number of connections currently waiting for a suitable link group to be created, if it is not less that the number of link groups to be created multiplied by (SMC_RMBS_PER_LGR_MAX - 1), then increase the number of link groups to be created, current connection is considered as the first contact connection. ends. 3. Increase the number of connections currently waiting, and wait for woken up. 4. Decrease the number of connections currently waiting, goto 1. We wake up the connection that was put to sleep in stage 3 through the SMC link state change event. Once the link moves out of the SMC_LNK_ACTIVATING state, decrease the number of link groups to be created, and then wake up at most (SMC_RMBS_PER_LGR_MAX - 1) connections. In the iplementation, we introduce the concept of lnk cluster, which is a collection of links with the same characteristics (see smcr_lnk_cluster_cmpfn() with more details), which makes it possible to wake up efficiently in the scenario of N v.s 1. Signed-off-by: D. Wythe --- net/smc/af_smc.c | 50 +++--- net/smc/smc_core.c | 450 ++++++++++++++++++++++++++++++++++++++++++++++++++++- net/smc/smc_core.h | 46 ++++++ 3 files changed, 508 insertions(+), 38 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e12d4fa..52287ee 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -53,12 +53,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" -static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group - * creation on server - */ -static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group - * creation on client - */ +static DEFINE_MUTEX(smcd_buf_pending); /* serialize SMC-D buf creation */ static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */ struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ @@ -1197,10 +1192,8 @@ static int smc_connect_rdma(struct smc_sock *smc, if (reason_code) return reason_code; - mutex_lock(&smc_client_lgr_pending); reason_code = smc_conn_create(smc, ini); if (reason_code) { - mutex_unlock(&smc_client_lgr_pending); return reason_code; } @@ -1292,7 +1285,6 @@ static int smc_connect_rdma(struct smc_sock *smc, if (reason_code) goto connect_abort; } - mutex_unlock(&smc_client_lgr_pending); smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; @@ -1302,7 +1294,6 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; connect_abort: smc_conn_abort(smc, ini->first_contact_local); - mutex_unlock(&smc_client_lgr_pending); smc->connect_nonblock = 0; return reason_code; @@ -1348,16 +1339,15 @@ static int smc_connect_ism(struct smc_sock *smc, } ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; - /* there is only one lgr role for SMC-D; use server lock */ - mutex_lock(&smc_server_lgr_pending); rc = smc_conn_create(smc, ini); if (rc) { - mutex_unlock(&smc_server_lgr_pending); return rc; } + mutex_lock(&smcd_buf_pending); /* Create send and receive buffers */ rc = smc_buf_create(smc, true); + mutex_unlock(&smcd_buf_pending); if (rc) { rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM; goto connect_abort; @@ -1379,7 +1369,6 @@ static int smc_connect_ism(struct smc_sock *smc, aclc->hdr.version, eid, NULL); if (rc) goto connect_abort; - mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; @@ -1389,7 +1378,6 @@ static int smc_connect_ism(struct smc_sock *smc, return 0; connect_abort: smc_conn_abort(smc, ini->first_contact_local); - mutex_unlock(&smc_server_lgr_pending); smc->connect_nonblock = 0; return rc; @@ -1505,6 +1493,9 @@ static int __smc_connect(struct smc_sock *smc) SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc); smc_connect_ism_vlan_cleanup(smc, ini); + if (ini->first_contact_local) + smc_lgr_decision_maker_on_first_contact_success(smc, ini); + kfree(buf); kfree(ini); return 0; @@ -1513,6 +1504,8 @@ static int __smc_connect(struct smc_sock *smc) smc_connect_ism_vlan_cleanup(smc, ini); kfree(buf); fallback: + if (ini->first_contact_local) + smc_lgr_decision_maker_on_first_contact_fail(ini); kfree(ini); return smc_connect_decline_fallback(smc, rc, version); } @@ -2001,8 +1994,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, if (rc) return rc; + mutex_lock(&smcd_buf_pending); /* Create send and receive buffers */ rc = smc_buf_create(new_smc, true); + mutex_unlock(&smcd_buf_pending); if (rc) { smc_conn_abort(new_smc, ini->first_contact_local); return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : @@ -2379,7 +2374,6 @@ static void smc_listen_work(struct work_struct *work) if (rc) goto out_decl; - mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@ -2387,46 +2381,42 @@ static void smc_listen_work(struct work_struct *work) /* determine ISM or RoCE device used for connection */ rc = smc_listen_find_device(new_smc, pclc, ini); if (rc) - goto out_unlock; + goto out_decl; /* send SMC Accept CLC message */ accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version; rc = smc_clc_send_accept(new_smc, ini->first_contact_local, accept_version, ini->negotiated_eid); if (rc) - goto out_unlock; - - /* SMC-D does not need this lock any more */ - if (ini->is_smcd) - mutex_unlock(&smc_server_lgr_pending); + goto out_decl; /* receive SMC Confirm CLC message */ memset(buf, 0, sizeof(*buf)); cclc = (struct smc_clc_msg_accept_confirm *)buf; rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf), SMC_CLC_CONFIRM, CLC_WAIT_TIME); - if (rc) { - if (!ini->is_smcd) - goto out_unlock; + if (rc) goto out_decl; - } /* finish worker */ if (!ini->is_smcd) { rc = smc_listen_rdma_finish(new_smc, cclc, ini->first_contact_local, ini); if (rc) - goto out_unlock; - mutex_unlock(&smc_server_lgr_pending); + goto out_decl; } + smc_conn_leave_rtoken_pending(new_smc, ini); smc_conn_save_peer_info(new_smc, cclc); smc_listen_out_connected(new_smc); SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini); + if (ini->first_contact_local) + smc_lgr_decision_maker_on_first_contact_success(new_smc, ini); goto out_free; -out_unlock: - mutex_unlock(&smc_server_lgr_pending); out_decl: + smc_conn_leave_rtoken_pending(new_smc, ini); + if (ini && ini->first_contact_local) + smc_lgr_decision_maker_on_first_contact_fail(ini); smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0, proposal_version); out_free: diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c305d8d..1c4d669 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -40,12 +40,289 @@ #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) +#define LDM_TBD 0 +#define LDM_RETRY 1 +#define LDM_FIRST_CONTACT 2 + struct smc_lgr_list smc_lgr_list = { /* established link groups */ .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), .list = LIST_HEAD_INIT(smc_lgr_list.list), .num = 0, }; +struct smc_lgr_decision_maker { + struct rhash_head rnode; /* node for rhashtable */ + struct wait_queue_head wq; /* queue for connection that have been + * decided to wait + */ + spinlock_t lock; /* protection for decision maker */ + refcount_t ref; /* refcount for decision maker */ + int type; /* smc type */ + int role; /* smc role */ + unsigned long pending_capability; + /* maximum pending number of connections that + * need to wait. + */ + unsigned long conns_pending; + /* the number of pending connections */ +}; + +struct smcr_lgr_decision_maker { + struct smc_lgr_decision_maker ldm; + u8 peer_systemid[SMC_SYSTEMID_LEN]; + u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ + u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ + int clcqpn; +}; + +struct smcd_lgr_decision_maker { + struct smc_lgr_decision_maker ldm; + u64 peer_gid; + struct smcd_dev *dev; +}; + +static int smcr_lgr_decision_maker_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct smcr_lgr_decision_maker *rldm = obj; + const struct smc_init_info *ini = arg->key; + + if (ini->role != rldm->ldm.role) + return 1; + + if (memcmp(ini->peer_systemid, rldm->peer_systemid, SMC_SYSTEMID_LEN)) + return 1; + + if (memcmp(ini->peer_gid, rldm->peer_gid, SMC_GID_SIZE)) + return 1; + + if ((ini->role == SMC_SERV || ini->ib_clcqpn == rldm->clcqpn) && + (ini->smcr_version == SMC_V2 || + !memcmp(ini->peer_mac, rldm->peer_mac, ETH_ALEN))) + return 0; + + return 1; +} + +static u32 smcr_lgr_decision_maker_hashfn(const void *data, u32 len, u32 seed) +{ + const struct smcr_lgr_decision_maker *rldm = data; + + return jhash2((u32 *)rldm->peer_systemid, SMC_SYSTEMID_LEN / sizeof(u32), seed) + + ((rldm->ldm.role == SMC_SERV) ? 0 : rldm->clcqpn); +} + +static u32 smcr_lgr_decision_maker_arg_hashfn(const void *data, u32 len, u32 seed) +{ + const struct smc_init_info *ini = data; + + return jhash2((u32 *)ini->peer_systemid, SMC_SYSTEMID_LEN / sizeof(u32), seed) + + ((ini->role == SMC_SERV) ? 0 : ini->ib_clcqpn); +} + +static void smcr_lgr_decision_maker_init(struct smc_lgr_decision_maker *ldm, + struct smc_init_info *ini) +{ + struct smcr_lgr_decision_maker *rldm = (struct smcr_lgr_decision_maker *)ldm; + + memcpy(rldm->peer_systemid, ini->peer_systemid, SMC_SYSTEMID_LEN); + memcpy(rldm->peer_gid, ini->peer_gid, SMC_GID_SIZE); + memcpy(rldm->peer_mac, ini->peer_mac, ETH_ALEN); + rldm->clcqpn = ini->ib_clcqpn; +} + +static int smcd_lgr_decision_maker_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct smcd_lgr_decision_maker *dldm = obj; + const struct smc_init_info *ini = arg->key; + + if (ini->role != dldm->ldm.role) + return 1; + + if (ini->ism_peer_gid[ini->ism_selected] != dldm->peer_gid) + return 1; + + if (ini->ism_dev[ini->ism_selected] != dldm->dev) + return 1; + + return 0; +} + +static u32 smcd_lgr_decision_maker_hashfn(const void *data, u32 len, u32 seed) +{ + const struct smcd_lgr_decision_maker *dlcm = data; + + return jhash2((u32 *)&dlcm->peer_gid, sizeof(dlcm->peer_gid) / sizeof(u32), seed); +} + +static u32 smcd_lgr_decision_maker_arg_hashfn(const void *data, u32 len, u32 seed) +{ + const struct smc_init_info *ini = data; + u64 select_gid; + + select_gid = ini->ism_peer_gid[ini->ism_selected]; + return jhash2((u32 *)&select_gid, sizeof(select_gid) / sizeof(u32), seed); +} + +static void smcd_lgr_decision_maker_init(struct smc_lgr_decision_maker *ldm, + struct smc_init_info *ini) +{ + struct smcd_lgr_decision_maker *dldm = (struct smcd_lgr_decision_maker *)ldm; + + dldm->peer_gid = ini->ism_peer_gid[ini->ism_selected]; + dldm->dev = ini->ism_dev[ini->ism_selected]; +} + +struct smc_lgr_decision_builder { + struct rhashtable map; + spinlock_t map_lock; /* protect map */ + struct rhashtable_params default_params; + /* how to serach and insert decision maker by ini */ + void (*init)(struct smc_lgr_decision_maker *ldm, struct smc_init_info *ini); + /* init maker by ini */ + u32 sz; /* size */ +}; + +static struct smc_lgr_decision_builder smc_lgr_decision_set[SMC_TYPE_D + 1] = { + /* SMC_TYPE_R = 0 */ + { + .sz = sizeof(struct smcr_lgr_decision_maker), + .init = smcr_lgr_decision_maker_init, + .map_lock = __SPIN_LOCK_UNLOCKED(smc_lgr_decision_set[SMC_TYPE_R].map_lock), + .default_params = { + .head_offset = offsetof(struct smc_lgr_decision_maker, rnode), + .key_len = sizeof(struct smc_init_info), + .obj_cmpfn = smcr_lgr_decision_maker_cmpfn, + .obj_hashfn = smcr_lgr_decision_maker_hashfn, + .hashfn = smcr_lgr_decision_maker_arg_hashfn, + .automatic_shrinking = true, + }, + }, + /* SMC_TYPE_D = 1 */ + { + .sz = sizeof(struct smcd_lgr_decision_maker), + .init = smcd_lgr_decision_maker_init, + .map_lock = __SPIN_LOCK_UNLOCKED(smc_lgr_decision_set[SMC_TYPE_D].map_lock), + .default_params = { + .head_offset = offsetof(struct smc_lgr_decision_maker, rnode), + .key_len = sizeof(struct smc_init_info), + .obj_cmpfn = smcd_lgr_decision_maker_cmpfn, + .obj_hashfn = smcd_lgr_decision_maker_hashfn, + .hashfn = smcd_lgr_decision_maker_arg_hashfn, + .automatic_shrinking = true, + }, + }, +}; + +/* hold a reference for smc_lgr_decision_maker */ +static inline void smc_lgr_decision_maker_hold(struct smc_lgr_decision_maker *ldm) +{ + if (likely(ldm)) + refcount_inc(&ldm->ref); +} + +/* release a reference for smc_lgr_decision_maker */ +static inline void smc_lgr_decision_maker_put(struct smc_lgr_decision_maker *ldm) +{ + bool do_free = false; + int type; + + if (unlikely(!ldm)) + return; + + if (refcount_dec_not_one(&ldm->ref)) + return; + + type = ldm->type; + + spin_lock_bh(&smc_lgr_decision_set[type].map_lock); + /* last ref */ + if (refcount_dec_and_test(&ldm->ref)) { + do_free = true; + rhashtable_remove_fast(&smc_lgr_decision_set[type].map, &ldm->rnode, + smc_lgr_decision_set[type].default_params); + } + spin_unlock_bh(&smc_lgr_decision_set[type].map_lock); + if (do_free) + kfree(ldm); +} + +static struct smc_lgr_decision_maker * +smc_get_or_create_lgr_decision_maker(struct smc_init_info *ini) +{ + struct smc_lgr_decision_maker *ldm; + int err, type; + + type = ini->is_smcd ? SMC_TYPE_D : SMC_TYPE_R; + + spin_lock_bh(&smc_lgr_decision_set[type].map_lock); + ldm = rhashtable_lookup_fast(&smc_lgr_decision_set[type].map, ini, + smc_lgr_decision_set[type].default_params); + if (!ldm) { + ldm = kzalloc(smc_lgr_decision_set[type].sz, GFP_ATOMIC); + if (unlikely(!ldm)) + goto fail; + + /* common init */ + spin_lock_init(&ldm->lock); + init_waitqueue_head(&ldm->wq); + refcount_set(&ldm->ref, 1); + ldm->type = type; + ldm->role = ini->role; + + /* init */ + if (smc_lgr_decision_set[type].init) + smc_lgr_decision_set[type].init(ldm, ini); + + err = rhashtable_insert_fast(&smc_lgr_decision_set[type].map, + &ldm->rnode, + smc_lgr_decision_set[type].default_params); + if (unlikely(err)) { + pr_warn_ratelimited("smc: rhashtable_insert_fast failed (%d)", err); + kfree(ldm); + ldm = NULL; + } + } else { + smc_lgr_decision_maker_hold(ldm); + } +fail: + spin_unlock_bh(&smc_lgr_decision_set[type].map_lock); + return ldm; +} + +void smc_lgr_decision_maker_on_first_contact_done(struct smc_init_info *ini, bool success) +{ + struct smc_lgr_decision_maker *ldm; + int nr; + + if (unlikely(!ini->first_contact_local)) + return; + + /* get lgr decision maker */ + ldm = ini->ldm; + + if (unlikely(!ldm)) + return; + + spin_lock_bh(&ldm->lock); + ldm->pending_capability -= (SMC_RMBS_PER_LGR_MAX - 1); + nr = SMC_RMBS_PER_LGR_MAX - 1; + if (unlikely(!success) && ldm->role == SMC_SERV) { + /* only to wake up one connection to perfrom + * first contact in server side, client MUST wake up + * all to decline. + */ + nr = min(1, nr); + } + if (nr) + __wake_up(&ldm->wq, TASK_NORMAL, nr, + success ? (void *)LDM_RETRY : (void *)LDM_FIRST_CONTACT); + + spin_unlock_bh(&ldm->lock); + + /* hold in smc_lgr_create */ + smc_lgr_decision_maker_put(ldm); +} + static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); @@ -756,6 +1033,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ + rwlock_init(&lnk->rtokens_lock); lnk->link_idx = link_idx; lnk->wr_rx_id_compl = 0; smc_ibdev_cnt_inc(lnk); @@ -915,6 +1193,11 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) atomic_inc(&lgr_cnt); } smc->conn.lgr = lgr; + + lgr->ldm = ini->ldm; + /* smc_lgr_decision_maker_put in __smc_lgr_free() */ + smc_lgr_decision_maker_hold(lgr->ldm); + spin_lock_bh(lgr_lock); list_add_tail(&lgr->list, lgr_list); spin_unlock_bh(lgr_lock); @@ -1364,6 +1647,9 @@ static void __smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } + /* smc_lgr_decision_maker_hold in smc_lgr_create() */ + if (lgr->ldm) + smc_lgr_decision_maker_put(lgr->ldm); kfree(lgr); } @@ -1824,6 +2110,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, lgr->role != role) return false; + if (!READ_ONCE(lgr->first_contact_done)) + return false; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { lnk = &lgr->lnk[i]; @@ -1844,16 +2133,39 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, static bool smcd_lgr_match(struct smc_link_group *lgr, struct smcd_dev *smcismdev, u64 peer_gid) { - return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; + return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev && + READ_ONCE(lgr->first_contact_done); +} + +static int smc_ldm_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode, int sync, + void *key) +{ + struct smc_init_info *ini = wq_entry->private; + int ret; + + wq_entry->private = ini->private; + ini->advise = (u8)(uintptr_t)key; + + ret = woken_wake_function(wq_entry, mode, sync, NULL); + if (ret) { + /* only wake up once */ + list_del_init_careful(&wq_entry->entry); + if (likely(ini->ldm)) + ini->ldm->conns_pending--; + } + return ret; } /* create a new SMC connection (and a new link group if necessary) */ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) { + struct smc_lgr_decision_maker *ldm = NULL; struct smc_connection *conn = &smc->conn; struct net *net = sock_net(&smc->sk); + struct wait_queue_entry wait; struct list_head *lgr_list; struct smc_link_group *lgr; + int timeo = CLC_WAIT_TIME; enum smc_lgr_role role; spinlock_t *lgr_lock; int rc = 0; @@ -1864,12 +2176,40 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) &smc_lgr_list.lock; ini->first_contact_local = 1; role = smc->listen_smc ? SMC_SERV : SMC_CLNT; - if (role == SMC_CLNT && ini->first_contact_peer) + ini->role = role; + + ldm = smc_get_or_create_lgr_decision_maker(ini); + if (unlikely(!ldm)) + return SMC_CLC_DECL_INTERR; + + /* Considering a scenario, after find out the SMCDv2 device, a potential failures + * occur in smc_find_rdma_v2_device_serv, for example smc_buf_create failed. And then, + * it will continue to search for SMCDv1 devices or SMCR devices. Hence, + * smc_conn_create will called again with the same ini, if the ini performs the first + * contact logic, which means that the lgr that should have been created by it must be + * failed, and we need to actively trigger the failed logic here. + */ + if (unlikely(ini->ldm)) { + smc_lgr_decision_maker_on_first_contact_fail(ini); + ini->ldm = NULL; + } + + if (role == SMC_CLNT && ini->first_contact_peer) { + spin_lock_bh(&ldm->lock); + ldm->pending_capability += (SMC_RMBS_PER_LGR_MAX - 1); + spin_unlock_bh(&ldm->lock); /* create new link group as well */ goto create; + } /* determine if an existing link group can be reused */ spin_lock_bh(lgr_lock); + spin_lock(&ldm->lock); + +again: + /* init advise */ + ini->advise = LDM_TBD; + list_for_each_entry(lgr, lgr_list, list) { write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? @@ -1884,21 +2224,82 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && - !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { + (SMC_RMBS_PER_LGR_MAX - + bitmap_weight(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) + > atomic_read(&lgr->rtoken_pendings))))) { /* link group found */ ini->first_contact_local = 0; conn->lgr = lgr; rc = smc_lgr_register_conn(conn, false); write_unlock_bh(&lgr->conns_lock); - if (!rc && delayed_work_pending(&lgr->free_work)) - cancel_delayed_work(&lgr->free_work); + if (!rc) { + smc_conn_enter_rtoken_pending(smc, ini); + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); + } break; } write_unlock_bh(&lgr->conns_lock); } + + /* not found */ + if (!rc && ini->first_contact_local) { + if (timeo && ldm->pending_capability > ldm->conns_pending) { + /* record pending connection, release + * in smc_lgr_decision_maker_on_first_contact_done() or after timeout + */ + ldm->conns_pending++; + + /* used in ldm_wakeup, clear after remove from queue */ + ini->ldm = ldm; + + /* init wait entry */ + init_wait_entry(&wait, 0); + /* replace wait with new private & func */ + ini->private = wait.private; + wait.private = ini; + wait.func = smc_ldm_wake_function; + + /* add to wq */ + add_wait_queue_exclusive(&ldm->wq, &wait); + + spin_unlock(&ldm->lock); + spin_unlock_bh(lgr_lock); + + /* wait woken */ + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + + spin_lock_bh(lgr_lock); + spin_lock(&ldm->lock); + + ini->ldm = NULL; + + /* remove from wq */ + remove_wait_queue(&ldm->wq, &wait); + + /* timeout */ + if (unlikely(!timeo || ini->advise == LDM_TBD)) { + if (ini->advise == LDM_TBD) { + ldm->conns_pending--; + ini->advise = LDM_RETRY; + } + } + } else { + ini->advise = LDM_FIRST_CONTACT; + } + + if (ini->advise == LDM_RETRY) + goto again; + + /* do first contact */ + ldm->pending_capability += (SMC_RMBS_PER_LGR_MAX - 1); + } + + spin_unlock(&ldm->lock); spin_unlock_bh(lgr_lock); + if (rc) - return rc; + goto out; if (role == SMC_CLNT && !ini->first_contact_peer && ini->first_contact_local) { @@ -1906,11 +2307,15 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) * a new one * send out_of_sync decline, reason synchr. error */ - return SMC_CLC_DECL_SYNCERR; + rc = SMC_CLC_DECL_SYNCERR; + goto out; } create: if (ini->first_contact_local) { + ini->ldm = ldm; + /* smc_lgr_decision_maker_put in first_contact_done() */ + smc_lgr_decision_maker_hold(ldm); rc = smc_lgr_create(smc, ini); if (rc) goto out; @@ -1943,6 +2348,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) #endif out: + /* smc_lgr_decision_maker_hold in smc_get_or_create_lgr_decision_make() */ + smc_lgr_decision_maker_put(ldm); return rc; } @@ -2505,19 +2912,24 @@ int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) u32 rkey = ntohl(nw_rkey); int i; + write_lock_bh(&lnk->rtokens_lock); for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && test_bit(i, lgr->rtokens_used_mask)) { /* already in list */ + write_unlock_bh(&lnk->rtokens_lock); return i; } } i = smc_rmb_reserve_rtoken_idx(lgr); - if (i < 0) + if (i < 0) { + write_unlock_bh(&lnk->rtokens_lock); return i; + } lgr->rtokens[i][lnk->link_idx].rkey = rkey; lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; + write_unlock_bh(&lnk->rtokens_lock); return i; } @@ -2528,6 +2940,7 @@ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) u32 rkey = ntohl(nw_rkey); int i, j; + write_lock_bh(&lnk->rtokens_lock); for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && test_bit(i, lgr->rtokens_used_mask)) { @@ -2536,9 +2949,11 @@ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) lgr->rtokens[i][j].dma_addr = 0; } clear_bit(i, lgr->rtokens_used_mask); + write_unlock_bh(&lnk->rtokens_lock); return 0; } } + write_unlock_bh(&lnk->rtokens_lock); return -ENOENT; } @@ -2604,12 +3019,31 @@ static int smc_core_reboot_event(struct notifier_block *this, int __init smc_core_init(void) { + int i; + + /* init smc lgr decision maker builder */ + for (i = 0; i <= SMC_TYPE_D; i++) + rhashtable_init(&smc_lgr_decision_set[i].map, + &smc_lgr_decision_set[i].default_params); + return register_reboot_notifier(&smc_reboot_notifier); } +static void smc_lgr_decision_maker_free_cb(void *ptr, void *arg) +{ + kfree(ptr); +} + /* Called (from smc_exit) when module is removed */ void smc_core_exit(void) { + int i; + unregister_reboot_notifier(&smc_reboot_notifier); smc_lgrs_shutdown(); + + /* destroy smc lgr decision maker builder */ + for (i = 0; i <= SMC_TYPE_D; i++) + rhashtable_free_and_destroy(&smc_lgr_decision_set[i].map, + smc_lgr_decision_maker_free_cb, NULL); } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 285f9bd..e9adf4f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ struct smc_link { u32 wr_tx_cnt; /* number of WR send buffers */ wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ atomic_t wr_tx_refcnt; /* tx refs to link */ + rwlock_t rtokens_lock; struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ @@ -244,17 +246,23 @@ struct smc_llc_flow { struct smc_llc_qentry *qentry; }; +struct smc_lgr_decision_maker; + struct smc_link_group { struct list_head list; struct rb_root conns_all; /* connection tree */ rwlock_t conns_lock; /* protects conns_all */ unsigned int conns_num; /* current # of connections */ + atomic_t rtoken_pendings;/* number of connection that + * lgr assigned but no rtoken got yet + */ unsigned short vlan_id; /* vlan id of link group */ struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */ struct mutex sndbufs_lock; /* protects tx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ struct mutex rmbs_lock; /* protects rx buffers */ + u8 first_contact_done; /* if first contact succeed */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ @@ -335,6 +343,8 @@ struct smc_link_group { /* peer triggered shutdownn */ }; }; + struct smc_lgr_decision_maker *ldm; + /* who decides to create this lgr */ }; struct smc_clc_msg_local; @@ -373,6 +383,9 @@ struct smc_init_info { unsigned short vlan_id; u32 rc; u8 negotiated_eid[SMC_MAX_EID_LEN]; + struct smc_lgr_decision_maker *ldm; + u8 advise; + void *private; /* SMC-R */ u8 smcr_version; u8 check_smcrv2; @@ -391,6 +404,7 @@ struct smc_init_info { u8 ism_offered_cnt; /* # of ISM devices offered */ u8 ism_selected; /* index of selected ISM dev*/ u8 smcd_version; + u8 role; }; /* Find the connection associated with the given alert token in the link group. @@ -559,6 +573,38 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb); int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb); +void smc_lgr_decision_maker_on_first_contact_done(struct smc_init_info *ini, bool success); + +static inline void smc_lgr_decision_maker_on_first_contact_success(struct smc_sock *smc, + struct smc_init_info *ini) +{ + smc->conn.lgr->first_contact_done = 1; + /* make sure first_contact_done can be seen after wakeup */ + smp_mb(); + smc_lgr_decision_maker_on_first_contact_done(ini, 1 /* success */); +} + +static inline void smc_lgr_decision_maker_on_first_contact_fail(struct smc_init_info *ini) +{ + smc_lgr_decision_maker_on_first_contact_done(ini, 0 /* failed */); +} + +static inline void smc_conn_enter_rtoken_pending(struct smc_sock *smc, struct smc_init_info *ini) +{ + struct smc_link_group *lgr = smc->conn.lgr; + + if (lgr && !ini->first_contact_local) + atomic_inc(&lgr->rtoken_pendings); +} + +static inline void smc_conn_leave_rtoken_pending(struct smc_sock *smc, struct smc_init_info *ini) +{ + struct smc_link_group *lgr = smc->conn.lgr; + + if (lgr && !ini->first_contact_local) + atomic_dec(&lgr->rtoken_pendings); +} + static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { return link->lgr; From patchwork Sat Nov 26 09:03:37 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056396 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 83B4BC46467 for ; Sat, 26 Nov 2022 09:04:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229599AbiKZJD5 (ORCPT ); Sat, 26 Nov 2022 04:03:57 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33578 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229496AbiKZJDz (ORCPT ); Sat, 26 Nov 2022 04:03:55 -0500 Received: from out30-42.freemail.mail.aliyun.com (out30-42.freemail.mail.aliyun.com [115.124.30.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B96932A707; Sat, 26 Nov 2022 01:03:53 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R121e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046049;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjT9_1669453429; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjT9_1669453429) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:50 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 2/7] net/smc: allow confirm/delete rkey response deliver multiplex Date: Sat, 26 Nov 2022 17:03:37 +0800 Message-Id: <1669453422-38152-3-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" We know that all flows except confirm_rkey and delete_rkey are exclusive, confirm/delete rkey flows can run concurrently (local and remote). Although the protocol allows, all flows are actually mutually exclusive in implementation, dues to waiting for LLC messages is in serial. This aggravates the time for establishing or destroying a SMC-R connections, connections have to be queued in smc_llc_wait. We use rtokens or rkey to correlate a confirm/delete rkey message with its response. Before sending a message, we put context with rtokens or rkey into wait queue. When a response message received, we wakeup the context which with the same rtokens or rkey against the response message. Signed-off-by: D. Wythe --- net/smc/smc_llc.c | 174 +++++++++++++++++++++++++++++++++++++++++------------- net/smc/smc_wr.c | 10 ---- net/smc/smc_wr.h | 10 ++++ 3 files changed, 143 insertions(+), 51 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 524649d..bdd1e77 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -200,6 +200,7 @@ struct smc_llc_msg_delete_rkey_v2 { /* type 0x29 */ struct smc_llc_qentry { struct list_head list; struct smc_link *link; + void *private; union smc_llc_msg msg; }; @@ -479,19 +480,17 @@ int smc_llc_send_confirm_link(struct smc_link *link, return rc; } -/* send LLC confirm rkey request */ -static int smc_llc_send_confirm_rkey(struct smc_link *send_link, - struct smc_buf_desc *rmb_desc) +/* build LLC confirm rkey request */ +static int smc_llc_build_confirm_rkey_request(struct smc_link *send_link, + struct smc_buf_desc *rmb_desc, + struct smc_wr_tx_pend_priv **priv) { struct smc_llc_msg_confirm_rkey *rkeyllc; - struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; struct smc_link *link; int i, rc, rtok_ix; - if (!smc_wr_tx_link_hold(send_link)) - return -ENOLINK; - rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend); + rc = smc_llc_add_pending_send(send_link, &wr_buf, priv); if (rc) goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; @@ -521,25 +520,20 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, cpu_to_be64((uintptr_t)rmb_desc->cpu_addr) : cpu_to_be64((u64)sg_dma_address (rmb_desc->sgt[send_link->link_idx].sgl)); - /* send llc message */ - rc = smc_wr_tx_send(send_link, pend); put_out: - smc_wr_tx_link_put(send_link); return rc; } -/* send LLC delete rkey request */ -static int smc_llc_send_delete_rkey(struct smc_link *link, - struct smc_buf_desc *rmb_desc) +/* build LLC delete rkey request */ +static int smc_llc_build_delete_rkey_request(struct smc_link *link, + struct smc_buf_desc *rmb_desc, + struct smc_wr_tx_pend_priv **priv) { struct smc_llc_msg_delete_rkey *rkeyllc; - struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; - if (!smc_wr_tx_link_hold(link)) - return -ENOLINK; - rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + rc = smc_llc_add_pending_send(link, &wr_buf, priv); if (rc) goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; @@ -548,10 +542,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, smc_llc_init_msg_hdr(&rkeyllc->hd, link->lgr, sizeof(*rkeyllc)); rkeyllc->num_rkeys = 1; rkeyllc->rkey[0] = htonl(rmb_desc->mr[link->link_idx]->rkey); - /* send llc message */ - rc = smc_wr_tx_send(link, pend); put_out: - smc_wr_tx_link_put(link); return rc; } @@ -2017,7 +2008,8 @@ static void smc_llc_rx_response(struct smc_link *link, case SMC_LLC_DELETE_RKEY: if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry) break; /* drop out-of-flow response */ - goto assign; + __wake_up(&link->lgr->llc_msg_waiter, TASK_NORMAL, 1, qentry); + goto free; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; @@ -2026,6 +2018,7 @@ static void smc_llc_rx_response(struct smc_link *link, qentry->msg.raw.hdr.common.type); break; } +free: kfree(qentry); return; assign: @@ -2184,25 +2177,98 @@ void smc_llc_link_clear(struct smc_link *link, bool log) cancel_delayed_work_sync(&link->llc_testlink_wrk); } +static int smc_llc_rkey_response_wake_function(struct wait_queue_entry *wq_entry, + unsigned int mode, int sync, void *key) +{ + struct smc_llc_qentry *except, *incoming; + u8 except_llc_type; + + /* not a rkey response */ + if (!key) + return 0; + + except = wq_entry->private; + incoming = key; + + except_llc_type = except->msg.raw.hdr.common.llc_type; + + /* except LLC MSG TYPE mismatch */ + if (except_llc_type != incoming->msg.raw.hdr.common.llc_type) + return 0; + + switch (except_llc_type) { + case SMC_LLC_CONFIRM_RKEY: + if (memcmp(except->msg.confirm_rkey.rtoken, incoming->msg.confirm_rkey.rtoken, + sizeof(struct smc_rmb_rtoken) * + except->msg.confirm_rkey.rtoken[0].num_rkeys)) + return 0; + break; + case SMC_LLC_DELETE_RKEY: + if (memcmp(except->msg.delete_rkey.rkey, incoming->msg.delete_rkey.rkey, + sizeof(__be32) * except->msg.delete_rkey.num_rkeys)) + return 0; + break; + default: + pr_warn("smc: invalid except llc msg %d.\n", except_llc_type); + return 0; + } + + /* match, save hdr */ + memcpy(&except->msg.raw.hdr, &incoming->msg.raw.hdr, sizeof(except->msg.raw.hdr)); + + wq_entry->private = except->private; + return woken_wake_function(wq_entry, mode, sync, NULL); +} + /* register a new rtoken at the remote peer (for all links) */ int smc_llc_do_confirm_rkey(struct smc_link *send_link, struct smc_buf_desc *rmb_desc) { + DEFINE_WAIT_FUNC(wait, smc_llc_rkey_response_wake_function); struct smc_link_group *lgr = send_link->lgr; - struct smc_llc_qentry *qentry = NULL; - int rc = 0; + long timeout = SMC_LLC_WAIT_TIME; + struct smc_wr_tx_pend_priv *priv; + struct smc_llc_qentry qentry; + struct smc_wr_tx_pend *pend; + int rc = 0, flags; - rc = smc_llc_send_confirm_rkey(send_link, rmb_desc); + if (!smc_wr_tx_link_hold(send_link)) + return -ENOLINK; + + rc = smc_llc_build_confirm_rkey_request(send_link, rmb_desc, &priv); if (rc) goto out; - /* receive CONFIRM RKEY response from server over RoCE fabric */ - qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME, - SMC_LLC_CONFIRM_RKEY); - if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG)) + + pend = container_of(priv, struct smc_wr_tx_pend, priv); + /* make a copy of send msg */ + memcpy(&qentry.msg.confirm_rkey, send_link->wr_tx_bufs[pend->idx].raw, + sizeof(qentry.msg.confirm_rkey)); + + qentry.private = wait.private; + wait.private = &qentry; + + add_wait_queue(&lgr->llc_msg_waiter, &wait); + + /* send llc message */ + rc = smc_wr_tx_send(send_link, priv); + smc_wr_tx_link_put(send_link); + if (rc) { + remove_wait_queue(&lgr->llc_msg_waiter, &wait); + goto out; + } + + while (!signal_pending(current) && timeout) { + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); + if (qentry.msg.raw.hdr.flags & SMC_LLC_FLAG_RESP) + break; + } + + remove_wait_queue(&lgr->llc_msg_waiter, &wait); + flags = qentry.msg.raw.hdr.flags; + + if (!(flags & SMC_LLC_FLAG_RESP) || flags & SMC_LLC_FLAG_RKEY_NEG) rc = -EFAULT; out: - if (qentry) - smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return rc; } @@ -2210,26 +2276,52 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link, int smc_llc_do_delete_rkey(struct smc_link_group *lgr, struct smc_buf_desc *rmb_desc) { - struct smc_llc_qentry *qentry = NULL; + DEFINE_WAIT_FUNC(wait, smc_llc_rkey_response_wake_function); + long timeout = SMC_LLC_WAIT_TIME; + struct smc_wr_tx_pend_priv *priv; + struct smc_llc_qentry qentry; + struct smc_wr_tx_pend *pend; struct smc_link *send_link; - int rc = 0; + int rc = 0, flags; send_link = smc_llc_usable_link(lgr); - if (!send_link) + if (!send_link || !smc_wr_tx_link_hold(send_link)) return -ENOLINK; - /* protected by llc_flow control */ - rc = smc_llc_send_delete_rkey(send_link, rmb_desc); + rc = smc_llc_build_delete_rkey_request(send_link, rmb_desc, &priv); if (rc) goto out; - /* receive DELETE RKEY response from server over RoCE fabric */ - qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME, - SMC_LLC_DELETE_RKEY); - if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG)) + + pend = container_of(priv, struct smc_wr_tx_pend, priv); + /* make a copy of send msg */ + memcpy(&qentry.msg.delete_link, send_link->wr_tx_bufs[pend->idx].raw, + sizeof(qentry.msg.delete_link)); + + qentry.private = wait.private; + wait.private = &qentry; + + add_wait_queue(&lgr->llc_msg_waiter, &wait); + + /* send llc message */ + rc = smc_wr_tx_send(send_link, priv); + smc_wr_tx_link_put(send_link); + if (rc) { + remove_wait_queue(&lgr->llc_msg_waiter, &wait); + goto out; + } + + while (!signal_pending(current) && timeout) { + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); + if (qentry.msg.raw.hdr.flags & SMC_LLC_FLAG_RESP) + break; + } + + remove_wait_queue(&lgr->llc_msg_waiter, &wait); + flags = qentry.msg.raw.hdr.flags; + + if (!(flags & SMC_LLC_FLAG_RESP) || flags & SMC_LLC_FLAG_RKEY_NEG) rc = -EFAULT; out: - if (qentry) - smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return rc; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index b0678a4..797dffa 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -37,16 +37,6 @@ static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS); static DEFINE_SPINLOCK(smc_wr_rx_hash_lock); -struct smc_wr_tx_pend { /* control data for a pending send request */ - u64 wr_id; /* work request id sent */ - smc_wr_tx_handler handler; - enum ib_wc_status wc_status; /* CQE status */ - struct smc_link *link; - u32 idx; - struct smc_wr_tx_pend_priv priv; - u8 compl_requested; -}; - /******************************** send queue *********************************/ /*------------------------------- completion --------------------------------*/ diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 45e9b89..a4ea215 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -46,6 +46,16 @@ struct smc_wr_rx_handler { u8 type; }; +struct smc_wr_tx_pend { /* control data for a pending send request */ + u64 wr_id; /* work request id sent */ + smc_wr_tx_handler handler; + enum ib_wc_status wc_status; /* CQE status */ + struct smc_link *link; + u32 idx; + struct smc_wr_tx_pend_priv priv; + u8 compl_requested; +}; + /* Only used by RDMA write WRs. * All other WRs (CDC/LLC) use smc_wr_tx_send handling WR_ID implicitly */ From patchwork Sat Nov 26 09:03:38 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056400 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 104A7C47090 for ; Sat, 26 Nov 2022 09:04:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229615AbiKZJEB (ORCPT ); Sat, 26 Nov 2022 04:04:01 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33592 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229563AbiKZJD4 (ORCPT ); Sat, 26 Nov 2022 04:03:56 -0500 Received: from out30-54.freemail.mail.aliyun.com (out30-54.freemail.mail.aliyun.com [115.124.30.54]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A8A7A2A437; Sat, 26 Nov 2022 01:03:53 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R261e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018045170;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjTN_1669453430; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjTN_1669453430) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:50 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 3/7] net/smc: make SMC_LLC_FLOW_RKEY run concurrently Date: Sat, 26 Nov 2022 17:03:38 +0800 Message-Id: <1669453422-38152-4-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" Once confirm/delete rkey response can be multiplex delivered, We can allow parallel execution of start (remote) or initialization (local) a SMC_LLC_FLOW_RKEY flow. This patch will count the flows executed in parallel, and only when the count reaches zero will the current flow type be removed. Signed-off-by: D. Wythe --- net/smc/smc_core.h | 1 + net/smc/smc_llc.c | 89 ++++++++++++++++++++++++++++++++++++++++++------------ net/smc/smc_llc.h | 6 ++++ 3 files changed, 77 insertions(+), 19 deletions(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index e9adf4f..fb48f61 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -244,6 +244,7 @@ enum smc_llc_flowtype { struct smc_llc_flow { enum smc_llc_flowtype type; struct smc_llc_qentry *qentry; + refcount_t parallel_refcnt; }; struct smc_lgr_decision_maker; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index bdd1e77..4ae636f 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -231,15 +231,23 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, flow->qentry = qentry; } -static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, +static bool smc_llc_flow_parallel(struct smc_link_group *lgr, struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) { u8 msg_type = qentry->msg.raw.hdr.common.llc_type; + u8 flow_type = flow->type; + + /* SMC_LLC_FLOW_RKEY can be parallel */ + if (flow_type == SMC_LLC_FLOW_RKEY && + (msg_type == SMC_LLC_CONFIRM_RKEY || msg_type == SMC_LLC_DELETE_RKEY)) { + refcount_inc(&flow->parallel_refcnt); + return true; + } if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && flow_type != msg_type && !lgr->delayed_event) { lgr->delayed_event = qentry; - return; + return false; } /* drop parallel or already-in-progress llc requests */ if (flow_type != msg_type) @@ -250,6 +258,7 @@ static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, qentry->msg.raw.hdr.common.type, flow_type, lgr->role); kfree(qentry); + return false; } /* try to start a new llc flow, initiated by an incoming llc msg */ @@ -257,13 +266,14 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) { struct smc_link_group *lgr = qentry->link->lgr; + bool allow_start = true; spin_lock_bh(&lgr->llc_flow_lock); if (flow->type) { /* a flow is already active */ - smc_llc_flow_parallel(lgr, flow->type, qentry); + allow_start = smc_llc_flow_parallel(lgr, flow, qentry); spin_unlock_bh(&lgr->llc_flow_lock); - return false; + return allow_start; } switch (qentry->msg.raw.hdr.common.llc_type) { case SMC_LLC_ADD_LINK: @@ -280,8 +290,9 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, flow->type = SMC_LLC_FLOW_NONE; } smc_llc_flow_qentry_set(flow, qentry); + refcount_set(&flow->parallel_refcnt, 1); spin_unlock_bh(&lgr->llc_flow_lock); - return true; + return allow_start; } /* start a new local llc flow, wait till current flow finished */ @@ -289,6 +300,7 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr, enum smc_llc_flowtype type) { enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE; + bool accept = false; int rc; /* all flows except confirm_rkey and delete_rkey are exclusive, @@ -300,10 +312,39 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr, if (list_empty(&lgr->list)) return -ENODEV; spin_lock_bh(&lgr->llc_flow_lock); - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && - (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || - lgr->llc_flow_rmt.type == allowed_remote)) { - lgr->llc_flow_lcl.type = type; + + /* Flow is initialized only if the following conditions are met: + * incoming flow local flow remote flow + * exclusive NONE NONE + * SMC_LLC_FLOW_RKEY SMC_LLC_FLOW_RKEY SMC_LLC_FLOW_RKEY + * SMC_LLC_FLOW_RKEY NONE SMC_LLC_FLOW_RKEY + * SMC_LLC_FLOW_RKEY SMC_LLC_FLOW_RKEY NONE + */ + switch (type) { + case SMC_LLC_FLOW_RKEY: + if (!SMC_IS_PARALLEL_FLOW(lgr->llc_flow_lcl.type)) + break; + if (!SMC_IS_PARALLEL_FLOW(lgr->llc_flow_rmt.type)) + break; + /* accepted */ + accept = true; + break; + default: + if (!SMC_IS_NONE_FLOW(lgr->llc_flow_lcl.type)) + break; + if (!SMC_IS_NONE_FLOW(lgr->llc_flow_rmt.type)) + break; + /* accepted */ + accept = true; + break; + } + if (accept) { + if (SMC_IS_NONE_FLOW(lgr->llc_flow_lcl.type)) { + lgr->llc_flow_lcl.type = type; + refcount_set(&lgr->llc_flow_lcl.parallel_refcnt, 1); + } else { + refcount_inc(&lgr->llc_flow_lcl.parallel_refcnt); + } spin_unlock_bh(&lgr->llc_flow_lock); return 0; } @@ -322,6 +363,16 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr, void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow) { spin_lock_bh(&lgr->llc_flow_lock); + if (!refcount_dec_and_test(&flow->parallel_refcnt)) { + spin_unlock_bh(&lgr->llc_flow_lock); + return; + } + /* free the first parallel flow, At present, + * only confirm rkey and delete rkey flow will use it. + */ + if (flow->qentry) + smc_llc_flow_qentry_del(flow); + memset(flow, 0, sizeof(*flow)); flow->type = SMC_LLC_FLOW_NONE; spin_unlock_bh(&lgr->llc_flow_lock); @@ -1723,16 +1774,14 @@ static void smc_llc_delete_link_work(struct work_struct *work) } /* process a confirm_rkey request from peer, remote flow */ -static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) +static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr, struct smc_llc_qentry *qentry) { struct smc_llc_msg_confirm_rkey *llc; - struct smc_llc_qentry *qentry; struct smc_link *link; int num_entries; int rk_idx; int i; - qentry = lgr->llc_flow_rmt.qentry; llc = &qentry->msg.confirm_rkey; link = qentry->link; @@ -1759,19 +1808,19 @@ static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr) llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_init_msg_hdr(&llc->hd, link->lgr, sizeof(*llc)); smc_llc_send_message(link, &qentry->msg); - smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); + /* parallel subflow, keep the first flow until ref cnt goes to zero */ + if (qentry != lgr->llc_flow_rmt.qentry) + kfree(qentry); } /* process a delete_rkey request from peer, remote flow */ -static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) +static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr, struct smc_llc_qentry *qentry) { struct smc_llc_msg_delete_rkey *llc; - struct smc_llc_qentry *qentry; struct smc_link *link; u8 err_mask = 0; int i, max; - qentry = lgr->llc_flow_rmt.qentry; llc = &qentry->msg.delete_rkey; link = qentry->link; @@ -1809,7 +1858,9 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) finish: llc->hd.flags |= SMC_LLC_FLAG_RESP; smc_llc_send_message(link, &qentry->msg); - smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); + /* parallel subflow, keep the first flow until ref cnt goes to zero */ + if (qentry != lgr->llc_flow_rmt.qentry) + kfree(qentry); } static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) @@ -1910,7 +1961,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* new request from remote, assign to remote flow */ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) { /* process here, does not wait for more llc msgs */ - smc_llc_rmt_conf_rkey(lgr); + smc_llc_rmt_conf_rkey(lgr, qentry); smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; @@ -1923,7 +1974,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) /* new request from remote, assign to remote flow */ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) { /* process here, does not wait for more llc msgs */ - smc_llc_rmt_delete_rkey(lgr); + smc_llc_rmt_delete_rkey(lgr, qentry); smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 7e7a316..cb217793 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -49,6 +49,12 @@ enum smc_llc_msg_type { #define smc_link_downing(state) \ (cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE) +#define SMC_IS_NONE_FLOW(type) \ + ((type) == SMC_LLC_FLOW_NONE) + +#define SMC_IS_PARALLEL_FLOW(type) \ + (((type) == SMC_LLC_FLOW_RKEY) || SMC_IS_NONE_FLOW(type)) + /* LLC DELETE LINK Request Reason Codes */ #define SMC_LLC_DEL_LOST_PATH 0x00010000 #define SMC_LLC_DEL_OP_INIT_TERM 0x00020000 From patchwork Sat Nov 26 09:03:39 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056401 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7F98DC63703 for ; Sat, 26 Nov 2022 09:04:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229626AbiKZJED (ORCPT ); Sat, 26 Nov 2022 04:04:03 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33594 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229450AbiKZJD5 (ORCPT ); Sat, 26 Nov 2022 04:03:57 -0500 Received: from out30-132.freemail.mail.aliyun.com (out30-132.freemail.mail.aliyun.com [115.124.30.132]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E41F42A70C; Sat, 26 Nov 2022 01:03:53 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R841e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046050;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjTZ_1669453431; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjTZ_1669453431) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:51 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 4/7] net/smc: llc_conf_mutex refactor, replace it with rw_semaphore Date: Sat, 26 Nov 2022 17:03:39 +0800 Message-Id: <1669453422-38152-5-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" llc_conf_mutex was used to protect links and link related configurations in the same link group, for example, add or delete links. However, in most cases, the protected critical area has only read semantics and with no write semantics at all, such as obtaining a usable link or an available rmb_desc. This patch do simply code refactoring, replace mutex with rw_semaphore, replace mutex_lock with down_write and replace mutex_unlock with up_write. Theoretically, this replacement is equivalent, but after this patch, we can distinguish lock granularity according to different semantics of critical areas. Signed-off-by: D. Wythe --- net/smc/af_smc.c | 8 ++++---- net/smc/smc_core.c | 20 ++++++++++---------- net/smc/smc_core.h | 2 +- net/smc/smc_llc.c | 18 +++++++++--------- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 52287ee..c4253b5 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -496,7 +496,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link, return -EINVAL; /* protect against parallel smcr_link_reg_buf() */ - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue; @@ -504,7 +504,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link, if (rc) break; } - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); return rc; } @@ -521,7 +521,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, /* protect against parallel smc_llc_cli_rkey_exchange() and * parallel smcr_link_reg_buf() */ - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue; @@ -538,7 +538,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, } rmb_desc->is_conf_rkey = true; out: - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 1c4d669..b571297 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1385,10 +1385,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); if (!rc) { /* protect against smc_llc_cli_rkey_exchange() */ - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); smc_llc_do_delete_rkey(lgr, buf_desc); buf_desc->is_conf_rkey = false; - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } } @@ -1659,12 +1659,12 @@ static void smc_lgr_free(struct smc_link_group *lgr) int i; if (!lgr->is_smcd) { - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].state != SMC_LNK_UNUSED) smcr_link_clear(&lgr->lnk[i], false); } - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); smc_llc_lgr_clear(lgr); } @@ -1978,12 +1978,12 @@ static void smcr_link_down(struct smc_link *lnk) } else { if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* another llc task is ongoing */ - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) || lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), SMC_LLC_WAIT_TIME); - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); } if (!list_empty(&lgr->list)) { smc_llc_send_delete_link(to_lnk, del_link_id, @@ -2043,9 +2043,9 @@ static void smc_link_down_work(struct work_struct *work) if (list_empty(&lgr->list)) return; wake_up_all(&lgr->llc_msg_waiter); - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); smcr_link_down(link); - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); } static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, @@ -2650,7 +2650,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, int i, rc = 0, cnt = 0; /* protect against parallel link reconfiguration */ - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; @@ -2663,7 +2663,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, cnt++; } out: - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); if (!rc && !cnt) rc = -EINVAL; return rc; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fb48f61..f7ec04a 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -307,7 +307,7 @@ struct smc_link_group { /* queue for llc events */ spinlock_t llc_event_q_lock; /* protects llc_event_q */ - struct mutex llc_conf_mutex; + struct rw_semaphore llc_conf_mutex; /* protects lgr reconfig. */ struct work_struct llc_add_link_work; struct work_struct llc_del_link_work; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 4ae636f..221ffdc 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1244,12 +1244,12 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); if (smc_llc_is_local_add_link(&qentry->msg)) smc_llc_cli_add_link_invite(qentry->link, qentry); else smc_llc_cli_add_link(qentry->link, qentry); - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); } static int smc_llc_active_link_count(struct smc_link_group *lgr) @@ -1551,13 +1551,13 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); rc = smc_llc_srv_add_link(link, qentry); if (!rc && lgr->type == SMC_LGR_SYMMETRIC) { /* delete any asymmetric link */ smc_llc_delete_asym_link(lgr); } - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); kfree(qentry); } @@ -1624,7 +1624,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_lgr_terminate_sched(lgr); goto out; } - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); /* delete single link */ for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) { if (lgr->lnk[lnk_idx].link_id != del_llc->link_num) @@ -1658,7 +1658,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_lgr_terminate_sched(lgr); } out_unlock: - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); out: kfree(qentry); } @@ -1694,7 +1694,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) int active_links; int i; - mutex_lock(&lgr->llc_conf_mutex); + down_write(&lgr->llc_conf_mutex); qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); lnk = qentry->link; del_llc = &qentry->msg.delete_link; @@ -1750,7 +1750,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) smc_llc_add_link_local(lnk); } out: - mutex_unlock(&lgr->llc_conf_mutex); + up_write(&lgr->llc_conf_mutex); kfree(qentry); } @@ -2170,7 +2170,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) spin_lock_init(&lgr->llc_flow_lock); init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); - mutex_init(&lgr->llc_conf_mutex); + init_rwsem(&lgr->llc_conf_mutex); lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); } From patchwork Sat Nov 26 09:03:40 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056397 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 923FCC47088 for ; Sat, 26 Nov 2022 09:04:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229610AbiKZJEA (ORCPT ); Sat, 26 Nov 2022 04:04:00 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33588 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229555AbiKZJD4 (ORCPT ); Sat, 26 Nov 2022 04:03:56 -0500 Received: from out30-44.freemail.mail.aliyun.com (out30-44.freemail.mail.aliyun.com [115.124.30.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 70CF22A71C; Sat, 26 Nov 2022 01:03:54 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R211e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018045176;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjTj_1669453431; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjTj_1669453431) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:52 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 5/7] net/smc: use read semaphores to reduce unnecessary blocking in smc_buf_create() & smcr_buf_unuse() Date: Sat, 26 Nov 2022 17:03:40 +0800 Message-Id: <1669453422-38152-6-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" Following is part of Off-CPU graph during frequent SMC-R short-lived processing: process_one_work (51.19%) smc_close_passive_work (28.36%) smcr_buf_unuse (28.34%) rwsem_down_write_slowpath (28.22%) smc_listen_work (22.83%) smc_clc_wait_msg (1.84%) smc_buf_create (20.45%) smcr_buf_map_usable_links rwsem_down_write_slowpath (20.43%) smcr_lgr_reg_rmbs (0.53%) rwsem_down_write_slowpath (0.43%) smc_llc_do_confirm_rkey (0.08%) We can clearly see that during the connection establishment time, waiting time of connections is not on IO, but on llc_conf_mutex. What is more important, the core critical area (smcr_buf_unuse() & smc_buf_create()) only perfroms read semantics on links, we can easily replace it with read semaphore. Signed-off-by: D. Wythe --- net/smc/smc_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index b571297..2f261c3 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1385,10 +1385,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); if (!rc) { /* protect against smc_llc_cli_rkey_exchange() */ - down_write(&lgr->llc_conf_mutex); + down_read(&lgr->llc_conf_mutex); smc_llc_do_delete_rkey(lgr, buf_desc); buf_desc->is_conf_rkey = false; - up_write(&lgr->llc_conf_mutex); + up_read(&lgr->llc_conf_mutex); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); } } @@ -2650,7 +2650,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, int i, rc = 0, cnt = 0; /* protect against parallel link reconfiguration */ - down_write(&lgr->llc_conf_mutex); + down_read(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; @@ -2663,7 +2663,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, cnt++; } out: - up_write(&lgr->llc_conf_mutex); + up_read(&lgr->llc_conf_mutex); if (!rc && !cnt) rc = -EINVAL; return rc; From patchwork Sat Nov 26 09:03:41 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056399 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4A252C46467 for ; Sat, 26 Nov 2022 09:04:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229555AbiKZJED (ORCPT ); Sat, 26 Nov 2022 04:04:03 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33590 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229533AbiKZJD4 (ORCPT ); Sat, 26 Nov 2022 04:03:56 -0500 Received: from out30-57.freemail.mail.aliyun.com (out30-57.freemail.mail.aliyun.com [115.124.30.57]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0C7DB2A42C; Sat, 26 Nov 2022 01:03:54 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R201e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046056;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjTo_1669453432; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjTo_1669453432) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:52 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 6/7] net/smc: reduce unnecessary blocking in smcr_lgr_reg_rmbs() Date: Sat, 26 Nov 2022 17:03:41 +0800 Message-Id: <1669453422-38152-7-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" Unlike smc_buf_create() and smcr_buf_unuse(), smcr_lgr_reg_rmbs() is exclusive when assigned rmb_desc was not registered, although it can be executed in parallel when assigned rmb_desc was registered already and only performs read semtamics on it. Hence, we can not simply replace it with read semaphore. The idea here is that if the assigned rmb_desc was registered already, use read semaphore to protect the critical section, once the assigned rmb_desc was not registered, keep using keep write semaphore still to keep its exclusivity. Thanks to the reusable features of rmb_desc, which allows us to execute in parallel in most cases. Signed-off-by: D. Wythe --- net/smc/af_smc.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4253b5..0af7b2c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -513,11 +513,26 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, struct smc_buf_desc *rmb_desc) { struct smc_link_group *lgr = link->lgr; + bool do_slow = false; int i, rc = 0; rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); if (rc) return rc; + + down_read(&lgr->llc_conf_mutex); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (!smc_link_active(&lgr->lnk[i])) + continue; + if (!rmb_desc->is_reg_mr[link->link_idx]) { + up_read(&lgr->llc_conf_mutex); + goto slow_path; + } + } + /* mr register already */ + goto fast_path; +slow_path: + do_slow = true; /* protect against parallel smc_llc_cli_rkey_exchange() and * parallel smcr_link_reg_buf() */ @@ -529,7 +544,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, if (rc) goto out; } - +fast_path: /* exchange confirm_rkey msg with peer */ rc = smc_llc_do_confirm_rkey(link, rmb_desc); if (rc) { @@ -538,7 +553,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, } rmb_desc->is_conf_rkey = true; out: - up_write(&lgr->llc_conf_mutex); + do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex); smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); return rc; } From patchwork Sat Nov 26 09:03:42 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "D. Wythe" X-Patchwork-Id: 13056402 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 71827C47088 for ; Sat, 26 Nov 2022 09:04:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229563AbiKZJEE (ORCPT ); Sat, 26 Nov 2022 04:04:04 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33596 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229590AbiKZJD5 (ORCPT ); Sat, 26 Nov 2022 04:03:57 -0500 Received: from out30-42.freemail.mail.aliyun.com (out30-42.freemail.mail.aliyun.com [115.124.30.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 071E62A720; Sat, 26 Nov 2022 01:03:55 -0800 (PST) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R121e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018046051;MF=alibuda@linux.alibaba.com;NM=1;PH=DS;RN=8;SR=0;TI=SMTPD_---0VViBjU._1669453432; Received: from j66a10360.sqa.eu95.tbsite.net(mailfrom:alibuda@linux.alibaba.com fp:SMTPD_---0VViBjU._1669453432) by smtp.aliyun-inc.com; Sat, 26 Nov 2022 17:03:53 +0800 From: "D.Wythe" To: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com Cc: kuba@kernel.org, davem@davemloft.net, netdev@vger.kernel.org, linux-s390@vger.kernel.org, linux-rdma@vger.kernel.org Subject: [PATCH net-next v6 7/7] net/smc: replace mutex rmbs_lock and sndbufs_lock with rw_semaphore Date: Sat, 26 Nov 2022 17:03:42 +0800 Message-Id: <1669453422-38152-8-git-send-email-alibuda@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> References: <1669453422-38152-1-git-send-email-alibuda@linux.alibaba.com> Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: "D. Wythe" It's clear that rmbs_lock and sndbufs_lock are aims to protect the rmbs list or the sndbufs list. During connection establieshment, smc_buf_get_slot() will always be invoked, and it only performs read semantics in rmbs list and sndbufs list. Based on the above considerations, we replace mutex with rw_semaphore. Only smc_buf_get_slot() use down_read() to allow smc_buf_get_slot() run concurrently, other part use down_write() to keep exclusive semantics. Signed-off-by: D. Wythe --- net/smc/smc_core.c | 55 +++++++++++++++++++++++++++--------------------------- net/smc/smc_core.h | 4 ++-- net/smc/smc_llc.c | 16 ++++++++-------- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2f261c3..3f6e70e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1129,8 +1129,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */ - mutex_init(&lgr->sndbufs_lock); - mutex_init(&lgr->rmbs_lock); + init_rwsem(&lgr->sndbufs_lock); + init_rwsem(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { INIT_LIST_HEAD(&lgr->sndbufs[i]); @@ -1377,7 +1377,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, struct smc_link_group *lgr) { - struct mutex *lock; /* lock buffer list */ + struct rw_semaphore *lock; /* lock buffer list */ int rc; if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) { @@ -1397,9 +1397,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, /* buf registration failed, reuse not possible */ lock = is_rmb ? &lgr->rmbs_lock : &lgr->sndbufs_lock; - mutex_lock(lock); + down_write(lock); list_del(&buf_desc->list); - mutex_unlock(lock); + up_write(lock); smc_buf_free(lgr, is_rmb, buf_desc); } else { @@ -1503,15 +1503,16 @@ static void smcr_buf_unmap_lgr(struct smc_link *lnk) int i; for (i = 0; i < SMC_RMBE_SIZES; i++) { - mutex_lock(&lgr->rmbs_lock); + down_write(&lgr->rmbs_lock); list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) smcr_buf_unmap_link(buf_desc, true, lnk); - mutex_unlock(&lgr->rmbs_lock); - mutex_lock(&lgr->sndbufs_lock); + up_write(&lgr->rmbs_lock); + + down_write(&lgr->sndbufs_lock); list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) smcr_buf_unmap_link(buf_desc, false, lnk); - mutex_unlock(&lgr->sndbufs_lock); + up_write(&lgr->sndbufs_lock); } } @@ -2393,19 +2394,19 @@ int smc_uncompress_bufsize(u8 compressed) * buffer size; if not available, return NULL */ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, - struct mutex *lock, + struct rw_semaphore *lock, struct list_head *buf_list) { struct smc_buf_desc *buf_slot; - mutex_lock(lock); + down_read(lock); list_for_each_entry(buf_slot, buf_list, list) { if (cmpxchg(&buf_slot->used, 0, 1) == 0) { - mutex_unlock(lock); + up_read(lock); return buf_slot; } } - mutex_unlock(lock); + up_read(lock); return NULL; } @@ -2514,13 +2515,13 @@ int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc) return 0; } -static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, +static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock, struct list_head *lst, bool is_rmb) { struct smc_buf_desc *buf_desc, *bf; int rc = 0; - mutex_lock(lock); + down_write(lock); list_for_each_entry_safe(buf_desc, bf, lst, list) { if (!buf_desc->used) continue; @@ -2529,7 +2530,7 @@ static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock, goto out; } out: - mutex_unlock(lock); + up_write(lock); return rc; } @@ -2562,37 +2563,37 @@ int smcr_buf_reg_lgr(struct smc_link *lnk) int i, rc = 0; /* reg all RMBs for a new link */ - mutex_lock(&lgr->rmbs_lock); + down_write(&lgr->rmbs_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { if (!buf_desc->used) continue; rc = smcr_link_reg_buf(lnk, buf_desc); if (rc) { - mutex_unlock(&lgr->rmbs_lock); + up_write(&lgr->rmbs_lock); return rc; } } } - mutex_unlock(&lgr->rmbs_lock); + up_write(&lgr->rmbs_lock); if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) return rc; /* reg all vzalloced sndbufs for a new link */ - mutex_lock(&lgr->sndbufs_lock); + down_write(&lgr->sndbufs_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) { if (!buf_desc->used || !buf_desc->is_vm) continue; rc = smcr_link_reg_buf(lnk, buf_desc); if (rc) { - mutex_unlock(&lgr->sndbufs_lock); + up_write(&lgr->sndbufs_lock); return rc; } } } - mutex_unlock(&lgr->sndbufs_lock); + up_write(&lgr->sndbufs_lock); return rc; } @@ -2712,8 +2713,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) struct smc_link_group *lgr = conn->lgr; struct list_head *buf_list; int bufsize, bufsize_short; + struct rw_semaphore *lock; /* lock buffer list */ bool is_dgraded = false; - struct mutex *lock; /* lock buffer list */ int sk_buf_size; if (is_rmb) @@ -2761,9 +2762,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb); SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); buf_desc->used = 1; - mutex_lock(lock); + down_write(lock); list_add(&buf_desc->list, buf_list); - mutex_unlock(lock); + up_write(lock); break; /* found */ } @@ -2837,9 +2838,9 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) /* create rmb */ rc = __smc_buf_create(smc, is_smcd, true); if (rc) { - mutex_lock(&smc->conn.lgr->sndbufs_lock); + down_write(&smc->conn.lgr->sndbufs_lock); list_del(&smc->conn.sndbuf_desc->list); - mutex_unlock(&smc->conn.lgr->sndbufs_lock); + up_write(&smc->conn.lgr->sndbufs_lock); smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); smc->conn.sndbuf_desc = NULL; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index f7ec04a..756ed6e 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -260,9 +260,9 @@ struct smc_link_group { unsigned short vlan_id; /* vlan id of link group */ struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */ - struct mutex sndbufs_lock; /* protects tx buffers */ + struct rw_semaphore sndbufs_lock; /* protects tx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ - struct mutex rmbs_lock; /* protects rx buffers */ + struct rw_semaphore rmbs_lock; /* protects rx buffers */ u8 first_contact_done; /* if first contact succeed */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 221ffdc..47146ff 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -650,7 +650,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, prim_lnk_idx = link->link_idx; lnk_idx = link_new->link_idx; - mutex_lock(&lgr->rmbs_lock); + down_write(&lgr->rmbs_lock); ext->num_rkeys = lgr->conns_num; if (!ext->num_rkeys) goto out; @@ -670,7 +670,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, } len += i * sizeof(ext->rt[0]); out: - mutex_unlock(&lgr->rmbs_lock); + up_write(&lgr->rmbs_lock); return len; } @@ -931,7 +931,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link, int rc = 0; int i; - mutex_lock(&lgr->rmbs_lock); + down_write(&lgr->rmbs_lock); num_rkeys_send = lgr->conns_num; buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); do { @@ -958,7 +958,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link, break; } while (num_rkeys_send || num_rkeys_recv); - mutex_unlock(&lgr->rmbs_lock); + up_write(&lgr->rmbs_lock); return rc; } @@ -1041,14 +1041,14 @@ static void smc_llc_save_add_link_rkeys(struct smc_link *link, ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 + SMC_WR_TX_SIZE); max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); - mutex_lock(&lgr->rmbs_lock); + down_write(&lgr->rmbs_lock); for (i = 0; i < max; i++) { smc_rtoken_set(lgr, link->link_idx, link_new->link_idx, ext->rt[i].rmb_key, ext->rt[i].rmb_vaddr_new, ext->rt[i].rmb_key_new); } - mutex_unlock(&lgr->rmbs_lock); + up_write(&lgr->rmbs_lock); } static void smc_llc_save_add_link_info(struct smc_link *link, @@ -1355,7 +1355,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link, int rc = 0; int i; - mutex_lock(&lgr->rmbs_lock); + down_write(&lgr->rmbs_lock); num_rkeys_send = lgr->conns_num; buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); do { @@ -1380,7 +1380,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link, smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); } while (num_rkeys_send || num_rkeys_recv); out: - mutex_unlock(&lgr->rmbs_lock); + up_write(&lgr->rmbs_lock); return rc; }