@@ -53,12 +53,7 @@
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
-static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
- * creation on server
- */
-static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
- * creation on client
- */
+static DEFINE_MUTEX(smcd_buf_pending); /* serialize SMC-D buf creation */
static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */
struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
@@ -1197,10 +1192,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (reason_code)
return reason_code;
- mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
if (reason_code) {
- mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
@@ -1292,7 +1285,6 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (reason_code)
goto connect_abort;
}
- mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
@@ -1302,7 +1294,6 @@ static int smc_connect_rdma(struct smc_sock *smc,
return 0;
connect_abort:
smc_conn_abort(smc, ini->first_contact_local);
- mutex_unlock(&smc_client_lgr_pending);
smc->connect_nonblock = 0;
return reason_code;
@@ -1348,16 +1339,15 @@ static int smc_connect_ism(struct smc_sock *smc,
}
ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid;
- /* there is only one lgr role for SMC-D; use server lock */
- mutex_lock(&smc_server_lgr_pending);
rc = smc_conn_create(smc, ini);
if (rc) {
- mutex_unlock(&smc_server_lgr_pending);
return rc;
}
+ mutex_lock(&smcd_buf_pending);
/* Create send and receive buffers */
rc = smc_buf_create(smc, true);
+ mutex_unlock(&smcd_buf_pending);
if (rc) {
rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
goto connect_abort;
@@ -1379,7 +1369,6 @@ static int smc_connect_ism(struct smc_sock *smc,
aclc->hdr.version, eid, NULL);
if (rc)
goto connect_abort;
- mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
@@ -1389,7 +1378,6 @@ static int smc_connect_ism(struct smc_sock *smc,
return 0;
connect_abort:
smc_conn_abort(smc, ini->first_contact_local);
- mutex_unlock(&smc_server_lgr_pending);
smc->connect_nonblock = 0;
return rc;
@@ -1505,6 +1493,9 @@ static int __smc_connect(struct smc_sock *smc)
SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
smc_connect_ism_vlan_cleanup(smc, ini);
+ if (ini->first_contact_local)
+ smc_lgr_decision_maker_on_first_contact_success(smc, ini);
+
kfree(buf);
kfree(ini);
return 0;
@@ -1513,6 +1504,8 @@ static int __smc_connect(struct smc_sock *smc)
smc_connect_ism_vlan_cleanup(smc, ini);
kfree(buf);
fallback:
+ if (ini->first_contact_local)
+ smc_lgr_decision_maker_on_first_contact_fail(ini);
kfree(ini);
return smc_connect_decline_fallback(smc, rc, version);
}
@@ -2001,8 +1994,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
if (rc)
return rc;
+ mutex_lock(&smcd_buf_pending);
/* Create send and receive buffers */
rc = smc_buf_create(new_smc, true);
+ mutex_unlock(&smcd_buf_pending);
if (rc) {
smc_conn_abort(new_smc, ini->first_contact_local);
return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
@@ -2379,7 +2374,6 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
- mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
@@ -2387,46 +2381,42 @@ static void smc_listen_work(struct work_struct *work)
/* determine ISM or RoCE device used for connection */
rc = smc_listen_find_device(new_smc, pclc, ini);
if (rc)
- goto out_unlock;
+ goto out_decl;
/* send SMC Accept CLC message */
accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
accept_version, ini->negotiated_eid);
if (rc)
- goto out_unlock;
-
- /* SMC-D does not need this lock any more */
- if (ini->is_smcd)
- mutex_unlock(&smc_server_lgr_pending);
+ goto out_decl;
/* receive SMC Confirm CLC message */
memset(buf, 0, sizeof(*buf));
cclc = (struct smc_clc_msg_accept_confirm *)buf;
rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
SMC_CLC_CONFIRM, CLC_WAIT_TIME);
- if (rc) {
- if (!ini->is_smcd)
- goto out_unlock;
+ if (rc)
goto out_decl;
- }
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
ini->first_contact_local, ini);
if (rc)
- goto out_unlock;
- mutex_unlock(&smc_server_lgr_pending);
+ goto out_decl;
}
+ smc_conn_leave_rtoken_pending(new_smc, ini);
smc_conn_save_peer_info(new_smc, cclc);
smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
+ if (ini->first_contact_local)
+ smc_lgr_decision_maker_on_first_contact_success(new_smc, ini);
goto out_free;
-out_unlock:
- mutex_unlock(&smc_server_lgr_pending);
out_decl:
+ smc_conn_leave_rtoken_pending(new_smc, ini);
+ if (ini && ini->first_contact_local)
+ smc_lgr_decision_maker_on_first_contact_fail(ini);
smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
proposal_version);
out_free:
@@ -40,12 +40,289 @@
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
+#define LDM_TBD 0
+#define LDM_RETRY 1
+#define LDM_FIRST_CONTACT 2
+
struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
.list = LIST_HEAD_INIT(smc_lgr_list.list),
.num = 0,
};
+struct smc_lgr_decision_maker {
+ struct rhash_head rnode; /* node for rhashtable */
+ struct wait_queue_head wq; /* queue for connection that have been
+ * decided to wait
+ */
+ spinlock_t lock; /* protection for decision maker */
+ refcount_t ref; /* refcount for decision maker */
+ int type; /* smc type */
+ int role; /* smc role */
+ unsigned long pending_capability;
+ /* maximum pending number of connections that
+ * need to wait.
+ */
+ unsigned long conns_pending;
+ /* the number of pending connections */
+};
+
+struct smcr_lgr_decision_maker {
+ struct smc_lgr_decision_maker ldm;
+ u8 peer_systemid[SMC_SYSTEMID_LEN];
+ u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
+ u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
+ int clcqpn;
+};
+
+struct smcd_lgr_decision_maker {
+ struct smc_lgr_decision_maker ldm;
+ u64 peer_gid;
+ struct smcd_dev *dev;
+};
+
+static int smcr_lgr_decision_maker_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct smcr_lgr_decision_maker *rldm = obj;
+ const struct smc_init_info *ini = arg->key;
+
+ if (ini->role != rldm->ldm.role)
+ return 1;
+
+ if (memcmp(ini->peer_systemid, rldm->peer_systemid, SMC_SYSTEMID_LEN))
+ return 1;
+
+ if (memcmp(ini->peer_gid, rldm->peer_gid, SMC_GID_SIZE))
+ return 1;
+
+ if ((ini->role == SMC_SERV || ini->ib_clcqpn == rldm->clcqpn) &&
+ (ini->smcr_version == SMC_V2 ||
+ !memcmp(ini->peer_mac, rldm->peer_mac, ETH_ALEN)))
+ return 0;
+
+ return 1;
+}
+
+static u32 smcr_lgr_decision_maker_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct smcr_lgr_decision_maker *rldm = data;
+
+ return jhash2((u32 *)rldm->peer_systemid, SMC_SYSTEMID_LEN / sizeof(u32), seed)
+ + ((rldm->ldm.role == SMC_SERV) ? 0 : rldm->clcqpn);
+}
+
+static u32 smcr_lgr_decision_maker_arg_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct smc_init_info *ini = data;
+
+ return jhash2((u32 *)ini->peer_systemid, SMC_SYSTEMID_LEN / sizeof(u32), seed)
+ + ((ini->role == SMC_SERV) ? 0 : ini->ib_clcqpn);
+}
+
+static void smcr_lgr_decision_maker_init(struct smc_lgr_decision_maker *ldm,
+ struct smc_init_info *ini)
+{
+ struct smcr_lgr_decision_maker *rldm = (struct smcr_lgr_decision_maker *)ldm;
+
+ memcpy(rldm->peer_systemid, ini->peer_systemid, SMC_SYSTEMID_LEN);
+ memcpy(rldm->peer_gid, ini->peer_gid, SMC_GID_SIZE);
+ memcpy(rldm->peer_mac, ini->peer_mac, ETH_ALEN);
+ rldm->clcqpn = ini->ib_clcqpn;
+}
+
+static int smcd_lgr_decision_maker_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct smcd_lgr_decision_maker *dldm = obj;
+ const struct smc_init_info *ini = arg->key;
+
+ if (ini->role != dldm->ldm.role)
+ return 1;
+
+ if (ini->ism_peer_gid[ini->ism_selected] != dldm->peer_gid)
+ return 1;
+
+ if (ini->ism_dev[ini->ism_selected] != dldm->dev)
+ return 1;
+
+ return 0;
+}
+
+static u32 smcd_lgr_decision_maker_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct smcd_lgr_decision_maker *dlcm = data;
+
+ return jhash2((u32 *)&dlcm->peer_gid, sizeof(dlcm->peer_gid) / sizeof(u32), seed);
+}
+
+static u32 smcd_lgr_decision_maker_arg_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct smc_init_info *ini = data;
+ u64 select_gid;
+
+ select_gid = ini->ism_peer_gid[ini->ism_selected];
+ return jhash2((u32 *)&select_gid, sizeof(select_gid) / sizeof(u32), seed);
+}
+
+static void smcd_lgr_decision_maker_init(struct smc_lgr_decision_maker *ldm,
+ struct smc_init_info *ini)
+{
+ struct smcd_lgr_decision_maker *dldm = (struct smcd_lgr_decision_maker *)ldm;
+
+ dldm->peer_gid = ini->ism_peer_gid[ini->ism_selected];
+ dldm->dev = ini->ism_dev[ini->ism_selected];
+}
+
+struct smc_lgr_decision_builder {
+ struct rhashtable map;
+ spinlock_t map_lock; /* protect map */
+ struct rhashtable_params default_params;
+ /* how to serach and insert decision maker by ini */
+ void (*init)(struct smc_lgr_decision_maker *ldm, struct smc_init_info *ini);
+ /* init maker by ini */
+ u32 sz; /* size */
+};
+
+static struct smc_lgr_decision_builder smc_lgr_decision_set[SMC_TYPE_D + 1] = {
+ /* SMC_TYPE_R = 0 */
+ {
+ .sz = sizeof(struct smcr_lgr_decision_maker),
+ .init = smcr_lgr_decision_maker_init,
+ .map_lock = __SPIN_LOCK_UNLOCKED(smc_lgr_decision_set[SMC_TYPE_R].map_lock),
+ .default_params = {
+ .head_offset = offsetof(struct smc_lgr_decision_maker, rnode),
+ .key_len = sizeof(struct smc_init_info),
+ .obj_cmpfn = smcr_lgr_decision_maker_cmpfn,
+ .obj_hashfn = smcr_lgr_decision_maker_hashfn,
+ .hashfn = smcr_lgr_decision_maker_arg_hashfn,
+ .automatic_shrinking = true,
+ },
+ },
+ /* SMC_TYPE_D = 1 */
+ {
+ .sz = sizeof(struct smcd_lgr_decision_maker),
+ .init = smcd_lgr_decision_maker_init,
+ .map_lock = __SPIN_LOCK_UNLOCKED(smc_lgr_decision_set[SMC_TYPE_D].map_lock),
+ .default_params = {
+ .head_offset = offsetof(struct smc_lgr_decision_maker, rnode),
+ .key_len = sizeof(struct smc_init_info),
+ .obj_cmpfn = smcd_lgr_decision_maker_cmpfn,
+ .obj_hashfn = smcd_lgr_decision_maker_hashfn,
+ .hashfn = smcd_lgr_decision_maker_arg_hashfn,
+ .automatic_shrinking = true,
+ },
+ },
+};
+
+/* hold a reference for smc_lgr_decision_maker */
+static inline void smc_lgr_decision_maker_hold(struct smc_lgr_decision_maker *ldm)
+{
+ if (likely(ldm))
+ refcount_inc(&ldm->ref);
+}
+
+/* release a reference for smc_lgr_decision_maker */
+static inline void smc_lgr_decision_maker_put(struct smc_lgr_decision_maker *ldm)
+{
+ bool do_free = false;
+ int type;
+
+ if (unlikely(!ldm))
+ return;
+
+ if (refcount_dec_not_one(&ldm->ref))
+ return;
+
+ type = ldm->type;
+
+ spin_lock_bh(&smc_lgr_decision_set[type].map_lock);
+ /* last ref */
+ if (refcount_dec_and_test(&ldm->ref)) {
+ do_free = true;
+ rhashtable_remove_fast(&smc_lgr_decision_set[type].map, &ldm->rnode,
+ smc_lgr_decision_set[type].default_params);
+ }
+ spin_unlock_bh(&smc_lgr_decision_set[type].map_lock);
+ if (do_free)
+ kfree(ldm);
+}
+
+static struct smc_lgr_decision_maker *
+smc_get_or_create_lgr_decision_maker(struct smc_init_info *ini)
+{
+ struct smc_lgr_decision_maker *ldm;
+ int err, type;
+
+ type = ini->is_smcd ? SMC_TYPE_D : SMC_TYPE_R;
+
+ spin_lock_bh(&smc_lgr_decision_set[type].map_lock);
+ ldm = rhashtable_lookup_fast(&smc_lgr_decision_set[type].map, ini,
+ smc_lgr_decision_set[type].default_params);
+ if (!ldm) {
+ ldm = kzalloc(smc_lgr_decision_set[type].sz, GFP_ATOMIC);
+ if (unlikely(!ldm))
+ goto fail;
+
+ /* common init */
+ spin_lock_init(&ldm->lock);
+ init_waitqueue_head(&ldm->wq);
+ refcount_set(&ldm->ref, 1);
+ ldm->type = type;
+ ldm->role = ini->role;
+
+ /* init */
+ if (smc_lgr_decision_set[type].init)
+ smc_lgr_decision_set[type].init(ldm, ini);
+
+ err = rhashtable_insert_fast(&smc_lgr_decision_set[type].map,
+ &ldm->rnode,
+ smc_lgr_decision_set[type].default_params);
+ if (unlikely(err)) {
+ pr_warn_ratelimited("smc: rhashtable_insert_fast failed (%d)", err);
+ kfree(ldm);
+ ldm = NULL;
+ }
+ } else {
+ smc_lgr_decision_maker_hold(ldm);
+ }
+fail:
+ spin_unlock_bh(&smc_lgr_decision_set[type].map_lock);
+ return ldm;
+}
+
+void smc_lgr_decision_maker_on_first_contact_done(struct smc_init_info *ini, bool success)
+{
+ struct smc_lgr_decision_maker *ldm;
+ int nr;
+
+ if (unlikely(!ini->first_contact_local))
+ return;
+
+ /* get lgr decision maker */
+ ldm = ini->ldm;
+
+ if (unlikely(!ldm))
+ return;
+
+ spin_lock_bh(&ldm->lock);
+ ldm->pending_capability -= (SMC_RMBS_PER_LGR_MAX - 1);
+ nr = SMC_RMBS_PER_LGR_MAX - 1;
+ if (unlikely(!success) && ldm->role == SMC_SERV) {
+ /* only to wake up one connection to perfrom
+ * first contact in server side, client MUST wake up
+ * all to decline.
+ */
+ nr = min(1, nr);
+ }
+ if (nr)
+ __wake_up(&ldm->wq, TASK_NORMAL, nr,
+ success ? (void *)LDM_RETRY : (void *)LDM_FIRST_CONTACT);
+
+ spin_unlock_bh(&ldm->lock);
+
+ /* hold in smc_lgr_create */
+ smc_lgr_decision_maker_put(ldm);
+}
+
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
@@ -756,6 +1033,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
lnk->link_id = smcr_next_link_id(lgr);
lnk->lgr = lgr;
smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
+ rwlock_init(&lnk->rtokens_lock);
lnk->link_idx = link_idx;
lnk->wr_rx_id_compl = 0;
smc_ibdev_cnt_inc(lnk);
@@ -915,6 +1193,11 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&lgr_cnt);
}
smc->conn.lgr = lgr;
+
+ lgr->ldm = ini->ldm;
+ /* smc_lgr_decision_maker_put in __smc_lgr_free() */
+ smc_lgr_decision_maker_hold(lgr->ldm);
+
spin_lock_bh(lgr_lock);
list_add_tail(&lgr->list, lgr_list);
spin_unlock_bh(lgr_lock);
@@ -1364,6 +1647,9 @@ static void __smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
+ /* smc_lgr_decision_maker_hold in smc_lgr_create() */
+ if (lgr->ldm)
+ smc_lgr_decision_maker_put(lgr->ldm);
kfree(lgr);
}
@@ -1824,6 +2110,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
lgr->role != role)
return false;
+ if (!READ_ONCE(lgr->first_contact_done))
+ return false;
+
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
lnk = &lgr->lnk[i];
@@ -1844,16 +2133,39 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
static bool smcd_lgr_match(struct smc_link_group *lgr,
struct smcd_dev *smcismdev, u64 peer_gid)
{
- return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
+ return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev &&
+ READ_ONCE(lgr->first_contact_done);
+}
+
+static int smc_ldm_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode, int sync,
+ void *key)
+{
+ struct smc_init_info *ini = wq_entry->private;
+ int ret;
+
+ wq_entry->private = ini->private;
+ ini->advise = (u8)(uintptr_t)key;
+
+ ret = woken_wake_function(wq_entry, mode, sync, NULL);
+ if (ret) {
+ /* only wake up once */
+ list_del_init_careful(&wq_entry->entry);
+ if (likely(ini->ldm))
+ ini->ldm->conns_pending--;
+ }
+ return ret;
}
/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
+ struct smc_lgr_decision_maker *ldm = NULL;
struct smc_connection *conn = &smc->conn;
struct net *net = sock_net(&smc->sk);
+ struct wait_queue_entry wait;
struct list_head *lgr_list;
struct smc_link_group *lgr;
+ int timeo = CLC_WAIT_TIME;
enum smc_lgr_role role;
spinlock_t *lgr_lock;
int rc = 0;
@@ -1864,12 +2176,40 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
&smc_lgr_list.lock;
ini->first_contact_local = 1;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
- if (role == SMC_CLNT && ini->first_contact_peer)
+ ini->role = role;
+
+ ldm = smc_get_or_create_lgr_decision_maker(ini);
+ if (unlikely(!ldm))
+ return SMC_CLC_DECL_INTERR;
+
+ /* Considering a scenario, after find out the SMCDv2 device, a potential failures
+ * occur in smc_find_rdma_v2_device_serv, for example smc_buf_create failed. And then,
+ * it will continue to search for SMCDv1 devices or SMCR devices. Hence,
+ * smc_conn_create will called again with the same ini, if the ini performs the first
+ * contact logic, which means that the lgr that should have been created by it must be
+ * failed, and we need to actively trigger the failed logic here.
+ */
+ if (unlikely(ini->ldm)) {
+ smc_lgr_decision_maker_on_first_contact_fail(ini);
+ ini->ldm = NULL;
+ }
+
+ if (role == SMC_CLNT && ini->first_contact_peer) {
+ spin_lock_bh(&ldm->lock);
+ ldm->pending_capability += (SMC_RMBS_PER_LGR_MAX - 1);
+ spin_unlock_bh(&ldm->lock);
/* create new link group as well */
goto create;
+ }
/* determine if an existing link group can be reused */
spin_lock_bh(lgr_lock);
+ spin_lock(&ldm->lock);
+
+again:
+ /* init advise */
+ ini->advise = LDM_TBD;
+
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
@@ -1884,21 +2224,82 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
(lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
- !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
+ (SMC_RMBS_PER_LGR_MAX -
+ bitmap_weight(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)
+ > atomic_read(&lgr->rtoken_pendings))))) {
/* link group found */
ini->first_contact_local = 0;
conn->lgr = lgr;
rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
- if (!rc && delayed_work_pending(&lgr->free_work))
- cancel_delayed_work(&lgr->free_work);
+ if (!rc) {
+ smc_conn_enter_rtoken_pending(smc, ini);
+ if (delayed_work_pending(&lgr->free_work))
+ cancel_delayed_work(&lgr->free_work);
+ }
break;
}
write_unlock_bh(&lgr->conns_lock);
}
+
+ /* not found */
+ if (!rc && ini->first_contact_local) {
+ if (timeo && ldm->pending_capability > ldm->conns_pending) {
+ /* record pending connection, release
+ * in smc_lgr_decision_maker_on_first_contact_done() or after timeout
+ */
+ ldm->conns_pending++;
+
+ /* used in ldm_wakeup, clear after remove from queue */
+ ini->ldm = ldm;
+
+ /* init wait entry */
+ init_wait_entry(&wait, 0);
+ /* replace wait with new private & func */
+ ini->private = wait.private;
+ wait.private = ini;
+ wait.func = smc_ldm_wake_function;
+
+ /* add to wq */
+ add_wait_queue_exclusive(&ldm->wq, &wait);
+
+ spin_unlock(&ldm->lock);
+ spin_unlock_bh(lgr_lock);
+
+ /* wait woken */
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+
+ spin_lock_bh(lgr_lock);
+ spin_lock(&ldm->lock);
+
+ ini->ldm = NULL;
+
+ /* remove from wq */
+ remove_wait_queue(&ldm->wq, &wait);
+
+ /* timeout */
+ if (unlikely(!timeo || ini->advise == LDM_TBD)) {
+ if (ini->advise == LDM_TBD) {
+ ldm->conns_pending--;
+ ini->advise = LDM_RETRY;
+ }
+ }
+ } else {
+ ini->advise = LDM_FIRST_CONTACT;
+ }
+
+ if (ini->advise == LDM_RETRY)
+ goto again;
+
+ /* do first contact */
+ ldm->pending_capability += (SMC_RMBS_PER_LGR_MAX - 1);
+ }
+
+ spin_unlock(&ldm->lock);
spin_unlock_bh(lgr_lock);
+
if (rc)
- return rc;
+ goto out;
if (role == SMC_CLNT && !ini->first_contact_peer &&
ini->first_contact_local) {
@@ -1906,11 +2307,15 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
* a new one
* send out_of_sync decline, reason synchr. error
*/
- return SMC_CLC_DECL_SYNCERR;
+ rc = SMC_CLC_DECL_SYNCERR;
+ goto out;
}
create:
if (ini->first_contact_local) {
+ ini->ldm = ldm;
+ /* smc_lgr_decision_maker_put in first_contact_done() */
+ smc_lgr_decision_maker_hold(ldm);
rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
@@ -1943,6 +2348,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
#endif
out:
+ /* smc_lgr_decision_maker_hold in smc_get_or_create_lgr_decision_make() */
+ smc_lgr_decision_maker_put(ldm);
return rc;
}
@@ -2505,19 +2912,24 @@ int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
u32 rkey = ntohl(nw_rkey);
int i;
+ write_lock_bh(&lnk->rtokens_lock);
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
test_bit(i, lgr->rtokens_used_mask)) {
/* already in list */
+ write_unlock_bh(&lnk->rtokens_lock);
return i;
}
}
i = smc_rmb_reserve_rtoken_idx(lgr);
- if (i < 0)
+ if (i < 0) {
+ write_unlock_bh(&lnk->rtokens_lock);
return i;
+ }
lgr->rtokens[i][lnk->link_idx].rkey = rkey;
lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
+ write_unlock_bh(&lnk->rtokens_lock);
return i;
}
@@ -2528,6 +2940,7 @@ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
u32 rkey = ntohl(nw_rkey);
int i, j;
+ write_lock_bh(&lnk->rtokens_lock);
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
test_bit(i, lgr->rtokens_used_mask)) {
@@ -2536,9 +2949,11 @@ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
lgr->rtokens[i][j].dma_addr = 0;
}
clear_bit(i, lgr->rtokens_used_mask);
+ write_unlock_bh(&lnk->rtokens_lock);
return 0;
}
}
+ write_unlock_bh(&lnk->rtokens_lock);
return -ENOENT;
}
@@ -2604,12 +3019,31 @@ static int smc_core_reboot_event(struct notifier_block *this,
int __init smc_core_init(void)
{
+ int i;
+
+ /* init smc lgr decision maker builder */
+ for (i = 0; i <= SMC_TYPE_D; i++)
+ rhashtable_init(&smc_lgr_decision_set[i].map,
+ &smc_lgr_decision_set[i].default_params);
+
return register_reboot_notifier(&smc_reboot_notifier);
}
+static void smc_lgr_decision_maker_free_cb(void *ptr, void *arg)
+{
+ kfree(ptr);
+}
+
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
+ int i;
+
unregister_reboot_notifier(&smc_reboot_notifier);
smc_lgrs_shutdown();
+
+ /* destroy smc lgr decision maker builder */
+ for (i = 0; i <= SMC_TYPE_D; i++)
+ rhashtable_free_and_destroy(&smc_lgr_decision_set[i].map,
+ smc_lgr_decision_maker_free_cb, NULL);
}
@@ -15,6 +15,7 @@
#include <linux/atomic.h>
#include <linux/smc.h>
#include <linux/pci.h>
+#include <linux/rhashtable.h>
#include <rdma/ib_verbs.h>
#include <net/genetlink.h>
@@ -107,6 +108,7 @@ struct smc_link {
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
atomic_t wr_tx_refcnt; /* tx refs to link */
+ rwlock_t rtokens_lock;
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
@@ -244,17 +246,23 @@ struct smc_llc_flow {
struct smc_llc_qentry *qentry;
};
+struct smc_lgr_decision_maker;
+
struct smc_link_group {
struct list_head list;
struct rb_root conns_all; /* connection tree */
rwlock_t conns_lock; /* protects conns_all */
unsigned int conns_num; /* current # of connections */
+ atomic_t rtoken_pendings;/* number of connection that
+ * lgr assigned but no rtoken got yet
+ */
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
struct mutex sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
struct mutex rmbs_lock; /* protects rx buffers */
+ u8 first_contact_done; /* if first contact succeed */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -335,6 +343,8 @@ struct smc_link_group {
/* peer triggered shutdownn */
};
};
+ struct smc_lgr_decision_maker *ldm;
+ /* who decides to create this lgr */
};
struct smc_clc_msg_local;
@@ -373,6 +383,9 @@ struct smc_init_info {
unsigned short vlan_id;
u32 rc;
u8 negotiated_eid[SMC_MAX_EID_LEN];
+ struct smc_lgr_decision_maker *ldm;
+ u8 advise;
+ void *private;
/* SMC-R */
u8 smcr_version;
u8 check_smcrv2;
@@ -391,6 +404,7 @@ struct smc_init_info {
u8 ism_offered_cnt; /* # of ISM devices offered */
u8 ism_selected; /* index of selected ISM dev*/
u8 smcd_version;
+ u8 role;
};
/* Find the connection associated with the given alert token in the link group.
@@ -559,6 +573,38 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb);
int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb);
+void smc_lgr_decision_maker_on_first_contact_done(struct smc_init_info *ini, bool success);
+
+static inline void smc_lgr_decision_maker_on_first_contact_success(struct smc_sock *smc,
+ struct smc_init_info *ini)
+{
+ smc->conn.lgr->first_contact_done = 1;
+ /* make sure first_contact_done can be seen after wakeup */
+ smp_mb();
+ smc_lgr_decision_maker_on_first_contact_done(ini, 1 /* success */);
+}
+
+static inline void smc_lgr_decision_maker_on_first_contact_fail(struct smc_init_info *ini)
+{
+ smc_lgr_decision_maker_on_first_contact_done(ini, 0 /* failed */);
+}
+
+static inline void smc_conn_enter_rtoken_pending(struct smc_sock *smc, struct smc_init_info *ini)
+{
+ struct smc_link_group *lgr = smc->conn.lgr;
+
+ if (lgr && !ini->first_contact_local)
+ atomic_inc(&lgr->rtoken_pendings);
+}
+
+static inline void smc_conn_leave_rtoken_pending(struct smc_sock *smc, struct smc_init_info *ini)
+{
+ struct smc_link_group *lgr = smc->conn.lgr;
+
+ if (lgr && !ini->first_contact_local)
+ atomic_dec(&lgr->rtoken_pendings);
+}
+
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
{
return link->lgr;