@@ -41,9 +41,9 @@ void rxe_cq_cleanup(struct rxe_pool_elem *arg);
/* rxe_mcast.c */
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
+void rxe_free_mcg(struct kref *kref);
int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
-void rxe_cleanup_mcg(struct kref *kref);
/* rxe_mmap.c */
struct rxe_mmap_info {
@@ -17,6 +17,12 @@
* mca is created. It holds a pointer to the qp and is added to a list
* of qp's that are attached to the mcg. The qp_list is used to replicate
* mcast packets in the rxe receive path.
+ *
+ * The highest performance operations are mca list traversal when
+ * processing incoming multicast packets which need to be fanned out
+ * to the attached qp's. This list is protected by RCU locking for read
+ * operations and a spinlock in the rxe_dev struct for write operations.
+ * The red-black tree is protected by the same spinlock.
*/
#include "rxe.h"
@@ -243,10 +249,10 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
}
/**
- * rxe_cleanup_mcg - cleanup mcg for kref_put
+ * rxe_free_mcg - free mcg for kref_put
* @kref: struct kref embnedded in mcg
*/
-void rxe_cleanup_mcg(struct kref *kref)
+void rxe_free_mcg(struct kref *kref)
{
struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
@@ -254,36 +260,21 @@ void rxe_cleanup_mcg(struct kref *kref)
}
/**
- * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock
+ * rxe_cleanup_mcg - cleanup mcg object
* @mcg: the mcg object
*
- * Context: caller is holding rxe->mcg_lock
- * no qp's are attached to mcg
+ * Context: no qp's are attached to mcg
*/
-static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
+static void rxe_cleanup_mcg(struct rxe_mcg *mcg)
{
struct rxe_dev *rxe = mcg->rxe;
+ unsigned long flags;
- /* remove mcg from red-black tree then drop ref */
+ spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
__rxe_remove_mcg(mcg);
- kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
-
+ kref_put(&mcg->ref_cnt, rxe_free_mcg);
rxe_mcast_delete(mcg->rxe, &mcg->mgid);
atomic_dec(&rxe->mcg_num);
-}
-
-/**
- * rxe_destroy_mcg - destroy mcg object
- * @mcg: the mcg object
- *
- * Context: no qp's are attached to mcg
- */
-static void rxe_destroy_mcg(struct rxe_mcg *mcg)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
- __rxe_destroy_mcg(mcg);
spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
}
@@ -322,11 +313,60 @@ static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg,
rxe_add_ref(qp);
mca->qp = qp;
- list_add_tail(&mca->qp_list, &mcg->qp_list);
+ kref_get(&mcg->ref_cnt);
+ mca->mcg = mcg;
+
+ list_add_tail_rcu(&mca->qp_list, &mcg->qp_list);
return 0;
}
+/**
+ * rxe_destroy_mca - free mca resources
+ * @head: rcu_head embedded in mca
+ *
+ * Context: caller must hold a reference to mcg
+ * all rcu read operations should be compelete
+ */
+static void rxe_destroy_mca(struct rcu_head *head)
+{
+ struct rxe_mca *mca = container_of(head, typeof(*mca), rcu);
+ struct rxe_mcg *mcg = mca->mcg;
+
+ /* let destroy_qp finish if this is the last ref */
+ rxe_drop_ref(mca->qp);
+
+ atomic_dec(&mcg->qp_num);
+ atomic_dec(&mcg->rxe->mcg_attach);
+
+ /* if the number of qp's attached to the
+ * mcast group falls to zero go ahead and
+ * tear it down. This will not free the
+ * object since we are still holding a ref
+ * from the caller
+ */
+ if (atomic_read(&mcg->qp_num) <= 0)
+ rxe_cleanup_mcg(mcg);
+
+ kfree(mca);
+}
+
+/**
+ * __rxe_cleanup_mca - cleanup mca object holding lock
+ * @mca: mca object
+ *
+ * Context: caller must hold a reference to mcg and rxe->mcg_lock
+ */
+static void __rxe_cleanup_mca(struct rxe_mca *mca)
+{
+ list_del_rcu(&mca->qp_list);
+
+ /* let destroy_qp start if this is the last mc attach */
+ atomic_dec(&mca->qp->mcg_num);
+
+ call_rcu(&mca->rcu, rxe_destroy_mca);
+}
+
/**
* rxe_attach_mcg - attach qp to mcg if not already attached
* @qp: qp object
@@ -343,14 +383,14 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
int err;
/* check to see if the qp is already a member of the group */
- spin_lock_irqsave(&rxe->mcg_lock, flags);
- list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
if (mca->qp == qp) {
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ rcu_read_unlock();
return 0;
}
}
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ rcu_read_unlock();
/* speculative alloc new mca without using GFP_ATOMIC */
mca = kzalloc(sizeof(*mca), GFP_KERNEL);
@@ -375,25 +415,6 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
return err;
}
-/**
- * __rxe_cleanup_mca - cleanup mca object holding lock
- * @mca: mca object
- * @mcg: mcg object
- *
- * Context: caller must hold a reference to mcg and rxe->mcg_lock
- */
-static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
-{
- list_del(&mca->qp_list);
-
- atomic_dec(&mcg->qp_num);
- atomic_dec(&mcg->rxe->mcg_attach);
- atomic_dec(&mca->qp->mcg_num);
- rxe_drop_ref(mca->qp);
-
- kfree(mca);
-}
-
/**
* rxe_detach_mcg - detach qp from mcg
* @mcg: mcg object
@@ -404,30 +425,19 @@ static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg)
static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
struct rxe_dev *rxe = mcg->rxe;
- struct rxe_mca *mca, *tmp;
- unsigned long flags;
+ struct rxe_mca *mca;
- spin_lock_irqsave(&rxe->mcg_lock, flags);
- list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
+ spin_lock_bh(&rxe->mcg_lock);
+ list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
if (mca->qp == qp) {
- __rxe_cleanup_mca(mca, mcg);
-
- /* if the number of qp's attached to the
- * mcast group falls to zero go ahead and
- * tear it down. This will not free the
- * object since we are still holding a ref
- * from the caller
- */
- if (atomic_read(&mcg->qp_num) <= 0)
- __rxe_destroy_mcg(mcg);
-
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ __rxe_cleanup_mca(mca);
+ spin_unlock_bh(&rxe->mcg_lock);
return 0;
}
}
/* we didn't find the qp on the list */
- spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+ spin_unlock_bh(&rxe->mcg_lock);
return -EINVAL;
}
@@ -455,9 +465,9 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
/* if we failed to attach the first qp to mcg tear it down */
if (atomic_read(&mcg->qp_num) == 0)
- rxe_destroy_mcg(mcg);
+ rxe_cleanup_mcg(mcg);
- kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+ kref_put(&mcg->ref_cnt, rxe_free_mcg);
return err;
}
@@ -482,7 +492,7 @@ int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
return -EINVAL;
err = rxe_detach_mcg(mcg, qp);
- kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+ kref_put(&mcg->ref_cnt, rxe_free_mcg);
return err;
}
@@ -265,17 +265,17 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
qp_array = kmalloc_array(nmax, sizeof(qp), GFP_KERNEL);
n = 0;
- spin_lock_bh(&rxe->mcg_lock);
- list_for_each_entry(mca, &mcg->qp_list, qp_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(mca, &mcg->qp_list, qp_list) {
/* protect the qp pointers in the list */
rxe_add_ref(mca->qp);
qp_array[n++] = mca->qp;
if (n == nmax)
break;
}
- spin_unlock_bh(&rxe->mcg_lock);
+ rcu_read_unlock();
nmax = n;
- kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
+ kref_put(&mcg->ref_cnt, rxe_free_mcg);
for (n = 0; n < nmax; n++) {
qp = qp_array[n];
@@ -363,7 +363,9 @@ struct rxe_mcg {
struct rxe_mca {
struct list_head qp_list;
+ struct rcu_head rcu;
struct rxe_qp *qp;
+ struct rxe_mcg *mcg;
};
struct rxe_port {
Replace spinlock with rcu read locks for read side operations on mca in rxe_recv.c and rxe_mcast.c. Minor fixups and name changes for clarity. Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- v14: This is the last patch left in the series to move multicast out of rxe_pools. I finally found the cause of the failure in v12 and fixed it. I also make some cleanups to names and order of the subroutines to help readability. Also the rxe_destroy_mca subroutine which is called from call_rcu() doesn't have the spinlock set which required a small change. drivers/infiniband/sw/rxe/rxe_loc.h | 2 +- drivers/infiniband/sw/rxe/rxe_mcast.c | 144 ++++++++++++++------------ drivers/infiniband/sw/rxe/rxe_recv.c | 8 +- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 + 4 files changed, 84 insertions(+), 72 deletions(-)