[21/22] IB/ipoib: deserialize multicast joins

Message ID	781ced9840cd21c1ef8f671d6ababc6c634bc01d.1423703861.git.dledford@redhat.com (mailing list archive)
State	Rejected
Headers	show Return-Path: <linux-rdma-owner@kernel.org> From: Doug Ledford <dledford@redhat.com> To: linux-rdma@vger.kernel.org, roland@kernel.org Cc: Or Gerlitz <gerlitz.or@gmail.com>, Erez Shitrit <erezsh@mellanox.com>, Doug Ledford <dledford@redhat.com> Subject: [PATCH 21/22] IB/ipoib: deserialize multicast joins Date: Wed, 11 Feb 2015 20:43:44 -0500 Message-Id: <781ced9840cd21c1ef8f671d6ababc6c634bc01d.1423703861.git.dledford@redhat.com> In-Reply-To: <cover.1423703861.git.dledford@redhat.com> References: <cover.1423703861.git.dledford@redhat.com> In-Reply-To: <cover.1423703861.git.dledford@redhat.com> References: <cover.1423703861.git.dledford@redhat.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 68b47bed33a..355c320dc4d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -295,113 +295,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, return 0; } -static int -ipoib_mcast_sendonly_join_complete(int status, - struct ib_sa_multicast *multicast) -{ - struct ipoib_mcast *mcast = multicast->context; - struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); - - /* - * We have to take the mutex to force mcast_sendonly_join to - * return from ib_sa_multicast_join and set mcast->mc to a - * valid value. Otherwise we were racing with ourselves in - * that we might fail here, but get a valid return from - * ib_sa_multicast_join after we had cleared mcast->mc here, - * resulting in mis-matched joins and leaves and a deadlock - */ - mutex_lock(&mcast_mutex); - - /* We trap for port events ourselves. */ - if (status == -ENETRESET) { - status = 0; - goto out; - } - - if (!status) - status = ipoib_mcast_join_finish(mcast, &multicast->rec); - - if (status) { - if (mcast->logcount++ < 20) - ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast " - "join failed for %pI6, status %d\n", - mcast->mcmember.mgid.raw, status); - - /* Flush out any queued packets */ - netif_tx_lock_bh(dev); - while (!skb_queue_empty(&mcast->pkt_queue)) { - ++dev->stats.tx_dropped; - dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); - } - netif_tx_unlock_bh(dev); - __ipoib_mcast_continue_join_thread(priv, mcast, 1); - } else { - /* Join completed, so reset any backoff parameters */ - mcast->backoff = 1; - mcast->delay_until = jiffies; - __ipoib_mcast_continue_join_thread(priv, NULL, 0); - } -out: - clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - if (status) - mcast->mc = NULL; - complete(&mcast->done); - mutex_unlock(&mcast_mutex); - return status; -} - -static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) -{ - struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sa_mcmember_rec rec = { -#if 0 /* Some SMs don't support send-only yet */ - .join_state = 4 -#else - .join_state = 1 -#endif - }; - int ret = 0; - - if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { - ipoib_dbg_mcast(priv, "device shutting down, no sendonly " - "multicast joins\n"); - clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - complete(&mcast->done); - return -ENODEV; - } - - rec.mgid = mcast->mcmember.mgid; - rec.port_gid = priv->local_gid; - rec.pkey = cpu_to_be16(priv->pkey); - - mutex_lock(&mcast_mutex); - mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, - priv->port, &rec, - IB_SA_MCMEMBER_REC_MGID | - IB_SA_MCMEMBER_REC_PORT_GID | - IB_SA_MCMEMBER_REC_PKEY | - IB_SA_MCMEMBER_REC_JOIN_STATE, - GFP_ATOMIC, - ipoib_mcast_sendonly_join_complete, - mcast); - if (IS_ERR(mcast->mc)) { - ret = PTR_ERR(mcast->mc); - clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - complete(&mcast->done); - ipoib_warn(priv, "ib_sa_join_multicast for sendonly join " - "failed (ret = %d)\n", ret); - __ipoib_mcast_continue_join_thread(priv, NULL, 0); - } else { - ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting " - "sendonly join\n", mcast->mcmember.mgid.raw); - } - mutex_unlock(&mcast_mutex); - - return ret; -} - void ipoib_mcast_carrier_on_task(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, @@ -442,7 +335,9 @@ static int ipoib_mcast_join_complete(int status, struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", + ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n", + test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? + "sendonly " : "", mcast->mcmember.mgid.raw, status); /* @@ -467,27 +362,52 @@ static int ipoib_mcast_join_complete(int status, if (!status) { mcast->backoff = 1; mcast->delay_until = jiffies; - __ipoib_mcast_continue_join_thread(priv, NULL, 0); /* * Defer carrier on work to priv->wq to avoid a - * deadlock on rtnl_lock here. + * deadlock on rtnl_lock here. Requeue our multicast + * work too, which will end up happening right after + * our carrier on task work and will allow us to + * send out all of the non-broadcast joins */ - if (mcast == priv->broadcast) + if (mcast == priv->broadcast) { queue_work(priv->wq, &priv->carrier_on_task); + __ipoib_mcast_continue_join_thread(priv, NULL, 0); + } } else { if (mcast->logcount++ < 20) { if (status == -ETIMEDOUT || status == -EAGAIN) { - ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", + ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n", + test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", mcast->mcmember.mgid.raw, status); } else { - ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", + ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n", + test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", mcast->mcmember.mgid.raw, status); } } - /* Requeue this join task with a backoff delay */ - __ipoib_mcast_continue_join_thread(priv, mcast, 1); + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && + mcast->backoff >= 2) { + /* + * We only retry sendonly joins once before we drop + * the packet and quit trying to deal with the + * group. However, we leave the group in the + * mcast list as an unjoined group. If we want to + * try joining again, we simply queue up a packet + * and restart the join thread. The empty queue + * is why the join thread ignores this group. + */ + mcast->backoff = 1; + netif_tx_lock_bh(dev); + while (!skb_queue_empty(&mcast->pkt_queue)) { + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); + } + netif_tx_unlock_bh(dev); + } else + /* Requeue this join task with a backoff delay */ + __ipoib_mcast_continue_join_thread(priv, mcast, 1); } out: clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); @@ -626,6 +546,11 @@ void ipoib_mcast_join_task(struct work_struct *work) !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) { mcast = priv->broadcast; create = 0; + if (mcast->backoff != 1 && + time_before_eq(jiffies, mcast->delay_until)) { + delay_until = mcast->delay_until; + mcast = NULL; + } } goto out; } @@ -636,44 +561,45 @@ void ipoib_mcast_join_task(struct work_struct *work) list_for_each_entry(mcast, &priv->multicast_list, list) { if (IS_ERR_OR_NULL(mcast->mc) && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && - !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { + (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) || + !skb_queue_empty(&mcast->pkt_queue))) { if (mcast->backoff == 1 || - time_after_eq(jiffies, mcast->delay_until)) + time_after_eq(jiffies, mcast->delay_until)) { /* Found the next unjoined group */ - break; - else if (!delay_until || + init_completion(&mcast->done); + set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) + create = 0; + else + create = 1; + spin_unlock_irq(&priv->lock); + mutex_unlock(&mcast_mutex); + ipoib_mcast_join(dev, mcast, create); + mutex_lock(&mcast_mutex); + spin_lock_irq(&priv->lock); + } else if (!delay_until || time_before(mcast->delay_until, delay_until)) delay_until = mcast->delay_until; } } - if (&mcast->list == &priv->multicast_list) { - /* All done, unless we have delayed work from - * backoff retransmissions, but we will get - * restarted when the time is right, so we are - * done for now - */ - mcast = NULL; - ipoib_dbg_mcast(priv, "successfully joined all " - "multicast groups\n"); - if (delay_until) - queue_delayed_work(priv->wq, &priv->mcast_task, - delay_until - jiffies); - } + mcast = NULL; + ipoib_dbg_mcast(priv, "successfully started all multicast joins\n"); out: + if (delay_until) { + cancel_delayed_work(&priv->mcast_task); + queue_delayed_work(priv->wq, &priv->mcast_task, + delay_until - jiffies); + } if (mcast) { init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); } spin_unlock_irq(&priv->lock); mutex_unlock(&mcast_mutex); - if (mcast) { - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - ipoib_mcast_sendonly_join(mcast); - else - ipoib_mcast_join(dev, mcast, create); - } + if (mcast) + ipoib_mcast_join(dev, mcast, create); return; } @@ -717,8 +643,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) if (!IS_ERR_OR_NULL(mcast->mc)) ib_sa_free_multicast(mcast->mc); - else - ipoib_dbg(priv, "ipoib_mcast_leave with mcast->mc invalid\n"); if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", @@ -754,50 +678,38 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) } mcast = __ipoib_mcast_find(dev, mgid); - if (!mcast) { - /* Let's create a new send only group now */ - ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", - mgid); - - mcast = ipoib_mcast_alloc(dev, 0); + if (!mcast || !mcast->ah) { if (!mcast) { - ipoib_warn(priv, "unable to allocate memory for " - "multicast structure\n"); - ++dev->stats.tx_dropped; - dev_kfree_skb_any(skb); - goto out; - } - - set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); - memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); - __ipoib_mcast_add(dev, mcast); - list_add_tail(&mcast->list, &priv->multicast_list); - /* - * Make sure that if we are currently waiting for a backoff - * delay to expire, that we cancel that, run immediately, - * and then requeue our task to fire when the backoff - * timer is over. - */ - cancel_delayed_work(&priv->mcast_task); - __ipoib_mcast_continue_join_thread(priv, NULL, 0); - } + /* Let's create a new send only group now */ + ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", + mgid); + + mcast = ipoib_mcast_alloc(dev, 0); + if (!mcast) { + ipoib_warn(priv, "unable to allocate memory " + "for multicast structure\n"); + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + goto unlock; + } - if (!mcast->ah) { + set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); + memcpy(mcast->mcmember.mgid.raw, mgid, + sizeof (union ib_gid)); + __ipoib_mcast_add(dev, mcast); + list_add_tail(&mcast->list, &priv->multicast_list); + } if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) skb_queue_tail(&mcast->pkt_queue, skb); else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } - /* - * If lookup completes between here and out:, don't - * want to send packet twice. - */ - mcast = NULL; - } - -out: - if (mcast && mcast->ah) { + if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { + cancel_delayed_work(&priv->mcast_task); + __ipoib_mcast_continue_join_thread(priv, NULL, 0); + } + } else { struct ipoib_neigh *neigh; spin_unlock_irqrestore(&priv->lock, flags);

[21/22] IB/ipoib: deserialize multicast joins

Commit Message

Patch