diff mbox series

[for-next,2/6] RDMA/rxe: Handle loopback of mcast packets

Message ID 20231103204324.9606-3-rpearsonhpe@gmail.com (mailing list archive)
State Deferred
Headers show
Series RDMA/rxe: Make multicast actually work | expand

Commit Message

Bob Pearson Nov. 3, 2023, 8:43 p.m. UTC
Add a mask bit to indicate that a multicast packet has been locally
sent and use to set the correct qpn for multicast packets.

Add code to rxe_xmit_packet() to correctly handle multicast packets
which must be sent on the wire and also duplicated to any local qps
which may belong the multicast group, but not including the sender.

Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
 drivers/infiniband/sw/rxe/rxe_av.c     |  7 +++++++
 drivers/infiniband/sw/rxe/rxe_loc.h    |  1 +
 drivers/infiniband/sw/rxe/rxe_net.c    | 25 ++++++++++++++++++++++++-
 drivers/infiniband/sw/rxe/rxe_opcode.h |  2 +-
 drivers/infiniband/sw/rxe/rxe_recv.c   |  4 ++++
 drivers/infiniband/sw/rxe/rxe_req.c    | 11 +++++++++--
 6 files changed, 46 insertions(+), 4 deletions(-)

Comments

Zhu Yanjun Nov. 4, 2023, 12:30 p.m. UTC | #1
在 2023/11/4 4:43, Bob Pearson 写道:
> Add a mask bit to indicate that a multicast packet has been locally
> sent and use to set the correct qpn for multicast packets.
> 
> Add code to rxe_xmit_packet() to correctly handle multicast packets
> which must be sent on the wire and also duplicated to any local qps
> which may belong the multicast group, but not including the sender.
> 
> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
> ---
>   drivers/infiniband/sw/rxe/rxe_av.c     |  7 +++++++
>   drivers/infiniband/sw/rxe/rxe_loc.h    |  1 +
>   drivers/infiniband/sw/rxe/rxe_net.c    | 25 ++++++++++++++++++++++++-
>   drivers/infiniband/sw/rxe/rxe_opcode.h |  2 +-
>   drivers/infiniband/sw/rxe/rxe_recv.c   |  4 ++++
>   drivers/infiniband/sw/rxe/rxe_req.c    | 11 +++++++++--
>   6 files changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
> index 4ac17b8def28..022173eb5d75 100644
> --- a/drivers/infiniband/sw/rxe/rxe_av.c
> +++ b/drivers/infiniband/sw/rxe/rxe_av.c
> @@ -7,6 +7,13 @@
>   #include "rxe.h"
>   #include "rxe_loc.h"
>   
> +bool rxe_is_mcast_av(struct rxe_av *av)
> +{
> +	struct in6_addr *daddr = (struct in6_addr *)av->grh.dgid.raw;
> +
> +	return rdma_is_multicast_addr(daddr);
> +}
> +
>   void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
>   {
>   	rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 3d2504a0ae56..62b2b25903fc 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -8,6 +8,7 @@
>   #define RXE_LOC_H
>   
>   /* rxe_av.c */
> +bool rxe_is_mcast_av(struct rxe_av *av);
>   void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
>   int rxe_chk_ah_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
>   void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index cd59666158b1..2fad56fc95e7 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -412,6 +412,27 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
>   	return 0;
>   }
>   
> +/* for a multicast packet must send remotely and looback to any local qps
> + * that may belong to the mcast group
> + */

https://www.kernel.org/doc/html/v4.15/process/coding-style.html
Please follow the preferred style for long (multi-line) comments in the 
above link.

Zhu Yanjun

> +static int rxe_loop_and_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
> +{
> +	struct sk_buff *cskb;
> +	int err, loc_err = 0;
> +
> +	if (atomic_read(&pkt->rxe->mcg_num)) {
> +		loc_err = -ENOMEM;
> +		cskb = skb_clone(skb, GFP_KERNEL);
> +		if (cskb)
> +			loc_err = rxe_loopback(cskb, pkt);
> +	}
> +
> +	err = rxe_send(skb, pkt);
> +	if (loc_err)
> +		err = loc_err;
> +	return err;
> +}
> +
>   int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>   		    struct sk_buff *skb)
>   {
> @@ -431,7 +452,9 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>   
>   	rxe_icrc_generate(skb, pkt);
>   
> -	if (pkt->mask & RXE_LOOPBACK_MASK)
> +	if (pkt->mask & RXE_MCAST_MASK)
> +		err = rxe_loop_and_send(skb, pkt);
> +	else if (pkt->mask & RXE_LOOPBACK_MASK)
>   		err = rxe_loopback(skb, pkt);
>   	else
>   		err = rxe_send(skb, pkt);
> diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
> index 5686b691d6b8..c4cf672ea26d 100644
> --- a/drivers/infiniband/sw/rxe/rxe_opcode.h
> +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
> @@ -85,7 +85,7 @@ enum rxe_hdr_mask {
>   	RXE_END_MASK		= BIT(NUM_HDR_TYPES + 11),
>   
>   	RXE_LOOPBACK_MASK	= BIT(NUM_HDR_TYPES + 12),
> -
> +	RXE_MCAST_MASK		= BIT(NUM_HDR_TYPES + 13),
>   	RXE_ATOMIC_WRITE_MASK   = BIT(NUM_HDR_TYPES + 14),
>   
>   	RXE_READ_OR_ATOMIC_MASK	= (RXE_READ_MASK | RXE_ATOMIC_MASK),
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 5861e4244049..7153de0799fc 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -217,6 +217,10 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>   	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
>   		qp = mca->qp;
>   
> +		/* don't reply packet to sender if locally sent */
> +		if (pkt->mask & RXE_MCAST_MASK && qp_num(qp) == deth_sqp(pkt))
> +			continue;
> +
>   		/* validate qp for incoming packet */
>   		err = check_type_state(rxe, pkt, qp);
>   		if (err)
> diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
> index d8c41fd626a9..599bec88cb54 100644
> --- a/drivers/infiniband/sw/rxe/rxe_req.c
> +++ b/drivers/infiniband/sw/rxe/rxe_req.c
> @@ -442,8 +442,12 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
>   			(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
>   			(RXE_WRITE_MASK | RXE_IMMDT_MASK));
>   
> -	qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
> -					 qp->attr.dest_qp_num;
> +	if (pkt->mask & RXE_MCAST_MASK)
> +		qp_num = IB_MULTICAST_QPN;
> +	else if (pkt->mask & RXE_DETH_MASK)
> +		qp_num = ibwr->wr.ud.remote_qpn;
> +	else
> +		qp_num = qp->attr.dest_qp_num;
>   
>   	ack_req = ((pkt->mask & RXE_END_MASK) ||
>   		(qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
> @@ -809,6 +813,9 @@ int rxe_requester(struct rxe_qp *qp)
>   		goto err;
>   	}
>   
> +	if (rxe_is_mcast_av(av))
> +		pkt.mask |= RXE_MCAST_MASK;
> +
>   	skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
>   	if (unlikely(!skb)) {
>   		rxe_dbg_qp(qp, "Failed allocating skb\n");
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 4ac17b8def28..022173eb5d75 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -7,6 +7,13 @@ 
 #include "rxe.h"
 #include "rxe_loc.h"
 
+bool rxe_is_mcast_av(struct rxe_av *av)
+{
+	struct in6_addr *daddr = (struct in6_addr *)av->grh.dgid.raw;
+
+	return rdma_is_multicast_addr(daddr);
+}
+
 void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
 {
 	rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 3d2504a0ae56..62b2b25903fc 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -8,6 +8,7 @@ 
 #define RXE_LOC_H
 
 /* rxe_av.c */
+bool rxe_is_mcast_av(struct rxe_av *av);
 void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
 int rxe_chk_ah_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
 void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index cd59666158b1..2fad56fc95e7 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -412,6 +412,27 @@  static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 	return 0;
 }
 
+/* for a multicast packet must send remotely and looback to any local qps
+ * that may belong to the mcast group
+ */
+static int rxe_loop_and_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
+{
+	struct sk_buff *cskb;
+	int err, loc_err = 0;
+
+	if (atomic_read(&pkt->rxe->mcg_num)) {
+		loc_err = -ENOMEM;
+		cskb = skb_clone(skb, GFP_KERNEL);
+		if (cskb)
+			loc_err = rxe_loopback(cskb, pkt);
+	}
+
+	err = rxe_send(skb, pkt);
+	if (loc_err)
+		err = loc_err;
+	return err;
+}
+
 int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 		    struct sk_buff *skb)
 {
@@ -431,7 +452,9 @@  int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 
 	rxe_icrc_generate(skb, pkt);
 
-	if (pkt->mask & RXE_LOOPBACK_MASK)
+	if (pkt->mask & RXE_MCAST_MASK)
+		err = rxe_loop_and_send(skb, pkt);
+	else if (pkt->mask & RXE_LOOPBACK_MASK)
 		err = rxe_loopback(skb, pkt);
 	else
 		err = rxe_send(skb, pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 5686b691d6b8..c4cf672ea26d 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -85,7 +85,7 @@  enum rxe_hdr_mask {
 	RXE_END_MASK		= BIT(NUM_HDR_TYPES + 11),
 
 	RXE_LOOPBACK_MASK	= BIT(NUM_HDR_TYPES + 12),
-
+	RXE_MCAST_MASK		= BIT(NUM_HDR_TYPES + 13),
 	RXE_ATOMIC_WRITE_MASK   = BIT(NUM_HDR_TYPES + 14),
 
 	RXE_READ_OR_ATOMIC_MASK	= (RXE_READ_MASK | RXE_ATOMIC_MASK),
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 5861e4244049..7153de0799fc 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -217,6 +217,10 @@  static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 	list_for_each_entry(mca, &mcg->qp_list, qp_list) {
 		qp = mca->qp;
 
+		/* don't reply packet to sender if locally sent */
+		if (pkt->mask & RXE_MCAST_MASK && qp_num(qp) == deth_sqp(pkt))
+			continue;
+
 		/* validate qp for incoming packet */
 		err = check_type_state(rxe, pkt, qp);
 		if (err)
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index d8c41fd626a9..599bec88cb54 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -442,8 +442,12 @@  static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 			(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
 			(RXE_WRITE_MASK | RXE_IMMDT_MASK));
 
-	qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
-					 qp->attr.dest_qp_num;
+	if (pkt->mask & RXE_MCAST_MASK)
+		qp_num = IB_MULTICAST_QPN;
+	else if (pkt->mask & RXE_DETH_MASK)
+		qp_num = ibwr->wr.ud.remote_qpn;
+	else
+		qp_num = qp->attr.dest_qp_num;
 
 	ack_req = ((pkt->mask & RXE_END_MASK) ||
 		(qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
@@ -809,6 +813,9 @@  int rxe_requester(struct rxe_qp *qp)
 		goto err;
 	}
 
+	if (rxe_is_mcast_av(av))
+		pkt.mask |= RXE_MCAST_MASK;
+
 	skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
 	if (unlikely(!skb)) {
 		rxe_dbg_qp(qp, "Failed allocating skb\n");