Message ID | 4132fdbc9fbba5dca834c84ae383d7fe6a917760.1649083917.git.leonro@nvidia.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | [rdma-rc] RDMA/cma: Limit join multicast to UD QP type only | expand |
On Mon, Apr 04, 2022 at 05:52:18PM +0300, Leon Romanovsky wrote: > -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > +static int cma_set_default_qkey(struct rdma_id_private *id_priv) > { > struct ib_sa_mcmember_rec rec; > int ret = 0; > > - if (id_priv->qkey) { > - if (qkey && id_priv->qkey != qkey) > - return -EINVAL; > - return 0; > - } > - > - if (qkey) { > - id_priv->qkey = qkey; > - return 0; > - } > - > switch (id_priv->id.ps) { > case RDMA_PS_UDP: > case RDMA_PS_IB: > @@ -528,9 +517,22 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > default: > break; > } > + > return ret; > } > > +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > +{ > + if (!qkey) > + return cma_set_default_qkey(id_priv); This should be called in the couple of places that are actually allowed to set a default qkey. We have some confusion about when that is supposed to happen and when a 0 qkey can be presented. But isn't this not the same? The original behavior was to make the set_default a NOP if the id_priv already had a qkey: - if (id_priv->qkey) { - if (qkey && id_priv->qkey != qkey) But that is gone now? I got this: diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3e315fc0ac16cb..ef980ea7153e51 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1102,7 +1102,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { - ret = cma_set_qkey(id_priv, 0); + ret = cma_set_default_qkey(id_priv); if (ret) return ret; @@ -4430,14 +4430,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) { - if (conn_param) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - conn_param->qkey, - conn_param->private_data, - conn_param->private_data_len); - else - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - 0, NULL, 0); + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->qkey, + conn_param->private_data, + conn_param->private_data_len); } else { if (conn_param) ret = cma_accept_ib(id_priv, conn_param); @@ -4685,7 +4681,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; - ret = cma_set_qkey(id_priv, 0); + ret = cma_set_default_qkey(id_priv); if (ret) return ret; > static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) > { > dev_addr->dev_type = ARPHRD_INFINIBAND; > @@ -4762,8 +4764,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, > cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); > > ib.rec.pkey = cpu_to_be16(0xffff); > - if (id_priv->id.ps == RDMA_PS_UDP) > - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); Why isn't this symetrical with the IB side: ret = cma_set_default_qkey(id_priv); if (ret) return ret; rec.qkey = cpu_to_be32(id_priv->qkey); ?? It fells like set_default_qkey() is the right thing to do incase the qkey was already set by something, just as IB does it. > @@ -4815,6 +4816,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, > READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) > return -EINVAL; > > + if (id_priv->id.qp_type != IB_QPT_UD) > + return -EINVAL; > + This makes sense Jason
On Fri, Apr 08, 2022 at 03:24:40PM -0300, Jason Gunthorpe wrote: > On Mon, Apr 04, 2022 at 05:52:18PM +0300, Leon Romanovsky wrote: > > -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > +static int cma_set_default_qkey(struct rdma_id_private *id_priv) > > { > > struct ib_sa_mcmember_rec rec; > > int ret = 0; > > > > - if (id_priv->qkey) { > > - if (qkey && id_priv->qkey != qkey) > > - return -EINVAL; > > - return 0; > > - } > > - > > - if (qkey) { > > - id_priv->qkey = qkey; > > - return 0; > > - } > > - > > switch (id_priv->id.ps) { > > case RDMA_PS_UDP: > > case RDMA_PS_IB: > > @@ -528,9 +517,22 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > default: > > break; > > } > > + > > return ret; > > } > > > > +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > +{ > > + if (!qkey) > > + return cma_set_default_qkey(id_priv); > > This should be called in the couple of places that are actually > allowed to set a default qkey. We have some confusion about when that > is supposed to happen and when a 0 qkey can be presented. > > But isn't this not the same? The original behavior was to make the > set_default a NOP if the id_priv already had a qkey: > > - if (id_priv->qkey) { > - if (qkey && id_priv->qkey != qkey) > > But that is gone now? When I reviewed, I got an impression what once we create id_priv and set qkey to default values, we won't hit this if (..). > > I got this: > > diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c > index 3e315fc0ac16cb..ef980ea7153e51 100644 > --- a/drivers/infiniband/core/cma.c > +++ b/drivers/infiniband/core/cma.c > @@ -1102,7 +1102,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, > *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; > > if (id_priv->id.qp_type == IB_QPT_UD) { > - ret = cma_set_qkey(id_priv, 0); > + ret = cma_set_default_qkey(id_priv); This is ok > if (ret) > return ret; > > @@ -4430,14 +4430,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) > > if (rdma_cap_ib_cm(id->device, id->port_num)) { > if (id->qp_type == IB_QPT_UD) { > - if (conn_param) > - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, > - conn_param->qkey, > - conn_param->private_data, > - conn_param->private_data_len); > - else > - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, > - 0, NULL, 0); > + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, > + conn_param->qkey, > + conn_param->private_data, > + conn_param->private_data_len); It is ok too and we have many other places with not-possible "if (conn_param)". > } else { > if (conn_param) > ret = cma_accept_ib(id_priv, conn_param); > @@ -4685,7 +4681,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, > if (ret) > return ret; > > - ret = cma_set_qkey(id_priv, 0); > + ret = cma_set_default_qkey(id_priv); > if (ret) > return ret; > > > > static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) > > { > > dev_addr->dev_type = ARPHRD_INFINIBAND; > > @@ -4762,8 +4764,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, > > cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); > > > > ib.rec.pkey = cpu_to_be16(0xffff); > > - if (id_priv->id.ps == RDMA_PS_UDP) > > - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > > + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > > Why isn't this symetrical with the IB side: > > ret = cma_set_default_qkey(id_priv); > if (ret) > return ret; > rec.qkey = cpu_to_be32(id_priv->qkey); > > > ?? The original code didn't touch id_priv. > > It fells like set_default_qkey() is the right thing to do incase the > qkey was already set by something, just as IB does it. > > > @@ -4815,6 +4816,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, > > READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) > > return -EINVAL; > > > > + if (id_priv->id.qp_type != IB_QPT_UD) > > + return -EINVAL; > > + > > This makes sense > > Jason
On Sun, Apr 10, 2022 at 03:03:24PM +0300, Leon Romanovsky wrote: > On Fri, Apr 08, 2022 at 03:24:40PM -0300, Jason Gunthorpe wrote: > > On Mon, Apr 04, 2022 at 05:52:18PM +0300, Leon Romanovsky wrote: > > > -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > > +static int cma_set_default_qkey(struct rdma_id_private *id_priv) > > > { > > > struct ib_sa_mcmember_rec rec; > > > int ret = 0; > > > > > > - if (id_priv->qkey) { > > > - if (qkey && id_priv->qkey != qkey) > > > - return -EINVAL; > > > - return 0; > > > - } > > > - > > > - if (qkey) { > > > - id_priv->qkey = qkey; > > > - return 0; > > > - } > > > - > > > switch (id_priv->id.ps) { > > > case RDMA_PS_UDP: > > > case RDMA_PS_IB: > > > @@ -528,9 +517,22 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > > default: > > > break; > > > } > > > + > > > return ret; > > > } > > > > > > +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > > +{ > > > + if (!qkey) > > > + return cma_set_default_qkey(id_priv); > > > > This should be called in the couple of places that are actually > > allowed to set a default qkey. We have some confusion about when that > > is supposed to happen and when a 0 qkey can be presented. > > > > But isn't this not the same? The original behavior was to make the > > set_default a NOP if the id_priv already had a qkey: > > > > - if (id_priv->qkey) { > > - if (qkey && id_priv->qkey != qkey) > > > > But that is gone now? > > When I reviewed, I got an impression what once we create id_priv and set > qkey to default values, we won't hit this if (..). We don't set qkey during create, so I'm not so sure.. The only places setting non-default qkeys are SIDR, maybe nobody uses SIDR with multicast. > > > static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) > > > { > > > dev_addr->dev_type = ARPHRD_INFINIBAND; > > > @@ -4762,8 +4764,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, > > > cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); > > > > > > ib.rec.pkey = cpu_to_be16(0xffff); > > > - if (id_priv->id.ps == RDMA_PS_UDP) > > > - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > > > + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > > > > Why isn't this symetrical with the IB side: > > > > ret = cma_set_default_qkey(id_priv); > > if (ret) > > return ret; > > rec.qkey = cpu_to_be32(id_priv->qkey); > > > > > > ?? > > The original code didn't touch id_priv. I know, but I think that is a mistake, we should make it symmetric Jason
On Tue, 12 Apr 2022, Jason Gunthorpe wrote: > The only places setting non-default qkeys are SIDR, maybe nobody uses > SIDR with multicast. IP port numbers provided are ignored by the RDMA subsytem when doing multicast joins. Thus no need to do SIDRs with RDMA multicast. Some middleware solutions (like LLM by IBM and Confinity) are creating their own custom MGID because of this problem. The custom MGID will then contain the port number. On the subject of this patch: There is a usecase for Multicast with IBV_QPT_RAW_PACKET too. A multicast join is required to redirect traffic for a multicast group to the raw socket.
On Tue, Apr 12, 2022 at 05:01:36PM +0200, Christoph Lameter wrote: > On Tue, 12 Apr 2022, Jason Gunthorpe wrote: > > > The only places setting non-default qkeys are SIDR, maybe nobody uses > > SIDR with multicast. > > > IP port numbers provided are ignored by the RDMA subsytem when doing > multicast joins. Thus no need to do SIDRs with RDMA multicast. > > Some middleware solutions (like LLM by IBM and Confinity) are creating > their own custom MGID because of this problem. The custom MGID will then > contain the port number. > > On the subject of this patch: There is a usecase for Multicast with > IBV_QPT_RAW_PACKET too. A multicast join is required to redirect traffic > for a multicast group to the raw socket. The qp_type in rdma-cm is a little bit misleading and represents communication QP. It can be or RC or UD, which is hard coded in almost all rdma-cm code. The one of the places that receive it from the user space is ucma_get_qp_type() for RDMA_PS_IB flow, but librdmacm set it to RC too. 790 791 int rdma_create_id(struct rdma_event_channel *channel, 792 struct rdma_cm_id **id, void *context, 793 enum rdma_port_space ps) 794 { 795 enum ibv_qp_type qp_type; 796 797 qp_type = (ps == RDMA_PS_IPOIB || ps == RDMA_PS_UDP) ? 798 IBV_QPT_UD : IBV_QPT_RC; 799 return rdma_create_id2(channel, id, context, ps, qp_type); 800 } 801 Thanks
On Tue, Apr 12, 2022 at 11:11:34AM -0300, Jason Gunthorpe wrote: > On Sun, Apr 10, 2022 at 03:03:24PM +0300, Leon Romanovsky wrote: > > On Fri, Apr 08, 2022 at 03:24:40PM -0300, Jason Gunthorpe wrote: > > > On Mon, Apr 04, 2022 at 05:52:18PM +0300, Leon Romanovsky wrote: > > > > -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > > > +static int cma_set_default_qkey(struct rdma_id_private *id_priv) > > > > { > > > > struct ib_sa_mcmember_rec rec; > > > > int ret = 0; > > > > > > > > - if (id_priv->qkey) { > > > > - if (qkey && id_priv->qkey != qkey) > > > > - return -EINVAL; > > > > - return 0; > > > > - } > > > > - > > > > - if (qkey) { > > > > - id_priv->qkey = qkey; > > > > - return 0; > > > > - } > > > > - > > > > switch (id_priv->id.ps) { > > > > case RDMA_PS_UDP: > > > > case RDMA_PS_IB: > > > > @@ -528,9 +517,22 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > > > default: > > > > break; > > > > } > > > > + > > > > return ret; > > > > } > > > > > > > > +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) > > > > +{ > > > > + if (!qkey) > > > > + return cma_set_default_qkey(id_priv); > > > > > > This should be called in the couple of places that are actually > > > allowed to set a default qkey. We have some confusion about when that > > > is supposed to happen and when a 0 qkey can be presented. > > > > > > But isn't this not the same? The original behavior was to make the > > > set_default a NOP if the id_priv already had a qkey: > > > > > > - if (id_priv->qkey) { > > > - if (qkey && id_priv->qkey != qkey) > > > > > > But that is gone now? > > > > When I reviewed, I got an impression what once we create id_priv and set > > qkey to default values, we won't hit this if (..). > > We don't set qkey during create, so I'm not so sure.. > > The only places setting non-default qkeys are SIDR, maybe nobody uses > SIDR with multicast. > > > > > > static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) > > > > { > > > > dev_addr->dev_type = ARPHRD_INFINIBAND; > > > > @@ -4762,8 +4764,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, > > > > cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); > > > > > > > > ib.rec.pkey = cpu_to_be16(0xffff); > > > > - if (id_priv->id.ps == RDMA_PS_UDP) > > > > - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > > > > + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); > > > > > > Why isn't this symetrical with the IB side: > > > > > > ret = cma_set_default_qkey(id_priv); > > > if (ret) > > > return ret; > > > rec.qkey = cpu_to_be32(id_priv->qkey); > > > > > > > > > ?? > > > > The original code didn't touch id_priv. > > I know, but I think that is a mistake, we should make it symmetric ok, I added it to regression. Thanks > > Jason
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fabca5e51e3d..3e315fc0ac16 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -496,22 +496,11 @@ static inline unsigned short cma_family(struct rdma_id_private *id_priv) return id_priv->id.route.addr.src_addr.ss_family; } -static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +static int cma_set_default_qkey(struct rdma_id_private *id_priv) { struct ib_sa_mcmember_rec rec; int ret = 0; - if (id_priv->qkey) { - if (qkey && id_priv->qkey != qkey) - return -EINVAL; - return 0; - } - - if (qkey) { - id_priv->qkey = qkey; - return 0; - } - switch (id_priv->id.ps) { case RDMA_PS_UDP: case RDMA_PS_IB: @@ -528,9 +517,22 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) default: break; } + return ret; } +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) +{ + if (!qkey) + return cma_set_default_qkey(id_priv); + + if (id_priv->qkey && (id_priv->qkey != qkey)) + return -EINVAL; + + id_priv->qkey = qkey; + return 0; +} + static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) { dev_addr->dev_type = ARPHRD_INFINIBAND; @@ -4762,8 +4764,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); ib.rec.pkey = cpu_to_be16(0xffff); - if (id_priv->id.ps == RDMA_PS_UDP) - ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); @@ -4815,6 +4816,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) return -EINVAL; + if (id_priv->id.qp_type != IB_QPT_UD) + return -EINVAL; + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM;