Message ID | 1434358036-15526-3-git-send-email-haggaie@mellanox.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Mon, Jun 15, 2015 at 11:47:07AM +0300, Haggai Eran wrote: > +/* Called with an RCU read lock taken */ Add _rcu to the name? That is the standard convention. > +/* returns an IPoIB netdev on top a given ipoib device matching a pkey_index > + * and address, if one exists. */ > +static struct net_device *ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, > + const union ib_gid *gid, > + u16 pkey_index, > + const struct sockaddr *addr) > +{ > + struct ipoib_dev_priv *child_priv; > + struct net_device *net_dev = NULL; > + > + if (priv->pkey_index == pkey_index && > + (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { > + net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev); > + if (net_dev) > + return net_dev; As I said already, this should not even look at the sockaddr unless there are multiple possible hits on the other parameters, and there should be a comment explaining the sockaddr is only a hack to make up for having an incomplete LLADDR. That way people not using same guid children do not get incorrect functionality.. > +static struct net_device *ipoib_get_net_dev_by_params( > + struct ib_device *dev, u8 port, u16 pkey, > + const union ib_gid *gid, const struct sockaddr *addr) [..] > + ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); > + if (ret) > + return NULL; > + > + if (!rdma_protocol_ib(dev, port)) > + return NULL; This if should be first I'd think. > + dev_list = ib_get_client_data(dev, &ipoib_client); > + if (!dev_list) > + return NULL; Is the locking OK here? This access protected by lists_rwsem - but for instance ib_unregister_device holds only the device_mutex when calling client->remove, which kfree's dev_list. Looks wrong to me. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 15/06/2015 20:22, Jason Gunthorpe wrote: > On Mon, Jun 15, 2015 at 11:47:07AM +0300, Haggai Eran wrote: > >> +/* Called with an RCU read lock taken */ > > Add _rcu to the name? That is the standard convention. Sure, I'll change that. > >> +/* returns an IPoIB netdev on top a given ipoib device matching a pkey_index >> + * and address, if one exists. */ >> +static struct net_device *ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, >> + const union ib_gid *gid, >> + u16 pkey_index, >> + const struct sockaddr *addr) >> +{ >> + struct ipoib_dev_priv *child_priv; >> + struct net_device *net_dev = NULL; >> + >> + if (priv->pkey_index == pkey_index && >> + (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { >> + net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev); >> + if (net_dev) >> + return net_dev; > > As I said already, this should not even look at the sockaddr unless > there are multiple possible hits on the other parameters, What is the goal here? The only difference omitting the IP check will make is when sending a request to a matching GID but with the wrong IP. Is it important that we pass these requests here so that they will be dropped at the rdma_cm module? Also, note that ipoib_get_net_dev_match_addr can return a different net_dev from the one ipoib created. When using bonding, it will find the IP address on the bonding device, and return the bonding net_dev instead. > and there > should be a comment explaining the sockaddr is only a hack to make up > for having an incomplete LLADDR. Sure, I'll add a comment. > > That way people not using same guid children do not get incorrect > functionality.. > >> +static struct net_device *ipoib_get_net_dev_by_params( >> + struct ib_device *dev, u8 port, u16 pkey, >> + const union ib_gid *gid, const struct sockaddr *addr) > > [..] > >> + ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); >> + if (ret) >> + return NULL; >> + >> + if (!rdma_protocol_ib(dev, port)) >> + return NULL; > > This if should be first I'd think. Okay. > > >> + dev_list = ib_get_client_data(dev, &ipoib_client); >> + if (!dev_list) >> + return NULL; > > Is the locking OK here? This access protected by lists_rwsem - > but for instance ib_unregister_device holds only the device_mutex when > calling client->remove, which kfree's dev_list. Looks wrong to me. I think you're right. Perhaps we can switch the client data to NULL in ib_unregister_device under the lists_rwsem. Then the ipoib_get_net_dev_by_params call will know to skip it. The remove() callback will need to be augmented with the client data as a parameter, because it won't be able to retrieve it using ib_get_client_data anymore. Haggai -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index da149c278cb8..15af32665a87 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -48,6 +48,9 @@ #include <linux/jhash.h> #include <net/arp.h> +#include <net/addrconf.h> +#include <linux/inetdevice.h> +#include <rdma/ib_cache.h> #define DRV_VERSION "1.0.0" @@ -91,11 +94,15 @@ struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device); static void ipoib_neigh_reclaim(struct rcu_head *rp); +static struct net_device *ipoib_get_net_dev_by_params( + struct ib_device *dev, u8 port, u16 pkey, + const union ib_gid *gid, const struct sockaddr *addr); static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, - .remove = ipoib_remove_one + .remove = ipoib_remove_one, + .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; int ipoib_open(struct net_device *dev) @@ -222,6 +229,138 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } +/* Called with an RCU read lock taken */ +static bool ipoib_is_dev_match_addr(const struct sockaddr *addr, + struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct in_device *in_dev; + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr; + __be32 ret_addr; + + switch (addr->sa_family) { + case AF_INET: + in_dev = in_dev_get(dev); + if (!in_dev) + return false; + + ret_addr = inet_confirm_addr(net, in_dev, 0, + addr_in->sin_addr.s_addr, + RT_SCOPE_HOST); + in_dev_put(in_dev); + if (ret_addr) + return true; + + break; + case AF_INET6: + if (IS_ENABLED(CONFIG_IPV6) && + ipv6_chk_addr(net, &addr_in6->sin6_addr, dev, 1)) + return true; + + break; + } + return false; +} + +/** + * Find a net_device matching the given address, which is an upper device of + * the given net_device. + * @addr: IP address to look for. + * @dev: base IPoIB net_device + * + * If found, returns the net_device with a reference held. Otherwise return + * NULL. + */ +static struct net_device *ipoib_get_net_dev_match_addr( + const struct sockaddr *addr, struct net_device *dev) +{ + struct net_device *upper, + *result = NULL; + struct list_head *iter; + + rcu_read_lock(); + if (ipoib_is_dev_match_addr(addr, dev)) { + dev_hold(dev); + result = dev; + goto out; + } + + netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { + if (ipoib_is_dev_match_addr(addr, upper)) { + dev_hold(upper); + result = upper; + break; + } + } +out: + rcu_read_unlock(); + return result; +} + +/* returns an IPoIB netdev on top a given ipoib device matching a pkey_index + * and address, if one exists. */ +static struct net_device *ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, + const union ib_gid *gid, + u16 pkey_index, + const struct sockaddr *addr) +{ + struct ipoib_dev_priv *child_priv; + struct net_device *net_dev = NULL; + + if (priv->pkey_index == pkey_index && + (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { + net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev); + if (net_dev) + return net_dev; + } + + /* Check child interfaces */ + down_read(&priv->vlan_rwsem); + list_for_each_entry(child_priv, &priv->child_intfs, list) { + net_dev = ipoib_match_gid_pkey_addr(child_priv, gid, + pkey_index, addr); + if (net_dev) + break; + } + up_read(&priv->vlan_rwsem); + + return net_dev; +} + +static struct net_device *ipoib_get_net_dev_by_params( + struct ib_device *dev, u8 port, u16 pkey, + const union ib_gid *gid, const struct sockaddr *addr) +{ + struct ipoib_dev_priv *priv; + struct list_head *dev_list; + struct net_device *net_dev; + u16 pkey_index; + int ret; + + ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); + if (ret) + return NULL; + + if (!rdma_protocol_ib(dev, port)) + return NULL; + + dev_list = ib_get_client_data(dev, &ipoib_client); + if (!dev_list) + return NULL; + + list_for_each_entry(priv, dev_list, list) { + if (priv->port != port) + continue; + + net_dev = ipoib_match_gid_pkey_addr(priv, gid, pkey_index, + addr); + if (net_dev) + return net_dev; + } + return NULL; +} + int ipoib_set_mode(struct net_device *dev, const char *buf) { struct ipoib_dev_priv *priv = netdev_priv(dev);