diff mbox series

[rdma-next,2/2] RDMA/core: Add a netevent notifier to cma

Message ID 8c85028f89a877e9b4e6bb58bdd8a7f2cb4567a9.1649075034.git.leonro@nvidia.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series Add gratuitous ARP support to RDMA-CM | expand

Commit Message

Leon Romanovsky April 4, 2022, 12:27 p.m. UTC
From: Patrisious Haddad <phaddad@nvidia.com>

Add a netevent callback for cma, mainly to catch NETEVENT_NEIGH_UPDATE.

Previously, when a system with failover MAC mechanism change its MAC address
during a CM connection attempt, the RDMA-CM would take a lot of time till
it disconnects and timesout due to the incorrect MAC address.

Now when we get a NETEVENT_NEIGH_UPDATE we check if it is due to a failover
MAC change and if so, we instantly destroy the CM and notify the user in order
to spare the unnecessary waiting for the timeout.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/cma.c | 104 ++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

Comments

Jason Gunthorpe May 11, 2022, 12:04 a.m. UTC | #1
On Mon, Apr 04, 2022 at 03:27:27PM +0300, Leon Romanovsky wrote:

> @@ -5054,10 +5061,95 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
>  	return ret;
>  }
>  
> +static void cma_netevent_work_handler(struct work_struct *_work)
> +{
> +	struct cma_netevent_work *network =
> +		container_of(_work, struct cma_netevent_work, work);
> +	struct rdma_cm_event event = {};
> +
> +	mutex_lock(&network->id_priv->handler_mutex);
> +
> +	if (READ_ONCE(network->id_priv->state) == RDMA_CM_DESTROYING ||
> +	    READ_ONCE(network->id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
> +		goto out_unlock;
> +
> +	event.event = RDMA_CM_EVENT_UNREACHABLE;
> +	event.status = -ETIMEDOUT;
> +
> +	if (cma_cm_event_handler(network->id_priv, &event)) {
> +		__acquire(&network->id_priv->handler_mutex);

??

> +		network->id_priv->cm_id.ib = NULL;
> +		cma_id_put(network->id_priv);
> +		destroy_id_handler_unlock(network->id_priv);
> +		kfree(network);
> +		return;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&network->id_priv->handler_mutex);
> +	cma_id_put(network->id_priv);
> +	kfree(network);
> +}
> +
> +static int cma_netevent_callback(struct notifier_block *self,
> +				 unsigned long event, void *ctx)
> +{
> +	struct id_table_entry *ips_node = NULL;
> +	struct rdma_id_private *current_id;
> +	struct cma_netevent_work *network;
> +	struct neighbour *neigh = ctx;
> +	unsigned long flags;
> +
> +	if (event != NETEVENT_NEIGH_UPDATE)
> +		return NOTIFY_DONE;
> +
> +	spin_lock_irqsave(&id_table_lock, flags);
> +	if (neigh->tbl->family == AF_INET6) {
> +		struct sockaddr_in6 neigh_sock_6;
> +
> +		neigh_sock_6.sin6_family = AF_INET6;
> +		neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
> +		ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
> +					     (struct sockaddr *)&neigh_sock_6);
> +	} else if (neigh->tbl->family == AF_INET) {
> +		struct sockaddr_in neigh_sock_4;
> +
> +		neigh_sock_4.sin_family = AF_INET;
> +		neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
> +		ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
> +					     (struct sockaddr *)&neigh_sock_4);
> +	} else
> +		goto out;
> +
> +	if (!ips_node)
> +		goto out;
> +
> +	list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
> +		if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
> +			   neigh->ha, ETH_ALEN))
> +			continue;
> +		network = kzalloc(sizeof(*network), GFP_ATOMIC);
> +		if (!network)
> +			goto out;
> +
> +		INIT_WORK(&network->work, cma_netevent_work_handler);
> +		network->id_priv = current_id;
> +		cma_id_get(current_id);
> +		queue_work(cma_netevent_wq, &network->work);

This is pretty ugly that we need to do atomic allocations for every
matching id.

It would be better to add the work directly to the rdma_cm_id and just
waste that memory.

> +	cma_netevent_wq = alloc_ordered_workqueue("rdma_cm_netevent", 0);
> +	if (!cma_netevent_wq) {
> +		ret = -ENOMEM;
> +		goto err_netevent_wq;
> +	}

Why do we need another WQ? Why does it need to be ordered?

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index bfe2b70daf39..c26fec94d032 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -21,6 +21,7 @@ 
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/netevent.h>
 #include <net/tcp.h>
 #include <net/ipv6.h>
 #include <net/ip_fib.h>
@@ -173,6 +174,7 @@  static struct rb_root id_table = RB_ROOT;
 /* Serialize operations of id_table tree */
 static DEFINE_SPINLOCK(id_table_lock);
 static struct workqueue_struct *cma_wq;
+static struct workqueue_struct *cma_netevent_wq;
 static unsigned int cma_pernet_id;
 
 struct cma_pernet {
@@ -373,6 +375,11 @@  struct cma_work {
 	struct rdma_cm_event	event;
 };
 
+struct cma_netevent_work {
+	struct work_struct work;
+	struct rdma_id_private *id_priv;
+};
+
 union cma_ip_addr {
 	struct in6_addr ip6;
 	struct {
@@ -5054,10 +5061,95 @@  static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
 	return ret;
 }
 
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+	struct cma_netevent_work *network =
+		container_of(_work, struct cma_netevent_work, work);
+	struct rdma_cm_event event = {};
+
+	mutex_lock(&network->id_priv->handler_mutex);
+
+	if (READ_ONCE(network->id_priv->state) == RDMA_CM_DESTROYING ||
+	    READ_ONCE(network->id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+		goto out_unlock;
+
+	event.event = RDMA_CM_EVENT_UNREACHABLE;
+	event.status = -ETIMEDOUT;
+
+	if (cma_cm_event_handler(network->id_priv, &event)) {
+		__acquire(&network->id_priv->handler_mutex);
+		network->id_priv->cm_id.ib = NULL;
+		cma_id_put(network->id_priv);
+		destroy_id_handler_unlock(network->id_priv);
+		kfree(network);
+		return;
+	}
+
+out_unlock:
+	mutex_unlock(&network->id_priv->handler_mutex);
+	cma_id_put(network->id_priv);
+	kfree(network);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+				 unsigned long event, void *ctx)
+{
+	struct id_table_entry *ips_node = NULL;
+	struct rdma_id_private *current_id;
+	struct cma_netevent_work *network;
+	struct neighbour *neigh = ctx;
+	unsigned long flags;
+
+	if (event != NETEVENT_NEIGH_UPDATE)
+		return NOTIFY_DONE;
+
+	spin_lock_irqsave(&id_table_lock, flags);
+	if (neigh->tbl->family == AF_INET6) {
+		struct sockaddr_in6 neigh_sock_6;
+
+		neigh_sock_6.sin6_family = AF_INET6;
+		neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+		ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+					     (struct sockaddr *)&neigh_sock_6);
+	} else if (neigh->tbl->family == AF_INET) {
+		struct sockaddr_in neigh_sock_4;
+
+		neigh_sock_4.sin_family = AF_INET;
+		neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+		ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+					     (struct sockaddr *)&neigh_sock_4);
+	} else
+		goto out;
+
+	if (!ips_node)
+		goto out;
+
+	list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+		if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+			   neigh->ha, ETH_ALEN))
+			continue;
+		network = kzalloc(sizeof(*network), GFP_ATOMIC);
+		if (!network)
+			goto out;
+
+		INIT_WORK(&network->work, cma_netevent_work_handler);
+		network->id_priv = current_id;
+		cma_id_get(current_id);
+		queue_work(cma_netevent_wq, &network->work);
+	}
+out:
+	spin_unlock_irqrestore(&id_table_lock, flags);
+	return NOTIFY_DONE;
+}
+
 static struct notifier_block cma_nb = {
 	.notifier_call = cma_netdev_callback
 };
 
+static struct notifier_block cma_netevent_cb = {
+	.notifier_call = cma_netevent_callback
+};
+
 static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
 {
 	struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -5274,12 +5366,19 @@  static int __init cma_init(void)
 	if (!cma_wq)
 		return -ENOMEM;
 
+	cma_netevent_wq = alloc_ordered_workqueue("rdma_cm_netevent", 0);
+	if (!cma_netevent_wq) {
+		ret = -ENOMEM;
+		goto err_netevent_wq;
+	}
+
 	ret = register_pernet_subsys(&cma_pernet_operations);
 	if (ret)
 		goto err_wq;
 
 	ib_sa_register_client(&sa_client);
 	register_netdevice_notifier(&cma_nb);
+	register_netevent_notifier(&cma_netevent_cb);
 
 	ret = ib_register_client(&cma_client);
 	if (ret)
@@ -5294,10 +5393,13 @@  static int __init cma_init(void)
 err_ib:
 	ib_unregister_client(&cma_client);
 err:
+	unregister_netevent_notifier(&cma_netevent_cb);
 	unregister_netdevice_notifier(&cma_nb);
 	ib_sa_unregister_client(&sa_client);
 	unregister_pernet_subsys(&cma_pernet_operations);
 err_wq:
+	destroy_workqueue(cma_netevent_wq);
+err_netevent_wq:
 	destroy_workqueue(cma_wq);
 	return ret;
 }
@@ -5306,9 +5408,11 @@  static void __exit cma_cleanup(void)
 {
 	cma_configfs_exit();
 	ib_unregister_client(&cma_client);
+	unregister_netevent_notifier(&cma_netevent_cb);
 	unregister_netdevice_notifier(&cma_nb);
 	ib_sa_unregister_client(&sa_client);
 	unregister_pernet_subsys(&cma_pernet_operations);
+	destroy_workqueue(cma_netevent_wq);
 	destroy_workqueue(cma_wq);
 }