diff mbox series

[for-next] IB/mlx4: Convert rej_tmout radix-tree to XArray

Message ID 1601989634-4595-1-git-send-email-haakon.bugge@oracle.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series [for-next] IB/mlx4: Convert rej_tmout radix-tree to XArray | expand

Commit Message

Haakon Bugge Oct. 6, 2020, 1:07 p.m. UTC
Fixes: b7d8e64fa9db ("IB/mlx4: Add support for REJ due to timeout")

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
---
 drivers/infiniband/hw/mlx4/cm.c      | 73 +++++++++++++++---------------------
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  4 +-
 2 files changed, 32 insertions(+), 45 deletions(-)

Comments

Gal Pressman Oct. 6, 2020, 1:14 p.m. UTC | #1
On 06/10/2020 16:07, Håkon Bugge wrote:
> Fixes: b7d8e64fa9db ("IB/mlx4: Add support for REJ due to timeout")

There shouldn't be a blank line here, and the commit hash doesn't exist.

> 
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Haakon Bugge Oct. 6, 2020, 1:34 p.m. UTC | #2
> On 6 Oct 2020, at 15:14, Gal Pressman <galpress@amazon.com> wrote:
> 
> On 06/10/2020 16:07, Håkon Bugge wrote:
>> Fixes: b7d8e64fa9db ("IB/mlx4: Add support for REJ due to timeout")
> 
> There shouldn't be a blank line here, and the commit hash doesn't exist.

Yep, you're right Gal. The Fixes line should read:

227a0e142e37 ("IB/mlx4: Add support for REJ due to timeout")

I'll let this one linger for a day to see if there are more comments and then send a v2.


Thxs, Håkon
Jason Gunthorpe Oct. 8, 2020, 6:56 p.m. UTC | #3
On Tue, Oct 06, 2020 at 03:07:14PM +0200, Håkon Bugge wrote:
> Fixes: b7d8e64fa9db ("IB/mlx4: Add support for REJ due to timeout")
> 
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>  drivers/infiniband/hw/mlx4/cm.c      | 73 +++++++++++++++---------------------
>  drivers/infiniband/hw/mlx4/mlx4_ib.h |  4 +-
>  2 files changed, 32 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
> index 0ce4b5a..6c7986b 100644
> +++ b/drivers/infiniband/hw/mlx4/cm.c
> @@ -58,9 +58,7 @@ struct rej_tmout_entry {
>  	int slave;
>  	u32 rem_pv_cm_id;
>  	struct delayed_work timeout;
> -	struct radix_tree_root *rej_tmout_root;
> -	/* Points to the mutex protecting this radix-tree */
> -	struct mutex *lock;
> +	struct xarray *xa_rej_tmout;
>  };
>  
>  struct cm_generic_msg {
> @@ -350,9 +348,7 @@ static void rej_tmout_timeout(struct work_struct *work)
>  	struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
>  	struct rej_tmout_entry *deleted;
>  
> -	mutex_lock(item->lock);
> -	deleted = radix_tree_delete_item(item->rej_tmout_root, item->rem_pv_cm_id, NULL);
> -	mutex_unlock(item->lock);
> +	deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
>  
>  	if (deleted != item)
>  		pr_debug("deleted(%p) != item(%p)\n", deleted, item);
> @@ -363,14 +359,13 @@ static void rej_tmout_timeout(struct work_struct *work)
>  static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
>  {
>  	struct rej_tmout_entry *item;
> -	int sts;
> +	struct rej_tmout_entry *old;
> +
> +	item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);

The locking that was here looks wrong, rej_tmout_timeout() is a work
that could run at any time and kfree(item), so some kind of lock must
be held across every touch to item

Holding the xa_lock until the mod_delayed_work is done would be ok?

>  static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
>  {
>  	struct rej_tmout_entry *item;
>  
> -	mutex_lock(&sriov->rej_tmout_lock);
> -	item = radix_tree_lookup(&sriov->rej_tmout_root, (unsigned long)rem_pv_cm_id);
> -	mutex_unlock(&sriov->rej_tmout_lock);
> +	item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
>  
> -	if (!item || IS_ERR(item)) {
> +	if (!item || xa_err(item)) {
>  		pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
> -			 rem_pv_cm_id, (int)PTR_ERR(item));
> -		return !item ? -ENOENT : PTR_ERR(item);
> +			 rem_pv_cm_id, xa_err(item));
> +		return !item ? -ENOENT : xa_err(item);
>  	}
>  
>  	return item->slave;

Here too

> +	xa_lock(&sriov->xa_rej_tmout);
> +	xa_for_each(&sriov->xa_rej_tmout, id, item) {
>  		if (slave < 0 || slave == item->slave) {
>  			mod_delayed_work(system_wq, &item->timeout, 0);
>  			flush_needed = true;
>  			++cnt;
>  		}
>  	}
> -	mutex_unlock(&sriov->rej_tmout_lock);
> +	xa_unlock(&sriov->xa_rej_tmout);

This is OK

Jason
Haakon Bugge Oct. 9, 2020, 2:09 p.m. UTC | #4
> On 8 Oct 2020, at 20:56, Jason Gunthorpe <jgg@ziepe.ca> wrote:
> 
> On Tue, Oct 06, 2020 at 03:07:14PM +0200, Håkon Bugge wrote:
>> Fixes: b7d8e64fa9db ("IB/mlx4: Add support for REJ due to timeout")
>> 
>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>> drivers/infiniband/hw/mlx4/cm.c      | 73 +++++++++++++++---------------------
>> drivers/infiniband/hw/mlx4/mlx4_ib.h |  4 +-
>> 2 files changed, 32 insertions(+), 45 deletions(-)
>> 
>> diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
>> index 0ce4b5a..6c7986b 100644
>> +++ b/drivers/infiniband/hw/mlx4/cm.c
>> @@ -58,9 +58,7 @@ struct rej_tmout_entry {
>> 	int slave;
>> 	u32 rem_pv_cm_id;
>> 	struct delayed_work timeout;
>> -	struct radix_tree_root *rej_tmout_root;
>> -	/* Points to the mutex protecting this radix-tree */
>> -	struct mutex *lock;
>> +	struct xarray *xa_rej_tmout;
>> };
>> 
>> struct cm_generic_msg {
>> @@ -350,9 +348,7 @@ static void rej_tmout_timeout(struct work_struct *work)
>> 	struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
>> 	struct rej_tmout_entry *deleted;
>> 
>> -	mutex_lock(item->lock);
>> -	deleted = radix_tree_delete_item(item->rej_tmout_root, item->rem_pv_cm_id, NULL);
>> -	mutex_unlock(item->lock);
>> +	deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
>> 
>> 	if (deleted != item)
>> 		pr_debug("deleted(%p) != item(%p)\n", deleted, item);
>> @@ -363,14 +359,13 @@ static void rej_tmout_timeout(struct work_struct *work)
>> static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
>> {
>> 	struct rej_tmout_entry *item;
>> -	int sts;
>> +	struct rej_tmout_entry *old;
>> +
>> +	item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
> 
> The locking that was here looks wrong, rej_tmout_timeout() is a work
> that could run at any time and kfree(item), so some kind of lock must
> be held across every touch to item
> 
> Holding the xa_lock until the mod_delayed_work is done would be ok?


Good catch. I focused too much on the XArray itself. That works, but as you point out, dereferencing item with no locking is a no-no.

Will send a v2.


Thxs for review, Håkon


> 
>> static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
>> {
>> 	struct rej_tmout_entry *item;
>> 
>> -	mutex_lock(&sriov->rej_tmout_lock);
>> -	item = radix_tree_lookup(&sriov->rej_tmout_root, (unsigned long)rem_pv_cm_id);
>> -	mutex_unlock(&sriov->rej_tmout_lock);
>> +	item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
>> 
>> -	if (!item || IS_ERR(item)) {
>> +	if (!item || xa_err(item)) {
>> 		pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
>> -			 rem_pv_cm_id, (int)PTR_ERR(item));
>> -		return !item ? -ENOENT : PTR_ERR(item);
>> +			 rem_pv_cm_id, xa_err(item));
>> +		return !item ? -ENOENT : xa_err(item);
>> 	}
>> 
>> 	return item->slave;
> 
> Here too
> 
>> +	xa_lock(&sriov->xa_rej_tmout);
>> +	xa_for_each(&sriov->xa_rej_tmout, id, item) {
>> 		if (slave < 0 || slave == item->slave) {
>> 			mod_delayed_work(system_wq, &item->timeout, 0);
>> 			flush_needed = true;
>> 			++cnt;
>> 		}
>> 	}
>> -	mutex_unlock(&sriov->rej_tmout_lock);
>> +	xa_unlock(&sriov->xa_rej_tmout);
> 
> This is OK
> 
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 0ce4b5a..6c7986b 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -58,9 +58,7 @@  struct rej_tmout_entry {
 	int slave;
 	u32 rem_pv_cm_id;
 	struct delayed_work timeout;
-	struct radix_tree_root *rej_tmout_root;
-	/* Points to the mutex protecting this radix-tree */
-	struct mutex *lock;
+	struct xarray *xa_rej_tmout;
 };
 
 struct cm_generic_msg {
@@ -350,9 +348,7 @@  static void rej_tmout_timeout(struct work_struct *work)
 	struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
 	struct rej_tmout_entry *deleted;
 
-	mutex_lock(item->lock);
-	deleted = radix_tree_delete_item(item->rej_tmout_root, item->rem_pv_cm_id, NULL);
-	mutex_unlock(item->lock);
+	deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
 
 	if (deleted != item)
 		pr_debug("deleted(%p) != item(%p)\n", deleted, item);
@@ -363,14 +359,13 @@  static void rej_tmout_timeout(struct work_struct *work)
 static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
 {
 	struct rej_tmout_entry *item;
-	int sts;
+	struct rej_tmout_entry *old;
+
+	item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
 
-	mutex_lock(&sriov->rej_tmout_lock);
-	item = radix_tree_lookup(&sriov->rej_tmout_root, (unsigned long)rem_pv_cm_id);
-	mutex_unlock(&sriov->rej_tmout_lock);
 	if (item) {
-		if (IS_ERR(item))
-			return PTR_ERR(item);
+		if (xa_err(item))
+			return xa_err(item);
 		/* If a retry, adjust delayed work */
 		mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
 		return 0;
@@ -383,36 +378,30 @@  static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
 	INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
 	item->slave = slave;
 	item->rem_pv_cm_id = rem_pv_cm_id;
-	item->rej_tmout_root = &sriov->rej_tmout_root;
-	item->lock = &sriov->rej_tmout_lock;
+	item->xa_rej_tmout = &sriov->xa_rej_tmout;
 
-	mutex_lock(&sriov->rej_tmout_lock);
-	sts = radix_tree_insert(&sriov->rej_tmout_root, (unsigned long)rem_pv_cm_id, item);
-	mutex_unlock(&sriov->rej_tmout_lock);
-	if (sts)
-		goto err_insert;
+	old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
+	if (old) {
+		pr_debug("Non-null old entry (%p) or error (%d) when inserting\n", old, xa_err(old));
+		kfree(item);
+		return xa_err(old);
+	}
 
 	schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
 
 	return 0;
-
-err_insert:
-	kfree(item);
-	return sts;
 }
 
 static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
 {
 	struct rej_tmout_entry *item;
 
-	mutex_lock(&sriov->rej_tmout_lock);
-	item = radix_tree_lookup(&sriov->rej_tmout_root, (unsigned long)rem_pv_cm_id);
-	mutex_unlock(&sriov->rej_tmout_lock);
+	item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
 
-	if (!item || IS_ERR(item)) {
+	if (!item || xa_err(item)) {
 		pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
-			 rem_pv_cm_id, (int)PTR_ERR(item));
-		return !item ? -ENOENT : PTR_ERR(item);
+			 rem_pv_cm_id, xa_err(item));
+		return !item ? -ENOENT : xa_err(item);
 	}
 
 	return item->slave;
@@ -483,34 +472,34 @@  void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
 	INIT_LIST_HEAD(&dev->sriov.cm_list);
 	dev->sriov.sl_id_map = RB_ROOT;
 	xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
-	mutex_init(&dev->sriov.rej_tmout_lock);
-	INIT_RADIX_TREE(&dev->sriov.rej_tmout_root, GFP_KERNEL);
+	xa_init(&dev->sriov.xa_rej_tmout);
 }
 
-static void rej_tmout_tree_cleanup(struct mlx4_ib_sriov *sriov, int slave)
+static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
 {
-	struct radix_tree_iter iter;
+	struct rej_tmout_entry *item;
 	bool flush_needed = false;
-	__rcu void **slot;
+	unsigned long id;
 	int cnt = 0;
 
-	mutex_lock(&sriov->rej_tmout_lock);
-	radix_tree_for_each_slot(slot, &sriov->rej_tmout_root, &iter, 0) {
-		struct rej_tmout_entry *item = *slot;
-
+	xa_lock(&sriov->xa_rej_tmout);
+	xa_for_each(&sriov->xa_rej_tmout, id, item) {
 		if (slave < 0 || slave == item->slave) {
 			mod_delayed_work(system_wq, &item->timeout, 0);
 			flush_needed = true;
 			++cnt;
 		}
 	}
-	mutex_unlock(&sriov->rej_tmout_lock);
+	xa_unlock(&sriov->xa_rej_tmout);
 
 	if (flush_needed) {
 		flush_scheduled_work();
-		pr_debug("Deleted %d entries in radix_tree for slave %d during cleanup\n",
-			 slave, cnt);
+		pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
+			 cnt, slave);
 	}
+
+	if (slave < 0)
+		WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
 }
 
 /* slave = -1 ==> all slaves */
@@ -581,5 +570,5 @@  void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
 		kfree(map);
 	}
 
-	rej_tmout_tree_cleanup(sriov, slave);
+	rej_tmout_xa_cleanup(sriov, slave);
 }
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 2ab83ed..d8add5f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -495,9 +495,7 @@  struct mlx4_ib_sriov {
 	spinlock_t id_map_lock;
 	struct rb_root sl_id_map;
 	struct list_head cm_list;
-	/* Protects the radix-tree */
-	struct mutex rej_tmout_lock;
-	struct radix_tree_root rej_tmout_root;
+	struct xarray xa_rej_tmout;
 };
 
 struct gid_cache_context {