diff mbox series

[2/2] maple_tree: reset mas->index and mas->last on write retries

Message ID 20240808163000.25053-2-sidhartha.kumar@oracle.com (mailing list archive)
State New
Headers show
Series [1/2] maple_tree: add test to replicate low memory race conditions | expand

Commit Message

Sidhartha Kumar Aug. 8, 2024, 4:30 p.m. UTC
The following scenario can result in a race condition:

Consider a node with the following indices and values

	a<------->b<----------->c<--------->d
	    0xA        NULL          0xB

	CPU 1			  CPU 2
      ---------        		---------
	mas_set_range(a,b)
	mas_erase()
		-> range is expanded (a,c) because of null expansion

	mas_nomem()
	mas_unlock()
				mas_store_range(b,c,0xC)

The node now looks like:

	a<------->b<----------->c<--------->d
	    0xA        0xC          0xB

	mas_lock()
	mas_erase() <------ range of erase is still (a,c)

The node is now NULL from (a,c) but the write from CPU 2 should have been
retained and range (b,c) should still have 0xC as its value. We can fix
this by re-intializing to the original index and last. This does not need
a cc: Stable as there are no users of the maple tree which use internal
locking and this condition is only possible with internal locking.

Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
 lib/maple_tree.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

Comments

Liam R. Howlett Aug. 8, 2024, 5:31 p.m. UTC | #1
* Sidhartha Kumar <sidhartha.kumar@oracle.com> [240808 12:30]:
> The following scenario can result in a race condition:
> 
> Consider a node with the following indices and values
> 
> 	a<------->b<----------->c<--------->d
> 	    0xA        NULL          0xB
> 
> 	CPU 1			  CPU 2
>       ---------        		---------
> 	mas_set_range(a,b)
> 	mas_erase()
> 		-> range is expanded (a,c) because of null expansion
> 
> 	mas_nomem()
> 	mas_unlock()
> 				mas_store_range(b,c,0xC)
> 
> The node now looks like:
> 
> 	a<------->b<----------->c<--------->d
> 	    0xA        0xC          0xB
> 
> 	mas_lock()
> 	mas_erase() <------ range of erase is still (a,c)
> 
> The node is now NULL from (a,c) but the write from CPU 2 should have been
> retained and range (b,c) should still have 0xC as its value. We can fix
> this by re-intializing to the original index and last. This does not need
> a cc: Stable as there are no users of the maple tree which use internal
> locking and this condition is only possible with internal locking.
> 
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
> ---
>  lib/maple_tree.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index 65fba37ef999..6ba95a278326 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -5451,14 +5451,21 @@ EXPORT_SYMBOL_GPL(mas_store);
>   */
>  int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
>  {
> +	unsigned long index = mas->index;
> +	unsigned long last = mas->last;
>  	MA_WR_STATE(wr_mas, mas, entry);
>  
>  	mas_wr_store_setup(&wr_mas);
>  	trace_ma_write(__func__, mas, 0, entry);
>  retry:
>  	mas_wr_store_entry(&wr_mas);
> -	if (unlikely(mas_nomem(mas, gfp)))
> +	if (unlikely(mas_nomem(mas, gfp))) {
> +		if (!entry) {
> +			mas->index = index;
> +			mas->last = last;

__mas_set_range(mas, index, last);

> +		}
>  		goto retry;
> +	}
>  
>  	if (unlikely(mas_is_err(mas)))
>  		return xa_err(mas->node);
> @@ -6245,17 +6252,19 @@ EXPORT_SYMBOL_GPL(mas_find_range_rev);
>  void *mas_erase(struct ma_state *mas)
>  {
>  	void *entry;
> +	unsigned long index = mas->index;
>  	MA_WR_STATE(wr_mas, mas, NULL);
>  
>  	if (!mas_is_active(mas) || !mas_is_start(mas))
>  		mas->status = ma_start;
>  
> -	/* Retry unnecessary when holding the write lock. */
> +write_retry:
> +	/* reset mas->index and mas->last in case range of entry changed */
> +	mas->index = mas->last = index;

it might make sense to re-init in the mas_nomem() case only, to avoid
extra instructions in the fast path.

>  	entry = mas_state_walk(mas);
>  	if (!entry)
>  		return NULL;
>  
> -write_retry:
>  	/* Must reset to ensure spanning writes of last slot are detected */
>  	mas_reset(mas);
>  	mas_wr_store_setup(&wr_mas);
> -- 
> 2.46.0
>
diff mbox series

Patch

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 65fba37ef999..6ba95a278326 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5451,14 +5451,21 @@  EXPORT_SYMBOL_GPL(mas_store);
  */
 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
 {
+	unsigned long index = mas->index;
+	unsigned long last = mas->last;
 	MA_WR_STATE(wr_mas, mas, entry);
 
 	mas_wr_store_setup(&wr_mas);
 	trace_ma_write(__func__, mas, 0, entry);
 retry:
 	mas_wr_store_entry(&wr_mas);
-	if (unlikely(mas_nomem(mas, gfp)))
+	if (unlikely(mas_nomem(mas, gfp))) {
+		if (!entry) {
+			mas->index = index;
+			mas->last = last;
+		}
 		goto retry;
+	}
 
 	if (unlikely(mas_is_err(mas)))
 		return xa_err(mas->node);
@@ -6245,17 +6252,19 @@  EXPORT_SYMBOL_GPL(mas_find_range_rev);
 void *mas_erase(struct ma_state *mas)
 {
 	void *entry;
+	unsigned long index = mas->index;
 	MA_WR_STATE(wr_mas, mas, NULL);
 
 	if (!mas_is_active(mas) || !mas_is_start(mas))
 		mas->status = ma_start;
 
-	/* Retry unnecessary when holding the write lock. */
+write_retry:
+	/* reset mas->index and mas->last in case range of entry changed */
+	mas->index = mas->last = index;
 	entry = mas_state_walk(mas);
 	if (!entry)
 		return NULL;
 
-write_retry:
 	/* Must reset to ensure spanning writes of last slot are detected */
 	mas_reset(mas);
 	mas_wr_store_setup(&wr_mas);