diff mbox series

[v2,1/2] maple_tree: simplify split calculation

Message ID 20241109134410.31792-2-richard.weiyang@gmail.com (mailing list archive)
State New
Headers show
Series simplify split calculation | expand

Commit Message

Wei Yang Nov. 9, 2024, 1:44 p.m. UTC
We have been too smart to calculate split value.

The purpose of current calculation is to avoid having a range less than
the slot count. But this seems to push too hard to suffer from jitter
problem.

Considering this only matters if the range is less than the slot count,
so the real world implications of the calculation will be negligible. So
we decide to simplify the calculation of split.

Also current code may lead to deficient node, the condition to check
should be (b_end - split - 1 > slot_min). After this change, this one is
gone together.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
CC: Liam R. Howlett <Liam.Howlett@Oracle.com>
CC: Sidhartha Kumar <sidhartha.kumar@oracle.com>
CC: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

---
v2:
  instead of fixing deficient split, simplify the calculation
---
 lib/maple_tree.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

Comments

Liam R. Howlett Nov. 12, 2024, 2:46 p.m. UTC | #1
* Wei Yang <richard.weiyang@gmail.com> [241109 08:45]:
> We have been too smart to calculate split value.
> 
> The purpose of current calculation is to avoid having a range less than
> the slot count. But this seems to push too hard to suffer from jitter
> problem.
> 
> Considering this only matters if the range is less than the slot count,
> so the real world implications of the calculation will be negligible. So
> we decide to simplify the calculation of split.
> 
> Also current code may lead to deficient node, the condition to check
> should be (b_end - split - 1 > slot_min). After this change, this one is
> gone together.

This comment is difficult to understand.

Maybe something like:
The current calculation for splitting nodes tries to enforce a minimum
span on the leaf nodes.  This code is complex and never worked correctly
to begin with, due to the min value being passed as 0 for all leaves.

The calculation should just split the data as equally as possible
between the new nodes.  Note that b_end will be one more than the data,
so the left side is still favoured in the calculation.

The current code may also lead to a deficient node by not leaving enough
data for the right side of the split. This issue is also addressed with
the split calculation change.

> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>

Fixes: ?
Cc: stable ?

> CC: Liam R. Howlett <Liam.Howlett@Oracle.com>
> CC: Sidhartha Kumar <sidhartha.kumar@oracle.com>
> CC: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> 
> ---
> v2:
>   instead of fixing deficient split, simplify the calculation
> ---
>  lib/maple_tree.c | 23 ++++++-----------------
>  1 file changed, 6 insertions(+), 17 deletions(-)
> 
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index d0ae808f3a14..4f2950a1c38d 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -1863,11 +1863,11 @@ static inline int mab_no_null_split(struct maple_big_node *b_node,
>   * Return: The first split location.  The middle split is set in @mid_split.
>   */
>  static inline int mab_calc_split(struct ma_state *mas,
> -	 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
> +	 struct maple_big_node *bn, unsigned char *mid_split)
>  {
>  	unsigned char b_end = bn->b_end;
>  	int split = b_end / 2; /* Assume equal split. */
> -	unsigned char slot_min, slot_count = mt_slots[bn->type];
> +	unsigned char slot_count = mt_slots[bn->type];
>  
>  	/*
>  	 * To support gap tracking, all NULL entries are kept together and a node cannot
> @@ -1900,18 +1900,7 @@ static inline int mab_calc_split(struct ma_state *mas,
>  		split = b_end / 3;
>  		*mid_split = split * 2;
>  	} else {
> -		slot_min = mt_min_slots[bn->type];
> -
>  		*mid_split = 0;
> -		/*
> -		 * Avoid having a range less than the slot count unless it
> -		 * causes one node to be deficient.
> -		 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
> -		 */
> -		while ((split < slot_count - 1) &&
> -		       ((bn->pivot[split] - min) < slot_count - 1) &&
> -		       (b_end - split > slot_min))
> -			split++;
>  	}
>  
>  	/* Avoid ending a node on a NULL entry */
> @@ -2377,7 +2366,7 @@ static inline struct maple_enode
>  static inline unsigned char mas_mab_to_node(struct ma_state *mas,
>  	struct maple_big_node *b_node, struct maple_enode **left,
>  	struct maple_enode **right, struct maple_enode **middle,
> -	unsigned char *mid_split, unsigned long min)
> +	unsigned char *mid_split)
>  {
>  	unsigned char split = 0;
>  	unsigned char slot_count = mt_slots[b_node->type];
> @@ -2390,7 +2379,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas,
>  	if (b_node->b_end < slot_count) {
>  		split = b_node->b_end;
>  	} else {
> -		split = mab_calc_split(mas, b_node, mid_split, min);
> +		split = mab_calc_split(mas, b_node, mid_split);
>  		*right = mas_new_ma_node(mas, b_node);
>  	}
>  
> @@ -2877,7 +2866,7 @@ static void mas_spanning_rebalance(struct ma_state *mas,
>  		mast->bn->b_end--;
>  		mast->bn->type = mte_node_type(mast->orig_l->node);
>  		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
> -					&mid_split, mast->orig_l->min);
> +					&mid_split);
>  		mast_set_split_parents(mast, left, middle, right, split,
>  				       mid_split);
>  		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
> @@ -3365,7 +3354,7 @@ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
>  		if (mas_push_data(mas, height, &mast, false))
>  			break;
>  
> -		split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
> +		split = mab_calc_split(mas, b_node, &mid_split);
>  		mast_split_data(&mast, mas, split);
>  		/*
>  		 * Usually correct, mab_mas_cp in the above call overwrites
> -- 
> 2.34.1
>
Wei Yang Nov. 13, 2024, 1:15 a.m. UTC | #2
On Tue, Nov 12, 2024 at 09:46:20AM -0500, Liam R. Howlett wrote:
>* Wei Yang <richard.weiyang@gmail.com> [241109 08:45]:
>> We have been too smart to calculate split value.
>> 
>> The purpose of current calculation is to avoid having a range less than
>> the slot count. But this seems to push too hard to suffer from jitter
>> problem.
>> 
>> Considering this only matters if the range is less than the slot count,
>> so the real world implications of the calculation will be negligible. So
>> we decide to simplify the calculation of split.
>> 
>> Also current code may lead to deficient node, the condition to check
>> should be (b_end - split - 1 > slot_min). After this change, this one is
>> gone together.
>
>This comment is difficult to understand.
>
>Maybe something like:
>The current calculation for splitting nodes tries to enforce a minimum
>span on the leaf nodes.  This code is complex and never worked correctly
>to begin with, due to the min value being passed as 0 for all leaves.
>
>The calculation should just split the data as equally as possible
>between the new nodes.  Note that b_end will be one more than the data,
>so the left side is still favoured in the calculation.
>
>The current code may also lead to a deficient node by not leaving enough
>data for the right side of the split. This issue is also addressed with
>the split calculation change.
>

Thanks, this looks much better :-)

>> 
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>
>Fixes: ?
>Cc: stable ?
>

Will add this.

BTW, as this is a fix, do you think the test case in patch 2 of v1 is still
necessary?

>> CC: Liam R. Howlett <Liam.Howlett@Oracle.com>
>> CC: Sidhartha Kumar <sidhartha.kumar@oracle.com>
>> CC: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>> 
>> ---
>> v2:
>>   instead of fixing deficient split, simplify the calculation
>> ---
>>  lib/maple_tree.c | 23 ++++++-----------------
>>  1 file changed, 6 insertions(+), 17 deletions(-)
>> 
>> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
>> index d0ae808f3a14..4f2950a1c38d 100644
>> --- a/lib/maple_tree.c
>> +++ b/lib/maple_tree.c
>> @@ -1863,11 +1863,11 @@ static inline int mab_no_null_split(struct maple_big_node *b_node,
>>   * Return: The first split location.  The middle split is set in @mid_split.
>>   */
>>  static inline int mab_calc_split(struct ma_state *mas,
>> -	 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
>> +	 struct maple_big_node *bn, unsigned char *mid_split)
>>  {
>>  	unsigned char b_end = bn->b_end;
>>  	int split = b_end / 2; /* Assume equal split. */
>> -	unsigned char slot_min, slot_count = mt_slots[bn->type];
>> +	unsigned char slot_count = mt_slots[bn->type];
>>  
>>  	/*
>>  	 * To support gap tracking, all NULL entries are kept together and a node cannot
>> @@ -1900,18 +1900,7 @@ static inline int mab_calc_split(struct ma_state *mas,
>>  		split = b_end / 3;
>>  		*mid_split = split * 2;
>>  	} else {
>> -		slot_min = mt_min_slots[bn->type];
>> -
>>  		*mid_split = 0;
>> -		/*
>> -		 * Avoid having a range less than the slot count unless it
>> -		 * causes one node to be deficient.
>> -		 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
>> -		 */
>> -		while ((split < slot_count - 1) &&
>> -		       ((bn->pivot[split] - min) < slot_count - 1) &&
>> -		       (b_end - split > slot_min))
>> -			split++;
>>  	}
>>  
>>  	/* Avoid ending a node on a NULL entry */
>> @@ -2377,7 +2366,7 @@ static inline struct maple_enode
>>  static inline unsigned char mas_mab_to_node(struct ma_state *mas,
>>  	struct maple_big_node *b_node, struct maple_enode **left,
>>  	struct maple_enode **right, struct maple_enode **middle,
>> -	unsigned char *mid_split, unsigned long min)
>> +	unsigned char *mid_split)
>>  {
>>  	unsigned char split = 0;
>>  	unsigned char slot_count = mt_slots[b_node->type];
>> @@ -2390,7 +2379,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas,
>>  	if (b_node->b_end < slot_count) {
>>  		split = b_node->b_end;
>>  	} else {
>> -		split = mab_calc_split(mas, b_node, mid_split, min);
>> +		split = mab_calc_split(mas, b_node, mid_split);
>>  		*right = mas_new_ma_node(mas, b_node);
>>  	}
>>  
>> @@ -2877,7 +2866,7 @@ static void mas_spanning_rebalance(struct ma_state *mas,
>>  		mast->bn->b_end--;
>>  		mast->bn->type = mte_node_type(mast->orig_l->node);
>>  		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
>> -					&mid_split, mast->orig_l->min);
>> +					&mid_split);
>>  		mast_set_split_parents(mast, left, middle, right, split,
>>  				       mid_split);
>>  		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
>> @@ -3365,7 +3354,7 @@ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
>>  		if (mas_push_data(mas, height, &mast, false))
>>  			break;
>>  
>> -		split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
>> +		split = mab_calc_split(mas, b_node, &mid_split);
>>  		mast_split_data(&mast, mas, split);
>>  		/*
>>  		 * Usually correct, mab_mas_cp in the above call overwrites
>> -- 
>> 2.34.1
>>
Liam R. Howlett Nov. 13, 2024, 1:44 a.m. UTC | #3
* Wei Yang <richard.weiyang@gmail.com> [241112 20:15]:
> On Tue, Nov 12, 2024 at 09:46:20AM -0500, Liam R. Howlett wrote:
> >* Wei Yang <richard.weiyang@gmail.com> [241109 08:45]:
> >> We have been too smart to calculate split value.
> >> 
> >> The purpose of current calculation is to avoid having a range less than
> >> the slot count. But this seems to push too hard to suffer from jitter
> >> problem.
> >> 
> >> Considering this only matters if the range is less than the slot count,
> >> so the real world implications of the calculation will be negligible. So
> >> we decide to simplify the calculation of split.
> >> 
> >> Also current code may lead to deficient node, the condition to check
> >> should be (b_end - split - 1 > slot_min). After this change, this one is
> >> gone together.
> >
> >This comment is difficult to understand.
> >
> >Maybe something like:
> >The current calculation for splitting nodes tries to enforce a minimum
> >span on the leaf nodes.  This code is complex and never worked correctly
> >to begin with, due to the min value being passed as 0 for all leaves.
> >
> >The calculation should just split the data as equally as possible
> >between the new nodes.  Note that b_end will be one more than the data,
> >so the left side is still favoured in the calculation.
> >
> >The current code may also lead to a deficient node by not leaving enough
> >data for the right side of the split. This issue is also addressed with
> >the split calculation change.
> >
> 
> Thanks, this looks much better :-)
> 
> >> 
> >> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> >
> >Fixes: ?
> >Cc: stable ?
> >
> 
> Will add this.
> 
> BTW, as this is a fix, do you think the test case in patch 2 of v1 is still
> necessary?

Yes, please include it.

I keep all test cases from fixed bugs or added features.  This way we
know that the bug doesn't return in another unrelated change.  The
testcases continuously grow, but they are always useful.

Thanks,
Liam
diff mbox series

Patch

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index d0ae808f3a14..4f2950a1c38d 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -1863,11 +1863,11 @@  static inline int mab_no_null_split(struct maple_big_node *b_node,
  * Return: The first split location.  The middle split is set in @mid_split.
  */
 static inline int mab_calc_split(struct ma_state *mas,
-	 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
+	 struct maple_big_node *bn, unsigned char *mid_split)
 {
 	unsigned char b_end = bn->b_end;
 	int split = b_end / 2; /* Assume equal split. */
-	unsigned char slot_min, slot_count = mt_slots[bn->type];
+	unsigned char slot_count = mt_slots[bn->type];
 
 	/*
 	 * To support gap tracking, all NULL entries are kept together and a node cannot
@@ -1900,18 +1900,7 @@  static inline int mab_calc_split(struct ma_state *mas,
 		split = b_end / 3;
 		*mid_split = split * 2;
 	} else {
-		slot_min = mt_min_slots[bn->type];
-
 		*mid_split = 0;
-		/*
-		 * Avoid having a range less than the slot count unless it
-		 * causes one node to be deficient.
-		 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
-		 */
-		while ((split < slot_count - 1) &&
-		       ((bn->pivot[split] - min) < slot_count - 1) &&
-		       (b_end - split > slot_min))
-			split++;
 	}
 
 	/* Avoid ending a node on a NULL entry */
@@ -2377,7 +2366,7 @@  static inline struct maple_enode
 static inline unsigned char mas_mab_to_node(struct ma_state *mas,
 	struct maple_big_node *b_node, struct maple_enode **left,
 	struct maple_enode **right, struct maple_enode **middle,
-	unsigned char *mid_split, unsigned long min)
+	unsigned char *mid_split)
 {
 	unsigned char split = 0;
 	unsigned char slot_count = mt_slots[b_node->type];
@@ -2390,7 +2379,7 @@  static inline unsigned char mas_mab_to_node(struct ma_state *mas,
 	if (b_node->b_end < slot_count) {
 		split = b_node->b_end;
 	} else {
-		split = mab_calc_split(mas, b_node, mid_split, min);
+		split = mab_calc_split(mas, b_node, mid_split);
 		*right = mas_new_ma_node(mas, b_node);
 	}
 
@@ -2877,7 +2866,7 @@  static void mas_spanning_rebalance(struct ma_state *mas,
 		mast->bn->b_end--;
 		mast->bn->type = mte_node_type(mast->orig_l->node);
 		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
-					&mid_split, mast->orig_l->min);
+					&mid_split);
 		mast_set_split_parents(mast, left, middle, right, split,
 				       mid_split);
 		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
@@ -3365,7 +3354,7 @@  static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
 		if (mas_push_data(mas, height, &mast, false))
 			break;
 
-		split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
+		split = mab_calc_split(mas, b_node, &mid_split);
 		mast_split_data(&mast, mas, split);
 		/*
 		 * Usually correct, mab_mas_cp in the above call overwrites