diff mbox series

md/persistent-data: Set tm hash size via a module_param

Message ID 20241014134944.1264991-1-meir.elisha@volumez.com (mailing list archive)
State New, archived
Headers show
Series md/persistent-data: Set tm hash size via a module_param | expand

Commit Message

Meir Elisha Oct. 14, 2024, 1:49 p.m. UTC
Enlarging the hash table size can significantly improve transaction
manager hash operations. This commit adds the tm_hash_table_size
module_param that sets the hash size.

For demonstration, I've created a thin volume, filled it with 4.8TiB of
sequential data and took a snapshot. then, I overwritten the volume in a
way that caused a maximum btree nodes to be allocated. I've repeated the
process for 5 snapshots and measured the first snapshot deletion time.

When using 128k hash size (instead of the 256 default value) I was able
to reduce snapshot deletion time from 42min to 4min (10x improvement).

Signed-off-by: Meir Elisha <meir.elisha@volumez.com>
---

Script used for demonstration attached below.

 .../persistent-data/dm-transaction-manager.c  | 51 ++++++++++++++++---
 1 file changed, 43 insertions(+), 8 deletions(-)

Comments

Meir Elisha Nov. 18, 2024, 1:19 p.m. UTC | #1
On 10/14/24 16:49, Meir Elisha wrote:
> Enlarging the hash table size can significantly improve transaction
> manager hash operations. This commit adds the tm_hash_table_size
> module_param that sets the hash size.
> 
> For demonstration, I've created a thin volume, filled it with 4.8TiB of
> sequential data and took a snapshot. then, I overwritten the volume in a
> way that caused a maximum btree nodes to be allocated. I've repeated the
> process for 5 snapshots and measured the first snapshot deletion time.
> 
> When using 128k hash size (instead of the 256 default value) I was able
> to reduce snapshot deletion time from 42min to 4min (10x improvement).
> 
> Signed-off-by: Meir Elisha <meir.elisha@volumez.com>
> ---
> 
> Script used for demonstration attached below.
> 
>  .../persistent-data/dm-transaction-manager.c  | 51 ++++++++++++++++---
>  1 file changed, 43 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
> index c7ba4e6cbbc7..8d486b1e6693 100644
> --- a/drivers/md/persistent-data/dm-transaction-manager.c
> +++ b/drivers/md/persistent-data/dm-transaction-manager.c
> @@ -10,6 +10,7 @@
>  #include "dm-space-map-metadata.h"
>  #include "dm-persistent-data-internal.h"
>  
> +#include <linux/moduleparam.h>
>  #include <linux/export.h>
>  #include <linux/mutex.h>
>  #include <linux/hash.h>
> @@ -84,8 +85,35 @@ struct shadow_info {
>  /*
>   * It would be nice if we scaled with the size of transaction.
>   */
> -#define DM_HASH_SIZE 256
> -#define DM_HASH_MASK (DM_HASH_SIZE - 1)
> +static uint tm_hash_table_size = 256;
> +
> +static int param_set_hash_size(const char *val, const struct kernel_param *kp)
> +{
> +	unsigned int num;
> +	int ret;
> +
> +	if (!val)
> +		return -EINVAL;
> +
> +	ret = kstrtouint(val, 0, &num);
> +	if (ret)
> +		return ret;
> +
> +	/* Hash size must be a power of 2 */
> +	if (!(num && !(num & (num - 1))))
> +		return -EINVAL;
> +
> +	*((unsigned int *)kp->arg) = num;
> +	return 0;
> +}
> +
> +static const struct kernel_param_ops tm_hash_table_size_ops = {
> +	.set = param_set_hash_size,
> +	.get = param_get_uint
> +};
> +
> +module_param_cb(tm_hash_table_size, &tm_hash_table_size_ops, &tm_hash_table_size, 0644);
> +MODULE_PARM_DESC(tm_hash_table_size, "transaction manager hash size");
>  
>  struct dm_transaction_manager {
>  	int is_clone;
> @@ -95,8 +123,9 @@ struct dm_transaction_manager {
>  	struct dm_space_map *sm;
>  
>  	spinlock_t lock;
> -	struct hlist_head buckets[DM_HASH_SIZE];
> -
> +	struct hlist_head *buckets;
> +	uint hash_size;
> +	uint hash_mask;
>  	struct prefetch_set prefetches;
>  };
>  
> @@ -105,7 +134,7 @@ struct dm_transaction_manager {
>  static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
>  {
>  	int r = 0;
> -	unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
> +	unsigned int bucket = dm_hash_block(b, tm->hash_mask);
>  	struct shadow_info *si;
>  
>  	spin_lock(&tm->lock);
> @@ -131,7 +160,7 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
>  	si = kmalloc(sizeof(*si), GFP_NOIO);
>  	if (si) {
>  		si->where = b;
> -		bucket = dm_hash_block(b, DM_HASH_MASK);
> +		bucket = dm_hash_block(b, tm->hash_mask);
>  		spin_lock(&tm->lock);
>  		hlist_add_head(&si->hlist, tm->buckets + bucket);
>  		spin_unlock(&tm->lock);
> @@ -146,7 +175,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
>  	int i;
>  
>  	spin_lock(&tm->lock);
> -	for (i = 0; i < DM_HASH_SIZE; i++) {
> +	for (i = 0; i < tm->hash_size; i++) {
>  		bucket = tm->buckets + i;
>  		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
>  			kfree(si);
> @@ -169,13 +198,19 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
>  	if (!tm)
>  		return ERR_PTR(-ENOMEM);
>  
> +	tm->hash_size = tm_hash_table_size;
> +	tm->buckets = kmalloc_array(tm->hash_size, sizeof(*tm->buckets), GFP_KERNEL);
> +	if (!tm->buckets)
> +		return ERR_PTR(-ENOMEM);
> +
>  	tm->is_clone = 0;
>  	tm->real = NULL;
>  	tm->bm = bm;
>  	tm->sm = sm;
> +	tm->hash_mask = tm->hash_size - 1;
>  
>  	spin_lock_init(&tm->lock);
> -	for (i = 0; i < DM_HASH_SIZE; i++)
> +	for (i = 0; i < tm->hash_size; i++)
>  		INIT_HLIST_HEAD(tm->buckets + i);
>  
>  	prefetch_init(&tm->prefetches);

Hi all,

I wanted to follow up on this patch. 
Could you please review it when you have a chance?
If you need any additional information or have concerns, let me know.
Thanks for your time.
Mikulas Patocka Nov. 18, 2024, 2:55 p.m. UTC | #2
On Mon, 18 Nov 2024, Meir Elisha wrote:

> > diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
> > index c7ba4e6cbbc7..8d486b1e6693 100644
> > --- a/drivers/md/persistent-data/dm-transaction-manager.c
> > +++ b/drivers/md/persistent-data/dm-transaction-manager.c
> > @@ -10,6 +10,7 @@
> >  #include "dm-space-map-metadata.h"
> >  #include "dm-persistent-data-internal.h"
> >  
> > +#include <linux/moduleparam.h>
> >  #include <linux/export.h>
> >  #include <linux/mutex.h>
> >  #include <linux/hash.h>
> > @@ -84,8 +85,35 @@ struct shadow_info {
> >  /*
> >   * It would be nice if we scaled with the size of transaction.
> >   */
> > -#define DM_HASH_SIZE 256
> > -#define DM_HASH_MASK (DM_HASH_SIZE - 1)
> > +static uint tm_hash_table_size = 256;
> > +
> > +static int param_set_hash_size(const char *val, const struct kernel_param *kp)
> > +{
> > +	unsigned int num;
> > +	int ret;
> > +
> > +	if (!val)
> > +		return -EINVAL;
> > +
> > +	ret = kstrtouint(val, 0, &num);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* Hash size must be a power of 2 */
> > +	if (!(num && !(num & (num - 1))))
> > +		return -EINVAL;
> > +
> > +	*((unsigned int *)kp->arg) = num;
> > +	return 0;
> > +}
> > +
> > +static const struct kernel_param_ops tm_hash_table_size_ops = {
> > +	.set = param_set_hash_size,
> > +	.get = param_get_uint
> > +};
> > +
> > +module_param_cb(tm_hash_table_size, &tm_hash_table_size_ops, &tm_hash_table_size, 0644);
> > +MODULE_PARM_DESC(tm_hash_table_size, "transaction manager hash size");
> >  
> >  struct dm_transaction_manager {
> >  	int is_clone;
> > @@ -95,8 +123,9 @@ struct dm_transaction_manager {
> >  	struct dm_space_map *sm;
> >  
> >  	spinlock_t lock;
> > -	struct hlist_head buckets[DM_HASH_SIZE];
> > -
> > +	struct hlist_head *buckets;
> > +	uint hash_size;
> > +	uint hash_mask;
> >  	struct prefetch_set prefetches;
> >  };
> >  
> > @@ -105,7 +134,7 @@ struct dm_transaction_manager {
> >  static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
> >  {
> >  	int r = 0;
> > -	unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
> > +	unsigned int bucket = dm_hash_block(b, tm->hash_mask);
> >  	struct shadow_info *si;
> >  
> >  	spin_lock(&tm->lock);
> > @@ -131,7 +160,7 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
> >  	si = kmalloc(sizeof(*si), GFP_NOIO);
> >  	if (si) {
> >  		si->where = b;
> > -		bucket = dm_hash_block(b, DM_HASH_MASK);
> > +		bucket = dm_hash_block(b, tm->hash_mask);
> >  		spin_lock(&tm->lock);
> >  		hlist_add_head(&si->hlist, tm->buckets + bucket);
> >  		spin_unlock(&tm->lock);
> > @@ -146,7 +175,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
> >  	int i;
> >  
> >  	spin_lock(&tm->lock);
> > -	for (i = 0; i < DM_HASH_SIZE; i++) {
> > +	for (i = 0; i < tm->hash_size; i++) {
> >  		bucket = tm->buckets + i;
> >  		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
> >  			kfree(si);
> > @@ -169,13 +198,19 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
> >  	if (!tm)
> >  		return ERR_PTR(-ENOMEM);
> >  
> > +	tm->hash_size = tm_hash_table_size;
> > +	tm->buckets = kmalloc_array(tm->hash_size, sizeof(*tm->buckets), GFP_KERNEL);
> > +	if (!tm->buckets)
> > +		return ERR_PTR(-ENOMEM);
> > +
> >  	tm->is_clone = 0;
> >  	tm->real = NULL;
> >  	tm->bm = bm;
> >  	tm->sm = sm;
> > +	tm->hash_mask = tm->hash_size - 1;
> >  
> >  	spin_lock_init(&tm->lock);
> > -	for (i = 0; i < DM_HASH_SIZE; i++)
> > +	for (i = 0; i < tm->hash_size; i++)
> >  		INIT_HLIST_HEAD(tm->buckets + i);
> >  
> >  	prefetch_init(&tm->prefetches);
> 
> Hi all,
> 
> I wanted to follow up on this patch. 
> Could you please review it when you have a chance?
> If you need any additional information or have concerns, let me know.
> Thanks for your time.

Hi

I discussed it with Joe. We concluded that the patch is not much useful, 
because few users will know about this option and few users will change it 
if they hit bad performance.

Joe will be reworking the code so that it will use rb-trees instead of 
linked lists when there will be hash collision, this could improve 
performance without adding a tunable variable.

Mikulas
Meir Elisha Nov. 19, 2024, 3:54 p.m. UTC | #3
On 11/18/24 16:55, Mikulas Patocka wrote:
> 
> 
> On Mon, 18 Nov 2024, Meir Elisha wrote:
> 
>>> diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
>>> index c7ba4e6cbbc7..8d486b1e6693 100644
>>> --- a/drivers/md/persistent-data/dm-transaction-manager.c
>>> +++ b/drivers/md/persistent-data/dm-transaction-manager.c
>>> @@ -10,6 +10,7 @@
>>>  #include "dm-space-map-metadata.h"
>>>  #include "dm-persistent-data-internal.h"
>>>  
>>> +#include <linux/moduleparam.h>
>>>  #include <linux/export.h>
>>>  #include <linux/mutex.h>
>>>  #include <linux/hash.h>
>>> @@ -84,8 +85,35 @@ struct shadow_info {
>>>  /*
>>>   * It would be nice if we scaled with the size of transaction.
>>>   */
>>> -#define DM_HASH_SIZE 256
>>> -#define DM_HASH_MASK (DM_HASH_SIZE - 1)
>>> +static uint tm_hash_table_size = 256;
>>> +
>>> +static int param_set_hash_size(const char *val, const struct kernel_param *kp)
>>> +{
>>> +	unsigned int num;
>>> +	int ret;
>>> +
>>> +	if (!val)
>>> +		return -EINVAL;
>>> +
>>> +	ret = kstrtouint(val, 0, &num);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	/* Hash size must be a power of 2 */
>>> +	if (!(num && !(num & (num - 1))))
>>> +		return -EINVAL;
>>> +
>>> +	*((unsigned int *)kp->arg) = num;
>>> +	return 0;
>>> +}
>>> +
>>> +static const struct kernel_param_ops tm_hash_table_size_ops = {
>>> +	.set = param_set_hash_size,
>>> +	.get = param_get_uint
>>> +};
>>> +
>>> +module_param_cb(tm_hash_table_size, &tm_hash_table_size_ops, &tm_hash_table_size, 0644);
>>> +MODULE_PARM_DESC(tm_hash_table_size, "transaction manager hash size");
>>>  
>>>  struct dm_transaction_manager {
>>>  	int is_clone;
>>> @@ -95,8 +123,9 @@ struct dm_transaction_manager {
>>>  	struct dm_space_map *sm;
>>>  
>>>  	spinlock_t lock;
>>> -	struct hlist_head buckets[DM_HASH_SIZE];
>>> -
>>> +	struct hlist_head *buckets;
>>> +	uint hash_size;
>>> +	uint hash_mask;
>>>  	struct prefetch_set prefetches;
>>>  };
>>>  
>>> @@ -105,7 +134,7 @@ struct dm_transaction_manager {
>>>  static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
>>>  {
>>>  	int r = 0;
>>> -	unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
>>> +	unsigned int bucket = dm_hash_block(b, tm->hash_mask);
>>>  	struct shadow_info *si;
>>>  
>>>  	spin_lock(&tm->lock);
>>> @@ -131,7 +160,7 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
>>>  	si = kmalloc(sizeof(*si), GFP_NOIO);
>>>  	if (si) {
>>>  		si->where = b;
>>> -		bucket = dm_hash_block(b, DM_HASH_MASK);
>>> +		bucket = dm_hash_block(b, tm->hash_mask);
>>>  		spin_lock(&tm->lock);
>>>  		hlist_add_head(&si->hlist, tm->buckets + bucket);
>>>  		spin_unlock(&tm->lock);
>>> @@ -146,7 +175,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
>>>  	int i;
>>>  
>>>  	spin_lock(&tm->lock);
>>> -	for (i = 0; i < DM_HASH_SIZE; i++) {
>>> +	for (i = 0; i < tm->hash_size; i++) {
>>>  		bucket = tm->buckets + i;
>>>  		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
>>>  			kfree(si);
>>> @@ -169,13 +198,19 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
>>>  	if (!tm)
>>>  		return ERR_PTR(-ENOMEM);
>>>  
>>> +	tm->hash_size = tm_hash_table_size;
>>> +	tm->buckets = kmalloc_array(tm->hash_size, sizeof(*tm->buckets), GFP_KERNEL);
>>> +	if (!tm->buckets)
>>> +		return ERR_PTR(-ENOMEM);
>>> +
>>>  	tm->is_clone = 0;
>>>  	tm->real = NULL;
>>>  	tm->bm = bm;
>>>  	tm->sm = sm;
>>> +	tm->hash_mask = tm->hash_size - 1;
>>>  
>>>  	spin_lock_init(&tm->lock);
>>> -	for (i = 0; i < DM_HASH_SIZE; i++)
>>> +	for (i = 0; i < tm->hash_size; i++)
>>>  		INIT_HLIST_HEAD(tm->buckets + i);
>>>  
>>>  	prefetch_init(&tm->prefetches);
>>
>> Hi all,
>>
>> I wanted to follow up on this patch. 
>> Could you please review it when you have a chance?
>> If you need any additional information or have concerns, let me know.
>> Thanks for your time.
> 
> Hi
> 
> I discussed it with Joe. We concluded that the patch is not much useful, 
> because few users will know about this option and few users will change it 
> if they hit bad performance.
> 
> Joe will be reworking the code so that it will use rb-trees instead of 
> linked lists when there will be hash collision, this could improve 
> performance without adding a tunable variable.
> 
> Mikulas
> 

First of all, thanks for the response.
I get that probably not too many users will use the module_param
but it has a significant performance impact on our use case.
Any general idea when the rb-trees solution should be submitted?
diff mbox series

Patch

diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index c7ba4e6cbbc7..8d486b1e6693 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -10,6 +10,7 @@ 
 #include "dm-space-map-metadata.h"
 #include "dm-persistent-data-internal.h"
 
+#include <linux/moduleparam.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/hash.h>
@@ -84,8 +85,35 @@  struct shadow_info {
 /*
  * It would be nice if we scaled with the size of transaction.
  */
-#define DM_HASH_SIZE 256
-#define DM_HASH_MASK (DM_HASH_SIZE - 1)
+static uint tm_hash_table_size = 256;
+
+static int param_set_hash_size(const char *val, const struct kernel_param *kp)
+{
+	unsigned int num;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtouint(val, 0, &num);
+	if (ret)
+		return ret;
+
+	/* Hash size must be a power of 2 */
+	if (!(num && !(num & (num - 1))))
+		return -EINVAL;
+
+	*((unsigned int *)kp->arg) = num;
+	return 0;
+}
+
+static const struct kernel_param_ops tm_hash_table_size_ops = {
+	.set = param_set_hash_size,
+	.get = param_get_uint
+};
+
+module_param_cb(tm_hash_table_size, &tm_hash_table_size_ops, &tm_hash_table_size, 0644);
+MODULE_PARM_DESC(tm_hash_table_size, "transaction manager hash size");
 
 struct dm_transaction_manager {
 	int is_clone;
@@ -95,8 +123,9 @@  struct dm_transaction_manager {
 	struct dm_space_map *sm;
 
 	spinlock_t lock;
-	struct hlist_head buckets[DM_HASH_SIZE];
-
+	struct hlist_head *buckets;
+	uint hash_size;
+	uint hash_mask;
 	struct prefetch_set prefetches;
 };
 
@@ -105,7 +134,7 @@  struct dm_transaction_manager {
 static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 {
 	int r = 0;
-	unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
+	unsigned int bucket = dm_hash_block(b, tm->hash_mask);
 	struct shadow_info *si;
 
 	spin_lock(&tm->lock);
@@ -131,7 +160,7 @@  static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 	si = kmalloc(sizeof(*si), GFP_NOIO);
 	if (si) {
 		si->where = b;
-		bucket = dm_hash_block(b, DM_HASH_MASK);
+		bucket = dm_hash_block(b, tm->hash_mask);
 		spin_lock(&tm->lock);
 		hlist_add_head(&si->hlist, tm->buckets + bucket);
 		spin_unlock(&tm->lock);
@@ -146,7 +175,7 @@  static void wipe_shadow_table(struct dm_transaction_manager *tm)
 	int i;
 
 	spin_lock(&tm->lock);
-	for (i = 0; i < DM_HASH_SIZE; i++) {
+	for (i = 0; i < tm->hash_size; i++) {
 		bucket = tm->buckets + i;
 		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
 			kfree(si);
@@ -169,13 +198,19 @@  static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
 	if (!tm)
 		return ERR_PTR(-ENOMEM);
 
+	tm->hash_size = tm_hash_table_size;
+	tm->buckets = kmalloc_array(tm->hash_size, sizeof(*tm->buckets), GFP_KERNEL);
+	if (!tm->buckets)
+		return ERR_PTR(-ENOMEM);
+
 	tm->is_clone = 0;
 	tm->real = NULL;
 	tm->bm = bm;
 	tm->sm = sm;
+	tm->hash_mask = tm->hash_size - 1;
 
 	spin_lock_init(&tm->lock);
-	for (i = 0; i < DM_HASH_SIZE; i++)
+	for (i = 0; i < tm->hash_size; i++)
 		INIT_HLIST_HEAD(tm->buckets + i);
 
 	prefetch_init(&tm->prefetches);