diff mbox series

accel/habanalabs: Make use of rhashtable

Message ID 20230426092813.44635-1-cai.huoqing@linux.dev (mailing list archive)
State New, archived
Headers show
Series accel/habanalabs: Make use of rhashtable | expand

Commit Message

Cai Huoqing April 26, 2023, 9:28 a.m. UTC
Using rhashtable to accelerate the search for userptr by address,
instead of using a list.

Preferably, the lookup complexity of a hash table is O(1).

This patch will speedup the method
hl_userptr_is_pinned by rhashtable_lookup_fast.

Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev>
---
 .../habanalabs/common/command_submission.c    | 16 ++++++---
 drivers/accel/habanalabs/common/habanalabs.h  | 19 +++++-----
 drivers/accel/habanalabs/common/memory.c      | 35 +++++++++++++------
 drivers/accel/habanalabs/gaudi/gaudi.c        | 16 +++++----
 drivers/accel/habanalabs/goya/goya.c          | 14 +++++---
 5 files changed, 66 insertions(+), 34 deletions(-)

Comments

Cai Huoqing April 26, 2023, 11:22 a.m. UTC | #1
On 26 4月 23 17:28:02, Cai Huoqing wrote:
> Using rhashtable to accelerate the search for userptr by address,
> instead of using a list.
> 
> Preferably, the lookup complexity of a hash table is O(1).
> 
> This patch will speedup the method
> hl_userptr_is_pinned by rhashtable_lookup_fast.
> 
> Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev>
> ---
>  .../habanalabs/common/command_submission.c    | 16 ++++++---
>  drivers/accel/habanalabs/common/habanalabs.h  | 19 +++++-----
>  drivers/accel/habanalabs/common/memory.c      | 35 +++++++++++++------
>  drivers/accel/habanalabs/gaudi/gaudi.c        | 16 +++++----
>  drivers/accel/habanalabs/goya/goya.c          | 14 +++++---
>  5 files changed, 66 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
> index af9d2e22c6e7..35c2ab934396 100644
> --- a/drivers/accel/habanalabs/common/command_submission.c
> +++ b/drivers/accel/habanalabs/common/command_submission.c
> @@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
>  	parser.job_id = job->id;
>  
>  	parser.hw_queue_id = job->hw_queue_id;
> -	parser.job_userptr_list = &job->userptr_list;
> +	parser.job_userptr_ht = &job->userptr_ht;
>  	parser.patched_cb = NULL;
>  	parser.user_cb = job->user_cb;
>  	parser.user_cb_size = job->user_cb_size;
> @@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
>  	struct hl_cs *cs = job->cs;
>  
>  	if (is_cb_patched(hdev, job)) {
> -		hl_userptr_delete_list(hdev, &job->userptr_list);
> +		hl_userptr_delete_list(hdev, &job->userptr_ht);
>  
>  		/*
>  		 * We might arrive here from rollback and patched CB wasn't
> @@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
>  		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
>  {
>  	struct hl_cs_job *job;
> +	int rc;
>  
>  	job = kzalloc(sizeof(*job), GFP_ATOMIC);
>  	if (!job)
> @@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
>  	job->queue_type = queue_type;
>  	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
>  
> -	if (is_cb_patched(hdev, job))
> -		INIT_LIST_HEAD(&job->userptr_list);
> +	if (is_cb_patched(hdev, job)) {
> +		rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
> +		if (rc)
> +			goto free_job;
> +	}
>  
>  	if (job->queue_type == QUEUE_TYPE_EXT)
>  		INIT_WORK(&job->finish_work, job_wq_completion);
>  
>  	return job;
> +
> +free_job:
> +	kfree(job);
> +	return NULL;
>  }
>  
>  static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
> diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
> index eaae69a9f817..9c876d1480d2 100644
> --- a/drivers/accel/habanalabs/common/habanalabs.h
> +++ b/drivers/accel/habanalabs/common/habanalabs.h
> @@ -19,6 +19,7 @@
>  #include <linux/dma-direction.h>
>  #include <linux/scatterlist.h>
>  #include <linux/hashtable.h>
> +#include <linux/rhashtable.h>
>  #include <linux/debugfs.h>
>  #include <linux/rwsem.h>
>  #include <linux/eventfd.h>
> @@ -540,6 +541,8 @@ struct hl_hints_range {
>  	u64 end_addr;
>  };
>  
> +extern const struct rhashtable_params hl_userptr_rht_params;
> +
>  /**
>   * struct asic_fixed_properties - ASIC specific immutable properties.
>   * @hw_queues_props: H/W queues properties.
> @@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
>  /**
>   * struct hl_userptr - memory mapping chunk information
>   * @vm_type: type of the VM.
> - * @job_node: linked-list node for hanging the object on the Job's list.
> + * @job_node: hashtable node for hanging the object on the Job's list.
>   * @pages: pointer to struct page array
>   * @npages: size of @pages array
>   * @sgt: pointer to the scatter-gather table that holds the pages.
> @@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
>   */
>  struct hl_userptr {
>  	enum vm_type		vm_type; /* must be first */
> -	struct list_head	job_node;
> +	struct rhash_head	job_node;
>  	struct page		**pages;
>  	unsigned int		npages;
>  	struct sg_table		*sgt;
> @@ -2028,7 +2031,7 @@ struct hl_cs {
>   * @patched_cb: in case of patching, this is internal CB which is submitted on
>   *		the queue instead of the CB we got from the IOCTL.
>   * @finish_work: workqueue object to run when job is completed.
> - * @userptr_list: linked-list of userptr mappings that belong to this job and
> + * @userptr_ht: hashtable of userptr mappings that belong to this job and
>   *			wait for completion.
>   * @debugfs_list: node in debugfs list of command submission jobs.
>   * @refcount: reference counter for usage of the CS job.
> @@ -2056,7 +2059,7 @@ struct hl_cs_job {
>  	struct hl_cb		*user_cb;
>  	struct hl_cb		*patched_cb;
>  	struct work_struct	finish_work;
> -	struct list_head	userptr_list;
> +	struct rhashtable	userptr_ht;
>  	struct list_head	debugfs_list;
>  	struct kref		refcount;
>  	enum hl_queue_type	queue_type;
> @@ -2075,7 +2078,7 @@ struct hl_cs_job {
>   * @user_cb: the CB we got from the user.
>   * @patched_cb: in case of patching, this is internal CB which is submitted on
>   *		the queue instead of the CB we got from the IOCTL.
> - * @job_userptr_list: linked-list of userptr mappings that belong to the related
> + * @job_userptr_ht: hashtable of userptr mappings that belong to the related
>   *			job and wait for completion.
>   * @cs_sequence: the sequence number of the related CS.
>   * @queue_type: the type of the H/W queue this job is submitted to.
> @@ -2098,7 +2101,7 @@ struct hl_cs_job {
>  struct hl_cs_parser {
>  	struct hl_cb		*user_cb;
>  	struct hl_cb		*patched_cb;
> -	struct list_head	*job_userptr_list;
> +	struct rhashtable	*job_userptr_ht;
>  	u64			cs_sequence;
>  	enum hl_queue_type	queue_type;
>  	u32			ctx_id;
> @@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
>  			struct hl_userptr *userptr);
>  void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
>  void hl_userptr_delete_list(struct hl_device *hdev,
> -				struct list_head *userptr_list);
> +				struct rhashtable *userptr_ht);
>  bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
> -				struct list_head *userptr_list,
> +				struct rhashtable *userptr_ht,
>  				struct hl_userptr **userptr);
>  
>  int hl_mmu_init(struct hl_device *hdev);
> diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
> index a7b6a273ce21..e5e7912b3b34 100644
> --- a/drivers/accel/habanalabs/common/memory.c
> +++ b/drivers/accel/habanalabs/common/memory.c
> @@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);
>  
>  #define MEM_HANDLE_INVALID	ULONG_MAX
>  
> +const struct rhashtable_params hl_userptr_rht_params = {
> +	.head_offset = offsetof(struct hl_userptr, job_node),
> +	.key_offset = offsetof(struct hl_userptr, addr),
> +	.key_len = sizeof(u64),
> +	.automatic_shrinking = true,
> +};
> +
>  static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
>  			struct hl_mem_in *args, u64 *handle);
>  
> @@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
>  	userptr->size = size;
>  	userptr->addr = addr;
>  	userptr->dma_mapped = false;
> -	INIT_LIST_HEAD(&userptr->job_node);
>  
>  	rc = get_user_memory(hdev, addr, size, npages, start, offset,
>  				userptr);
> @@ -2522,8 +2528,6 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
>  	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
>  	kvfree(userptr->pages);
>  
> -	list_del(&userptr->job_node);
> -
>  	sg_free_table(userptr->sgt);
>  	kfree(userptr->sgt);
>  }
> @@ -2531,23 +2535,31 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
>  /**
>   * hl_userptr_delete_list() - clear userptr list.
>   * @hdev: pointer to the habanalabs device structure.
> - * @userptr_list: pointer to the list to clear.
> + * @userptr_ht: pointer to the hashtable to clear.
>   *
>   * This function does the following:
>   * - Iterates over the list and unpins the host memory and frees the userptr
>   *   structure.
>   */
>  void hl_userptr_delete_list(struct hl_device *hdev,
> -				struct list_head *userptr_list)
> +				struct rhashtable *userptr_ht)
>  {
> -	struct hl_userptr *userptr, *tmp;
> +	struct hl_userptr *userptr;
> +	struct rhashtable_iter hti;
> +	struct rhash_head *pos;
>  
> -	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
> +	rhashtable_walk_enter(userptr_ht, &hti);
> +	rhashtable_walk_start(&hti);
> +	while ((pos = rhashtable_walk_next(&hti))) {

rhashtable_walk_next seems not stable,
will revert here, keep 'userptr_list' to do clear by list_for_each.
And send the v2 patch

Cai-
Thanks
> +		if (PTR_ERR(pos) == -EAGAIN)
> +			continue;
> +		rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
> +		userptr = rhashtable_walk_peek(&hti);
>  		hl_unpin_host_memory(hdev, userptr);
>  		kfree(userptr);
>  	}
>  
> -	INIT_LIST_HEAD(userptr_list);
> +	rhashtable_destroy(userptr_ht);
>  }
>  
>  /**
> @@ -2555,7 +2567,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
>   * @hdev: pointer to the habanalabs device structure.
>   * @addr: user address to check.
>   * @size: user block size to check.
> - * @userptr_list: pointer to the list to clear.
> + * @userptr_ht: pointer to the hashtable to clear.
>   * @userptr: pointer to userptr to check.
>   *
>   * This function does the following:
> @@ -2563,10 +2575,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
>   *   pinned. If so, returns true, otherwise returns false.
>   */
>  bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
> -				u32 size, struct list_head *userptr_list,
> +				u32 size, struct rhashtable *userptr_ht,
>  				struct hl_userptr **userptr)
>  {
> -	list_for_each_entry((*userptr), userptr_list, job_node) {
> +	(*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
> +	if (*userptr) {
>  		if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
>  			return true;
>  	}
> diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
> index a29aa8f7b6f3..1e1433042413 100644
> --- a/drivers/accel/habanalabs/gaudi/gaudi.c
> +++ b/drivers/accel/habanalabs/gaudi/gaudi.c
> @@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
>  	}
>  
>  free_job:
> -	hl_userptr_delete_list(hdev, &job->userptr_list);
> +	hl_userptr_delete_list(hdev, &job->userptr_ht);
>  	hl_debugfs_remove_job(hdev, job);
>  	kfree(job);
>  	atomic_dec(&cb->cs_cnt);
> @@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
>  	int rc;
>  
>  	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> -			parser->job_userptr_list, &userptr))
> +			parser->job_userptr_ht, &userptr))
>  		goto already_pinned;
>  
>  	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> @@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
>  	if (rc)
>  		goto free_userptr;
>  
> -	list_add_tail(&userptr->job_node, parser->job_userptr_list);
> +	rc = rhashtable_insert_fast(parser->job_userptr_ht,
> +				    &userptr->job_node, hl_userptr_rht_params);
> +	if (rc)
> +		goto unpin_memory;
>  
>  	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
>  	if (rc) {
> @@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
>  	return 0;
>  
>  unpin_memory:
> -	list_del(&userptr->job_node);
> +	rhashtable_remove_fast(parser->job_userptr_ht,
> +			       &userptr->job_node, hl_userptr_rht_params);
>  	hl_unpin_host_memory(hdev, userptr);
>  free_userptr:
>  	kfree(userptr);
> @@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
>  	if ((!skip_host_mem_pin) &&
>  		(!hl_userptr_is_pinned(hdev, addr,
>  					le32_to_cpu(user_dma_pkt->tsize),
> -					parser->job_userptr_list, &userptr))) {
> +					parser->job_userptr_ht, &userptr))) {
>  		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
>  				addr, user_dma_pkt->tsize);
>  		return -EFAULT;
> @@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
>  
>  free_userptr:
>  	if (rc)
> -		hl_userptr_delete_list(hdev, parser->job_userptr_list);
> +		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
>  	return rc;
>  }
>  
> diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
> index fb0ac9df841a..bfcbb9e8b126 100644
> --- a/drivers/accel/habanalabs/goya/goya.c
> +++ b/drivers/accel/habanalabs/goya/goya.c
> @@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
>  	int rc;
>  
>  	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> -			parser->job_userptr_list, &userptr))
> +			parser->job_userptr_ht, &userptr))
>  		goto already_pinned;
>  
>  	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> @@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
>  	if (rc)
>  		goto free_userptr;
>  
> -	list_add_tail(&userptr->job_node, parser->job_userptr_list);
> +	rc = rhashtable_insert_fast(parser->job_userptr_ht,
> +				    &userptr->job_node, hl_userptr_rht_params);
> +	if (rc)
> +		goto unpin_memory;
>  
>  	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
>  	if (rc) {
> @@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
>  	return 0;
>  
>  unpin_memory:
> -	list_del(&userptr->job_node);
> +	rhashtable_remove_fast(parser->job_userptr_ht,
> +			       &userptr->job_node, hl_userptr_rht_params);
>  	hl_unpin_host_memory(hdev, userptr);
>  free_userptr:
>  	kfree(userptr);
> @@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
>  	if ((!skip_host_mem_pin) &&
>  		(hl_userptr_is_pinned(hdev, addr,
>  			le32_to_cpu(user_dma_pkt->tsize),
> -			parser->job_userptr_list, &userptr) == false)) {
> +			parser->job_userptr_ht, &userptr) == false)) {
>  		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
>  				addr, user_dma_pkt->tsize);
>  		return -EFAULT;
> @@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
>  
>  free_userptr:
>  	if (rc)
> -		hl_userptr_delete_list(hdev, parser->job_userptr_list);
> +		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
>  	return rc;
>  }
>  
> -- 
> 2.34.1
>
Cai Huoqing April 28, 2023, 3 p.m. UTC | #2
On 26 4月 23 19:22:43, Cai Huoqing wrote:
> On 26 4月 23 17:28:02, Cai Huoqing wrote:
> > Using rhashtable to accelerate the search for userptr by address,
> > instead of using a list.
> > 
> > Preferably, the lookup complexity of a hash table is O(1).
> > 
> > This patch will speedup the method
> > hl_userptr_is_pinned by rhashtable_lookup_fast.
> > 
> > Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev>
> > ---
> >  .../habanalabs/common/command_submission.c    | 16 ++++++---
> >  drivers/accel/habanalabs/common/habanalabs.h  | 19 +++++-----
> >  drivers/accel/habanalabs/common/memory.c      | 35 +++++++++++++------
> >  drivers/accel/habanalabs/gaudi/gaudi.c        | 16 +++++----
> >  drivers/accel/habanalabs/goya/goya.c          | 14 +++++---
> >  5 files changed, 66 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
> > index af9d2e22c6e7..35c2ab934396 100644
> > --- a/drivers/accel/habanalabs/common/command_submission.c
> > +++ b/drivers/accel/habanalabs/common/command_submission.c
> > @@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
> >  	parser.job_id = job->id;
> >  
> >  	parser.hw_queue_id = job->hw_queue_id;
> > -	parser.job_userptr_list = &job->userptr_list;
> > +	parser.job_userptr_ht = &job->userptr_ht;
> >  	parser.patched_cb = NULL;
> >  	parser.user_cb = job->user_cb;
> >  	parser.user_cb_size = job->user_cb_size;
> > @@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
> >  	struct hl_cs *cs = job->cs;
> >  
> >  	if (is_cb_patched(hdev, job)) {
> > -		hl_userptr_delete_list(hdev, &job->userptr_list);
> > +		hl_userptr_delete_list(hdev, &job->userptr_ht);
> >  
> >  		/*
> >  		 * We might arrive here from rollback and patched CB wasn't
> > @@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
> >  		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
> >  {
> >  	struct hl_cs_job *job;
> > +	int rc;
> >  
> >  	job = kzalloc(sizeof(*job), GFP_ATOMIC);
> >  	if (!job)
> > @@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
> >  	job->queue_type = queue_type;
> >  	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
> >  
> > -	if (is_cb_patched(hdev, job))
> > -		INIT_LIST_HEAD(&job->userptr_list);
> > +	if (is_cb_patched(hdev, job)) {
> > +		rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
> > +		if (rc)
> > +			goto free_job;
> > +	}
> >  
> >  	if (job->queue_type == QUEUE_TYPE_EXT)
> >  		INIT_WORK(&job->finish_work, job_wq_completion);
> >  
> >  	return job;
> > +
> > +free_job:
> > +	kfree(job);
> > +	return NULL;
> >  }
> >  
> >  static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
> > diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
> > index eaae69a9f817..9c876d1480d2 100644
> > --- a/drivers/accel/habanalabs/common/habanalabs.h
> > +++ b/drivers/accel/habanalabs/common/habanalabs.h
> > @@ -19,6 +19,7 @@
> >  #include <linux/dma-direction.h>
> >  #include <linux/scatterlist.h>
> >  #include <linux/hashtable.h>
> > +#include <linux/rhashtable.h>
> >  #include <linux/debugfs.h>
> >  #include <linux/rwsem.h>
> >  #include <linux/eventfd.h>
> > @@ -540,6 +541,8 @@ struct hl_hints_range {
> >  	u64 end_addr;
> >  };
> >  
> > +extern const struct rhashtable_params hl_userptr_rht_params;
> > +
> >  /**
> >   * struct asic_fixed_properties - ASIC specific immutable properties.
> >   * @hw_queues_props: H/W queues properties.
> > @@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
> >  /**
> >   * struct hl_userptr - memory mapping chunk information
> >   * @vm_type: type of the VM.
> > - * @job_node: linked-list node for hanging the object on the Job's list.
> > + * @job_node: hashtable node for hanging the object on the Job's list.
> >   * @pages: pointer to struct page array
> >   * @npages: size of @pages array
> >   * @sgt: pointer to the scatter-gather table that holds the pages.
> > @@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
> >   */
> >  struct hl_userptr {
> >  	enum vm_type		vm_type; /* must be first */
> > -	struct list_head	job_node;
> > +	struct rhash_head	job_node;
> >  	struct page		**pages;
> >  	unsigned int		npages;
> >  	struct sg_table		*sgt;
> > @@ -2028,7 +2031,7 @@ struct hl_cs {
> >   * @patched_cb: in case of patching, this is internal CB which is submitted on
> >   *		the queue instead of the CB we got from the IOCTL.
> >   * @finish_work: workqueue object to run when job is completed.
> > - * @userptr_list: linked-list of userptr mappings that belong to this job and
> > + * @userptr_ht: hashtable of userptr mappings that belong to this job and
> >   *			wait for completion.
> >   * @debugfs_list: node in debugfs list of command submission jobs.
> >   * @refcount: reference counter for usage of the CS job.
> > @@ -2056,7 +2059,7 @@ struct hl_cs_job {
> >  	struct hl_cb		*user_cb;
> >  	struct hl_cb		*patched_cb;
> >  	struct work_struct	finish_work;
> > -	struct list_head	userptr_list;
> > +	struct rhashtable	userptr_ht;
> >  	struct list_head	debugfs_list;
> >  	struct kref		refcount;
> >  	enum hl_queue_type	queue_type;
> > @@ -2075,7 +2078,7 @@ struct hl_cs_job {
> >   * @user_cb: the CB we got from the user.
> >   * @patched_cb: in case of patching, this is internal CB which is submitted on
> >   *		the queue instead of the CB we got from the IOCTL.
> > - * @job_userptr_list: linked-list of userptr mappings that belong to the related
> > + * @job_userptr_ht: hashtable of userptr mappings that belong to the related
> >   *			job and wait for completion.
> >   * @cs_sequence: the sequence number of the related CS.
> >   * @queue_type: the type of the H/W queue this job is submitted to.
> > @@ -2098,7 +2101,7 @@ struct hl_cs_job {
> >  struct hl_cs_parser {
> >  	struct hl_cb		*user_cb;
> >  	struct hl_cb		*patched_cb;
> > -	struct list_head	*job_userptr_list;
> > +	struct rhashtable	*job_userptr_ht;
> >  	u64			cs_sequence;
> >  	enum hl_queue_type	queue_type;
> >  	u32			ctx_id;
> > @@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
> >  			struct hl_userptr *userptr);
> >  void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
> >  void hl_userptr_delete_list(struct hl_device *hdev,
> > -				struct list_head *userptr_list);
> > +				struct rhashtable *userptr_ht);
> >  bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
> > -				struct list_head *userptr_list,
> > +				struct rhashtable *userptr_ht,
> >  				struct hl_userptr **userptr);
> >  
> >  int hl_mmu_init(struct hl_device *hdev);
> > diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
> > index a7b6a273ce21..e5e7912b3b34 100644
> > --- a/drivers/accel/habanalabs/common/memory.c
> > +++ b/drivers/accel/habanalabs/common/memory.c
> > @@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);
> >  
> >  #define MEM_HANDLE_INVALID	ULONG_MAX
> >  
> > +const struct rhashtable_params hl_userptr_rht_params = {
> > +	.head_offset = offsetof(struct hl_userptr, job_node),
> > +	.key_offset = offsetof(struct hl_userptr, addr),
> > +	.key_len = sizeof(u64),
> > +	.automatic_shrinking = true,
> > +};
> > +
> >  static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
> >  			struct hl_mem_in *args, u64 *handle);
> >  
> > @@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
> >  	userptr->size = size;
> >  	userptr->addr = addr;
> >  	userptr->dma_mapped = false;
> > -	INIT_LIST_HEAD(&userptr->job_node);
> >  
> >  	rc = get_user_memory(hdev, addr, size, npages, start, offset,
> >  				userptr);
> > @@ -2522,8 +2528,6 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
> >  	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
> >  	kvfree(userptr->pages);
> >  
> > -	list_del(&userptr->job_node);
> > -
> >  	sg_free_table(userptr->sgt);
> >  	kfree(userptr->sgt);
> >  }
> > @@ -2531,23 +2535,31 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
> >  /**
> >   * hl_userptr_delete_list() - clear userptr list.
> >   * @hdev: pointer to the habanalabs device structure.
> > - * @userptr_list: pointer to the list to clear.
> > + * @userptr_ht: pointer to the hashtable to clear.
> >   *
> >   * This function does the following:
> >   * - Iterates over the list and unpins the host memory and frees the userptr
> >   *   structure.
> >   */
> >  void hl_userptr_delete_list(struct hl_device *hdev,
> > -				struct list_head *userptr_list)
> > +				struct rhashtable *userptr_ht)
> >  {
> > -	struct hl_userptr *userptr, *tmp;
> > +	struct hl_userptr *userptr;
> > +	struct rhashtable_iter hti;
> > +	struct rhash_head *pos;
> >  
> > -	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
> > +	rhashtable_walk_enter(userptr_ht, &hti);
> > +	rhashtable_walk_start(&hti);
> > +	while ((pos = rhashtable_walk_next(&hti))) {
> 
> rhashtable_walk_next seems not stable,
> will revert here, keep 'userptr_list' to do clear by list_for_each.
> And send the v2 patch
> 
> Cai-
> Thanks
rhashtable_free_and_destroy can be used here

I have sent v2 patch

https://lore.kernel.org/lkml/20230428144903.26048-1-cai.huoqing@linux.dev/

Thanks,
Cai-

> > +		if (PTR_ERR(pos) == -EAGAIN)
> > +			continue;
> > +		rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
> > +		userptr = rhashtable_walk_peek(&hti);
> >  		hl_unpin_host_memory(hdev, userptr);
> >  		kfree(userptr);
> >  	}
> >  
> > -	INIT_LIST_HEAD(userptr_list);
> > +	rhashtable_destroy(userptr_ht);
> >  }
> >  
> >  /**
> > @@ -2555,7 +2567,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
> >   * @hdev: pointer to the habanalabs device structure.
> >   * @addr: user address to check.
> >   * @size: user block size to check.
> > - * @userptr_list: pointer to the list to clear.
> > + * @userptr_ht: pointer to the hashtable to clear.
> >   * @userptr: pointer to userptr to check.
> >   *
> >   * This function does the following:
> > @@ -2563,10 +2575,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
> >   *   pinned. If so, returns true, otherwise returns false.
> >   */
> >  bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
> > -				u32 size, struct list_head *userptr_list,
> > +				u32 size, struct rhashtable *userptr_ht,
> >  				struct hl_userptr **userptr)
> >  {
> > -	list_for_each_entry((*userptr), userptr_list, job_node) {
> > +	(*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
> > +	if (*userptr) {
> >  		if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
> >  			return true;
> >  	}
> > diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
> > index a29aa8f7b6f3..1e1433042413 100644
> > --- a/drivers/accel/habanalabs/gaudi/gaudi.c
> > +++ b/drivers/accel/habanalabs/gaudi/gaudi.c
> > @@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
> >  	}
> >  
> >  free_job:
> > -	hl_userptr_delete_list(hdev, &job->userptr_list);
> > +	hl_userptr_delete_list(hdev, &job->userptr_ht);
> >  	hl_debugfs_remove_job(hdev, job);
> >  	kfree(job);
> >  	atomic_dec(&cb->cs_cnt);
> > @@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> >  	int rc;
> >  
> >  	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> > -			parser->job_userptr_list, &userptr))
> > +			parser->job_userptr_ht, &userptr))
> >  		goto already_pinned;
> >  
> >  	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> > @@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> >  	if (rc)
> >  		goto free_userptr;
> >  
> > -	list_add_tail(&userptr->job_node, parser->job_userptr_list);
> > +	rc = rhashtable_insert_fast(parser->job_userptr_ht,
> > +				    &userptr->job_node, hl_userptr_rht_params);
> > +	if (rc)
> > +		goto unpin_memory;
> >  
> >  	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
> >  	if (rc) {
> > @@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
> >  	return 0;
> >  
> >  unpin_memory:
> > -	list_del(&userptr->job_node);
> > +	rhashtable_remove_fast(parser->job_userptr_ht,
> > +			       &userptr->job_node, hl_userptr_rht_params);
> >  	hl_unpin_host_memory(hdev, userptr);
> >  free_userptr:
> >  	kfree(userptr);
> > @@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
> >  	if ((!skip_host_mem_pin) &&
> >  		(!hl_userptr_is_pinned(hdev, addr,
> >  					le32_to_cpu(user_dma_pkt->tsize),
> > -					parser->job_userptr_list, &userptr))) {
> > +					parser->job_userptr_ht, &userptr))) {
> >  		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
> >  				addr, user_dma_pkt->tsize);
> >  		return -EFAULT;
> > @@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
> >  
> >  free_userptr:
> >  	if (rc)
> > -		hl_userptr_delete_list(hdev, parser->job_userptr_list);
> > +		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
> >  	return rc;
> >  }
> >  
> > diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
> > index fb0ac9df841a..bfcbb9e8b126 100644
> > --- a/drivers/accel/habanalabs/goya/goya.c
> > +++ b/drivers/accel/habanalabs/goya/goya.c
> > @@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> >  	int rc;
> >  
> >  	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> > -			parser->job_userptr_list, &userptr))
> > +			parser->job_userptr_ht, &userptr))
> >  		goto already_pinned;
> >  
> >  	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> > @@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> >  	if (rc)
> >  		goto free_userptr;
> >  
> > -	list_add_tail(&userptr->job_node, parser->job_userptr_list);
> > +	rc = rhashtable_insert_fast(parser->job_userptr_ht,
> > +				    &userptr->job_node, hl_userptr_rht_params);
> > +	if (rc)
> > +		goto unpin_memory;
> >  
> >  	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
> >  	if (rc) {
> > @@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
> >  	return 0;
> >  
> >  unpin_memory:
> > -	list_del(&userptr->job_node);
> > +	rhashtable_remove_fast(parser->job_userptr_ht,
> > +			       &userptr->job_node, hl_userptr_rht_params);
> >  	hl_unpin_host_memory(hdev, userptr);
> >  free_userptr:
> >  	kfree(userptr);
> > @@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
> >  	if ((!skip_host_mem_pin) &&
> >  		(hl_userptr_is_pinned(hdev, addr,
> >  			le32_to_cpu(user_dma_pkt->tsize),
> > -			parser->job_userptr_list, &userptr) == false)) {
> > +			parser->job_userptr_ht, &userptr) == false)) {
> >  		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
> >  				addr, user_dma_pkt->tsize);
> >  		return -EFAULT;
> > @@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
> >  
> >  free_userptr:
> >  	if (rc)
> > -		hl_userptr_delete_list(hdev, parser->job_userptr_list);
> > +		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
> >  	return rc;
> >  }
> >  
> > -- 
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index af9d2e22c6e7..35c2ab934396 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -312,7 +312,7 @@  static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 	parser.job_id = job->id;
 
 	parser.hw_queue_id = job->hw_queue_id;
-	parser.job_userptr_list = &job->userptr_list;
+	parser.job_userptr_ht = &job->userptr_ht;
 	parser.patched_cb = NULL;
 	parser.user_cb = job->user_cb;
 	parser.user_cb_size = job->user_cb_size;
@@ -351,7 +351,7 @@  static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 	struct hl_cs *cs = job->cs;
 
 	if (is_cb_patched(hdev, job)) {
-		hl_userptr_delete_list(hdev, &job->userptr_list);
+		hl_userptr_delete_list(hdev, &job->userptr_ht);
 
 		/*
 		 * We might arrive here from rollback and patched CB wasn't
@@ -1284,6 +1284,7 @@  struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
 {
 	struct hl_cs_job *job;
+	int rc;
 
 	job = kzalloc(sizeof(*job), GFP_ATOMIC);
 	if (!job)
@@ -1296,13 +1297,20 @@  struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 	job->queue_type = queue_type;
 	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
 
-	if (is_cb_patched(hdev, job))
-		INIT_LIST_HEAD(&job->userptr_list);
+	if (is_cb_patched(hdev, job)) {
+		rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
+		if (rc)
+			goto free_job;
+	}
 
 	if (job->queue_type == QUEUE_TYPE_EXT)
 		INIT_WORK(&job->finish_work, job_wq_completion);
 
 	return job;
+
+free_job:
+	kfree(job);
+	return NULL;
 }
 
 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index eaae69a9f817..9c876d1480d2 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -19,6 +19,7 @@ 
 #include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 #include <linux/hashtable.h>
+#include <linux/rhashtable.h>
 #include <linux/debugfs.h>
 #include <linux/rwsem.h>
 #include <linux/eventfd.h>
@@ -540,6 +541,8 @@  struct hl_hints_range {
 	u64 end_addr;
 };
 
+extern const struct rhashtable_params hl_userptr_rht_params;
+
 /**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
@@ -1915,7 +1918,7 @@  struct hl_ctx_mgr {
 /**
  * struct hl_userptr - memory mapping chunk information
  * @vm_type: type of the VM.
- * @job_node: linked-list node for hanging the object on the Job's list.
+ * @job_node: hashtable node for hanging the object on the Job's list.
  * @pages: pointer to struct page array
  * @npages: size of @pages array
  * @sgt: pointer to the scatter-gather table that holds the pages.
@@ -1928,7 +1931,7 @@  struct hl_ctx_mgr {
  */
 struct hl_userptr {
 	enum vm_type		vm_type; /* must be first */
-	struct list_head	job_node;
+	struct rhash_head	job_node;
 	struct page		**pages;
 	unsigned int		npages;
 	struct sg_table		*sgt;
@@ -2028,7 +2031,7 @@  struct hl_cs {
  * @patched_cb: in case of patching, this is internal CB which is submitted on
  *		the queue instead of the CB we got from the IOCTL.
  * @finish_work: workqueue object to run when job is completed.
- * @userptr_list: linked-list of userptr mappings that belong to this job and
+ * @userptr_ht: hashtable of userptr mappings that belong to this job and
  *			wait for completion.
  * @debugfs_list: node in debugfs list of command submission jobs.
  * @refcount: reference counter for usage of the CS job.
@@ -2056,7 +2059,7 @@  struct hl_cs_job {
 	struct hl_cb		*user_cb;
 	struct hl_cb		*patched_cb;
 	struct work_struct	finish_work;
-	struct list_head	userptr_list;
+	struct rhashtable	userptr_ht;
 	struct list_head	debugfs_list;
 	struct kref		refcount;
 	enum hl_queue_type	queue_type;
@@ -2075,7 +2078,7 @@  struct hl_cs_job {
  * @user_cb: the CB we got from the user.
  * @patched_cb: in case of patching, this is internal CB which is submitted on
  *		the queue instead of the CB we got from the IOCTL.
- * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ * @job_userptr_ht: hashtable of userptr mappings that belong to the related
  *			job and wait for completion.
  * @cs_sequence: the sequence number of the related CS.
  * @queue_type: the type of the H/W queue this job is submitted to.
@@ -2098,7 +2101,7 @@  struct hl_cs_job {
 struct hl_cs_parser {
 	struct hl_cb		*user_cb;
 	struct hl_cb		*patched_cb;
-	struct list_head	*job_userptr_list;
+	struct rhashtable	*job_userptr_ht;
 	u64			cs_sequence;
 	enum hl_queue_type	queue_type;
 	u32			ctx_id;
@@ -3760,9 +3763,9 @@  int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 			struct hl_userptr *userptr);
 void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
 void hl_userptr_delete_list(struct hl_device *hdev,
-				struct list_head *userptr_list);
+				struct rhashtable *userptr_ht);
 bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
-				struct list_head *userptr_list,
+				struct rhashtable *userptr_ht,
 				struct hl_userptr **userptr);
 
 int hl_mmu_init(struct hl_device *hdev);
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index a7b6a273ce21..e5e7912b3b34 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -23,6 +23,13 @@  MODULE_IMPORT_NS(DMA_BUF);
 
 #define MEM_HANDLE_INVALID	ULONG_MAX
 
+const struct rhashtable_params hl_userptr_rht_params = {
+	.head_offset = offsetof(struct hl_userptr, job_node),
+	.key_offset = offsetof(struct hl_userptr, addr),
+	.key_len = sizeof(u64),
+	.automatic_shrinking = true,
+};
+
 static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
 			struct hl_mem_in *args, u64 *handle);
 
@@ -2483,7 +2490,6 @@  int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 	userptr->size = size;
 	userptr->addr = addr;
 	userptr->dma_mapped = false;
-	INIT_LIST_HEAD(&userptr->job_node);
 
 	rc = get_user_memory(hdev, addr, size, npages, start, offset,
 				userptr);
@@ -2522,8 +2528,6 @@  void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
 	kvfree(userptr->pages);
 
-	list_del(&userptr->job_node);
-
 	sg_free_table(userptr->sgt);
 	kfree(userptr->sgt);
 }
@@ -2531,23 +2535,31 @@  void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 /**
  * hl_userptr_delete_list() - clear userptr list.
  * @hdev: pointer to the habanalabs device structure.
- * @userptr_list: pointer to the list to clear.
+ * @userptr_ht: pointer to the hashtable to clear.
  *
  * This function does the following:
  * - Iterates over the list and unpins the host memory and frees the userptr
  *   structure.
  */
 void hl_userptr_delete_list(struct hl_device *hdev,
-				struct list_head *userptr_list)
+				struct rhashtable *userptr_ht)
 {
-	struct hl_userptr *userptr, *tmp;
+	struct hl_userptr *userptr;
+	struct rhashtable_iter hti;
+	struct rhash_head *pos;
 
-	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
+	rhashtable_walk_enter(userptr_ht, &hti);
+	rhashtable_walk_start(&hti);
+	while ((pos = rhashtable_walk_next(&hti))) {
+		if (PTR_ERR(pos) == -EAGAIN)
+			continue;
+		rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
+		userptr = rhashtable_walk_peek(&hti);
 		hl_unpin_host_memory(hdev, userptr);
 		kfree(userptr);
 	}
 
-	INIT_LIST_HEAD(userptr_list);
+	rhashtable_destroy(userptr_ht);
 }
 
 /**
@@ -2555,7 +2567,7 @@  void hl_userptr_delete_list(struct hl_device *hdev,
  * @hdev: pointer to the habanalabs device structure.
  * @addr: user address to check.
  * @size: user block size to check.
- * @userptr_list: pointer to the list to clear.
+ * @userptr_ht: pointer to the hashtable to clear.
  * @userptr: pointer to userptr to check.
  *
  * This function does the following:
@@ -2563,10 +2575,11 @@  void hl_userptr_delete_list(struct hl_device *hdev,
  *   pinned. If so, returns true, otherwise returns false.
  */
 bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
-				u32 size, struct list_head *userptr_list,
+				u32 size, struct rhashtable *userptr_ht,
 				struct hl_userptr **userptr)
 {
-	list_for_each_entry((*userptr), userptr_list, job_node) {
+	(*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
+	if (*userptr) {
 		if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
 			return true;
 	}
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index a29aa8f7b6f3..1e1433042413 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -1031,7 +1031,7 @@  static int _gaudi_init_tpc_mem(struct hl_device *hdev,
 	}
 
 free_job:
-	hl_userptr_delete_list(hdev, &job->userptr_list);
+	hl_userptr_delete_list(hdev, &job->userptr_ht);
 	hl_debugfs_remove_job(hdev, job);
 	kfree(job);
 	atomic_dec(&cb->cs_cnt);
@@ -4901,7 +4901,7 @@  static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 	int rc;
 
 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
-			parser->job_userptr_list, &userptr))
+			parser->job_userptr_ht, &userptr))
 		goto already_pinned;
 
 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
@@ -4913,7 +4913,10 @@  static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 	if (rc)
 		goto free_userptr;
 
-	list_add_tail(&userptr->job_node, parser->job_userptr_list);
+	rc = rhashtable_insert_fast(parser->job_userptr_ht,
+				    &userptr->job_node, hl_userptr_rht_params);
+	if (rc)
+		goto unpin_memory;
 
 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
@@ -4931,7 +4934,8 @@  static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 	return 0;
 
 unpin_memory:
-	list_del(&userptr->job_node);
+	rhashtable_remove_fast(parser->job_userptr_ht,
+			       &userptr->job_node, hl_userptr_rht_params);
 	hl_unpin_host_memory(hdev, userptr);
 free_userptr:
 	kfree(userptr);
@@ -5175,7 +5179,7 @@  static int gaudi_patch_dma_packet(struct hl_device *hdev,
 	if ((!skip_host_mem_pin) &&
 		(!hl_userptr_is_pinned(hdev, addr,
 					le32_to_cpu(user_dma_pkt->tsize),
-					parser->job_userptr_list, &userptr))) {
+					parser->job_userptr_ht, &userptr))) {
 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
 				addr, user_dma_pkt->tsize);
 		return -EFAULT;
@@ -5472,7 +5476,7 @@  static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
 
 free_userptr:
 	if (rc)
-		hl_userptr_delete_list(hdev, parser->job_userptr_list);
+		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
 	return rc;
 }
 
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index fb0ac9df841a..bfcbb9e8b126 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -3347,7 +3347,7 @@  static int goya_pin_memory_before_cs(struct hl_device *hdev,
 	int rc;
 
 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
-			parser->job_userptr_list, &userptr))
+			parser->job_userptr_ht, &userptr))
 		goto already_pinned;
 
 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
@@ -3359,7 +3359,10 @@  static int goya_pin_memory_before_cs(struct hl_device *hdev,
 	if (rc)
 		goto free_userptr;
 
-	list_add_tail(&userptr->job_node, parser->job_userptr_list);
+	rc = rhashtable_insert_fast(parser->job_userptr_ht,
+				    &userptr->job_node, hl_userptr_rht_params);
+	if (rc)
+		goto unpin_memory;
 
 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
@@ -3377,7 +3380,8 @@  static int goya_pin_memory_before_cs(struct hl_device *hdev,
 	return 0;
 
 unpin_memory:
-	list_del(&userptr->job_node);
+	rhashtable_remove_fast(parser->job_userptr_ht,
+			       &userptr->job_node, hl_userptr_rht_params);
 	hl_unpin_host_memory(hdev, userptr);
 free_userptr:
 	kfree(userptr);
@@ -3806,7 +3810,7 @@  static int goya_patch_dma_packet(struct hl_device *hdev,
 	if ((!skip_host_mem_pin) &&
 		(hl_userptr_is_pinned(hdev, addr,
 			le32_to_cpu(user_dma_pkt->tsize),
-			parser->job_userptr_list, &userptr) == false)) {
+			parser->job_userptr_ht, &userptr) == false)) {
 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
 				addr, user_dma_pkt->tsize);
 		return -EFAULT;
@@ -4104,7 +4108,7 @@  static int goya_parse_cb_no_mmu(struct hl_device *hdev,
 
 free_userptr:
 	if (rc)
-		hl_userptr_delete_list(hdev, parser->job_userptr_list);
+		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
 	return rc;
 }