lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZEkJe5YZY9hCfP2j@chq-MS-7D45>
Date:   Wed, 26 Apr 2023 19:22:35 +0800
From:   Cai Huoqing <cai.huoqing@...ux.dev>
To:     Oded Gabbay <ogabbay@...nel.org>,
        Ohad Sharabi <osharabi@...ana.ai>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] accel/habanalabs: Make use of rhashtable

On 26 4月 23 17:28:02, Cai Huoqing wrote:
> Using rhashtable to accelerate the search for userptr by address,
> instead of using a list.
> 
> Preferably, the lookup complexity of a hash table is O(1).
> 
> This patch will speedup the method
> hl_userptr_is_pinned by rhashtable_lookup_fast.
> 
> Signed-off-by: Cai Huoqing <cai.huoqing@...ux.dev>
> ---
>  .../habanalabs/common/command_submission.c    | 16 ++++++---
>  drivers/accel/habanalabs/common/habanalabs.h  | 19 +++++-----
>  drivers/accel/habanalabs/common/memory.c      | 35 +++++++++++++------
>  drivers/accel/habanalabs/gaudi/gaudi.c        | 16 +++++----
>  drivers/accel/habanalabs/goya/goya.c          | 14 +++++---
>  5 files changed, 66 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
> index af9d2e22c6e7..35c2ab934396 100644
> --- a/drivers/accel/habanalabs/common/command_submission.c
> +++ b/drivers/accel/habanalabs/common/command_submission.c
> @@ -312,7 +312,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
>  	parser.job_id = job->id;
>  
>  	parser.hw_queue_id = job->hw_queue_id;
> -	parser.job_userptr_list = &job->userptr_list;
> +	parser.job_userptr_ht = &job->userptr_ht;
>  	parser.patched_cb = NULL;
>  	parser.user_cb = job->user_cb;
>  	parser.user_cb_size = job->user_cb_size;
> @@ -351,7 +351,7 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
>  	struct hl_cs *cs = job->cs;
>  
>  	if (is_cb_patched(hdev, job)) {
> -		hl_userptr_delete_list(hdev, &job->userptr_list);
> +		hl_userptr_delete_list(hdev, &job->userptr_ht);
>  
>  		/*
>  		 * We might arrive here from rollback and patched CB wasn't
> @@ -1284,6 +1284,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
>  		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
>  {
>  	struct hl_cs_job *job;
> +	int rc;
>  
>  	job = kzalloc(sizeof(*job), GFP_ATOMIC);
>  	if (!job)
> @@ -1296,13 +1297,20 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
>  	job->queue_type = queue_type;
>  	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
>  
> -	if (is_cb_patched(hdev, job))
> -		INIT_LIST_HEAD(&job->userptr_list);
> +	if (is_cb_patched(hdev, job)) {
> +		rc = rhashtable_init(&job->userptr_ht, &hl_userptr_rht_params);
> +		if (rc)
> +			goto free_job;
> +	}
>  
>  	if (job->queue_type == QUEUE_TYPE_EXT)
>  		INIT_WORK(&job->finish_work, job_wq_completion);
>  
>  	return job;
> +
> +free_job:
> +	kfree(job);
> +	return NULL;
>  }
>  
>  static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
> diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
> index eaae69a9f817..9c876d1480d2 100644
> --- a/drivers/accel/habanalabs/common/habanalabs.h
> +++ b/drivers/accel/habanalabs/common/habanalabs.h
> @@ -19,6 +19,7 @@
>  #include <linux/dma-direction.h>
>  #include <linux/scatterlist.h>
>  #include <linux/hashtable.h>
> +#include <linux/rhashtable.h>
>  #include <linux/debugfs.h>
>  #include <linux/rwsem.h>
>  #include <linux/eventfd.h>
> @@ -540,6 +541,8 @@ struct hl_hints_range {
>  	u64 end_addr;
>  };
>  
> +extern const struct rhashtable_params hl_userptr_rht_params;
> +
>  /**
>   * struct asic_fixed_properties - ASIC specific immutable properties.
>   * @hw_queues_props: H/W queues properties.
> @@ -1915,7 +1918,7 @@ struct hl_ctx_mgr {
>  /**
>   * struct hl_userptr - memory mapping chunk information
>   * @vm_type: type of the VM.
> - * @job_node: linked-list node for hanging the object on the Job's list.
> + * @job_node: hashtable node for hanging the object on the Job's list.
>   * @pages: pointer to struct page array
>   * @npages: size of @pages array
>   * @sgt: pointer to the scatter-gather table that holds the pages.
> @@ -1928,7 +1931,7 @@ struct hl_ctx_mgr {
>   */
>  struct hl_userptr {
>  	enum vm_type		vm_type; /* must be first */
> -	struct list_head	job_node;
> +	struct rhash_head	job_node;
>  	struct page		**pages;
>  	unsigned int		npages;
>  	struct sg_table		*sgt;
> @@ -2028,7 +2031,7 @@ struct hl_cs {
>   * @patched_cb: in case of patching, this is internal CB which is submitted on
>   *		the queue instead of the CB we got from the IOCTL.
>   * @finish_work: workqueue object to run when job is completed.
> - * @userptr_list: linked-list of userptr mappings that belong to this job and
> + * @userptr_ht: hashtable of userptr mappings that belong to this job and
>   *			wait for completion.
>   * @debugfs_list: node in debugfs list of command submission jobs.
>   * @refcount: reference counter for usage of the CS job.
> @@ -2056,7 +2059,7 @@ struct hl_cs_job {
>  	struct hl_cb		*user_cb;
>  	struct hl_cb		*patched_cb;
>  	struct work_struct	finish_work;
> -	struct list_head	userptr_list;
> +	struct rhashtable	userptr_ht;
>  	struct list_head	debugfs_list;
>  	struct kref		refcount;
>  	enum hl_queue_type	queue_type;
> @@ -2075,7 +2078,7 @@ struct hl_cs_job {
>   * @user_cb: the CB we got from the user.
>   * @patched_cb: in case of patching, this is internal CB which is submitted on
>   *		the queue instead of the CB we got from the IOCTL.
> - * @job_userptr_list: linked-list of userptr mappings that belong to the related
> + * @job_userptr_ht: hashtable of userptr mappings that belong to the related
>   *			job and wait for completion.
>   * @cs_sequence: the sequence number of the related CS.
>   * @queue_type: the type of the H/W queue this job is submitted to.
> @@ -2098,7 +2101,7 @@ struct hl_cs_job {
>  struct hl_cs_parser {
>  	struct hl_cb		*user_cb;
>  	struct hl_cb		*patched_cb;
> -	struct list_head	*job_userptr_list;
> +	struct rhashtable	*job_userptr_ht;
>  	u64			cs_sequence;
>  	enum hl_queue_type	queue_type;
>  	u32			ctx_id;
> @@ -3760,9 +3763,9 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
>  			struct hl_userptr *userptr);
>  void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
>  void hl_userptr_delete_list(struct hl_device *hdev,
> -				struct list_head *userptr_list);
> +				struct rhashtable *userptr_ht);
>  bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
> -				struct list_head *userptr_list,
> +				struct rhashtable *userptr_ht,
>  				struct hl_userptr **userptr);
>  
>  int hl_mmu_init(struct hl_device *hdev);
> diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
> index a7b6a273ce21..e5e7912b3b34 100644
> --- a/drivers/accel/habanalabs/common/memory.c
> +++ b/drivers/accel/habanalabs/common/memory.c
> @@ -23,6 +23,13 @@ MODULE_IMPORT_NS(DMA_BUF);
>  
>  #define MEM_HANDLE_INVALID	ULONG_MAX
>  
> +const struct rhashtable_params hl_userptr_rht_params = {
> +	.head_offset = offsetof(struct hl_userptr, job_node),
> +	.key_offset = offsetof(struct hl_userptr, addr),
> +	.key_len = sizeof(u64),
> +	.automatic_shrinking = true,
> +};
> +
>  static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
>  			struct hl_mem_in *args, u64 *handle);
>  
> @@ -2483,7 +2490,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
>  	userptr->size = size;
>  	userptr->addr = addr;
>  	userptr->dma_mapped = false;
> -	INIT_LIST_HEAD(&userptr->job_node);
>  
>  	rc = get_user_memory(hdev, addr, size, npages, start, offset,
>  				userptr);
> @@ -2522,8 +2528,6 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
>  	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
>  	kvfree(userptr->pages);
>  
> -	list_del(&userptr->job_node);
> -
>  	sg_free_table(userptr->sgt);
>  	kfree(userptr->sgt);
>  }
> @@ -2531,23 +2535,31 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
>  /**
>   * hl_userptr_delete_list() - clear userptr list.
>   * @hdev: pointer to the habanalabs device structure.
> - * @userptr_list: pointer to the list to clear.
> + * @userptr_ht: pointer to the hashtable to clear.
>   *
>   * This function does the following:
>   * - Iterates over the list and unpins the host memory and frees the userptr
>   *   structure.
>   */
>  void hl_userptr_delete_list(struct hl_device *hdev,
> -				struct list_head *userptr_list)
> +				struct rhashtable *userptr_ht)
>  {
> -	struct hl_userptr *userptr, *tmp;
> +	struct hl_userptr *userptr;
> +	struct rhashtable_iter hti;
> +	struct rhash_head *pos;
>  
> -	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
> +	rhashtable_walk_enter(userptr_ht, &hti);
> +	rhashtable_walk_start(&hti);
> +	while ((pos = rhashtable_walk_next(&hti))) {

rhashtable_walk_next seems not stable,
will revert here, keep 'userptr_list' to do clear by list_for_each.
And send the v2 patch

Cai-
Thanks
> +		if (PTR_ERR(pos) == -EAGAIN)
> +			continue;
> +		rhashtable_remove_fast(userptr_ht, hti.p, hl_userptr_rht_params);
> +		userptr = rhashtable_walk_peek(&hti);
>  		hl_unpin_host_memory(hdev, userptr);
>  		kfree(userptr);
>  	}
>  
> -	INIT_LIST_HEAD(userptr_list);
> +	rhashtable_destroy(userptr_ht);
>  }
>  
>  /**
> @@ -2555,7 +2567,7 @@ void hl_userptr_delete_list(struct hl_device *hdev,
>   * @hdev: pointer to the habanalabs device structure.
>   * @addr: user address to check.
>   * @size: user block size to check.
> - * @userptr_list: pointer to the list to clear.
> + * @userptr_ht: pointer to the hashtable to clear.
>   * @userptr: pointer to userptr to check.
>   *
>   * This function does the following:
> @@ -2563,10 +2575,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
>   *   pinned. If so, returns true, otherwise returns false.
>   */
>  bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
> -				u32 size, struct list_head *userptr_list,
> +				u32 size, struct rhashtable *userptr_ht,
>  				struct hl_userptr **userptr)
>  {
> -	list_for_each_entry((*userptr), userptr_list, job_node) {
> +	(*userptr) = rhashtable_lookup_fast(userptr_ht, &addr, hl_userptr_rht_params);
> +	if (*userptr) {
>  		if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
>  			return true;
>  	}
> diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
> index a29aa8f7b6f3..1e1433042413 100644
> --- a/drivers/accel/habanalabs/gaudi/gaudi.c
> +++ b/drivers/accel/habanalabs/gaudi/gaudi.c
> @@ -1031,7 +1031,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev,
>  	}
>  
>  free_job:
> -	hl_userptr_delete_list(hdev, &job->userptr_list);
> +	hl_userptr_delete_list(hdev, &job->userptr_ht);
>  	hl_debugfs_remove_job(hdev, job);
>  	kfree(job);
>  	atomic_dec(&cb->cs_cnt);
> @@ -4901,7 +4901,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
>  	int rc;
>  
>  	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> -			parser->job_userptr_list, &userptr))
> +			parser->job_userptr_ht, &userptr))
>  		goto already_pinned;
>  
>  	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> @@ -4913,7 +4913,10 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
>  	if (rc)
>  		goto free_userptr;
>  
> -	list_add_tail(&userptr->job_node, parser->job_userptr_list);
> +	rc = rhashtable_insert_fast(parser->job_userptr_ht,
> +				    &userptr->job_node, hl_userptr_rht_params);
> +	if (rc)
> +		goto unpin_memory;
>  
>  	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
>  	if (rc) {
> @@ -4931,7 +4934,8 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
>  	return 0;
>  
>  unpin_memory:
> -	list_del(&userptr->job_node);
> +	rhashtable_remove_fast(parser->job_userptr_ht,
> +			       &userptr->job_node, hl_userptr_rht_params);
>  	hl_unpin_host_memory(hdev, userptr);
>  free_userptr:
>  	kfree(userptr);
> @@ -5175,7 +5179,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
>  	if ((!skip_host_mem_pin) &&
>  		(!hl_userptr_is_pinned(hdev, addr,
>  					le32_to_cpu(user_dma_pkt->tsize),
> -					parser->job_userptr_list, &userptr))) {
> +					parser->job_userptr_ht, &userptr))) {
>  		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
>  				addr, user_dma_pkt->tsize);
>  		return -EFAULT;
> @@ -5472,7 +5476,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
>  
>  free_userptr:
>  	if (rc)
> -		hl_userptr_delete_list(hdev, parser->job_userptr_list);
> +		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
>  	return rc;
>  }
>  
> diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
> index fb0ac9df841a..bfcbb9e8b126 100644
> --- a/drivers/accel/habanalabs/goya/goya.c
> +++ b/drivers/accel/habanalabs/goya/goya.c
> @@ -3347,7 +3347,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
>  	int rc;
>  
>  	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
> -			parser->job_userptr_list, &userptr))
> +			parser->job_userptr_ht, &userptr))
>  		goto already_pinned;
>  
>  	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
> @@ -3359,7 +3359,10 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
>  	if (rc)
>  		goto free_userptr;
>  
> -	list_add_tail(&userptr->job_node, parser->job_userptr_list);
> +	rc = rhashtable_insert_fast(parser->job_userptr_ht,
> +				    &userptr->job_node, hl_userptr_rht_params);
> +	if (rc)
> +		goto unpin_memory;
>  
>  	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
>  	if (rc) {
> @@ -3377,7 +3380,8 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
>  	return 0;
>  
>  unpin_memory:
> -	list_del(&userptr->job_node);
> +	rhashtable_remove_fast(parser->job_userptr_ht,
> +			       &userptr->job_node, hl_userptr_rht_params);
>  	hl_unpin_host_memory(hdev, userptr);
>  free_userptr:
>  	kfree(userptr);
> @@ -3806,7 +3810,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
>  	if ((!skip_host_mem_pin) &&
>  		(hl_userptr_is_pinned(hdev, addr,
>  			le32_to_cpu(user_dma_pkt->tsize),
> -			parser->job_userptr_list, &userptr) == false)) {
> +			parser->job_userptr_ht, &userptr) == false)) {
>  		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
>  				addr, user_dma_pkt->tsize);
>  		return -EFAULT;
> @@ -4104,7 +4108,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
>  
>  free_userptr:
>  	if (rc)
> -		hl_userptr_delete_list(hdev, parser->job_userptr_list);
> +		hl_userptr_delete_list(hdev, parser->job_userptr_ht);
>  	return rc;
>  }
>  
> -- 
> 2.34.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ