lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <611ef809-248f-424e-993c-f5727ed2e18c@arm.com>
Date:   Tue, 24 Oct 2023 13:38:11 +0100
From:   Robin Murphy <robin.murphy@....com>
To:     Huang Jiaqing <jiaqing.huang@...el.com>, joro@...tes.org,
        will@...nel.org, dwmw2@...radead.org, baolu.lu@...ux.intel.com,
        linux-kernel@...r.kernel.org, iommu@...ts.linux.dev
Cc:     jacob.jun.pan@...ux.intel.com, kevin.tian@...el.com,
        yi.y.sun@...el.com, kvm@...r.kernel.org
Subject: Re: [PATCH 1/2] iommu: Introduce a rb_tree for looking up device

On 24/10/2023 9:41 am, Huang Jiaqing wrote:
> The existing IO page fault handler locates the PCI device by calling
> pci_get_domain_bus_and_slot(), which searches the list of all PCI
> devices until the desired PCI device is found. This is inefficient
> because the algorithm efficiency of searching a list is O(n). In the
> critical path of handling an IO page fault, this is not performance
> friendly given that I/O page fault handling patch is performance
> critical, and parallel heavy dsa_test may cause cpu stuck due to
> the low efficiency and lock competition in current path.
> 
> To improve the performance of the IO page fault handler, replace
> pci_get_domain_bus_and_slot() with a local red-black tree. A red-black
> tree is a self-balancing binary search tree, which means that the
> average time complexity of searching a red-black tree is O(log(n)). This
> is significantly faster than O(n), so it can significantly improve the
> performance of the IO page fault handler.
> 
> In addition, we can only insert the affected devices (those that have IO
> page fault enabled) into the red-black tree. This can further improve
> the performance of the IO page fault handler.

Have we had the rbtree vs. xarray debate for this one yet? :)

However, how many devices do we actually expect to be sharing the same 
queue? If it's a small number then it's quite possible that tracking 
separate per-queue sets of devices is the real win here, and a simple 
list without the additional overheads of more complex structures could 
still be sufficient.

Thanks,
Robin.

> This series depends on "deliver page faults to user space" patch-set:
> https://lore.kernel.org/linux-iommu/20230928042734.16134-1-baolu.lu@linux.intel.com/
> 
> Signed-off-by: Huang Jiaqing <jiaqing.huang@...el.com>
> ---
>   drivers/iommu/io-pgfault.c | 104 ++++++++++++++++++++++++++++++++++++-
>   include/linux/iommu.h      |  16 ++++++
>   2 files changed, 118 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> index 1dbacc4fdf72..68e85dc6b1b6 100644
> --- a/drivers/iommu/io-pgfault.c
> +++ b/drivers/iommu/io-pgfault.c
> @@ -7,6 +7,7 @@
>   
>   #include <linux/iommu.h>
>   #include <linux/list.h>
> +#include <linux/pci.h>
>   #include <linux/sched/mm.h>
>   #include <linux/slab.h>
>   #include <linux/workqueue.h>
> @@ -392,6 +393,55 @@ int iopf_queue_discard_partial(struct iopf_queue *queue)
>   }
>   EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
>   
> +static int iopf_queue_pci_rbtree_insert(struct iopf_queue *queue, struct pci_dev *pdev)
> +{
> +	int ret;
> +	struct rb_node **new, *parent = NULL;
> +	struct iommu_fault_param *iopf_param = iopf_get_dev_fault_param(&pdev->dev);
> +
> +	if (!iopf_param)
> +		return -ENODEV;
> +
> +	down_write(&queue->pci_dev_sem);
> +	new = &(queue->pci_dev_rbtree.rb_node);
> +	while (*new) {
> +		struct iommu_fault_param *this = container_of(*new, struct iommu_fault_param, node);
> +		struct pci_dev *this_pdev = to_pci_dev(this->dev);
> +		s16 result = RB_NODE_CMP(pdev->bus->number, pdev->devfn, this_pdev->bus->number, this_pdev->devfn);
> +
> +		parent = *new;
> +		if (result < 0)
> +			new = &((*new)->rb_left);
> +		else if (result > 0)
> +			new = &((*new)->rb_right);
> +		else {
> +			ret = -EEXIST;
> +			goto err_unlock;
> +		}
> +	}
> +
> +	rb_link_node(&iopf_param->node, parent, new);
> +	rb_insert_color(&iopf_param->node, &queue->pci_dev_rbtree);
> +
> +	up_write(&queue->pci_dev_sem);
> +	return 0;
> +err_unlock:
> +	up_write(&queue->pci_dev_sem);
> +	iopf_put_dev_fault_param(iopf_param);
> +	return ret;
> +}
> +
> +/* Caller must have inserted iopf_param by calling iopf_queue_pci_rbtree_insert() */
> +static void iopf_queue_pci_rbtree_remove(struct iopf_queue *queue, struct iommu_fault_param *iopf_param)
> +{
> +	down_write(&queue->pci_dev_sem);
> +	rb_erase(&iopf_param->node, &queue->pci_dev_rbtree);
> +	up_write(&queue->pci_dev_sem);
> +
> +	/* paired with iopf_queue_pci_rbtree_insert() */
> +	iopf_put_dev_fault_param(iopf_param);
> +}
> +
>   /**
>    * iopf_queue_add_device - Add producer to the fault queue
>    * @queue: IOPF queue
> @@ -434,7 +484,13 @@ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
>   	mutex_unlock(&param->lock);
>   	mutex_unlock(&queue->lock);
>   
> -	return ret;
> +	if (ret)
> +		return ret;
> +
> +	if (dev_is_pci(dev))
> +		return iopf_queue_pci_rbtree_insert(queue, to_pci_dev(dev));
> +
> +	return 0;
>   }
>   EXPORT_SYMBOL_GPL(iopf_queue_add_device);
>   
> @@ -486,7 +542,13 @@ int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
>   	mutex_unlock(&param->lock);
>   	mutex_unlock(&queue->lock);
>   
> -	return ret;
> +	if (ret)
> +		return ret;
> +
> +	if (dev_is_pci(dev))
> +		iopf_queue_pci_rbtree_remove(queue, fault_param);
> +
> +	return 0;
>   }
>   EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
>   
> @@ -519,6 +581,9 @@ struct iopf_queue *iopf_queue_alloc(const char *name)
>   	INIT_LIST_HEAD(&queue->devices);
>   	mutex_init(&queue->lock);
>   
> +	queue->pci_dev_rbtree = RB_ROOT;
> +	init_rwsem(&queue->pci_dev_sem);
> +
>   	return queue;
>   }
>   EXPORT_SYMBOL_GPL(iopf_queue_alloc);
> @@ -544,3 +609,38 @@ void iopf_queue_free(struct iopf_queue *queue)
>   	kfree(queue);
>   }
>   EXPORT_SYMBOL_GPL(iopf_queue_free);
> +
> +/**
> + * iopf_queue_find_pdev - Lookup pci device in iopf_queue rbtree
> + * @queue: IOPF queue
> + * @bus: bus number of pci device to lookup
> + * @devfn: devfn of pci device to lookup
> + *
> + * Return: the pci device on success and NULL on not found.
> + */
> +struct pci_dev *iopf_queue_find_pdev(struct iopf_queue *queue, u8 bus, u8 devfn)
> +{
> +	struct iommu_fault_param *data = NULL;
> +	struct pci_dev *pdev = NULL;
> +	struct rb_node *node;
> +
> +	down_read(&queue->pci_dev_sem);
> +
> +	node = queue->pci_dev_rbtree.rb_node;
> +	while (node) {
> +		data = container_of(node, struct iommu_fault_param, node);
> +		pdev = to_pci_dev(data->dev);
> +		s16 result = RB_NODE_CMP(bus, devfn, pdev->bus->number, pdev->devfn);
> +
> +		if (result < 0)
> +			node = node->rb_left;
> +		else if (result > 0)
> +			node = node->rb_right;
> +		else
> +			break;
> +	}
> +	up_read(&queue->pci_dev_sem);
> +
> +	return node ? pdev : NULL;
> +}
> +EXPORT_SYMBOL_GPL(iopf_queue_find_pdev);
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index bcec7e91dfc4..b29bbb0d1843 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -136,11 +136,15 @@ struct iopf_group {
>    * @wq: the fault workqueue
>    * @devices: devices attached to this queue
>    * @lock: protects the device list
> + * @pci_dev_rbtree: pci devices for looking up
> + * @pci_dev_sem: protects the rb_tree
>    */
>   struct iopf_queue {
>   	struct workqueue_struct *wq;
>   	struct list_head devices;
>   	struct mutex lock;
> +	struct rb_root pci_dev_rbtree;
> +	struct rw_semaphore pci_dev_sem;
>   };
>   
>   /* iommu fault flags */
> @@ -483,6 +487,8 @@ struct iommu_device {
>   	u32 max_pasids;
>   };
>   
> +#define RB_NODE_CMP(bus1, devfn1, bus2, devfn2) ((s16)(PCI_DEVID(bus1, devfn1) - PCI_DEVID(bus2, devfn2)))
> +
>   /**
>    * struct iommu_fault_param - per-device IOMMU fault data
>    * @lock: protect pending faults list
> @@ -494,6 +500,7 @@ struct iommu_device {
>    * @partial: faults that are part of a Page Request Group for which the last
>    *           request hasn't been submitted yet.
>    * @faults: holds the pending faults which needs response
> + * @node: pci device tracking node(lookup by (bus, devfn))
>    */
>   struct iommu_fault_param {
>   	struct mutex lock;
> @@ -505,6 +512,7 @@ struct iommu_fault_param {
>   
>   	struct list_head partial;
>   	struct list_head faults;
> +	struct rb_node node;
>   };
>   
>   /**
> @@ -1286,6 +1294,8 @@ int iopf_queue_discard_dev_pasid(struct device *dev, ioasid_t pasid);
>   struct iopf_queue *iopf_queue_alloc(const char *name);
>   void iopf_queue_free(struct iopf_queue *queue);
>   int iopf_queue_discard_partial(struct iopf_queue *queue);
> +struct pci_dev *iopf_queue_find_pdev(struct iopf_queue *queue,
> +				u8 bus, u8 devfn);
>   void iopf_free_group(struct iopf_group *group);
>   int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt);
>   int iommu_page_response(struct device *dev, struct iommu_page_response *msg);
> @@ -1321,6 +1331,12 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
>   	return -ENODEV;
>   }
>   
> +static inline struct pci_dev *iopf_queue_find_pdev(struct iopf_queue *queue,
> +						u8 bus, u8 devfn)
> +{
> +	return NULL;
> +}
> +
>   static inline void iopf_free_group(struct iopf_group *group)
>   {
>   }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ