lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210713161906.457857-3-stefanha@redhat.com>
Date:   Tue, 13 Jul 2021 17:19:05 +0100
From:   Stefan Hajnoczi <stefanha@...hat.com>
To:     linux-kernel@...r.kernel.org
Cc:     Daniel Lezcano <daniel.lezcano@...aro.org>,
        Stefano Garzarella <sgarzare@...hat.com>,
        Ming Lei <ming.lei@...hat.com>,
        "Michael S . Tsirkin" <mst@...hat.com>,
        Marcelo Tosatti <mtosatti@...hat.com>,
        Jens Axboe <axboe@...nel.dk>, Jason Wang <jasowang@...hat.com>,
        linux-block@...r.kernel.org,
        "Rafael J. Wysocki" <rjw@...ysocki.net>,
        virtualization@...ts.linux-foundation.org,
        linux-pm@...r.kernel.org, Christoph Hellwig <hch@...radead.org>,
        Stefan Hajnoczi <stefanha@...hat.com>
Subject: [RFC 2/3] virtio: add poll_source virtqueue polling

VIRTIO drivers can cheaply disable interrupts by setting
RING_EVENT_FLAGS_DISABLE in the packed virtqueues's Driver Event
Suppression structure. See "2.7.10 Driver and Device Event Suppression"
in the VIRTIO 1.1 specification for details
(https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html).

Add a per-virtqueue poll_source that disables events in ->start(),
processes completed virtqueue buffers in ->poll(), and re-enables events
in ->stop().

This optimization avoids interrupt injection during cpuidle polling.
Workloads that submit requests and then halt the vCPU until completion
benefit from this.

To enable this optimization:

1. Enable the cpuidle haltpoll driver:
   https://www.kernel.org/doc/html/latest/virt/guest-halt-polling.html

2. Enable poll_source on the virtio device:
   # echo -n 1 > /sys/block/vda/device/poll_source

Note that this feature currently as no effect on split virtqueues when
VIRTIO_F_EVENT_IDX is negotiated. It may be possible to tweak
virtqueue_disable_cb_split() but I haven't attempted it here.

This optimization has been benchmarked successfully with virtio-blk
devices. Currently it does not improve virtio-net performance, probably
because it doesn't combine with NAPI polling.

Signed-off-by: Stefan Hajnoczi <stefanha@...hat.com>
---
 drivers/virtio/virtio_pci_common.h |  7 +++
 include/linux/virtio.h             |  2 +
 include/linux/virtio_config.h      |  2 +
 drivers/virtio/virtio.c            | 34 ++++++++++++
 drivers/virtio/virtio_pci_common.c | 86 ++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci_modern.c |  2 +
 6 files changed, 133 insertions(+)

diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index beec047a8f8d..630746043183 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/poll_source.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
@@ -38,6 +39,9 @@ struct virtio_pci_vq_info {
 
 	/* MSI-X vector (or none) */
 	unsigned msix_vector;
+
+	/* the cpuidle poll_source */
+	struct poll_source poll_source;
 };
 
 /* Our device structure */
@@ -102,6 +106,9 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 	return container_of(vdev, struct virtio_pci_device, vdev);
 }
 
+/* enable poll_source API for vq polling */
+int vp_enable_poll_source(struct virtio_device *vdev, bool enable);
+
 /* wait for pending irq handlers */
 void vp_synchronize_vectors(struct virtio_device *vdev);
 /* the notify function used when creating a virt queue */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b1894e0323fa..baaa3c8fadb1 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -93,6 +93,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
  * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
+ * @poll_source_enabled: poll_source API enabled for vq polling
  * @config_enabled: configuration change reporting enabled
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
@@ -107,6 +108,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
 struct virtio_device {
 	int index;
 	bool failed;
+	bool poll_source_enabled;
 	bool config_enabled;
 	bool config_change_pending;
 	spinlock_t config_lock;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8519b3ae5d52..5fb78d56fd44 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -72,6 +72,7 @@ struct virtio_shm_region {
  * @set_vq_affinity: set the affinity for a virtqueue (optional).
  * @get_vq_affinity: get the affinity for a virtqueue (optional).
  * @get_shm_region: get a shared memory region based on the index.
+ * @enable_poll_source: enable/disable poll_source API vq polling (optional).
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -97,6 +98,7 @@ struct virtio_config_ops {
 			int index);
 	bool (*get_shm_region)(struct virtio_device *vdev,
 			       struct virtio_shm_region *region, u8 id);
+	int (*enable_poll_source)(struct virtio_device *dev, bool enable);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 4b15c00c0a0a..22fee71bbe34 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -59,12 +59,44 @@ static ssize_t features_show(struct device *_d,
 }
 static DEVICE_ATTR_RO(features);
 
+static ssize_t poll_source_show(struct device *_d,
+				struct device_attribute *attr, char *buf)
+{
+	struct virtio_device *dev = dev_to_virtio(_d);
+	return sprintf(buf, "%d\n", dev->poll_source_enabled);
+}
+
+static ssize_t poll_source_store(struct device *_d,
+		                 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct virtio_device *dev = dev_to_virtio(_d);
+	bool val;
+	int rc;
+
+	rc = kstrtobool(buf, &val);
+	if (rc)
+		return rc;
+
+	if (val == dev->poll_source_enabled)
+		return count;
+	if (!dev->config->enable_poll_source)
+		return -ENOTSUPP;
+
+	rc = dev->config->enable_poll_source(dev, val);
+	if (rc)
+		return rc;
+	return count;
+}
+static DEVICE_ATTR_RW(poll_source);
+
 static struct attribute *virtio_dev_attrs[] = {
 	&dev_attr_device.attr,
 	&dev_attr_vendor.attr,
 	&dev_attr_status.attr,
 	&dev_attr_modalias.attr,
 	&dev_attr_features.attr,
+	&dev_attr_poll_source.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(virtio_dev);
@@ -343,6 +375,8 @@ int register_virtio_device(struct virtio_device *dev)
 	dev->index = err;
 	dev_set_name(&dev->dev, "virtio%u", dev->index);
 
+	dev->poll_source_enabled = false;
+
 	spin_lock_init(&dev->config_lock);
 	dev->config_enabled = false;
 	dev->config_change_pending = false;
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 222d630c41fc..6de372e12afb 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -24,6 +24,83 @@ MODULE_PARM_DESC(force_legacy,
 		 "Force legacy mode for transitional virtio 1 devices");
 #endif
 
+static void vp_poll_source_start(struct poll_source *src)
+{
+	struct virtio_pci_vq_info *info =
+		container_of(src, struct virtio_pci_vq_info, poll_source);
+
+	/* This API does not require a lock */
+	virtqueue_disable_cb(info->vq);
+}
+
+static void vp_poll_source_stop(struct poll_source *src)
+{
+	struct virtio_pci_vq_info *info =
+		container_of(src, struct virtio_pci_vq_info, poll_source);
+
+	/* Poll one last time in case */
+	/* TODO allow driver to provide spinlock */
+	if (!virtqueue_enable_cb(info->vq))
+		vring_interrupt(0 /* ignored */, info->vq);
+}
+
+static void vp_poll_source_poll(struct poll_source *src)
+{
+	struct virtio_pci_vq_info *info =
+		container_of(src, struct virtio_pci_vq_info, poll_source);
+
+	vring_interrupt(0 /* ignored */, info->vq);
+}
+
+static const struct poll_source_ops vp_poll_source_ops = {
+	.start = vp_poll_source_start,
+	.stop = vp_poll_source_stop,
+	.poll = vp_poll_source_poll,
+};
+
+/* call this when irq affinity changes to update cpuidle poll_source */
+/* TODO this function waits for a smp_call_function_single() completion, is that allowed in all caller contexts? */
+/* TODO this function is not thread-safe, do all callers hold the same lock? */
+static int vp_update_poll_source(struct virtio_device *vdev, int index)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct poll_source *src = &vp_dev->vqs[index]->poll_source;
+	const struct cpumask *mask;
+
+	if (!list_empty(&src->node))
+		poll_source_unregister(src);
+
+	if (!vdev->poll_source_enabled)
+		return 0;
+
+	mask = vp_get_vq_affinity(vdev, index);
+	if (!mask)
+		return -ENOTTY;
+
+	/* Update the poll_source cpu */
+	src->cpu = cpumask_first(mask);
+
+	/* Don't use poll_source if interrupts are handled on multiple CPUs */
+	if (cpumask_next(src->cpu, mask) < nr_cpu_ids)
+		return 0;
+
+	return poll_source_register(src);
+}
+
+/* TODO add this to virtio_pci_legacy config ops? */
+int vp_enable_poll_source(struct virtio_device *vdev, bool enable)
+{
+	struct virtqueue *vq;
+
+	vdev->poll_source_enabled = enable;
+
+	/* TODO locking */
+	list_for_each_entry(vq, &vdev->vqs, list) {
+		vp_update_poll_source(vdev, vq->index); /* TODO handle errors? */
+	}
+	return 0;
+}
+
 /* wait for pending irq handlers */
 void vp_synchronize_vectors(struct virtio_device *vdev)
 {
@@ -186,6 +263,9 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
+	info->poll_source.ops = &vp_poll_source_ops;
+	INIT_LIST_HEAD(&info->poll_source.node);
+
 	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
 			      msix_vec);
 	if (IS_ERR(vq))
@@ -237,6 +317,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 				int irq = pci_irq_vector(vp_dev->pci_dev, v);
 
 				irq_set_affinity_hint(irq, NULL);
+				vp_update_poll_source(vdev, vq->index);
 				free_irq(irq, vq);
 			}
 		}
@@ -342,6 +423,9 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 				  vqs[i]);
 		if (err)
 			goto error_find;
+
+		if (desc)
+			vp_update_poll_source(vdev, i);
 	}
 	return 0;
 
@@ -440,6 +524,8 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
 			cpumask_copy(mask, cpu_mask);
 			irq_set_affinity_hint(irq, mask);
 		}
+
+		vp_update_poll_source(vdev, vq->index);
 	}
 	return 0;
 }
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 30654d3a0b41..417d568590f2 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -394,6 +394,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.set_vq_affinity = vp_set_vq_affinity,
 	.get_vq_affinity = vp_get_vq_affinity,
 	.get_shm_region  = vp_get_shm_region,
+	.enable_poll_source = vp_enable_poll_source,
 };
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -411,6 +412,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.set_vq_affinity = vp_set_vq_affinity,
 	.get_vq_affinity = vp_get_vq_affinity,
 	.get_shm_region  = vp_get_shm_region,
+	.enable_poll_source = vp_enable_poll_source,
 };
 
 /* the PCI probing function */
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ