[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221214163025.103075-4-sgarzare@redhat.com>
Date: Wed, 14 Dec 2022 17:30:22 +0100
From: Stefano Garzarella <sgarzare@...hat.com>
To: virtualization@...ts.linux-foundation.org
Cc: Jason Wang <jasowang@...hat.com>,
Andrey Zhadchenko <andrey.zhadchenko@...tuozzo.com>,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
"Michael S. Tsirkin" <mst@...hat.com>, eperezma@...hat.com,
stefanha@...hat.com, netdev@...r.kernel.org,
Stefano Garzarella <sgarzare@...hat.com>
Subject: [RFC PATCH 3/6] vringh: support VA with iotlb
vDPA supports the possibility to use user VA in the iotlb messages.
So, let's add support for user VA in vringh to use it in the vDPA
simulators.
Signed-off-by: Stefano Garzarella <sgarzare@...hat.com>
---
include/linux/vringh.h | 5 +-
drivers/vdpa/mlx5/core/resources.c | 3 +-
drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +-
drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +-
drivers/vhost/vringh.c | 250 +++++++++++++++++++++++------
5 files changed, 207 insertions(+), 57 deletions(-)
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 212892cf9822..c70962f16b1f 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -32,6 +32,9 @@ struct vringh {
/* Can we get away with weak barriers? */
bool weak_barriers;
+ /* Use user's VA */
+ bool use_va;
+
/* Last available index we saw (ie. where we're up to). */
u16 last_avail_idx;
@@ -279,7 +282,7 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
spinlock_t *iotlb_lock);
int vringh_init_iotlb(struct vringh *vrh, u64 features,
- unsigned int num, bool weak_barriers,
+ unsigned int num, bool weak_barriers, bool use_va,
struct vring_desc *desc,
struct vring_avail *avail,
struct vring_used *used);
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 9800f9bec225..e0bab3458b40 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -233,7 +233,8 @@ static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
if (!mvdev->cvq.iotlb)
return -ENOMEM;
- vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
+ vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb,
+ &mvdev->cvq.iommu_lock);
return 0;
}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 90913365def4..81ba0867e2c8 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2504,7 +2504,7 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
- MLX5_CVQ_MAX_ENT, false,
+ MLX5_CVQ_MAX_ENT, false, false,
(struct vring_desc *)(uintptr_t)cvq->desc_addr,
(struct vring_avail *)(uintptr_t)cvq->driver_addr,
(struct vring_used *)(uintptr_t)cvq->device_addr);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index b20689f8fe89..2e0ee7280aa8 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -67,7 +67,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
- vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
+ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
(struct vring_desc *)(uintptr_t)vq->desc_addr,
(struct vring_avail *)
(uintptr_t)vq->driver_addr,
@@ -87,7 +87,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
vq->cb = NULL;
vq->private = NULL;
vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
- VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+ VDPASIM_QUEUE_MAX, false, false, NULL, NULL, NULL);
vq->vring.notify = NULL;
}
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 11f59dd06a74..c1f77dc93482 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1094,15 +1094,99 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
-static int iotlb_translate(const struct vringh *vrh,
- u64 addr, u64 len, u64 *translated,
- struct bio_vec iov[],
- int iov_size, u32 perm)
+static int iotlb_translate_va(const struct vringh *vrh,
+ u64 addr, u64 len, u64 *translated,
+ struct iovec iov[],
+ int iov_size, u32 perm)
{
struct vhost_iotlb_map *map;
struct vhost_iotlb *iotlb = vrh->iotlb;
+ u64 s = 0, last = addr + len - 1;
+ int ret = 0;
+
+ spin_lock(vrh->iotlb_lock);
+
+ while (len > s) {
+ u64 size;
+
+ if (unlikely(ret >= iov_size)) {
+ ret = -ENOBUFS;
+ break;
+ }
+
+ map = vhost_iotlb_itree_first(iotlb, addr, last);
+ if (!map || map->start > addr) {
+ ret = -EINVAL;
+ break;
+ } else if (!(map->perm & perm)) {
+ ret = -EPERM;
+ break;
+ }
+
+ size = map->size - addr + map->start;
+ iov[ret].iov_len = min(len - s, size);
+ iov[ret].iov_base = (void __user *)(unsigned long)
+ (map->addr + addr - map->start);
+ s += size;
+ addr += size;
+ ++ret;
+ }
+
+ spin_unlock(vrh->iotlb_lock);
+
+ if (translated)
+ *translated = min(len, s);
+
+ return ret;
+}
+
+static inline int copy_from_va(const struct vringh *vrh, void *dst, void *src,
+ u64 len, u64 *translated)
+{
+ struct iovec iov[16];
+ struct iov_iter iter;
+ int ret;
+
+ ret = iotlb_translate_va(vrh, (u64)(uintptr_t)src, len, translated, iov,
+ ARRAY_SIZE(iov), VHOST_MAP_RO);
+ if (ret == -ENOBUFS)
+ ret = ARRAY_SIZE(iov);
+ else if (ret < 0)
+ return ret;
+
+ iov_iter_init(&iter, READ, iov, ret, *translated);
+
+ return copy_from_iter(dst, *translated, &iter);
+}
+
+static inline int copy_to_va(const struct vringh *vrh, void *dst, void *src,
+ u64 len, u64 *translated)
+{
+ struct iovec iov[16];
+ struct iov_iter iter;
+ int ret;
+
+ ret = iotlb_translate_va(vrh, (u64)(uintptr_t)dst, len, translated, iov,
+ ARRAY_SIZE(iov), VHOST_MAP_WO);
+ if (ret == -ENOBUFS)
+ ret = ARRAY_SIZE(iov);
+ else if (ret < 0)
+ return ret;
+
+ iov_iter_init(&iter, WRITE, iov, ret, *translated);
+
+ return copy_to_iter(src, *translated, &iter);
+}
+
+static int iotlb_translate_pa(const struct vringh *vrh,
+ u64 addr, u64 len, u64 *translated,
+ struct bio_vec iov[],
+ int iov_size, u32 perm)
+{
+ struct vhost_iotlb_map *map;
+ struct vhost_iotlb *iotlb = vrh->iotlb;
+ u64 s = 0, last = addr + len - 1;
int ret = 0;
- u64 s = 0;
spin_lock(vrh->iotlb_lock);
@@ -1114,8 +1198,7 @@ static int iotlb_translate(const struct vringh *vrh,
break;
}
- map = vhost_iotlb_itree_first(iotlb, addr,
- addr + len - 1);
+ map = vhost_iotlb_itree_first(iotlb, addr, last);
if (!map || map->start > addr) {
ret = -EINVAL;
break;
@@ -1143,28 +1226,61 @@ static int iotlb_translate(const struct vringh *vrh,
return ret;
}
+static inline int copy_from_pa(const struct vringh *vrh, void *dst, void *src,
+ u64 len, u64 *translated)
+{
+ struct bio_vec iov[16];
+ struct iov_iter iter;
+ int ret;
+
+ ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)src, len, translated, iov,
+ ARRAY_SIZE(iov), VHOST_MAP_RO);
+ if (ret == -ENOBUFS)
+ ret = ARRAY_SIZE(iov);
+ else if (ret < 0)
+ return ret;
+
+ iov_iter_bvec(&iter, READ, iov, ret, *translated);
+
+ return copy_from_iter(dst, *translated, &iter);
+}
+
+static inline int copy_to_pa(const struct vringh *vrh, void *dst, void *src,
+ u64 len, u64 *translated)
+{
+ struct bio_vec iov[16];
+ struct iov_iter iter;
+ int ret;
+
+ ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)dst, len, translated, iov,
+ ARRAY_SIZE(iov), VHOST_MAP_WO);
+ if (ret == -ENOBUFS)
+ ret = ARRAY_SIZE(iov);
+ else if (ret < 0)
+ return ret;
+
+ iov_iter_bvec(&iter, WRITE, iov, ret, *translated);
+
+ return copy_to_iter(src, *translated, &iter);
+}
+
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
u64 total_translated = 0;
while (total_translated < len) {
- struct bio_vec iov[16];
- struct iov_iter iter;
u64 translated;
int ret;
- ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
- len - total_translated, &translated,
- iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
- if (ret == -ENOBUFS)
- ret = ARRAY_SIZE(iov);
- else if (ret < 0)
- return ret;
-
- iov_iter_bvec(&iter, READ, iov, ret, translated);
+ if (vrh->use_va) {
+ ret = copy_from_va(vrh, dst, src,
+ len - total_translated, &translated);
+ } else {
+ ret = copy_from_pa(vrh, dst, src,
+ len - total_translated, &translated);
+ }
- ret = copy_from_iter(dst, translated, &iter);
if (ret < 0)
return ret;
@@ -1182,22 +1298,17 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
u64 total_translated = 0;
while (total_translated < len) {
- struct bio_vec iov[16];
- struct iov_iter iter;
u64 translated;
int ret;
- ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
- len - total_translated, &translated,
- iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
- if (ret == -ENOBUFS)
- ret = ARRAY_SIZE(iov);
- else if (ret < 0)
- return ret;
-
- iov_iter_bvec(&iter, WRITE, iov, ret, translated);
+ if (vrh->use_va) {
+ ret = copy_to_va(vrh, dst, src,
+ len - total_translated, &translated);
+ } else {
+ ret = copy_to_pa(vrh, dst, src,
+ len - total_translated, &translated);
+ }
- ret = copy_to_iter(src, translated, &iter);
if (ret < 0)
return ret;
@@ -1212,20 +1323,36 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
static inline int getu16_iotlb(const struct vringh *vrh,
u16 *val, const __virtio16 *p)
{
- struct bio_vec iov;
- void *kaddr, *from;
int ret;
/* Atomic read is needed for getu16 */
- ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
- &iov, 1, VHOST_MAP_RO);
- if (ret < 0)
- return ret;
+ if (vrh->use_va) {
+ struct iovec iov;
+
+ ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ NULL, &iov, 1, VHOST_MAP_RO);
+ if (ret < 0)
+ return ret;
- kaddr = kmap_atomic(iov.bv_page);
- from = kaddr + iov.bv_offset;
- *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
- kunmap_atomic(kaddr);
+ ret = __get_user(*val, (__virtio16 *)iov.iov_base);
+ if (ret)
+ return ret;
+
+ *val = vringh16_to_cpu(vrh, *val);
+ } else {
+ struct bio_vec iov;
+ void *kaddr, *from;
+
+ ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ NULL, &iov, 1, VHOST_MAP_RO);
+ if (ret < 0)
+ return ret;
+
+ kaddr = kmap_atomic(iov.bv_page);
+ from = kaddr + iov.bv_offset;
+ *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
+ kunmap_atomic(kaddr);
+ }
return 0;
}
@@ -1233,20 +1360,36 @@ static inline int getu16_iotlb(const struct vringh *vrh,
static inline int putu16_iotlb(const struct vringh *vrh,
__virtio16 *p, u16 val)
{
- struct bio_vec iov;
- void *kaddr, *to;
int ret;
/* Atomic write is needed for putu16 */
- ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
- &iov, 1, VHOST_MAP_WO);
- if (ret < 0)
- return ret;
+ if (vrh->use_va) {
+ struct iovec iov;
- kaddr = kmap_atomic(iov.bv_page);
- to = kaddr + iov.bv_offset;
- WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
- kunmap_atomic(kaddr);
+ ret = iotlb_translate_va(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ NULL, &iov, 1, VHOST_MAP_RO);
+ if (ret < 0)
+ return ret;
+
+ val = cpu_to_vringh16(vrh, val);
+
+ ret = __put_user(val, (__virtio16 *)iov.iov_base);
+ if (ret)
+ return ret;
+ } else {
+ struct bio_vec iov;
+ void *kaddr, *to;
+
+ ret = iotlb_translate_pa(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
+ &iov, 1, VHOST_MAP_WO);
+ if (ret < 0)
+ return ret;
+
+ kaddr = kmap_atomic(iov.bv_page);
+ to = kaddr + iov.bv_offset;
+ WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
+ kunmap_atomic(kaddr);
+ }
return 0;
}
@@ -1308,6 +1451,7 @@ static inline int putused_iotlb(const struct vringh *vrh,
* @features: the feature bits for this ring.
* @num: the number of elements.
* @weak_barriers: true if we only need memory barriers, not I/O.
+ * @use_va: true if IOTLB contains user VA
* @desc: the userpace descriptor pointer.
* @avail: the userpace avail pointer.
* @used: the userpace used pointer.
@@ -1315,11 +1459,13 @@ static inline int putused_iotlb(const struct vringh *vrh,
* Returns an error if num is invalid.
*/
int vringh_init_iotlb(struct vringh *vrh, u64 features,
- unsigned int num, bool weak_barriers,
+ unsigned int num, bool weak_barriers, bool use_va,
struct vring_desc *desc,
struct vring_avail *avail,
struct vring_used *used)
{
+ vrh->use_va = use_va;
+
return vringh_init_kern(vrh, features, num, weak_barriers,
desc, avail, used);
}
--
2.38.1
Powered by blists - more mailing lists