lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <zrho753sfvo6wiovm5fkkfwuu3thurxu22j2ei2sx2r674d6gy@t45ugqstjibm>
Date:   Thu, 22 Jun 2023 18:31:37 +0200
From:   Stefano Garzarella <sgarzare@...hat.com>
To:     Bobby Eshleman <bobby.eshleman@...edance.com>
Cc:     Stefan Hajnoczi <stefanha@...hat.com>,
        "Michael S. Tsirkin" <mst@...hat.com>,
        Jason Wang <jasowang@...hat.com>,
        Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        "K. Y. Srinivasan" <kys@...rosoft.com>,
        Haiyang Zhang <haiyangz@...rosoft.com>,
        Wei Liu <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>,
        Bryan Tan <bryantan@...are.com>,
        Vishnu Dasa <vdasa@...are.com>,
        VMware PV-Drivers Reviewers <pv-drivers@...are.com>,
        Dan Carpenter <dan.carpenter@...aro.org>,
        Simon Horman <simon.horman@...igine.com>,
        Krasnov Arseniy <oxffffaa@...il.com>, kvm@...r.kernel.org,
        virtualization@...ts.linux-foundation.org, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-hyperv@...r.kernel.org,
        bpf@...r.kernel.org
Subject: Re: [PATCH RFC net-next v4 6/8] virtio/vsock: support dgrams

On Sat, Jun 10, 2023 at 12:58:33AM +0000, Bobby Eshleman wrote:
>This commit adds support for datagrams over virtio/vsock.
>
>Message boundaries are preserved on a per-skb and per-vq entry basis.
>Messages are copied in whole from the user to an SKB, which in turn is
>added to the scatterlist for the virtqueue in whole for the device.
>Messages do not straddle skbs and they do not straddle packets.
>Messages may be truncated by the receiving user if their buffer is
>shorter than the message.
>
>Other properties of vsock datagrams:
>- Datagrams self-throttle at the per-socket sk_sndbuf threshold.
>- The same virtqueue is used as is used for streams and seqpacket flows
>- Credits are not used for datagrams
>- Packets are dropped silently by the device, which means the virtqueue
>  will still get kicked even during high packet loss, so long as the
>  socket does not exceed sk_sndbuf.
>
>Future work might include finding a way to reduce the virtqueue kick
>rate for datagram flows with high packet loss.
>
>Signed-off-by: Bobby Eshleman <bobby.eshleman@...edance.com>
>---
> drivers/vhost/vsock.c                   |  27 ++++-
> include/linux/virtio_vsock.h            |   5 +-
> include/net/af_vsock.h                  |   1 +
> include/uapi/linux/virtio_vsock.h       |   1 +
> net/vmw_vsock/af_vsock.c                |  58 +++++++--
> net/vmw_vsock/virtio_transport.c        |  23 +++-
> net/vmw_vsock/virtio_transport_common.c | 207 ++++++++++++++++++++++++--------
> net/vmw_vsock/vsock_loopback.c          |   8 +-
> 8 files changed, 264 insertions(+), 66 deletions(-)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index 8f0082da5e70..159c1a22c1a8 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -32,7 +32,8 @@
> enum {
> 	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
> 			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
>-			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
>+			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET) |
>+			       (1ULL << VIRTIO_VSOCK_F_DGRAM)
> };
>
> enum {
>@@ -56,6 +57,7 @@ struct vhost_vsock {
> 	atomic_t queued_replies;
>
> 	u32 guest_cid;
>+	bool dgram_allow;
> 	bool seqpacket_allow;
> };
>
>@@ -394,6 +396,7 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
> 	return val < vq->num;
> }
>
>+static bool vhost_transport_dgram_allow(u32 cid, u32 port);
> static bool vhost_transport_seqpacket_allow(u32 remote_cid);
>
> static struct virtio_transport vhost_transport = {
>@@ -410,10 +413,11 @@ static struct virtio_transport vhost_transport = {
> 		.cancel_pkt               = vhost_transport_cancel_pkt,
>
> 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
>-		.dgram_allow              = virtio_transport_dgram_allow,
>+		.dgram_allow              = vhost_transport_dgram_allow,
> 		.dgram_get_cid		  = virtio_transport_dgram_get_cid,
> 		.dgram_get_port		  = virtio_transport_dgram_get_port,
> 		.dgram_get_length	  = virtio_transport_dgram_get_length,
>+		.dgram_payload_offset	  = 0,
>
> 		.stream_enqueue           = virtio_transport_stream_enqueue,
> 		.stream_dequeue           = virtio_transport_stream_dequeue,
>@@ -446,6 +450,22 @@ static struct virtio_transport vhost_transport = {
> 	.send_pkt = vhost_transport_send_pkt,
> };
>
>+static bool vhost_transport_dgram_allow(u32 cid, u32 port)
>+{
>+	struct vhost_vsock *vsock;
>+	bool dgram_allow = false;
>+
>+	rcu_read_lock();
>+	vsock = vhost_vsock_get(cid);
>+
>+	if (vsock)
>+		dgram_allow = vsock->dgram_allow;
>+
>+	rcu_read_unlock();
>+
>+	return dgram_allow;
>+}
>+
> static bool vhost_transport_seqpacket_allow(u32 remote_cid)
> {
> 	struct vhost_vsock *vsock;
>@@ -802,6 +822,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
> 	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
> 		vsock->seqpacket_allow = true;
>
>+	if (features & (1ULL << VIRTIO_VSOCK_F_DGRAM))
>+		vsock->dgram_allow = true;
>+
> 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
> 		vq = &vsock->vqs[i];
> 		mutex_lock(&vq->mutex);
>diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>index 73afa09f4585..237ca87a2ecd 100644
>--- a/include/linux/virtio_vsock.h
>+++ b/include/linux/virtio_vsock.h
>@@ -216,7 +216,6 @@ void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val);
> u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk);
> bool virtio_transport_stream_is_active(struct vsock_sock *vsk);
> bool virtio_transport_stream_allow(u32 cid, u32 port);
>-bool virtio_transport_dgram_allow(u32 cid, u32 port);
> int virtio_transport_dgram_get_cid(struct sk_buff *skb, unsigned int *cid);
> int virtio_transport_dgram_get_port(struct sk_buff *skb, unsigned int *port);
> int virtio_transport_dgram_get_length(struct sk_buff *skb, size_t *len);
>@@ -247,4 +246,8 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
> void virtio_transport_deliver_tap_pkt(struct sk_buff *skb);
> int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *list);
> int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t read_actor);
>+void virtio_transport_init_dgram_bind_tables(void);
>+int virtio_transport_dgram_get_cid(struct sk_buff *skb, unsigned int *cid);
>+int virtio_transport_dgram_get_port(struct sk_buff *skb, unsigned int *port);
>+int virtio_transport_dgram_get_length(struct sk_buff *skb, size_t *len);
> #endif /* _LINUX_VIRTIO_VSOCK_H */
>diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>index 7bedb9ee7e3e..c115e655b4f5 100644
>--- a/include/net/af_vsock.h
>+++ b/include/net/af_vsock.h
>@@ -225,6 +225,7 @@ void vsock_for_each_connected_socket(struct vsock_transport *transport,
> 				     void (*fn)(struct sock *sk));
> int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
> bool vsock_find_cid(unsigned int cid);
>+struct sock *vsock_find_bound_dgram_socket(struct sockaddr_vm *addr);
>
> /**** TAP ****/
>
>diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>index 9c25f267bbc0..27b4b2b8bf13 100644
>--- a/include/uapi/linux/virtio_vsock.h
>+++ b/include/uapi/linux/virtio_vsock.h
>@@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> enum virtio_vsock_type {
> 	VIRTIO_VSOCK_TYPE_STREAM = 1,
> 	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>+	VIRTIO_VSOCK_TYPE_DGRAM = 3,
> };
>
> enum virtio_vsock_op {
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index 7a3ca4270446..b0b18e7f4299 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c

I would split this patch in 2, one with the changes in af_vsock.c,
of for the virtio changes.

>@@ -114,6 +114,7 @@
> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
> static void vsock_sk_destruct(struct sock *sk);
> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
>+static bool sock_type_connectible(u16 type);
>
> /* Protocol family. */
> struct proto vsock_proto = {
>@@ -180,6 +181,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
> EXPORT_SYMBOL_GPL(vsock_connected_table);
> DEFINE_SPINLOCK(vsock_table_lock);
> EXPORT_SYMBOL_GPL(vsock_table_lock);
>+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE];
>+static DEFINE_SPINLOCK(vsock_dgram_table_lock);
>
> /* Autobind this socket to the local address if necessary. */
> static int vsock_auto_bind(struct vsock_sock *vsk)
>@@ -202,6 +205,9 @@ static void vsock_init_tables(void)
>
> 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
> 		INIT_LIST_HEAD(&vsock_connected_table[i]);
>+
>+	for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++)
>+		INIT_LIST_HEAD(&vsock_dgram_bind_table[i]);
> }
>
> static void __vsock_insert_bound(struct list_head *list,
>@@ -230,8 +236,8 @@ static void __vsock_remove_connected(struct vsock_sock *vsk)
> 	sock_put(&vsk->sk);
> }
>
>-struct sock *vsock_find_bound_socket_common(struct sockaddr_vm *addr,
>-					    struct list_head *bind_table)
>+static struct sock *vsock_find_bound_socket_common(struct sockaddr_vm *addr,
>+						   struct list_head *bind_table)
> {
> 	struct vsock_sock *vsk;
>
>@@ -248,6 +254,23 @@ struct sock *vsock_find_bound_socket_common(struct sockaddr_vm *addr,
> 	return NULL;
> }
>
>+struct sock *
>+vsock_find_bound_dgram_socket(struct sockaddr_vm *addr)
>+{
>+	struct sock *sk;
>+
>+	spin_lock_bh(&vsock_dgram_table_lock);
>+	sk = vsock_find_bound_socket_common(addr,
>+					    &vsock_dgram_bind_table[VSOCK_HASH(addr)]);
>+	if (sk)
>+		sock_hold(sk);
>+
>+	spin_unlock_bh(&vsock_dgram_table_lock);
>+
>+	return sk;
>+}
>+EXPORT_SYMBOL_GPL(vsock_find_bound_dgram_socket);
>+
> static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
> {
> 	return vsock_find_bound_socket_common(addr, vsock_bound_sockets(addr));
>@@ -287,6 +310,14 @@ void vsock_insert_connected(struct vsock_sock *vsk)
> }
> EXPORT_SYMBOL_GPL(vsock_insert_connected);
>
>+static void vsock_remove_dgram_bound(struct vsock_sock *vsk)
>+{
>+	spin_lock_bh(&vsock_dgram_table_lock);
>+	if (__vsock_in_bound_table(vsk))
>+		__vsock_remove_bound(vsk);
>+	spin_unlock_bh(&vsock_dgram_table_lock);
>+}
>+
> void vsock_remove_bound(struct vsock_sock *vsk)
> {
> 	spin_lock_bh(&vsock_table_lock);
>@@ -338,7 +369,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
>
> void vsock_remove_sock(struct vsock_sock *vsk)
> {
>-	vsock_remove_bound(vsk);
>+	if (sock_type_connectible(sk_vsock(vsk)->sk_type))
>+		vsock_remove_bound(vsk);
>+	else
>+		vsock_remove_dgram_bound(vsk);
> 	vsock_remove_connected(vsk);
> }
> EXPORT_SYMBOL_GPL(vsock_remove_sock);
>@@ -720,11 +754,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> 	return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1);
> }
>
>-static int __vsock_bind_dgram(struct vsock_sock *vsk,
>-			      struct sockaddr_vm *addr)
>+static int vsock_bind_dgram(struct vsock_sock *vsk,
>+			    struct sockaddr_vm *addr)
> {
>-	if (!vsk->transport || !vsk->transport->dgram_bind)
>-		return -EINVAL;
>+	if (!vsk->transport || !vsk->transport->dgram_bind) {
>+		int retval;
>+
>+		spin_lock_bh(&vsock_dgram_table_lock);
>+		retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table,
>+					   VSOCK_HASH_SIZE);
>+		spin_unlock_bh(&vsock_dgram_table_lock);
>+
>+		return retval;
>+	}
>
> 	return vsk->transport->dgram_bind(vsk, addr);
> }
>@@ -755,7 +797,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
> 		break;
>
> 	case SOCK_DGRAM:
>-		retval = __vsock_bind_dgram(vsk, addr);
>+		retval = vsock_bind_dgram(vsk, addr);
> 		break;
>
> 	default:
>diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>index 1b7843a7779a..7160a3104218 100644
>--- a/net/vmw_vsock/virtio_transport.c
>+++ b/net/vmw_vsock/virtio_transport.c
>@@ -63,6 +63,7 @@ struct virtio_vsock {
>
> 	u32 guest_cid;
> 	bool seqpacket_allow;
>+	bool dgram_allow;
> };
>
> static u32 virtio_transport_get_local_cid(void)
>@@ -413,6 +414,7 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
> 	queue_work(virtio_vsock_workqueue, &vsock->rx_work);
> }
>
>+static bool virtio_transport_dgram_allow(u32 cid, u32 port);
> static bool virtio_transport_seqpacket_allow(u32 remote_cid);
>
> static struct virtio_transport virtio_transport = {
>@@ -465,6 +467,21 @@ static struct virtio_transport virtio_transport = {
> 	.send_pkt = virtio_transport_send_pkt,
> };
>
>+static bool virtio_transport_dgram_allow(u32 cid, u32 port)
>+{
>+	struct virtio_vsock *vsock;
>+	bool dgram_allow;
>+
>+	dgram_allow = false;
>+	rcu_read_lock();
>+	vsock = rcu_dereference(the_virtio_vsock);
>+	if (vsock)
>+		dgram_allow = vsock->dgram_allow;
>+	rcu_read_unlock();
>+
>+	return dgram_allow;
>+}
>+
> static bool virtio_transport_seqpacket_allow(u32 remote_cid)
> {
> 	struct virtio_vsock *vsock;
>@@ -658,6 +675,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
> 	if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
> 		vsock->seqpacket_allow = true;
>
>+	if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_DGRAM))
>+		vsock->dgram_allow = true;
>+
> 	vdev->priv = vsock;
>
> 	ret = virtio_vsock_vqs_init(vsock);
>@@ -750,7 +770,8 @@ static struct virtio_device_id id_table[] = {
> };
>
> static unsigned int features[] = {
>-	VIRTIO_VSOCK_F_SEQPACKET
>+	VIRTIO_VSOCK_F_SEQPACKET,
>+	VIRTIO_VSOCK_F_DGRAM
> };
>
> static struct virtio_driver virtio_vsock_driver = {
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index d5a3c8efe84b..bc9d459723f5 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -37,6 +37,35 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
> 	return container_of(t, struct virtio_transport, transport);
> }
>
>+/* Requires info->msg and info->vsk */
>+static struct sk_buff *
>+virtio_transport_sock_alloc_send_skb(struct virtio_vsock_pkt_info *info, unsigned int size,
>+				     gfp_t mask, int *err)
>+{
>+	struct sk_buff *skb;
>+	struct sock *sk;
>+	int noblock;
>+
>+	if (size < VIRTIO_VSOCK_SKB_HEADROOM) {
>+		*err = -EINVAL;
>+		return NULL;
>+	}
>+
>+	if (info->msg)
>+		noblock = info->msg->msg_flags & MSG_DONTWAIT;
>+	else
>+		noblock = 1;
>+
>+	sk = sk_vsock(info->vsk);
>+	sk->sk_allocation = mask;
>+	skb = sock_alloc_send_skb(sk, size, noblock, err);
>+	if (!skb)
>+		return NULL;
>+
>+	skb_reserve(skb, VIRTIO_VSOCK_SKB_HEADROOM);
>+	return skb;
>+}
>+
> /* Returns a new packet on success, otherwise returns NULL.
>  *
>  * If NULL is returned, errp is set to a negative errno.
      ^
So this comment was wrong before this change?

>@@ -47,7 +76,8 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
> 			   u32 src_cid,
> 			   u32 src_port,
> 			   u32 dst_cid,
>-			   u32 dst_port)
>+			   u32 dst_port,
>+			   int *errp)
> {
> 	const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
> 	struct virtio_vsock_hdr *hdr;
>@@ -55,9 +85,21 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
> 	void *payload;
> 	int err;
>
>-	skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
>-	if (!skb)
>+	/* dgrams do not use credits, self-throttle according to sk_sndbuf
>+	 * using sock_alloc_send_skb. This helps avoid triggering the OOM.
>+	 */

I'm thinking if we should do somenthing similar also for other types...

>+	if (info->vsk && info->type == VIRTIO_VSOCK_TYPE_DGRAM) {
>+		skb = virtio_transport_sock_alloc_send_skb(info, 
>skb_len, GFP_KERNEL, &err);

Why not using errp here?

>+	} else {
>+		skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);

Maybe we can pass errp also to virtio_vsock_alloc_skb.


The rest LGTM.

Anyway, the implementation seems to work well, so I think now we should
discuss the virtio-spec changes, that with this approach should be
not big, right?

Thanks,
Stefano

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ