[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <juxkmz3vskdopukejobv745j6qqx45hhcdjtjw7gcpgz6fj5ws@ckz7dvyup6mq>
Date: Thu, 6 Nov 2025 17:21:35 +0100
From: Stefano Garzarella <sgarzare@...hat.com>
To: Bobby Eshleman <bobbyeshleman@...il.com>
Cc: Shuah Khan <shuah@...nel.org>, "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
Stefan Hajnoczi <stefanha@...hat.com>, "Michael S. Tsirkin" <mst@...hat.com>,
Jason Wang <jasowang@...hat.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Eugenio Pérez <eperezma@...hat.com>, "K. Y. Srinivasan" <kys@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>, Wei Liu <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>,
Bryan Tan <bryan-bt.tan@...adcom.com>, Vishnu Dasa <vishnu.dasa@...adcom.com>,
Broadcom internal kernel review list <bcm-kernel-feedback-list@...adcom.com>, virtualization@...ts.linux.dev, netdev@...r.kernel.org,
linux-kselftest@...r.kernel.org, linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-hyperv@...r.kernel.org, berrange@...hat.com, Bobby Eshleman <bobbyeshleman@...a.com>
Subject: Re: [PATCH net-next v8 07/14] vhost/vsock: add netns support
On Thu, Oct 23, 2025 at 11:27:46AM -0700, Bobby Eshleman wrote:
>From: Bobby Eshleman <bobbyeshleman@...a.com>
>
>Add the ability to isolate vhost-vsock flows using namespaces.
>
>The VM, via the vhost_vsock struct, inherits its namespace from the
>process that opens the vhost-vsock device. vhost_vsock lookup functions
>are modified to take into account the mode (e.g., if CIDs are matching
>but modes don't align, then return NULL).
>
>vhost_vsock now acquires a reference to the namespace.
>
>Signed-off-by: Bobby Eshleman <bobbyeshleman@...a.com>
>---
>Changes in v7:
>- remove the check_global flag of vhost_vsock_get(), that logic was both
> wrong and not necessary, reuse vsock_net_check_mode() instead
>- remove 'delete me' comment
>Changes in v5:
>- respect pid namespaces when assigning namespace to vhost_vsock
>---
> drivers/vhost/vsock.c | 44 ++++++++++++++++++++++++++++++++++----------
> 1 file changed, 34 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index 34adf0cf9124..df6136633cd8 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -46,6 +46,11 @@ static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
> struct vhost_vsock {
> struct vhost_dev dev;
> struct vhost_virtqueue vqs[2];
>+ struct net *net;
>+ netns_tracker ns_tracker;
>+
>+ /* The ns mode at the time vhost_vsock was created */
>+ enum vsock_net_mode net_mode;
>
> /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
> struct hlist_node hash;
>@@ -67,7 +72,8 @@ static u32 vhost_transport_get_local_cid(void)
> /* Callers that dereference the return value must hold vhost_vsock_mutex or the
> * RCU read lock.
> */
>-static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid, struct net *net,
>+ enum vsock_net_mode mode)
> {
> struct vhost_vsock *vsock;
>
>@@ -78,9 +84,9 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> if (other_cid == 0)
> continue;
>
>- if (other_cid == guest_cid)
>+ if (other_cid == guest_cid &&
>+ vsock_net_check_mode(net, mode, vsock->net, vsock->net_mode))
> return vsock;
>-
> }
>
> return NULL;
>@@ -271,14 +277,16 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work)
> static int
> vhost_transport_send_pkt(struct sk_buff *skb)
> {
>+ enum vsock_net_mode mode = virtio_vsock_skb_net_mode(skb);
> struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
>+ struct net *net = virtio_vsock_skb_net(skb);
> struct vhost_vsock *vsock;
> int len = skb->len;
>
> rcu_read_lock();
>
> /* Find the vhost_vsock according to guest context id */
>- vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
>+ vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid), net, mode);
> if (!vsock) {
> rcu_read_unlock();
> kfree_skb(skb);
>@@ -305,7 +313,8 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
> rcu_read_lock();
>
> /* Find the vhost_vsock according to guest context id */
>- vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
>+ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid,
>+ sock_net(sk_vsock(vsk)), vsk->net_mode);
> if (!vsock)
> goto out;
>
>@@ -327,7 +336,7 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
> }
>
> static struct sk_buff *
>-vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
>+vhost_vsock_alloc_skb(struct vhost_vsock *vsock, struct vhost_virtqueue *vq,
> unsigned int out, unsigned int in)
> {
> struct virtio_vsock_hdr *hdr;
>@@ -353,6 +362,9 @@ vhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
> if (!skb)
> return NULL;
>
>+ virtio_vsock_skb_set_net(skb, vsock->net);
>+ virtio_vsock_skb_set_net_mode(skb, vsock->net_mode);
>+
> iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len);
>
> hdr = virtio_vsock_hdr(skb);
>@@ -462,11 +474,12 @@ static struct virtio_transport vhost_transport = {
>
> static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
> {
>+ struct net *net = sock_net(sk_vsock(vsk));
> struct vhost_vsock *vsock;
> bool seqpacket_allow = false;
>
> rcu_read_lock();
>- vsock = vhost_vsock_get(remote_cid);
>+ vsock = vhost_vsock_get(remote_cid, net, vsk->net_mode);
>
> if (vsock)
> seqpacket_allow = vsock->seqpacket_allow;
>@@ -520,7 +533,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
> break;
> }
>
>- skb = vhost_vsock_alloc_skb(vq, out, in);
>+ skb = vhost_vsock_alloc_skb(vsock, vq, out, in);
> if (!skb) {
> vq_err(vq, "Faulted on pkt\n");
> continue;
>@@ -652,8 +665,10 @@ static void vhost_vsock_free(struct vhost_vsock *vsock)
>
> static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
> {
>+
> struct vhost_virtqueue **vqs;
> struct vhost_vsock *vsock;
>+ struct net *net;
> int ret;
>
> /* This struct is large and allocation could fail, fall back to vmalloc
>@@ -669,6 +684,14 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
> goto out;
> }
>
>+ net = current->nsproxy->net_ns;
>+ vsock->net = get_net_track(net, &vsock->ns_tracker, GFP_KERNEL);
>+
>+ /* Cache the mode of the namespace so that if that netns mode changes,
>+ * the vhost_vsock will continue to function as expected.
>+ */
I think we should document this in the commit description and in both we
should add also the reason. (IIRC, it was to simplify everything and
prevent a VM from changing modes when running and then tracking all its
packets)
>+ vsock->net_mode = vsock_net_mode(net);
>+
> vsock->guest_cid = 0; /* no CID assigned yet */
> vsock->seqpacket_allow = false;
>
>@@ -708,7 +731,7 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
> */
>
> /* If the peer is still valid, no need to reset connection */
>- if (vhost_vsock_get(vsk->remote_addr.svm_cid))
>+ if (vhost_vsock_get(vsk->remote_addr.svm_cid, sock_net(sk), vsk->net_mode))
> return;
>
> /* If the close timeout is pending, let it expire. This avoids races
>@@ -753,6 +776,7 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
> virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
>
> vhost_dev_cleanup(&vsock->dev);
>+ put_net_track(vsock->net, &vsock->ns_tracker);
Doing this after virtio_vsock_skb_queue_purge() should ensure that all
skbs have been drained, so there should be no one flying with this
netns. Perhaps this clarifies my doubts about the skb net, but should we
do something similar for loopback as well?
And maybe we should document that also in the virtio_vsock_skb_cb.
The rest LGTM.
Thanks,
Stefano
> kfree(vsock->dev.vqs);
> vhost_vsock_free(vsock);
> return 0;
>@@ -779,7 +803,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
>
> /* Refuse if CID is already in use */
> mutex_lock(&vhost_vsock_mutex);
>- other = vhost_vsock_get(guest_cid);
>+ other = vhost_vsock_get(guest_cid, vsock->net, vsock->net_mode);
> if (other && other != vsock) {
> mutex_unlock(&vhost_vsock_mutex);
> return -EADDRINUSE;
>
>--
>2.47.3
>
Powered by blists - more mailing lists