[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6sudpk4z2d3rgesfbpcc4ianmdrjeo7necmqoxmukmuxrhzgrf@6suyste6ilxt>
Date: Tue, 18 Nov 2025 19:11:07 +0100
From: Stefano Garzarella <sgarzare@...hat.com>
To: Bobby Eshleman <bobbyeshleman@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
Stefan Hajnoczi <stefanha@...hat.com>, "Michael S. Tsirkin" <mst@...hat.com>,
Jason Wang <jasowang@...hat.com>, Eugenio Pérez <eperezma@...hat.com>,
Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, "K. Y. Srinivasan" <kys@...rosoft.com>,
Haiyang Zhang <haiyangz@...rosoft.com>, Wei Liu <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>,
Bryan Tan <bryan-bt.tan@...adcom.com>, Vishnu Dasa <vishnu.dasa@...adcom.com>,
Broadcom internal kernel review list <bcm-kernel-feedback-list@...adcom.com>, Shuah Khan <shuah@...nel.org>, linux-kernel@...r.kernel.org,
virtualization@...ts.linux.dev, netdev@...r.kernel.org, kvm@...r.kernel.org,
linux-hyperv@...r.kernel.org, linux-kselftest@...r.kernel.org,
Sargun Dhillon <sargun@...gun.me>, berrange@...hat.com, Bobby Eshleman <bobbyeshleman@...a.com>
Subject: Re: [PATCH net-next v10 04/11] vsock: add netns support to virtio
transports
On Mon, Nov 17, 2025 at 06:00:27PM -0800, Bobby Eshleman wrote:
>From: Bobby Eshleman <bobbyeshleman@...a.com>
>
>Add netns support to loopback and vhost. Keep netns disabled for
>virtio-vsock, but add necessary changes to comply with common API
>updates.
>
>Signed-off-by: Bobby Eshleman <bobbyeshleman@...a.com>
>---
>Changes in v10:
>- Splitting patches complicates the series with meaningless placeholder values that eventually get replaced anyway,
> so to avoid that this patch combines into one. Links
Yeah, looking at the result, this is better IMO, thanks!
> to previous patches here:
> - Link: https://lore.kernel.org/all/20251111-vsock-vmtest-v9-3-852787a37bed@meta.com/
> - Link: https://lore.kernel.org/all/20251111-vsock-vmtest-v9-6-852787a37bed@meta.com/
> - Link: https://lore.kernel.org/all/20251111-vsock-vmtest-v9-7-852787a37bed@meta.com/
>- remove placeholder values (Stefano)
>- update comment describe net/net_mode for
> virtio_transport_reset_no_sock()
>---
> drivers/vhost/vsock.c | 45 +++++++++++++++++------
> include/linux/virtio_vsock.h | 8 +++--
> net/vmw_vsock/virtio_transport.c | 10 ++++--
> net/vmw_vsock/virtio_transport_common.c | 63 ++++++++++++++++++++++++---------
> net/vmw_vsock/vsock_loopback.c | 8 +++--
> 5 files changed, 102 insertions(+), 32 deletions(-)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index c8319cd1c232..2846076d484f 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -46,6 +46,11 @@ static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
> struct vhost_vsock {
> struct vhost_dev dev;
> struct vhost_virtqueue vqs[2];
>+ struct net *net;
>+ netns_tracker ns_tracker;
>+
>+ /* The ns mode at the time vhost_vsock was created */
>+ enum vsock_net_mode net_mode;
>
> /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
> struct hlist_node hash;
>@@ -72,7 +77,8 @@ static bool vhost_transport_supports_local_mode(void)
> /* Callers that dereference the return value must hold vhost_vsock_mutex or the
> * RCU read lock.
> */
>-static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
>+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid, struct net *net,
>+ enum vsock_net_mode mode)
> {
> struct vhost_vsock *vsock;
>
>@@ -83,9 +89,10 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
> if (other_cid == 0)
> continue;
>
>- if (other_cid == guest_cid)
>+ if (other_cid == guest_cid &&
>+ vsock_net_check_mode(net, mode, vsock->net,
>+ vsock->net_mode))
> return vsock;
>-
> }
>
> return NULL;
>@@ -274,7 +281,8 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work)
> }
>
> static int
>-vhost_transport_send_pkt(struct sk_buff *skb)
>+vhost_transport_send_pkt(struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode)
> {
> struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
> struct vhost_vsock *vsock;
>@@ -283,7 +291,7 @@ vhost_transport_send_pkt(struct sk_buff *skb)
> rcu_read_lock();
>
> /* Find the vhost_vsock according to guest context id */
>- vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
>+ vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid), net, net_mode);
> if (!vsock) {
> rcu_read_unlock();
> kfree_skb(skb);
>@@ -310,7 +318,8 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
> rcu_read_lock();
>
> /* Find the vhost_vsock according to guest context id */
>- vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
>+ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid,
>+ sock_net(sk_vsock(vsk)), vsk->net_mode);
> if (!vsock)
> goto out;
>
>@@ -470,11 +479,12 @@ static struct virtio_transport vhost_transport = {
> static bool
> vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
> {
>+ struct net *net = sock_net(sk_vsock(vsk));
> struct vhost_vsock *vsock;
> bool seqpacket_allow = false;
>
> rcu_read_lock();
>- vsock = vhost_vsock_get(remote_cid);
>+ vsock = vhost_vsock_get(remote_cid, net, vsk->net_mode);
>
> if (vsock)
> seqpacket_allow = vsock->seqpacket_allow;
>@@ -545,7 +555,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
> if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
> le64_to_cpu(hdr->dst_cid) ==
> vhost_transport_get_local_cid())
>- virtio_transport_recv_pkt(&vhost_transport, skb);
>+ virtio_transport_recv_pkt(&vhost_transport, skb,
>+ vsock->net, vsock->net_mode);
> else
> kfree_skb(skb);
>
>@@ -662,6 +673,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
> {
> struct vhost_virtqueue **vqs;
> struct vhost_vsock *vsock;
>+ struct net *net;
> int ret;
>
> /* This struct is large and allocation could fail, fall back to vmalloc
>@@ -677,6 +689,17 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
> goto out;
> }
>
>+ net = current->nsproxy->net_ns;
>+ vsock->net = get_net_track(net, &vsock->ns_tracker, GFP_KERNEL);
>+
>+ /* Store the mode of the namespace at the time of creation. If this
>+ * namespace later changes from "global" to "local", we want this vsock
>+ * to continue operating normally and not suddenly break. For that
>+ * reason, we save the mode here and later use it when performing
>+ * socket lookups with vsock_net_check_mode() (see vhost_vsock_get()).
>+ */
>+ vsock->net_mode = vsock_net_mode(net);
>+
> vsock->guest_cid = 0; /* no CID assigned yet */
> vsock->seqpacket_allow = false;
>
>@@ -716,7 +739,8 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
> */
>
> /* If the peer is still valid, no need to reset connection */
>- if (vhost_vsock_get(vsk->remote_addr.svm_cid))
>+ if (vhost_vsock_get(vsk->remote_addr.svm_cid, sock_net(sk),
>+ vsk->net_mode))
> return;
>
> /* If the close timeout is pending, let it expire. This avoids races
>@@ -761,6 +785,7 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
> virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
>
> vhost_dev_cleanup(&vsock->dev);
>+ put_net_track(vsock->net, &vsock->ns_tracker);
> kfree(vsock->dev.vqs);
> vhost_vsock_free(vsock);
> return 0;
>@@ -787,7 +812,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
>
> /* Refuse if CID is already in use */
> mutex_lock(&vhost_vsock_mutex);
>- other = vhost_vsock_get(guest_cid);
>+ other = vhost_vsock_get(guest_cid, vsock->net, vsock->net_mode);
> if (other && other != vsock) {
> mutex_unlock(&vhost_vsock_mutex);
> return -EADDRINUSE;
>diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>index 0c67543a45c8..5ed6136a4ed4 100644
>--- a/include/linux/virtio_vsock.h
>+++ b/include/linux/virtio_vsock.h
>@@ -173,6 +173,8 @@ struct virtio_vsock_pkt_info {
> u32 remote_cid, remote_port;
> struct vsock_sock *vsk;
> struct msghdr *msg;
>+ struct net *net;
>+ enum vsock_net_mode net_mode;
> u32 pkt_len;
> u16 type;
> u16 op;
>@@ -185,7 +187,8 @@ struct virtio_transport {
> struct vsock_transport transport;
>
> /* Takes ownership of the packet */
>- int (*send_pkt)(struct sk_buff *skb);
>+ int (*send_pkt)(struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode);
>
> /* Used in MSG_ZEROCOPY mode. Checks, that provided data
> * (number of buffers) could be transmitted with zerocopy
>@@ -280,7 +283,8 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> void virtio_transport_destruct(struct vsock_sock *vsk);
>
> void virtio_transport_recv_pkt(struct virtio_transport *t,
>- struct sk_buff *skb);
>+ struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode);
> void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb);
> u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
> void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
>diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>index e585cb66c6f5..bc266bdb7faa 100644
>--- a/net/vmw_vsock/virtio_transport.c
>+++ b/net/vmw_vsock/virtio_transport.c
>@@ -243,7 +243,8 @@ static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struc
> }
>
> static int
>-virtio_transport_send_pkt(struct sk_buff *skb)
>+virtio_transport_send_pkt(struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode)
> {
> struct virtio_vsock_hdr *hdr;
> struct virtio_vsock *vsock;
>@@ -675,7 +676,12 @@ static void virtio_transport_rx_work(struct work_struct *work)
> virtio_vsock_skb_put(skb, payload_len);
>
> virtio_transport_deliver_tap_pkt(skb);
>- virtio_transport_recv_pkt(&virtio_transport, skb);
>+
>+ /* Force virtio-transport into global mode since it
>+ * does not yet support local-mode namespacing.
>+ */
>+ virtio_transport_recv_pkt(&virtio_transport, skb,
>+ NULL, VSOCK_NET_MODE_GLOBAL);
> }
> } while (!virtqueue_enable_cb(vq));
>
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index dcc8a1d5851e..168e7517a3f0 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -413,7 +413,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>
> virtio_transport_inc_tx_pkt(vvs, skb);
>
>- ret = t_ops->send_pkt(skb);
>+ ret = t_ops->send_pkt(skb, info->net, info->net_mode);
> if (ret < 0)
> break;
>
>@@ -527,6 +527,8 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
> struct virtio_vsock_pkt_info info = {
> .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
> .vsk = vsk,
>+ .net = sock_net(sk_vsock(vsk)),
>+ .net_mode = vsk->net_mode,
> };
>
> return virtio_transport_send_pkt_info(vsk, &info);
>@@ -1067,6 +1069,8 @@ int virtio_transport_connect(struct vsock_sock *vsk)
> struct virtio_vsock_pkt_info info = {
> .op = VIRTIO_VSOCK_OP_REQUEST,
> .vsk = vsk,
>+ .net = sock_net(sk_vsock(vsk)),
>+ .net_mode = vsk->net_mode,
> };
>
> return virtio_transport_send_pkt_info(vsk, &info);
>@@ -1082,6 +1086,8 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
> (mode & SEND_SHUTDOWN ?
> VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
> .vsk = vsk,
>+ .net = sock_net(sk_vsock(vsk)),
>+ .net_mode = vsk->net_mode,
> };
>
> return virtio_transport_send_pkt_info(vsk, &info);
>@@ -1108,6 +1114,8 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
> .msg = msg,
> .pkt_len = len,
> .vsk = vsk,
>+ .net = sock_net(sk_vsock(vsk)),
>+ .net_mode = vsk->net_mode,
> };
>
> return virtio_transport_send_pkt_info(vsk, &info);
>@@ -1145,6 +1153,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
> .op = VIRTIO_VSOCK_OP_RST,
> .reply = !!skb,
> .vsk = vsk,
>+ .net = sock_net(sk_vsock(vsk)),
>+ .net_mode = vsk->net_mode,
> };
>
> /* Send RST only if the original pkt is not a RST pkt */
>@@ -1156,15 +1166,27 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
>
> /* Normally packets are associated with a socket. There may be no socket if an
> * attempt was made to connect to a socket that does not exist.
>+ *
>+ * net and net_mode refer to the namespace of whoever sent the invalid message.
>+ * For loopback, this is the namespace of the socket. For vhost, this is the
>+ * namespace of the VM (i.e., vhost_vsock).
> */
> static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
>- struct sk_buff *skb)
>+ struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode)
> {
> struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
> struct virtio_vsock_pkt_info info = {
> .op = VIRTIO_VSOCK_OP_RST,
> .type = le16_to_cpu(hdr->type),
> .reply = true,
>+
>+ /* net or net_mode are not defined here because we pass
>+ * net and net_mode directly to t->send_pkt(), instead of
>+ * relying on virtio_transport_send_pkt_info() to pass them to
>+ * t->send_pkt(). They are not needed by
>+ * virtio_transport_alloc_skb().
>+ */
> };
> struct sk_buff *reply;
>
>@@ -1183,7 +1205,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
> if (!reply)
> return -ENOMEM;
>
>- return t->send_pkt(reply);
>+ return t->send_pkt(reply, net, net_mode);
> }
>
> /* This function should be called with sk_lock held and SOCK_DONE set */
>@@ -1465,6 +1487,8 @@ virtio_transport_send_response(struct vsock_sock *vsk,
> .remote_port = le32_to_cpu(hdr->src_port),
> .reply = true,
> .vsk = vsk,
>+ .net = sock_net(sk_vsock(vsk)),
>+ .net_mode = vsk->net_mode,
> };
>
> return virtio_transport_send_pkt_info(vsk, &info);
>@@ -1507,12 +1531,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> int ret;
>
> if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
>- virtio_transport_reset_no_sock(t, skb);
>+ virtio_transport_reset_no_sock(t, skb, sock_net(sk),
>+ vsk->net_mode);
> return -EINVAL;
> }
>
> if (sk_acceptq_is_full(sk)) {
>- virtio_transport_reset_no_sock(t, skb);
>+ virtio_transport_reset_no_sock(t, skb, sock_net(sk),
>+ vsk->net_mode);
> return -ENOMEM;
> }
>
>@@ -1520,13 +1546,15 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> * Subsequent enqueues would lead to a memory leak.
> */
> if (sk->sk_shutdown == SHUTDOWN_MASK) {
>- virtio_transport_reset_no_sock(t, skb);
>+ virtio_transport_reset_no_sock(t, skb, sock_net(sk),
>+ vsk->net_mode);
> return -ESHUTDOWN;
> }
>
> child = vsock_create_connected(sk);
> if (!child) {
>- virtio_transport_reset_no_sock(t, skb);
>+ virtio_transport_reset_no_sock(t, skb, sock_net(sk),
>+ vsk->net_mode);
> return -ENOMEM;
> }
>
>@@ -1548,7 +1576,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> */
> if (ret || vchild->transport != &t->transport) {
> release_sock(child);
>- virtio_transport_reset_no_sock(t, skb);
>+ virtio_transport_reset_no_sock(t, skb, sock_net(sk),
>+ vsk->net_mode);
> sock_put(child);
> return ret;
> }
>@@ -1576,7 +1605,8 @@ static bool virtio_transport_valid_type(u16 type)
> * lock.
> */
> void virtio_transport_recv_pkt(struct virtio_transport *t,
>- struct sk_buff *skb)
>+ struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode)
> {
> struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
> struct sockaddr_vm src, dst;
>@@ -1599,24 +1629,25 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> le32_to_cpu(hdr->fwd_cnt));
>
> if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ (void)virtio_transport_reset_no_sock(t, skb, net, net_mode);
> goto free_pkt;
> }
>
> /* The socket must be in connected or bound table
> * otherwise send reset back
> */
>- sk = vsock_find_connected_socket(&src, &dst);
>+ sk = vsock_find_connected_socket_net(&src, &dst, net, net_mode);
> if (!sk) {
>- sk = vsock_find_bound_socket(&dst);
>+ sk = vsock_find_bound_socket_net(&dst, net, net_mode);
> if (!sk) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ (void)virtio_transport_reset_no_sock(t, skb, net,
>+ net_mode);
> goto free_pkt;
> }
> }
>
> if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ (void)virtio_transport_reset_no_sock(t, skb, net, net_mode);
> sock_put(sk);
> goto free_pkt;
> }
>@@ -1635,7 +1666,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> */
> if (sock_flag(sk, SOCK_DONE) ||
> (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ (void)virtio_transport_reset_no_sock(t, skb, net, net_mode);
> release_sock(sk);
> sock_put(sk);
> goto free_pkt;
>@@ -1667,7 +1698,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> kfree_skb(skb);
> break;
> default:
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ (void)virtio_transport_reset_no_sock(t, skb, net, net_mode);
> kfree_skb(skb);
> break;
> }
>diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
>index 1e25c1a6b43f..a730fa74d2d9 100644
>--- a/net/vmw_vsock/vsock_loopback.c
>+++ b/net/vmw_vsock/vsock_loopback.c
>@@ -31,7 +31,8 @@ static bool vsock_loopback_supports_local_mode(void)
> return true;
> }
>
>-static int vsock_loopback_send_pkt(struct sk_buff *skb)
>+static int vsock_loopback_send_pkt(struct sk_buff *skb, struct net *net,
>+ enum vsock_net_mode net_mode)
> {
> struct vsock_loopback *vsock = &the_vsock_loopback;
> int len = skb->len;
>@@ -138,7 +139,10 @@ static void vsock_loopback_work(struct work_struct *work)
> */
> virtio_transport_consume_skb_sent(skb, false);
> virtio_transport_deliver_tap_pkt(skb);
>- virtio_transport_recv_pkt(&loopback_transport, skb);
>+
>+ virtio_transport_recv_pkt(&loopback_transport, skb,
>+ sock_net(skb->sk),
This will crash without next patch right? (I'll comment also there, but
I think the order of patches here is important).
That said, should we add a WARN_ONCE/WARN_ON_ONCE here with a check on
skb->sk ?
The rest LTGM,
Stefano
>+ vsock_sk(skb->sk)->net_mode);
> }
> }
>
>
>--
>2.47.3
>
Powered by blists - more mailing lists