[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251228104521-mutt-send-email-mst@kernel.org>
Date: Sun, 28 Dec 2025 14:31:36 -0500
From: "Michael S. Tsirkin" <mst@...hat.com>
To: Cong Wang <xiyou.wangcong@...il.com>
Cc: netdev@...r.kernel.org, virtualization@...ts.linux.dev,
kvm@...r.kernel.org, Cong Wang <cwang@...tikernel.io>,
Stefan Hajnoczi <stefanha@...hat.com>,
Stefano Garzarella <sgarzare@...hat.com>
Subject: Re: [Patch net] vsock: fix DMA cacheline overlap warning using
coherent memory
On Sat, Dec 27, 2025 at 05:54:51PM -0800, Cong Wang wrote:
> From: Cong Wang <cwang@...tikernel.io>
>
> The virtio-vsock driver triggers a DMA debug warning during probe:
>
> [ 9.267139] ------------[ cut here ]------------
> [ 9.268694] DMA-API: virtio-pci 0000:08:00.0: cacheline tracking EEXIST, overlapping mappings aren't supported
> [ 9.271297] WARNING: kernel/dma/debug.c:601 at add_dma_entry+0x220/0x278, CPU#3: swapper/0/1
> [ 9.273628] CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.19.0-rc1+ #1383 PREEMPT(voluntary)
> [ 9.276124] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
> [ 9.278232] RIP: 0010:add_dma_entry+0x223/0x278
> [ 9.279456] Code: e8 63 ad 30 00 4c 8b 6d 00 48 89 ef e8 d5 6d aa 00 48 89 c6 eb 0a 49 c7 c5 80 55 90 82 4c 89 ee 48 8d 3d 4c 2a 1c 03 4c 89 ea <67> 48 0f b9 3a 48 89 df e8 de f1 ff ff 83 3d 85 e8 19 03 00 74 86
> [ 9.284284] RSP: 0018:ffff8880077ff6a8 EFLAGS: 00010246
> [ 9.285541] RAX: ffffffff82c433a0 RBX: ffff888007aed200 RCX: ffffffff81ec0591
> [ 9.287124] RDX: ffff88800ae7a830 RSI: ffffffff82c433a0 RDI: ffffffff845dc1f0
> [ 9.288801] RBP: ffff88800b2610c8 R08: 0000000000000007 R09: 0000000000000000
> [ 9.290407] R10: ffffffff814d2dcf R11: fffffbfff08b6fd4 R12: 1ffff11000effed5
> [ 9.292111] R13: ffff88800ae7a830 R14: 00000000ffffffef R15: 0000000000000202
> [ 9.293736] FS: 0000000000000000(0000) GS:ffff8880e7da8000(0000) knlGS:0000000000000000
> [ 9.295595] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 9.297095] CR2: 0000000000000000 CR3: 0000000003ca0000 CR4: 0000000000350ef0
> [ 9.298712] Call Trace:
> [ 9.299229] <TASK>
> [ 9.299709] ? __pfx_add_dma_entry+0x10/0x10
> [ 9.300729] ? _raw_spin_unlock_irqrestore+0x2e/0x44
> [ 9.301786] ? dma_entry_alloc+0x120/0x131
> [ 9.302650] ? debug_dma_map_phys+0xf2/0x118
> [ 9.303553] dma_map_phys+0x1b3/0x1c6
> [ 9.304392] vring_map_one_sg+0xdf/0x111
> [ 9.305312] virtqueue_add_split+0x348/0x767
> [ 9.306243] ? __pfx_virtqueue_add_split+0x10/0x10
> [ 9.307243] ? lock_acquire.part.0+0xb0/0x1c6
> [ 9.308246] ? find_held_lock+0x2b/0x71
> [ 9.309078] ? local_clock_noinstr+0x32/0x9c
> [ 9.310070] ? local_clock+0x11/0x24
> [ 9.310915] ? virtqueue_add+0x3e/0x89
> [ 9.311881] virtqueue_add_inbuf+0x73/0x9a
> [ 9.312840] ? __pfx_virtqueue_add_inbuf+0x10/0x10
> [ 9.313968] ? sg_assign_page+0xd/0x32
> [ 9.314907] ? sg_init_one+0x75/0x84
> [ 9.316025] virtio_vsock_event_fill_one.isra.0+0x86/0xae
> [ 9.317236] ? __pfx_virtio_vsock_event_fill_one.isra.0+0x10/0x10
> [ 9.318529] virtio_vsock_vqs_start+0xab/0xf7
> [ 9.319453] virtio_vsock_probe.part.0+0x3aa/0x3f0
> [ 9.320546] virtio_dev_probe+0x397/0x454
> [ 9.321431] ? __pfx_virtio_dev_probe+0x10/0x10
> [ 9.322382] ? kernfs_create_link+0xc1/0xec
> [ 9.323290] ? kernfs_put+0x19/0x33
> [ 9.324106] ? sysfs_do_create_link_sd+0x7a/0xc0
> [ 9.325104] really_probe+0x167/0x316
> [ 9.325932] ? __pfx___driver_attach+0x10/0x10
> [ 9.326905] __driver_probe_device+0x11e/0x155
> [ 9.327934] driver_probe_device+0x4a/0xc4
> [ 9.328828] __driver_attach+0x129/0x14c
> [ 9.329832] bus_for_each_dev+0xd9/0x12b
> [ 9.330741] ? __pfx_bus_for_each_dev+0x10/0x10
> [ 9.331851] ? __lock_release.isra.0+0xdb/0x193
> [ 9.332847] ? bus_add_driver+0xef/0x246
> [ 9.333700] bus_add_driver+0x10f/0x246
> [ 9.334521] driver_register+0x12c/0x181
> [ 9.335572] ? __pfx_virtio_vsock_init+0x10/0x10
> [ 9.336949] virtio_vsock_init+0x4f/0x75
> [ 9.337998] do_one_initcall+0x15e/0x371
> [ 9.339056] ? __pfx_do_one_initcall+0x10/0x10
> [ 9.340313] ? parameqn+0x11/0x6b
> [ 9.341205] ? poison_kmalloc_redzone+0x44/0x69
> [ 9.342435] ? kasan_save_track+0x10/0x29
> [ 9.343513] ? rcu_is_watching+0x1c/0x3c
> [ 9.344696] ? trace_kmalloc+0x82/0x97
> [ 9.345733] ? __kmalloc_noprof+0x41c/0x446
> [ 9.346888] ? do_initcalls+0x2c/0x15e
> [ 9.348064] do_initcalls+0x131/0x15e
> [ 9.349051] kernel_init_freeable+0x250/0x2a2
> [ 9.350201] ? __pfx_kernel_init+0x10/0x10
> [ 9.351285] kernel_init+0x18/0x136
> [ 9.352307] ? __pfx_kernel_init+0x10/0x10
> [ 9.353383] ret_from_fork+0x78/0x2e5
> [ 9.354371] ? __pfx_ret_from_fork+0x10/0x10
> [ 9.355501] ? __switch_to+0x453/0x4c2
> [ 9.356591] ? __pfx_kernel_init+0x10/0x10
> [ 9.357666] ret_from_fork_asm+0x1a/0x30
> [ 9.358713] </TASK>
> [ 9.359305] irq event stamp: 1580331
> [ 9.360349] hardirqs last enabled at (1580341): [<ffffffff813c0bf5>] __up_console_sem+0x53/0x59
> [ 9.362650] hardirqs last disabled at (1580348): [<ffffffff813c0bda>] __up_console_sem+0x38/0x59
> [ 9.365096] softirqs last enabled at (1580150): [<ffffffff812ecf1e>] handle_softirqs+0x46b/0x4bd
> [ 9.367426] softirqs last disabled at (1580145): [<ffffffff812ecfcb>] __irq_exit_rcu+0x4b/0xc3
> [ 9.369750] ---[ end trace 0000000000000000 ]---
> [ 9.370965] DMA-API: Mapped at:
> [ 9.371885] dma_entry_alloc+0x115/0x131
> [ 9.372921] debug_dma_map_phys+0x4c/0x118
> [ 9.374014] dma_map_phys+0x1b3/0x1c6
> [ 9.375000] vring_map_one_sg+0xdf/0x111
> [ 9.376161] virtqueue_add_split+0x348/0x767
>
> This occurs because event_list[8] contains 8 struct virtio_vsock_event
> entries, each only 4 bytes (__le32 id). When virtio_vsock_event_fill()
> creates DMA mappings for all 8 events via virtqueue_add_inbuf(), these
> 32 bytes all fit within a single 64-byte cacheline.
>
> The DMA debug subsystem warns about this because multiple DMA_FROM_DEVICE
> mappings within the same cacheline can cause data corruption: if the CPU
> writes to one event while the device is writing another event in the same
> cacheline, the CPU cache writeback could overwrite device data.
But the CPU never writes into one of these, or did I miss anything?
The real issue is other data in the same cache line?
> Fix this by allocating the event buffers from DMA coherent memory using
> dma_alloc_coherent(). This memory is always coherent between CPU and
> device, eliminating the cacheline overlap issue. The premapped virtqueue
> API (virtqueue_add_inbuf_premapped) is used to prevent virtio from
> performing redundant DMA mapping on the already-coherent memory.
>
> Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
> Cc: "Michael S. Tsirkin" <mst@...hat.com>
> Cc: Stefan Hajnoczi <stefanha@...hat.com>
> Cc: Stefano Garzarella <sgarzare@...hat.com>
> Signed-off-by: Cong Wang <cwang@...tikernel.io>
> ---
> net/vmw_vsock/virtio_transport.c | 47 ++++++++++++++++++++++++++------
> 1 file changed, 38 insertions(+), 9 deletions(-)
>
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index 8c867023a2e5..34606de587c0 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -26,6 +26,8 @@ static struct virtio_vsock __rcu *the_virtio_vsock;
> static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
> static struct virtio_transport virtio_transport; /* forward declaration */
>
> +#define VIRTIO_VSOCK_EVENT_BUFS 8
> +
> struct virtio_vsock {
> struct virtio_device *vdev;
> struct virtqueue *vqs[VSOCK_VQ_MAX];
> @@ -59,7 +61,8 @@ struct virtio_vsock {
> */
> struct mutex event_lock;
> bool event_run;
> - struct virtio_vsock_event event_list[8];
> + struct virtio_vsock_event *event_list; /* DMA coherent memory */
> + dma_addr_t event_list_dma;
>
> u32 guest_cid;
> bool seqpacket_allow;
> @@ -381,16 +384,19 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
>
> /* event_lock must be held */
> static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
> - struct virtio_vsock_event *event)
> + struct virtio_vsock_event *event,
> + dma_addr_t dma_addr)
> {
> struct scatterlist sg;
> struct virtqueue *vq;
>
> vq = vsock->vqs[VSOCK_VQ_EVENT];
>
> - sg_init_one(&sg, event, sizeof(*event));
> + sg_init_table(&sg, 1);
> + sg_dma_address(&sg) = dma_addr;
> + sg_dma_len(&sg) = sizeof(*event);
>
> - return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
> + return virtqueue_add_inbuf_premapped(vq, &sg, 1, event, NULL, GFP_KERNEL);
> }
>
> /* event_lock must be held */
> @@ -398,10 +404,12 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
> {
> size_t i;
>
> - for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
> + for (i = 0; i < VIRTIO_VSOCK_EVENT_BUFS; i++) {
> struct virtio_vsock_event *event = &vsock->event_list[i];
> + dma_addr_t dma_addr = vsock->event_list_dma +
> + i * sizeof(*event);
>
> - virtio_vsock_event_fill_one(vsock, event);
> + virtio_vsock_event_fill_one(vsock, event, dma_addr);
> }
>
> virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
> @@ -461,10 +469,14 @@ static void virtio_transport_event_work(struct work_struct *work)
>
> virtqueue_disable_cb(vq);
> while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
> + size_t idx = event - vsock->event_list;
> + dma_addr_t dma_addr = vsock->event_list_dma +
> + idx * sizeof(*event);
> +
> if (len == sizeof(*event))
> virtio_vsock_event_handle(vsock, event);
>
> - virtio_vsock_event_fill_one(vsock, event);
> + virtio_vsock_event_fill_one(vsock, event, dma_addr);
> }
> } while (!virtqueue_enable_cb(vq));
>
> @@ -796,6 +808,15 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
>
> vsock->vdev = vdev;
>
> + vsock->event_list = dma_alloc_coherent(vdev->dev.parent,
> + VIRTIO_VSOCK_EVENT_BUFS *
> + sizeof(*vsock->event_list),
> + &vsock->event_list_dma,
> + GFP_KERNEL);
> + if (!vsock->event_list) {
> + ret = -ENOMEM;
> + goto out_free_vsock;
> + }
>
> mutex_init(&vsock->tx_lock);
> mutex_init(&vsock->rx_lock);
> @@ -813,7 +834,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
>
> ret = virtio_vsock_vqs_init(vsock);
> if (ret < 0)
> - goto out;
> + goto out_free_event_list;
>
> for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++)
> vsock->out_sgs[i] = &vsock->out_bufs[i];
> @@ -825,8 +846,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
>
> return 0;
>
> -out:
> +out_free_event_list:
> + dma_free_coherent(vdev->dev.parent,
> + VIRTIO_VSOCK_EVENT_BUFS * sizeof(*vsock->event_list),
> + vsock->event_list, vsock->event_list_dma);
> +out_free_vsock:
> kfree(vsock);
> +out:
> mutex_unlock(&the_virtio_vsock_mutex);
> return ret;
> }
> @@ -853,6 +879,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
>
> mutex_unlock(&the_virtio_vsock_mutex);
>
> + dma_free_coherent(vdev->dev.parent,
> + VIRTIO_VSOCK_EVENT_BUFS * sizeof(*vsock->event_list),
> + vsock->event_list, vsock->event_list_dma);
> kfree(vsock);
You want virtqueue_map_alloc_coherent/virtqueue_map_free_coherent
methinks.
Then you can use normal inbuf/outbut and not muck around with premapped.
I prefer keeping fancy premapped APIs for perf sensitive code,
let virtio manage DMA API otherwise.
> }
>
> --
> 2.34.1
Powered by blists - more mailing lists