[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200930051722.389587-3-anant.thazhemadam@gmail.com>
Date: Wed, 30 Sep 2020 10:47:22 +0530
From: Anant Thazhemadam <anant.thazhemadam@...il.com>
To: mst@...hat.com, jasowang@...hat.com, davem@...emloft.net,
kuba@...nel.org, ast@...nel.org, daniel@...earbox.net,
hawk@...nel.org, john.fastabend@...il.com, kafai@...com,
songliubraving@...com, yhs@...com, andriin@...com,
kpsingh@...omium.org
Cc: Anant Thazhemadam <anant.thazhemadam@...il.com>,
linux-kernel-mentees@...ts.linuxfoundation.org,
virtualization@...ts.linux-foundation.org, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org
Subject: [Linux-kernel-mentees][PATCH 2/2] net: reorder members of receive_queue in virtio_net for optimization
Analysis of the structure receive_queue using pahole gives the
following stats.
/* size: 1280, cachelines: 20, members: 11 */
/* sum members: 1220, holes: 1, sum holes: 60 */
/* paddings: 2, sum paddings: 44 */
/* forced alignments: 2, forced holes: 1, sum forced holes: 60 */
Reordering the order in which the members of receive_queue are declared
helps in packing byte holes in the middle of receive_queue, and also
allows more members to be fully stored in a cacheline (of size 64bytes)
without overstepping over cachelines unnecessarily.
Analysis using pahole post-reordering of members gives us the following
stats.
/* size: 1280, cachelines: 20, members: 11 */
/* padding: 60 */
/* paddings: 2, sum paddings: 44 */
/* forced alignments: 2 */
Signed-off-by: Anant Thazhemadam <anant.thazhemadam@...il.com>
---
The complete analysis done by pahole can be found below.
Before the change:
struct receive_queue {
struct virtqueue * vq; /* 0 8 */
struct napi_struct napi __attribute__((__aligned__(8))); /* 8 392 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 6 boundary (384 bytes) was 16 bytes ago --- */
struct bpf_prog * xdp_prog; /* 400 8 */
struct virtnet_rq_stats stats; /* 408 64 */
/* --- cacheline 7 boundary (448 bytes) was 24 bytes ago --- */
struct page * pages; /* 472 8 */
struct ewma_pkt_len mrg_avg_pkt_len; /* 480 8 */
struct page_frag alloc_frag; /* 488 16 */
struct scatterlist sg[19]; /* 504 608 */
/* --- cacheline 17 boundary (1088 bytes) was 24 bytes ago --- */
unsigned int min_buf_len; /* 1112 4 */
char name[40]; /* 1116 40 */
/* XXX 60 bytes hole, try to pack */
/* --- cacheline 19 boundary (1216 bytes) --- */
struct xdp_rxq_info xdp_rxq __attribute__((__aligned__(64))); /* 1216 64 */
/* XXX last struct has 40 bytes of padding */
/* size: 1280, cachelines: 20, members: 11 */
/* sum members: 1220, holes: 1, sum holes: 60 */
/* paddings: 2, sum paddings: 44 */
/* forced alignments: 2, forced holes: 1, sum forced holes: 60 */
} __attribute__((__aligned__(64)));
After the change:
struct receive_queue {
struct virtqueue * vq; /* 0 8 */
struct napi_struct napi __attribute__((__aligned__(8))); /* 8 392 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 6 boundary (384 bytes) was 16 bytes ago --- */
char name[40]; /* 400 40 */
struct bpf_prog * xdp_prog; /* 440 8 */
/* --- cacheline 7 boundary (448 bytes) --- */
struct virtnet_rq_stats stats; /* 448 64 */
/* --- cacheline 8 boundary (512 bytes) --- */
struct scatterlist sg[19]; /* 512 608 */
/* --- cacheline 17 boundary (1088 bytes) was 32 bytes ago --- */
struct page_frag alloc_frag; /* 1120 16 */
struct page * pages; /* 1136 8 */
struct ewma_pkt_len mrg_avg_pkt_len; /* 1144 8 */
/* --- cacheline 18 boundary (1152 bytes) --- */
struct xdp_rxq_info xdp_rxq __attribute__((__aligned__(64))); /* 1152 64 */
/* XXX last struct has 40 bytes of padding */
/* --- cacheline 19 boundary (1216 bytes) --- */
unsigned int min_buf_len; /* 1216 4 */
/* size: 1280, cachelines: 20, members: 11 */
/* padding: 60 */
/* paddings: 2, sum paddings: 44 */
/* forced alignments: 2 */
} __attribute__((__aligned__(64)));
It can be observed that the holes have been eliminated.
Also, more members of virtnet_info are accomodated within a cacheline (instead of
unnecessarily crossing over the cacheline boundary).
There is a padding of 60 performed at the end since the min_buf_len is only of
size 4, and xdp_rxq is of size 64. If declared anywhere else other than at the
end, a 60 bytes hole would open up again.
drivers/net/virtio_net.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f7bd85001cf0..b52db0b4879a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -137,29 +137,29 @@ struct receive_queue {
struct napi_struct napi;
+ /* Name of this receive queue: input.$index */
+ char name[40];
+
struct bpf_prog __rcu *xdp_prog;
struct virtnet_rq_stats stats;
+ /* RX: fragments + linear part + virtio header */
+ struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+ /* Page frag for packet buffer allocation. */
+ struct page_frag alloc_frag;
+
/* Chain pages by the private ptr. */
struct page *pages;
/* Average packet length for mergeable receive buffers. */
struct ewma_pkt_len mrg_avg_pkt_len;
- /* Page frag for packet buffer allocation. */
- struct page_frag alloc_frag;
-
- /* RX: fragments + linear part + virtio header */
- struct scatterlist sg[MAX_SKB_FRAGS + 2];
+ struct xdp_rxq_info xdp_rxq;
/* Min single buffer size for mergeable buffers case. */
unsigned int min_buf_len;
-
- /* Name of this receive queue: input.$index */
- char name[40];
-
- struct xdp_rxq_info xdp_rxq;
};
/* Control VQ buffers: protected by the rtnl lock */
--
2.25.1
Powered by blists - more mailing lists