[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180331195900.183604-18-edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:58 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>
Cc: netdev <netdev@...r.kernel.org>, Florian Westphal <fw@...len.de>,
Herbert Xu <herbert@...dor.apana.org.au>,
Thomas Graf <tgraf@...g.ch>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Alexander Aring <alex.aring@...il.com>,
Stefan Schmidt <stefan@....samsung.com>,
Kirill Tkhai <ktkhai@...tuozzo.com>,
Eric Dumazet <edumazet@...gle.com>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: [PATCH v4 net-next 17/19] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB
ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately
this integer is currently in a different cache line than skb->next,
meaning that we use two cache lines per skb when finding the insertion point.
By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields
in a single cache line and save precious memory bandwidth.
Note that after the fast path added by Changli Gao in commit
d6bebca92c66 ("fragment: add fast path for in-order fragments")
this change wont help the fast path, since we still need
to access prev->len (2nd cache line), but will show great
benefits when slow path is entered, since we perform
a linear scan of a potentially long list.
Also, note that this potential long list is an attack vector,
we might consider also using an rb-tree there eventually.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/linux/skbuff.h | 1 +
net/ipv4/ip_fragment.c | 35 ++++++++++++++---------------------
2 files changed, 15 insertions(+), 21 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47082f54ec1f645bed95a37d8c101efd160c60dc..9065477ed255a48f7e01b8a28ea6321cce9127f5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -672,6 +672,7 @@ struct sk_buff {
* UDP receive path is one user.
*/
unsigned long dev_scratch;
+ int ip_defrag_offset;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fb185d9a5cc771778fb650695e2d9894724efc23..994fa70a910f472ebecc336ddd62d1442014eaba 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,14 +57,6 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-struct ipfrag_skb_cb
-{
- struct inet_skb_parm h;
- int offset;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -370,7 +362,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* any overlaps are eliminated.
*/
if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+ int i = (prev->ip_defrag_offset + prev->len) - offset;
if (i > 0) {
offset += i;
@@ -387,8 +379,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
err = -ENOMEM;
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+ while (next && next->ip_defrag_offset < end) {
+ int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
if (i < next->len) {
/* Eat head of the next overlapped fragment
@@ -396,7 +388,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
*/
if (!pskb_pull(next, i))
goto err;
- FRAG_CB(next)->offset += i;
+ next->ip_defrag_offset += i;
qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
@@ -420,7 +412,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
}
- FRAG_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ qp->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -431,11 +429,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
else
qp->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -511,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
--
2.17.0.rc1.321.gba9d0f2565-goog
Powered by blists - more mailing lists