[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200117150913.29302-1-maximmi@mellanox.com>
Date: Fri, 17 Jan 2020 15:09:44 +0000
From: Maxim Mikityanskiy <maximmi@...lanox.com>
To: Alexander Lobakin <alobakin@...nk.ru>,
Edward Cree <ecree@...arflare.com>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"David S. Miller" <davem@...emloft.net>,
Tariq Toukan <tariqt@...lanox.com>,
Jiri Pirko <jiri@...lanox.com>,
Eric Dumazet <edumazet@...gle.com>,
Maxim Mikityanskiy <maximmi@...lanox.com>
Subject: [PATCH net] net: Fix packet reordering caused by GRO and listified RX
cooperation
Commit 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in
napi_gro_receive()") introduces batching of GRO_NORMAL packets in
napi_skb_finish. However, dev_gro_receive, that is called just before
napi_skb_finish, can also pass skbs to the networking stack: e.g., when
the GRO session is flushed, napi_gro_complete is called, which passes pp
directly to netif_receive_skb_internal, skipping napi->rx_list. It means
that the packet stored in pp will be handled by the stack earlier than
the packets that arrived before, but are still waiting in napi->rx_list.
It leads to TCP reorderings that can be observed in the TCPOFOQueue
counter in netstat.
This commit fixes the reordering issue by making napi_gro_complete also
use napi->rx_list, so that all packets going through GRO will keep their
order.
Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()")
Signed-off-by: Maxim Mikityanskiy <maximmi@...lanox.com>
Cc: Alexander Lobakin <alobakin@...nk.ru>
Cc: Edward Cree <ecree@...arflare.com>
---
Alexander and Edward, please verify the correctness of this patch. If
it's necessary to pass that SKB to the networking stack right away, I
can change this patch to flush napi->rx_list by calling gro_normal_list
first, instead of putting the SKB in the list.
net/core/dev.c | 55 +++++++++++++++++++++++++-------------------------
1 file changed, 28 insertions(+), 27 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ad39c87b7fd..db7a105bbc77 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5491,9 +5491,29 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
-static int napi_gro_complete(struct sk_buff *skb)
+static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5526,7 +5546,8 @@ static int napi_gro_complete(struct sk_buff *skb)
}
out:
- return netif_receive_skb_internal(skb);
+ gro_normal_one(napi, skb);
+ return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -5539,7 +5560,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return;
skb_list_del_init(skb);
- napi_gro_complete(skb);
+ napi_gro_complete(napi, skb);
napi->gro_hash[index].count--;
}
@@ -5641,7 +5662,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
}
}
-static void gro_flush_oldest(struct list_head *head)
+static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
{
struct sk_buff *oldest;
@@ -5657,7 +5678,7 @@ static void gro_flush_oldest(struct list_head *head)
* SKB to the chain.
*/
skb_list_del_init(oldest);
- napi_gro_complete(oldest);
+ napi_gro_complete(napi, oldest);
}
INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
@@ -5733,7 +5754,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (pp) {
skb_list_del_init(pp);
- napi_gro_complete(pp);
+ napi_gro_complete(napi, pp);
napi->gro_hash[hash].count--;
}
@@ -5744,7 +5765,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
goto normal;
if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
- gro_flush_oldest(gro_head);
+ gro_flush_oldest(napi, gro_head);
} else {
napi->gro_hash[hash].count++;
}
@@ -5802,26 +5823,6 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
--
2.20.1
Powered by blists - more mailing lists