[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <8ec3fd0e-7ec6-4f8b-7c44-fdc853bc9d9d@solarflare.com>
Date: Tue, 4 Dec 2018 17:37:57 +0000
From: Edward Cree <ecree@...arflare.com>
To: <davem@...emloft.net>
CC: <linux-net-drivers@...arflare.com>, <netdev@...r.kernel.org>
Subject: [PATCH net] net: use skb_list_del_init() to remove from RX sublists
list_del() leaves the skb->next pointer poisoned, which can then lead to
a crash in e.g. OVS forwarding. For example, setting up an OVS VXLAN
forwarding bridge on sfc as per:
========
$ ovs-vsctl show
5dfd9c47-f04b-4aaa-aa96-4fbb0a522a30
Bridge "br0"
Port "br0"
Interface "br0"
type: internal
Port "enp6s0f0"
Interface "enp6s0f0"
Port "vxlan0"
Interface "vxlan0"
type: vxlan
options: {key="1", local_ip="10.0.0.5", remote_ip="10.0.0.4"}
ovs_version: "2.5.0"
========
(where 10.0.0.5 is an address on enp6s0f1)
and sending traffic across it will lead to the following panic:
========
general protection fault: 0000 [#1] SMP PTI
CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.20.0-rc3-ehc+ #701
Hardware name: Dell Inc. PowerEdge R710/0M233H, BIOS 6.4.0 07/23/2013
RIP: 0010:dev_hard_start_xmit+0x38/0x200
Code: 53 48 89 fb 48 83 ec 20 48 85 ff 48 89 54 24 08 48 89 4c 24 18 0f 84 ab 01 00 00 48 8d 86 90 00 00 00 48 89 f5 48 89 44 24 10 <4c> 8b 33 48 c7 03 00 00 00 00 48 8b 05 c7 d1 b3 00 4d 85 f6 0f 95
RSP: 0018:ffff888627b437e0 EFLAGS: 00010202
RAX: 0000000000000000 RBX: dead000000000100 RCX: ffff88862279c000
RDX: ffff888614a342c0 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff888618a88000 R08: 0000000000000001 R09: 00000000000003e8
R10: 0000000000000000 R11: ffff888614a34140 R12: 0000000000000000
R13: 0000000000000062 R14: dead000000000100 R15: ffff888616430000
FS: 0000000000000000(0000) GS:ffff888627b40000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f6d2bc6d000 CR3: 000000000200a000 CR4: 00000000000006e0
Call Trace:
<IRQ>
__dev_queue_xmit+0x623/0x870
? masked_flow_lookup+0xf7/0x220 [openvswitch]
? ep_poll_callback+0x101/0x310
do_execute_actions+0xaba/0xaf0 [openvswitch]
? __wake_up_common+0x8a/0x150
? __wake_up_common_lock+0x87/0xc0
? queue_userspace_packet+0x31c/0x5b0 [openvswitch]
ovs_execute_actions+0x47/0x120 [openvswitch]
ovs_dp_process_packet+0x7d/0x110 [openvswitch]
ovs_vport_receive+0x6e/0xd0 [openvswitch]
? dst_alloc+0x64/0x90
? rt_dst_alloc+0x50/0xd0
? ip_route_input_slow+0x19a/0x9a0
? __udp_enqueue_schedule_skb+0x198/0x1b0
? __udp4_lib_rcv+0x856/0xa30
? __udp4_lib_rcv+0x856/0xa30
? cpumask_next_and+0x19/0x20
? find_busiest_group+0x12d/0xcd0
netdev_frame_hook+0xce/0x150 [openvswitch]
__netif_receive_skb_core+0x205/0xae0
__netif_receive_skb_list_core+0x11e/0x220
netif_receive_skb_list+0x203/0x460
? __efx_rx_packet+0x335/0x5e0 [sfc]
efx_poll+0x182/0x320 [sfc]
net_rx_action+0x294/0x3c0
__do_softirq+0xca/0x297
irq_exit+0xa6/0xb0
do_IRQ+0x54/0xd0
common_interrupt+0xf/0xf
</IRQ>
========
So, in all listified-receive handling, instead pull skbs off the lists with
skb_list_del_init().
Fixes: 9af86f933894 ("net: core: fix use-after-free in __netif_receive_skb_list_core")
Fixes: 7da517a3bc52 ("net: core: Another step of skb receive list processing")
Fixes: a4ca8b7df73c ("net: ipv4: fix drop handling in ip_list_rcv() and ip_list_rcv_finish()")
Fixes: d8269e2cbf90 ("net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()")
Signed-off-by: Edward Cree <ecree@...arflare.com>
---
I'm not sure if these are the right Fixes tags, or if I should instead be
fingering some commit that made dev_hard_start_xmit() more sensitive to
skb->next.
Also, I only saw a crash from the list_del() in __netif_receive_skb_list_core()
but I converted all of them in the listified RX path, in case any others
have similar ways to escape into paths that care about skb->next.
net/core/dev.c | 8 ++++----
net/ipv4/ip_input.c | 4 ++--
net/ipv6/ip6_input.c | 4 ++--
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3470e7fff1f4..265dd2f8ded8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5014,7 +5014,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
- list_del(&skb->list);
+ skb_list_del_init(skb);
__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
@@ -5170,7 +5170,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(netdev_tstamp_prequeue, skb);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
}
@@ -5181,7 +5181,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
rcu_read_lock();
list_for_each_entry_safe(skb, next, head, list) {
xdp_prog = rcu_dereference(skb->dev->xdp_prog);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
list_add_tail(&skb->list, &sublist);
}
@@ -5200,7 +5200,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
if (cpu >= 0) {
/* Will be handled, remove from list */
- list_del(&skb->list);
+ skb_list_del_init(skb);
enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
}
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 35a786c0aaa0..e609b08c9df4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -547,7 +547,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -594,7 +594,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip_rcv_core(skb, net);
if (skb == NULL)
continue;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 96577e742afd..c1d85830c906 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -296,7 +296,7 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip6_rcv_core(skb, dev, net);
if (skb == NULL)
continue;
Powered by blists - more mailing lists