lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 22 Feb 2017 20:30:31 -0800
From:   Jakub Kicinski <jakub.kicinski@...ronome.com>
To:     Roopa Prabhu <roopa@...ulusnetworks.com>,
        Pravin B Shelar <pshelar@....org>
Cc:     netdev@...r.kernel.org
Subject: Re: VXLAN RCU error

On Wed, 22 Feb 2017 14:27:45 -0800, Jakub Kicinski wrote:
> Hi Roopa!

Ah, sorry, it seems like this splat may be coming all the way from
c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.").

> I get this RCU error on net 12d656af4e3d2781b9b9f52538593e1717e7c979:
> 
> [ 1571.067134] ===============================
> [ 1571.071842] [ ERR: suspicious RCU usage.  ]
> [ 1571.076546] 4.10.0-debug-03232-g12d656af4e3d #1 Tainted: G        W  O   
> [ 1571.084166] -------------------------------
> [ 1571.088867] ../drivers/net/vxlan.c:2111 suspicious rcu_dereference_check() usage!
> [ 1571.097286] 
> [ 1571.097286] other info that might help us debug this:
> [ 1571.097286] 
> [ 1571.106305] 
> [ 1571.106305] rcu_scheduler_active = 2, debug_locks = 1
> [ 1571.113654] 3 locks held by ping/13826:
> [ 1571.117968]  #0:  (sk_lock-AF_INET){+.+.+.}, at: [<ffffffffa1cd4972>] raw_sendmsg+0x14e2/0x2e40
> [ 1571.127758]  #1:  (rcu_read_lock_bh){......}, at: [<ffffffffa1be9594>] ip_finish_output2+0x274/0x1390
> [ 1571.138135]  #2:  (rcu_read_lock_bh){......}, at: [<ffffffffa1a9b63c>] __dev_queue_xmit+0x1ec/0x2750
> [ 1571.148408] 
> [ 1571.148408] stack backtrace:
> [ 1571.153326] CPU: 10 PID: 13826 Comm: ping Tainted: G        W  O    4.10.0-debug-03232-g12d656af4e3d #1
> [ 1571.163877] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.3.4 11/08/2016
> [ 1571.172290] Call Trace:
> [ 1571.175053]  dump_stack+0xcd/0x134
> [ 1571.178881]  ? _atomic_dec_and_lock+0xcc/0xcc
> [ 1571.183782]  ? print_lock+0xb2/0xb5
> [ 1571.187711]  lockdep_rcu_suspicious+0x123/0x170
> [ 1571.192807]  vxlan_xmit_one+0x1931/0x4270 [vxlan]
> [ 1571.198126]  ? encap_bypass_if_local+0x380/0x380 [vxlan]
> [ 1571.204109]  ? sched_clock+0x9/0x10
> [ 1571.208034]  ? sched_clock_cpu+0x20/0x2c0
> [ 1571.212541]  ? unwind_get_return_address+0x1b8/0x2b0
> [ 1571.218132]  ? __lock_acquire+0x6d6/0x3160
> [ 1571.222740]  vxlan_xmit+0x756/0x4f90 [vxlan]
> [ 1571.227541]  ? vxlan_xmit_one+0x4270/0x4270 [vxlan]
> [ 1571.233014]  ? netif_skb_features+0x2be/0xba0
> [ 1571.237919]  dev_hard_start_xmit+0x1ab/0xa70
> [ 1571.242724]  __dev_queue_xmit+0x137b/0x2750
> [ 1571.247425]  ? __dev_queue_xmit+0x1ec/0x2750
> [ 1571.252228]  ? netdev_pick_tx+0x330/0x330
> [ 1571.256735]  ? debug_smp_processor_id+0x17/0x20
> [ 1571.261826]  ? get_lock_stats+0x1d/0x160
> [ 1571.266241]  ? mark_held_locks+0x105/0x280
> [ 1571.270850]  ? memcpy+0x45/0x50
> [ 1571.274391]  dev_queue_xmit+0x10/0x20
> [ 1571.278511]  neigh_resolve_output+0x43e/0x7f0
> [ 1571.283405]  ? ip_finish_output2+0x69d/0x1390
> [ 1571.288308]  ip_finish_output2+0x69d/0x1390
> [ 1571.293008]  ? ip_finish_output2+0x274/0x1390
> [ 1571.297909]  ? ip_copy_metadata+0x7e0/0x7e0
> [ 1571.302610]  ? get_lock_stats+0x1d/0x160
> [ 1571.307027]  ip_finish_output+0x598/0xc50
> [ 1571.311537]  ip_output+0x371/0x630
> [ 1571.315362]  ? ip_output+0x1dc/0x630
> [ 1571.319383]  ? ip_mc_output+0xe70/0xe70
> [ 1571.323694]  ? kfree+0x372/0x5a0
> [ 1571.327325]  ? mark_held_locks+0x105/0x280
> [ 1571.331933]  ? __ip_make_skb+0xdd1/0x2200
> [ 1571.336457]  ip_local_out+0x8f/0x180
> [ 1571.340480]  ip_send_skb+0x44/0xf0
> [ 1571.344306]  ip_push_pending_frames+0x5a/0x80
> [ 1571.349203]  raw_sendmsg+0x164d/0x2e40
> [ 1571.353422]  ? debug_check_no_locks_freed+0x350/0x350
> [ 1571.359099]  ? dst_output+0x1b0/0x1b0
> [ 1571.363217]  ? get_lock_stats+0x1d/0x160
> [ 1571.367640]  ? __might_fault+0x199/0x230
> [ 1571.372052]  ? kasan_check_write+0x14/0x20
> [ 1571.382002]  ? _copy_from_user+0xb9/0x130
> [ 1571.386513]  ? rw_copy_check_uvector+0x8d/0x490
> [ 1571.391609]  ? import_iovec+0xae/0x5d0
> [ 1571.395826]  ? push_pipe+0xd00/0xd00
> [ 1571.399847]  ? kasan_check_write+0x14/0x20
> [ 1571.404450]  ? _copy_from_user+0xb9/0x130
> [ 1571.408960]  inet_sendmsg+0x19f/0x5f0
> [ 1571.413071]  ? inet_recvmsg+0x980/0x980
> [ 1571.417386]  sock_sendmsg+0xe2/0x170
> [ 1571.421408]  ___sys_sendmsg+0x66e/0x960
> [ 1571.425726]  ? mem_cgroup_commit_charge+0x144/0x2720
> [ 1571.431303]  ? copy_msghdr_from_user+0x610/0x610
> [ 1571.436495]  ? debug_smp_processor_id+0x17/0x20
> [ 1571.441584]  ? get_lock_stats+0x1d/0x160
> [ 1571.445995]  ? mem_cgroup_uncharge_swap+0x250/0x250
> [ 1571.451474]  ? page_add_new_anon_rmap+0x173/0x3a0
> [ 1571.456762]  ? handle_mm_fault+0x1589/0x3820
> [ 1571.461566]  ? handle_mm_fault+0x1589/0x3820
> [ 1571.466362]  ? handle_mm_fault+0x191/0x3820
> [ 1571.471070]  ? __fdget+0x13/0x20
> [ 1571.474702]  ? get_lock_stats+0x1d/0x160
> [ 1571.479116]  __sys_sendmsg+0xc6/0x150
> [ 1571.483234]  ? SyS_shutdown+0x1b0/0x1b0
> [ 1571.487551]  ? __do_page_fault+0x556/0xe50
> [ 1571.492158]  ? trace_hardirqs_on_thunk+0x1a/0x1c
> [ 1571.497340]  SyS_sendmsg+0x12/0x20
> [ 1571.501166]  entry_SYSCALL_64_fastpath+0x23/0xc6
> [ 1571.506354] RIP: 0033:0x7fca2d0384a0
> [ 1571.510374] RSP: 002b:00007ffd18d7fe88 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
> [ 1571.518886] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 00007fca2d0384a0
> [ 1571.526889] RDX: 0000000000000000 RSI: 000000000060a300 RDI: 0000000000000003
> [ 1571.534892] RBP: 0000000000000046 R08: 0000000000000020 R09: 000000000000003e
> [ 1571.542897] R10: 00007ffd18d7fc50 R11: 0000000000000246 R12: 00000000000000c0
> [ 1571.550900] R13: 0000000000000004 R14: 00007ffd18d81608 R15: 00007ffd18d810b0

Judging from the splat do we just need to use defereference_bh() on the
TX path?

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4e27c5b09600..8aa3e837cd6c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2109,7 +2109,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                     vxlan->cfg.port_max, true);
 
        if (dst->sa.sa_family == AF_INET) {
-               struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+               struct vxlan_sock *sock4 = rcu_dereference_bh(vxlan->vn4_sock);
                struct rtable *rt;
                __be16 df = 0;
 
@@ -2148,7 +2148,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                    src_port, dst_port, xnet, !udp_sum);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
-               struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+               struct vxlan_sock *sock6 = rcu_dereference_bh(vxlan->vn6_sock);
 
                ndst = vxlan6_get_route(vxlan, dev, sock6, skb,
                                        rdst ? rdst->remote_ifindex : 0, tos,


> Some of Netronome's VXLAN tests are also failing but I need to dig a
> bit to see what's wrong there...

Test failures turned out to be unrelated.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ