netdev - Re: kernel panic in latest vanilla stable, while using nameif with "alive" pppoe interfaces

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <200910191436.36218.denys@visp.net.lb>
Date:	Mon, 19 Oct 2009 14:36:36 +0300
From:	Denys Fedoryschenko <denys@...p.net.lb>
To:	Michal Ostrowski <mostrows@...il.com>
Cc:	netdev <netdev@...r.kernel.org>, linux-ppp@...r.kernel.org,
	paulus@...ba.org, mostrows@...thlink.net
Subject: Re: kernel panic in latest vanilla stable, while using nameif with  "alive" pppoe interfaces

Can you send me patch as attachment please?

On Monday 19 October 2009 06:34:06 Michal Ostrowski wrote:
> Here's my theory on this after an inital look...
>
> Looking at the oops report and disassembly of the actual module binary
> that caused the oops, one can deduce that:
>
> Execution was in pppoe_flush_dev().  %ebx contained the pointer "struct
> pppox_sock *po", which is what we faulted on, excuting "cmp %eax,
> 0x190(%ebx)". %ebx value was 0xffffffff (hence we got "NULL pointer
> dereference at 0x18f").
>
> At this point "i" (stored in %esi) is 15 (valid), meaning that we got a
> value of 0xffffffff in pn->hash_table[i].
>
> From this I'd hypothesize that the combination of dev_put() and
> release_sock() may have allowed us to free "pn".  At the bottom of the loop
> we alreayd recognize that since locks are dropped we're responsible for
> handling invalidation of objects, and perhaps that should be extended to
> "pn" as well. --
> Michal Ostrowski
> mostrows@...il.com
>
>
> ---
>  drivers/net/pppoe.c |   86 ++++++++++++++++++++++++++----
> --------------------
>  1 files changed, 45 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
> index 7cbf6f9..720c4ea 100644
> --- a/drivers/net/pppoe.c
> +++ b/drivers/net/pppoe.c
> @@ -296,6 +296,7 @@ static void pppoe_flush_dev(struct net_device *dev)
>
>        BUG_ON(dev == NULL);
>
> +restart:
>        pn = pppoe_pernet(dev_net(dev));
>        if (!pn) /* already freed */
>                return;
> @@ -303,48 +304,51 @@ static void pppoe_flush_dev(struct net_device *dev)
>        write_lock_bh(&pn->hash_lock);
>        for (i = 0; i < PPPOE_HASH_SIZE; i++) {
>                struct pppox_sock *po = pn->hash_table[i];
> +                struct sock *sk;
>
> -               while (po != NULL) {
> -                       struct sock *sk;
> -                       if (po->pppoe_dev != dev) {
> -                               po = po->next;
> -                               continue;
> -                       }
> -                       sk = sk_pppox(po);
> -                       spin_lock(&flush_lock);
> -                       po->pppoe_dev = NULL;
> -                       spin_unlock(&flush_lock);
> -                       dev_put(dev);
> -
> -                       /* We always grab the socket lock, followed by the
> -                        * hash_lock, in that order.  Since we should
> -                        * hold the sock lock while doing any unbinding,
> -                        * we need to release the lock we're holding.
> -                        * Hold a reference to the sock so it doesn't
> disappear -                        * as we're jumping between locks.
> -                        */
> +                while (po && po->pppoe_dev != dev) {
> +                        po = po->next;
> +                }
>
> -                       sock_hold(sk);
> +                if (po == NULL) {
> +                        continue;
> +                }
>
> -                       write_unlock_bh(&pn->hash_lock);
> -                       lock_sock(sk);
> +                sk = sk_pppox(po);
>
> -                       if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))
> { -                               pppox_unbind_sock(sk);
> -                               sk->sk_state = PPPOX_ZOMBIE;
> -                               sk->sk_state_change(sk);
> -                       }
> +                spin_lock(&flush_lock);
> +                po->pppoe_dev = NULL;
> +                spin_unlock(&flush_lock);
>
> -                       release_sock(sk);
> -                       sock_put(sk);
> +                dev_put(dev);
>
> -                       /* Restart scan at the beginning of this hash
> chain. -                        * While the lock was dropped the chain
> contents may -                        * have changed.
> -                        */
> -                       write_lock_bh(&pn->hash_lock);
> -                       po = pn->hash_table[i];
> -               }
> +                /* We always grab the socket lock, followed by the
> +                 * hash_lock, in that order.  Since we should
> +                 * hold the sock lock while doing any unbinding,
> +                 * we need to release the lock we're holding.
> +                 * Hold a reference to the sock so it doesn't disappear
> +                 * as we're jumping between locks.
> +                 */
> +
> +                sock_hold(sk);
> +
> +                write_unlock_bh(&pn->hash_lock);
> +                lock_sock(sk);
> +
> +                if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
> +                        pppox_unbind_sock(sk);
> +                        sk->sk_state = PPPOX_ZOMBIE;
> +                        sk->sk_state_change(sk);
> +                }
> +
> +                release_sock(sk);
> +                sock_put(sk);
> +
> +                /* Restart the flush process from the beginning.  While
> +                 * the lock was dropped the chain contents may have
> +                 * changed, and sock_put may have made things go away.
> +                 */
> +                goto restart;
>        }
>        write_unlock_bh(&pn->hash_lock);
>  }
> --
> 1.6.3.3
>
> On Sun, Oct 18, 2009 at 4:02 PM, Denys Fedoryschenko <denys@...p.net.lb> 
wrote:
> > I have server running as pppoe NAS.
> > Tried to rename customers without dropping pppd connections first, got
> > panic after few seconds.
> > Panic triggerable at 2.6.30.4 and 2.6.31.4
> > pppoe users running on eth2
> > pppoe flags:
> >  1457 root     /usr/sbin/pppoe-server -I eth2 -k -L 172.16.1.1 -R
> > 172.16.1.2 -N 253 -C gpzone -S gpzone
> >
> >
> > Commands sequence that i think triggered that:
> >
> > ip link set eth0 down
> > ip link set eth1 down
> > ip link set eth2 down
> > nameif etherx 00:16:76:8D:83:BA
> > nameif eth0 00:19:e0:72:4a:37
> > nameif eth1 00:19:e0:72:4a:4b
> >
> > ip addr flush dev eth0
> > ip addr flush dev eth1
> > ip addr add X.X.X.X/29 dev eth0
> > ip addr add 192.168.2.177/24 dev eth0
> > ip addr add 192.168.0.1/32 dev eth1
> > ip addr add 127.0.0.0/8 dev lo
> > #ip link set eth0 up
> > ip link set eth0 up
> > ip link set eth1 up
> > ip link set lo up
> > ip route add 0.0.0.0/0 via X.X.X.X
> >
> >
> > [  103.428591] r8169: eth0: link up
> > [  103.430360] r8169: eth1: link up
> > [  113.361528] BUG: unable to handle kernel
> > NULL pointer dereference
> > at 0000018f
> > [  113.361717] IP:
> > [<f8868269>] pppoe_device_event+0x80/0x12c [pppoe]
> > [  113.361853] *pdpt = 000000003411a001
> > *pde = 0000000000000000
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362012] Oops: 0000 [#1]
> > SMP
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362166] last sysfs file: /sys/devices/virtual/vc/vcs3/dev
> > [  113.362246] Modules linked in:
> > netconsole
> > configfs
> > act_skbedit
> > sch_ingress
> > sch_prio
> > cls_flow
> > cls_u32
> > em_meta
> > cls_basic
> > xt_dscp
> > xt_DSCP
> > ipt_REJECT
> > ts_bm
> > xt_string
> > xt_hl
> > ifb
> > cls_fw
> > sch_tbf
> > sch_htb
> > act_ipt
> > act_mirred
> > xt_MARK
> > pppoe
> > pppox
> > ppp_generic
> > slhc
> > xt_TCPMSS
> > xt_mark
> > xt_tcpudp
> > iptable_mangle
> > iptable_nat
> > nf_nat
> > rtc_cmos
> > nf_conntrack_ipv4
> > rtc_core
> > nf_conntrack
> > rtc_lib
> > nf_defrag_ipv4
> > iptable_filter
> > ip_tables
> > x_tables
> > 8021q
> > garp
> > stp
> > llc
> > loop
> > sata_sil
> > pata_atiixp
> > pata_acpi
> > ata_generic
> > libata
> > 8139cp
> > usb_storage
> > mtdblock
> > mtd_blkdevs
> > mtd
> > sr_mod
> > cdrom
> > tulip
> > r8169
> > sky2
> > via_velocity
> > via_rhine
> > sis900
> > ne2k_pci
> > 8390
> > skge
> > tg3
> > libphy
> > 8139too
> > e1000
> > e100
> > usbhid
> > ohci_hcd
> > uhci_hcd
> > ehci_hcd
> > usbcore
> > nls_base
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362344]
> > [  113.362344] Pid: 2858, comm: pppd Not tainted (2.6.31.4-build-0047 #7)
> > [  113.362344] EIP: 0060:[<f8868269>] EFLAGS: 00010286 CPU: 0
> > [  113.362344] EIP is at pppoe_device_event+0x80/0x12c [pppoe]
> > [  113.362344] EAX: f4fbe000 EBX: ffffffff ECX: f6cea5a0 EDX: f7403680
> > [  113.362344] ESI: 0000000f EDI: f6cea5e0 EBP: f4145e34 ESP: f4145e1c
> > [  113.362344]  DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
> > [  113.362344] Process pppd (pid: 2858, ti=f4145000 task=f4112ff0
> > task.ti=f4145000)
> > [  113.362344] Stack:
> > [  113.362344]  f4fbe220
> > f4fbe000
> > f6cea5a0
> > f886a430
> > fffffff5
> > 00000000
> > f4145e54
> > c01422b3
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362344] <0>
> > f4fbe000
> > 00000009
> > f8a457d8
> > f4fbe000
> > f8850190
> > 00001091
> > f4145e64
> > c0142361
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362344] <0>
> > ffffffff
> > 00000000
> > f4145e74
> > c029ffbf
> > f4fbe000
> > 000010d0
> > f4145e90
> > c029fa70
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362344] Call Trace:
> > [  113.362344]  [<c01422b3>] ? notifier_call_chain+0x2b/0x4a
> > [  113.362344]  [<c0142361>] ? raw_notifier_call_chain+0xc/0xe
> > [  113.362344]  [<c029ffbf>] ? dev_close+0x4c/0x8c
> > [  113.362344]  [<c029fa70>] ? dev_change_flags+0xa5/0x158
> > [  113.362344]  [<c02da633>] ? devinet_ioctl+0x21a/0x503
> > [  113.362344]  [<c02db693>] ? inet_ioctl+0x8d/0xa6
> > [  113.362344]  [<c0292b21>] ? sock_ioctl+0x1c8/0x1ec
> > [  113.362344]  [<c0292959>] ? sock_ioctl+0x0/0x1ec
> > [  113.362344]  [<c019af2b>] ? vfs_ioctl+0x22/0x69
> > [  113.362344]  [<c019b435>] ? do_vfs_ioctl+0x41f/0x459
> > [  113.362344]  [<c02934eb>] ? sys_send+0x18/0x1a
> > [  113.362344]  [<c011942f>] ? do_page_fault+0x242/0x26f
> > [  113.362344]  [<c019b49b>] ? sys_ioctl+0x2c/0x45
> > [  113.362344]  [<c0102975>] ? syscall_call+0x7/0xb
> > [  113.362344] Code:
> > c9
> > 00
> > 00
> > 00
> > 89
> > c7
> > 31
> > f6
> > 83
> > c7
> > 40
> > 89
> > f8
> > e8
> > cc
> > 60
> > a9
> > c7
> > 8b
> > 45
> > ec
> > 05
> > 20
> > 02
> > 00
> > 00
> > 89
> > 45
> > e8
> > 8b
> > 4d
> > f0
> > 8b
> > 1c
> > b1
> > e9
> > 8c
> > 00
> > 00
> > 00
> > 8b
> > 45
> > ec
> > Oct 18 23:59:40 194.146.153.93
> > 83
> > 90
> > 01
> > 00
> > 00
> > 74
> > 08
> > 8b
> > 9b
> > 8c
> > 01
> > 00
> > 00
> > eb
> > 79
> > b8
> > c0
> > a6
> > 86
> > f8
> > Oct 18 23:59:40 194.146.153.93
> > [  113.362344] EIP: [<f8868269>]
> > pppoe_device_event+0x80/0x12c [pppoe]
> > SS:ESP 0068:f4145e1c
> > [  113.362344] CR2: 000000000000018f
> > [  113.373124] ---[ end trace f6fe64a307e97f3b ]---
> > [  113.373203] Kernel panic - not syncing: Fatal exception in interrupt
> > [  113.373286] Pid: 2858, comm: pppd Tainted: G      D  
> >  2.6.31.4-build-0047 #7
> > [  113.373379] Call Trace:
> > [  113.373479]  [<c02fc496>] ? printk+0xf/0x11
> > [  113.373561]  [<c02fc3e7>] panic+0x39/0xd9
> > [  113.373656]  [<c01059b7>] oops_end+0x8b/0x9a
> > [  113.373727]  [<c0118f6d>] no_context+0x13d/0x147
> > [  113.373800]  [<c011908a>] __bad_area_nosemaphore+0x113/0x11b
> > [  113.373881]  [<c02953b3>] ? sock_alloc_send_pskb+0x8b/0x24a
> > [  113.373959]  [<c0121801>] ? __wake_up_sync_key+0x3b/0x45
> > [  113.374030]  [<c0131967>] ? irq_exit+0x39/0x5c
> > [  113.374107]  [<c0104393>] ? do_IRQ+0x80/0x96
> > [  113.374183]  [<c0102f49>] ? common_interrupt+0x29/0x30
> > [  113.374259]  [<c011909f>] bad_area_nosemaphore+0xd/0x10
> > [  113.374348]  [<c0119301>] do_page_fault+0x114/0x26f
> > [  113.374526]  [<c01191ed>] ? do_page_fault+0x0/0x26f
> > [  113.374605]  [<c02fe506>] error_code+0x66/0x6c
> > [  113.374683]  [<c02d007b>] ? tcp_v4_send_ack+0xa3/0x10e
> > [  113.374764]  [<c01191ed>] ? do_page_fault+0x0/0x26f
> > [  113.374850]  [<f8868269>] ? pppoe_device_event+0x80/0x12c [pppoe]
> > [  113.374928]  [<c01422b3>] notifier_call_chain+0x2b/0x4a
> > [  113.375012]  [<c0142361>] raw_notifier_call_chain+0xc/0xe
> > [  113.375097]  [<c029ffbf>] dev_close+0x4c/0x8c
> > [  113.375169]  [<c029fa70>] dev_change_flags+0xa5/0x158
> > [  113.375239]  [<c02da633>] devinet_ioctl+0x21a/0x503
> > [  113.375318]  [<c02db693>] inet_ioctl+0x8d/0xa6
> > [  113.375411]  [<c0292b21>] sock_ioctl+0x1c8/0x1ec
> > [  113.375491]  [<c0292959>] ? sock_ioctl+0x0/0x1ec
> > [  113.375574]  [<c019af2b>] vfs_ioctl+0x22/0x69
> > [  113.375653]  [<c019b435>] do_vfs_ioctl+0x41f/0x459
> > [  113.375734]  [<c02934eb>] ? sys_send+0x18/0x1a
> > [  113.375813]  [<c011942f>] ? do_page_fault+0x242/0x26f
> > [  113.375884]  [<c019b49b>] sys_ioctl+0x2c/0x45
> > [  113.375960]  [<c0102975>] syscall_call+0x7/0xb
> > [  113.376041] Rebooting in 5 seconds..


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html