[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100123165657.187c11e4@neptune.home>
Date:	Sat, 23 Jan 2010 16:56:57 +0100
From:	Bruno Prémont <bonbons@...ux-vserver.org>
To:	Eric Dumazet <eric.dumazet@...il.com>,
	"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [2.6.33-rc5 regression] NULL pointer dereference in vlan_skb_recv -
 probably introduced by commit 9793241fe92f7d9303fb221e43fc598eb065f267
Today I tried out 2.6.33-rc5 but always got crashing system:
[   26.390576] BUG: unable to handle kernel NULL pointer dereference at (null)
[   26.396369] IP: [<df856b89>] vlan_skb_recv+0x89/0x280 [8021q]
[   26.400534] *pde = 00000000 
[   26.400534] Oops: 0002 [#1] 
[   26.400534] last sysfs file: /sys/devices/pci0000:00/0000:00:1f.1/host0/target0:0:0/0:0:0:0/block/sda/uevent
[   26.400534] Modules linked in: squashfs zlib_inflate nfs lockd nfs_acl sunrpc 8021q snd_pcm_oss snd_mixer_oss xfs exportfs loop snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm ehci_hcd snd_timer uhci_hcd usbcore pcspkr i2c_i801 nsc_ircc snd irda snd_page_alloc crc_ccitt
[   26.400534] 
[   26.400534] Pid: 0, comm: swapper Not tainted 2.6.33-rc5-00001-g0d9d71d #5 TravelMate 660/TravelMate 660
[   26.400534] EIP: 0060:[<df856b89>] EFLAGS: 00010246 CPU: 0
[   26.400534] EIP is at vlan_skb_recv+0x89/0x280 [8021q]
[   26.400534] EAX: da74d000 EBX: 00000000 ECX: da046140 EDX: 00000004
[   26.400534] ESI: 00000000 EDI: da023000 EBP: c13f7f10 ESP: c13f7ef4
[   26.400534]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
[   26.400534] Process swapper (pid: 0, ti=c13f7000 task=c13fbb40 task.ti=c13f4000)
[   26.400534] Stack:
[   26.400534]  c13f7f10 c10420c9 4b5b0d11 da099430 ddbb8800 da023000 df8584c0 c13f7f44
[   26.400534] <0> c1249bdc ddbb8800 c142b000 da023098 ddbb8800 c142b000 df8584c0 00000000
[   26.400534] <0> 00000081 da023000 ddbb8b60 da023000 c13f7fa4 c1208213 c123e9ea da059b40
[   26.400534] Call Trace:
[   26.400534]  [<c10420c9>] ? ktime_get_real+0x19/0x40
[   26.400534]  [<c1249bdc>] ? netif_receive_skb+0x2cc/0x380
[   26.400534]  [<c1208213>] ? b44_poll+0x4e3/0x580
[   26.400534]  [<c123e9ea>] ? __kfree_skb+0x3a/0x90
[   26.400534]  [<c124a38a>] ? net_rx_action+0x9a/0x130
[   26.400534]  [<c102bb66>] ? __do_softirq+0x76/0x100
[   26.400534]  [<c102baf0>] ? __do_softirq+0x0/0x100
[   26.400534]  <IRQ> 
[   26.400534]  [<c102b965>] ? irq_exit+0x55/0x70
[   26.400534]  [<c1004582>] ? do_IRQ+0x42/0xb0
[   26.400534]  [<c103f13d>] ? sched_clock_local+0xdd/0x180
[   26.400534]  [<c1003169>] ? common_interrupt+0x29/0x30
[   26.400534]  [<c103007b>] ? run_timer_softirq+0x18b/0x1c0
[   26.400534]  [<c1165673>] ? acpi_idle_enter_simple+0xe1/0x10c
[   26.400534]  [<c116544b>] ? acpi_idle_enter_bm+0xb9/0x200
[   26.400534]  [<c122cefb>] ? cpuidle_idle_call+0x8b/0xe0
[   26.400534]  [<c1001d7c>] ? cpu_idle+0x3c/0x60
[   26.400534]  [<c12d514d>] ? rest_init+0x4d/0x60
[   26.400534]  [<c142d6cf>] ? start_kernel+0x21f/0x259
[   26.400534]  [<c142d28b>] ? unknown_bootoption+0x0/0x1d3
[   26.400534]  [<c142d067>] ? i386_start_kernel+0x67/0x69
[   26.400534] Code: 89 f0 0f b7 da 89 da 81 e2 ff 0f 00 00 e8 90 e4 ff ff 85 c0 89 47 14 0f 84 15 01 00 00 8b b6 e4 03 00 00 ba 04 00 00 00 c1 eb 0d <ff> 06 8b 47 4c 01 46 04 89 f8 e8 c8 77 9e e1 8b 57 14 89 55 e8 
[   26.400534] EIP: [<df856b89>] vlan_skb_recv+0x89/0x280 [8021q] SS:ESP 0068:c13f7ef4
[   26.400534] CR2: 0000000000000000
[   26.710884] ---[ end trace 0c4b7a45bd92e2ba ]---
[   26.718562] Kernel panic - not syncing: Fatal exception in interrupt
[   26.726395] Pid: 0, comm: swapper Tainted: G      D    2.6.33-rc5-00001-g0d9d71d #5
[   26.734339] Call Trace:
[   26.742290]  [<c12db5d0>] ? printk+0x18/0x20
[   26.750384]  [<c12db508>] panic+0x44/0xf4
[   26.758522]  [<c1005bce>] oops_end+0x7e/0x90
[   26.766754]  [<c101cc0f>] no_context+0xbf/0x150
[   26.774945]  [<c101ccef>] __bad_area_nosemaphore+0x4f/0x180
[   26.783227]  [<c1248775>] ? dev_hard_start_xmit+0x205/0x2b0
[   26.791583]  [<c103999b>] ? autoremove_wake_function+0x1b/0x50
[   26.800005]  [<c12588bc>] ? sch_direct_xmit+0xec/0x140
[   26.808462]  [<c12489e9>] ? dev_queue_xmit+0x1c9/0x4b0
[   26.816921]  [<df85653c>] ? vlan_dev_hard_start_xmit+0x13c/0x1c0 [8021q]
[   26.825443]  [<c101ce32>] bad_area_nosemaphore+0x12/0x20
[   26.833978]  [<c101d21c>] do_page_fault+0x25c/0x2f0
[   26.842561]  [<c1257df5>] ? eth_header+0x25/0xc0
[   26.851196]  [<c101cfc0>] ? do_page_fault+0x0/0x2f0
[   26.859859]  [<c12dd406>] error_code+0x5e/0x64
[   26.868578]  [<c104007b>] ? sys_getgroups+0x1b/0xc0
[   26.877206]  [<c101cfc0>] ? do_page_fault+0x0/0x2f0
[   26.885747]  [<df856b89>] ? vlan_skb_recv+0x89/0x280 [8021q]
[   26.894283]  [<c10420c9>] ? ktime_get_real+0x19/0x40
[   26.902859]  [<c1249bdc>] netif_receive_skb+0x2cc/0x380
[   26.911318]  [<c1208213>] b44_poll+0x4e3/0x580
[   26.919466]  [<c123e9ea>] ? __kfree_skb+0x3a/0x90
[   26.927297]  [<c124a38a>] net_rx_action+0x9a/0x130
[   26.934979]  [<c102bb66>] __do_softirq+0x76/0x100
[   26.942530]  [<c102baf0>] ? __do_softirq+0x0/0x100
[   26.949952]  <IRQ>  [<c102b965>] ? irq_exit+0x55/0x70
[   26.957401]  [<c1004582>] ? do_IRQ+0x42/0xb0
[   26.964803]  [<c103f13d>] ? sched_clock_local+0xdd/0x180
[   26.972247]  [<c1003169>] ? common_interrupt+0x29/0x30
[   26.979667]  [<c103007b>] ? run_timer_softirq+0x18b/0x1c0
[   26.987033]  [<c1165673>] ? acpi_idle_enter_simple+0xe1/0x10c
[   26.994363]  [<c116544b>] ? acpi_idle_enter_bm+0xb9/0x200
[   27.001573]  [<c122cefb>] ? cpuidle_idle_call+0x8b/0xe0
[   27.008683]  [<c1001d7c>] ? cpu_idle+0x3c/0x60
[   27.015733]  [<c12d514d>] ? rest_init+0x4d/0x60
[   27.022683]  [<c142d6cf>] ? start_kernel+0x21f/0x259
[   27.029627]  [<c142d28b>] ? unknown_bootoption+0x0/0x1d3
[   27.036592]  [<c142d067>] ? i386_start_kernel+0x67/0x69
[   27.043549] [drm:drm_fb_helper_panic] *ERROR* panic occurred, switching back to text console
repeating the following block many times until until kernel finally
ends up at panic.
[   27.050739] BUG: scheduling while atomic: swapper/0/0x10000100
[   27.057977] Modules linked in: squashfs zlib_inflate nfs lockd nfs_acl sunrpc 8021q snd_pcm_oss snd_mixer_oss xfs exportfs loop snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm ehci_hcd snd_timer uhci_hcd usbcore pcspkr i2c_i801 nsc_ircc snd irda snd_page_alloc crc_ccitt
[   27.074086] Modules linked in: squashfs zlib_inflate nfs lockd nfs_acl sunrpc 8021q snd_pcm_oss snd_mixer_oss xfs exportfs loop snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm ehci_hcd snd_timer uhci_hcd usbcore pcspkr i2c_i801 nsc_ircc snd irda snd_page_alloc crc_ccitt
[   27.091490] 
[   27.100093] Pid: 0, comm: swapper Tainted: G      D    2.6.33-rc5-00001-g0d9d71d #5 TravelMate 660/TravelMate 660
[   27.109086] EIP: 0060:[<c1165673>] EFLAGS: 00000286 CPU: 0
[   27.118106] EIP is at acpi_idle_enter_simple+0xe1/0x10c
[   27.127138] EAX: c13f4f68 EBX: 00000333 ECX: 00000000 EDX: 00000006
[   27.136253] ESI: 00000000 EDI: dd851c48 EBP: c13f4f88 ESP: c13f4f68
[   27.145445]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
[   27.154661] Process swapper (pid: 0, ti=c13f7000 task=c13fbb40 task.ti=c13f4000)
[   27.164033] Stack:
[   27.173342]  000f4240 00000000 dd851800 000000e5 00000000 dd851934 dd85181c dd851cbc
[   27.173636] <0> c13f4fac c116544b dd851800 00000000 00000055 00000001 dd851934 dd85181c
[   27.183296] <0> c13f5000 c13f4fbc c122cefb c1455940 0009fe00 c13f4fc4 c1001d7c c13f4fcc
[   27.202566] Call Trace:
[   27.212012]  [<c116544b>] ? acpi_idle_enter_bm+0xb9/0x200
[   27.221307]  [<c122cefb>] ? cpuidle_idle_call+0x8b/0xe0
[   27.230476]  [<c1001d7c>] ? cpu_idle+0x3c/0x60
[   27.239548]  [<c12d514d>] ? rest_init+0x4d/0x60
[   27.248602]  [<c142d6cf>] ? start_kernel+0x21f/0x259
[   27.257627]  [<c142d28b>] ? unknown_bootoption+0x0/0x1d3
[   27.266563]  [<c142d067>] ? i386_start_kernel+0x67/0x69
[   27.275418] Code: 69 4d f0 99 9e 36 00 8d 14 11 e8 69 1b fb ff 89 c3 b8 17 01 00 00 69 ca 17 01 00 00 89 d6 f7 e3 8d 14 11 e8 20 9d ed ff fb 89 e0 <31> c9 25 00 f0 ff ff 89 fa 83 48 0c 04 ff 47 18 8b 45 e8 e8 f3 
[   27.294819] Call Trace:
[   27.304169]  [<c116544b>] acpi_idle_enter_bm+0xb9/0x200
[   27.313634]  [<c122cefb>] cpuidle_idle_call+0x8b/0xe0
[   27.323086]  [<c1001d7c>] cpu_idle+0x3c/0x60
[   27.332507]  [<c12d514d>] rest_init+0x4d/0x60
[   27.341898]  [<c142d6cf>] start_kernel+0x21f/0x259
[   27.351278]  [<c142d28b>] ? unknown_bootoption+0x0/0x1d3
[   27.360691]  [<c142d067>] i386_start_kernel+0x67/0x69
Kernel boots fine up to init, but sometime before getting login prompt it
floods console with traces until finally panicing. This always happens 
after network startup (My B44 interface is renamed to lan and I have one
vlan active on top of it, both vlan and parent device being used)
2.6.32 (with a few DRM patches from 2.6.33-rc0 applied) works fine.
Objdump of 8021q.ko, vlan_skb_recv() around the crashing code:
00001b00 <vlan_skb_recv>:
    1b00:       55                      push   %ebp
    1b01:       89 e5                   mov    %esp,%ebp
    1b03:       57                      push   %edi
    1b04:       56                      push   %esi
    1b05:       89 d6                   mov    %edx,%esi
    1b07:       53                      push   %ebx
    1b08:       89 c3                   mov    %eax,%ebx
    1b0a:       83 ec 10                sub    $0x10,%esp
    1b0d:       89 df                   mov    %ebx,%edi
    1b0f:       8b 80 98 00 00 00       mov    0x98(%eax),%eax
    1b15:       48                      dec    %eax
    1b16:       74 15                   je     1b2d <vlan_skb_recv+0x2d>
    1b18:       ba 20 00 00 00          mov    $0x20,%edx
    1b1d:       89 d8                   mov    %ebx,%eax
    1b1f:       e8 fc ff ff ff          call   1b20 <vlan_skb_recv+0x20>
    1b24:       89 c7                   mov    %eax,%edi
    1b26:       89 d8                   mov    %ebx,%eax
    1b28:       e8 fc ff ff ff          call   1b29 <vlan_skb_recv+0x29>
    1b2d:       85 ff                   test   %edi,%edi
    1b2f:       0f 84 5b 01 00 00       je     1c90 <vlan_skb_recv+0x190>
    1b35:       8b 47 4c                mov    0x4c(%edi),%eax
    1b38:       8b 57 50                mov    0x50(%edi),%edx
    1b3b:       89 c1                   mov    %eax,%ecx
    1b3d:       29 d1                   sub    %edx,%ecx
    1b3f:       83 f9 03                cmp    $0x3,%ecx
    1b42:       0f 86 60 01 00 00       jbe    1ca8 <vlan_skb_recv+0x1a8>
    1b48:       8b 87 90 00 00 00       mov    0x90(%edi),%eax
    1b4e:       89 45 f0                mov    %eax,-0x10(%ebp)
    1b51:       0f b7 00                movzwl (%eax),%eax
    1b54:       89 c2                   mov    %eax,%edx
    1b56:       c1 e2 08                shl    $0x8,%edx
    1b59:       c1 e8 08                shr    $0x8,%eax
    1b5c:       09 c2                   or     %eax,%edx
    1b5e:       89 f0                   mov    %esi,%eax
    1b60:       0f b7 da                movzwl %dx,%ebx
    1b63:       89 da                   mov    %ebx,%edx
    1b65:       81 e2 ff 0f 00 00       and    $0xfff,%edx
    1b6b:       e8 fc ff ff ff          call   1b6c <vlan_skb_recv+0x6c>
    1b70:       85 c0                   test   %eax,%eax
    1b72:       89 47 14                mov    %eax,0x14(%edi)
    1b75:       0f 84 15 01 00 00       je     1c90 <vlan_skb_recv+0x190>
    1b7b:       8b b6 e4 03 00 00       mov    0x3e4(%esi),%esi
    1b81:       ba 04 00 00 00          mov    $0x4,%edx
    1b86:       c1 eb 0d                shr    $0xd,%ebx
    1b89:       ff 06                   incl   (%esi)
       ^___ this matches function offset and ESI is NULL according to 
            trace
    1b8b:       8b 47 4c                mov    0x4c(%edi),%eax
    1b8e:       01 46 04                add    %eax,0x4(%esi)
    1b91:       89 f8                   mov    %edi,%eax
    1b93:       e8 fc ff ff ff          call   1b94 <vlan_skb_recv+0x94>
    1b98:       8b 57 14                mov    0x14(%edi),%edx
    1b9b:       89 55 e8                mov    %edx,-0x18(%ebp)
    1b9e:       8b 84 9a 64 03 00 00    mov    0x364(%edx,%ebx,4),%eax
    1ba5:       0f b6 4f 61             movzbl 0x61(%edi),%ecx
    1ba9:       89 47 5c                mov    %eax,0x5c(%edi)
    1bac:       88 c8                   mov    %cl,%al
    1bae:       24 07                   and    $0x7,%al
int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
                  struct packet_type *ptype, struct net_device *orig_dev)
{
        struct vlan_hdr *vhdr;
        struct vlan_rx_stats *rx_stats;
        u16 vlan_id;
        u16 vlan_tci;
        skb = skb_share_check(skb, GFP_ATOMIC);
        if (skb == NULL)
                goto err_free;
        if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
                goto err_free;
        vhdr = (struct vlan_hdr *)skb->data;
        vlan_tci = ntohs(vhdr->h_vlan_TCI);
        vlan_id = vlan_tci & VLAN_VID_MASK;
        rcu_read_lock();
        skb->dev = __find_vlan_dev(dev, vlan_id);
        if (!skb->dev) {
                pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
                         __func__, vlan_id, dev->name);
                goto err_unlock;
        }
        rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats,
                               smp_processor_id());
        rx_stats->rx_packets++;
           ^___ looks like it is crashing here... (line 168 in net/8021q/vlan_dev.c)
        rx_stats->rx_bytes += skb->len;
Above part of code did change between 2.6.32 and 2.6.33-rc5 with commit
9793241fe92f7d9303fb221e43fc598eb065f267 (vlan: Precise RX stats accounting)
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=9793241fe92f7d9303fb221e43fc598eb065f267
Note, my system is compiled with SMP=n as I just have a single-core CPU
in this laptop. Kernel config file attached. If complete kernel log is
needed, just ask.
Bruno
View attachment ".config" of type "text/plain" (65751 bytes)
Powered by blists - more mailing lists
 
