[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <op.wyolrqyo9ey1ta@localhost>
Date: Fri, 14 Jun 2013 21:32:04 +0300
From: nirinA raseliarison <nirina.raseliarison@...il.com>
To: "Bjorn Helgaas" <bhelgaas@...gle.com>,
"Ming Lei" <ming.lei@...onical.com>
Cc: "nirinA raseliarison" <nirina.raseliarison@...il.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"Francois Romieu" <romieu@...zoreil.com>, nic_swsd@...ltek.com,
"Hayes Wang" <hayeswang@...ltek.com>,
"Guenter Roeck" <linux@...ck-us.net>
Subject: Re: BUG: unable to handle kernel NULL pointer dereference at
0000000000000040
on Fri, 14 Jun 2013 20:02:25 +0300, Ming Lei <ming.lei@...onical.com>
wrote:
> On Fri, Jun 14, 2013 at 10:30 PM, Bjorn Helgaas <bhelgaas@...gle.com>
> wrote:
>> [+cc Ming, Hayes, Francois, r8169 list]
>>
>> On Fri, Jun 14, 2013 at 6:49 AM, nirinA raseliarison
>> <nirina.raseliarison@...il.com> wrote:
>>> hello there,
>>> i have this ethernet controler:
>>>
>>> Realtek Semiconductor Co., Ltd. RTL8101E/RTL8102E PCI Express Fast
>>> Ethernet
>>> controller (rev 05)
>>>
>>> that uses the r8169 module.
>>> it works fine, but sometimes after a reboot and issueing:
>>>
>>> ifconfig eth0 192.168.1.1 up
>>>
>>> i got the message below. after another reboot the
>>> message disappears. i also get the same message this 3.9.5 and 3.9.4.
>>>
>>> it seems i catch my first oops and don't know what to do with it.
>>> currently running:
>>>
>>> cat /proc/version
>>> Linux version 3.9.6.20130614 (root@...ernova) (gcc version 4.8.1
>>> (GCC) ) #1
>>> SMP Fri Jun 14 09:14:50 EAT 2013
>>>
>>> uname -a
>>> Linux supernova 3.9.6.20130614 #1 SMP Fri Jun 14 09:14:50 EAT 2013
>>> x86_64
>>> Intel(R) Celeron(R) CPU G1610 @ 2.60GHz GenuineIntel GNU/Linux
>>>
>>> thanks,
>>> -----------------8<------------------------------8<---------------------------------------
>>>
>>> [ 57.877560] BUG: unable to handle kernel NULL pointer dereference at
>>> 0000000000000040
>>> [ 57.877603] IP: [<ffffffff81491844>] fw_load_abort.isra.5+0x4/0x20
>>> [ 57.877634] PGD 21330a067 PUD 211a3a067 PMD 0
>>> [ 57.877660] Oops: 0002 [#1] SMP
>>> [ 57.877681] Modules linked in: fuse coretemp kvm_intel kvm evdev
>>> r8169
>>> microcode mii
>>> [ 57.877735] CPU 0
>>> [ 57.877746] Pid: 1950, comm: firmware Not tainted 3.9.6.20130614 #1
>>> To be
>>> filled by O.E.M. To be filled by O.E.M./ONDA H61V Ver:4.01
>>> [ 57.877790] RIP: 0010:[<ffffffff81491844>] [<ffffffff81491844>]
>>> fw_load_abort.isra.5+0x4/0x20
>>> [ 57.877824] RSP: 0018:ffff8802119a7e80 EFLAGS: 00010246
>>> [ 57.877844] RAX: ffff8802158fe250 RBX: ffff880211a03b40 RCX:
>>> 0000000000000000
>>> [ 57.877869] RDX: ffffffff81c742c8 RSI: ffff8802158fe250 RDI:
>>> 0000000000000000
>>> [ 57.877895] RBP: ffff8802119a7e80 R08: ffff8802119a6000 R09:
>>> 00000000000005aa
>>> [ 57.877920] R10: 0000000000000000 R11: 0000000000000000 R12:
>>> ffffffffffffffff
>>> [ 57.877945] R13: ffff880213d34088 R14: 0000000000000003 R15:
>>> ffff88020eafc230
>>> [ 57.877970] FS: 00007f3c6cb2a740(0000) GS:ffff88021f200000(0000)
>>> knlGS:0000000000000000
>>> [ 57.877998] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [ 57.878019] CR2: 0000000000000040 CR3: 0000000203155000 CR4:
>>> 00000000001407f0
>>> [ 57.878044] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>>> 0000000000000000
>>> [ 57.878069] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
>>> 0000000000000400
>>> [ 57.878094] Process firmware (pid: 1950, threadinfo
>>> ffff8802119a6000,
>>> task ffff8802158fe250)
>>> [ 57.878124] Stack:
>>> [ 57.878133] ffff8802119a7eb0 ffffffff81491917 ffff880211a4d5a0
>>> 0000000000000003
>>> [ 57.878168] ffff8802119a7f50 ffffffff818765a0 ffff8802119a7ec0
>>> ffffffff81483063
>>> [ 57.878203] ffff8802119a7f08 ffffffff8119bc9e ffff880213d34098
>>> ffff880211a4d5c0
>>> [ 57.878237] Call Trace:
>>> [ 57.878251] [<ffffffff81491917>] firmware_loading_store+0x77/0x150
>>> [ 57.878275] [<ffffffff81483063>] dev_attr_store+0x13/0x20
>>> [ 57.878297] [<ffffffff8119bc9e>] sysfs_write_file+0xce/0x140
>>> [ 57.878320] [<ffffffff81133e8a>] vfs_write+0x9a/0x160
>>> [ 57.878340] [<ffffffff81134164>] sys_write+0x44/0x90
>>> [ 57.878360] [<ffffffff817d70ed>] system_call_fastpath+0x1a/0x1f
>>> [ 57.879379] Code: 6b ff ff ff 48 89 df 31 db e8 b9 b0 c9 ff e9 79
>>> ff ff
>>> ff 0f 1f 40 00 48 83 c4 10 5b 41 5c 41 5d 41 5e 5d c3 0f 1f 00 55 48
>>> 89 e5
>>> <f0> 80 4f 40 04 48 83 c7 18 e8 8e a9 bd ff 5d c3 66 66 66 2e 0f
>>> [ 57.881753] RIP [<ffffffff81491844>] fw_load_abort.isra.5+0x4/0x20
>>> [ 57.882888] RSP <ffff8802119a7e80>
>>> [ 57.884019] CR2: 0000000000000040
>>> [ 57.885166] ---[ end trace 6705f6d4ce6b6a12 ]---
>
> Looks it is a double abort race, could you try below patch?
> (also attached for applying)
i've also applied this patch and up to now, after
reboot a few times all thing seems to work fine.
thanks,
> --
> diff --git a/drivers/base/firmware_class.c
> b/drivers/base/firmware_class.c
> index 6ede229..a217ba8 100644
> --- a/drivers/base/firmware_class.c
> +++ b/drivers/base/firmware_class.c
> @@ -550,7 +550,12 @@ static ssize_t firmware_loading_show(struct device
> *dev,
> struct device_attribute *attr, char *buf)
> {
> struct firmware_priv *fw_priv = to_firmware_priv(dev);
> - int loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
> + int loading = 0;
> +
> + mutex_lock(&fw_lock);
> + if (fw_priv->buf)
> + loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
> + mutex_unlock(&fw_lock);
>
> return sprintf(buf, "%d\n", loading);
> }
> @@ -592,12 +597,12 @@ static ssize_t firmware_loading_store(struct
> device *dev,
> const char *buf, size_t count)
> {
> struct firmware_priv *fw_priv = to_firmware_priv(dev);
> - struct firmware_buf *fw_buf = fw_priv->buf;
> + struct firmware_buf *fw_buf;
> int loading = simple_strtol(buf, NULL, 10);
> int i;
>
> mutex_lock(&fw_lock);
> -
> + fw_buf = fw_priv->buf;
> if (!fw_buf)
> goto out;
>
> @@ -636,6 +641,7 @@ static ssize_t firmware_loading_store(struct device
> *dev,
> /* fallthrough */
> case -1:
> fw_load_abort(fw_buf);
> + fw_priv->buf = NULL;
> break;
> }
> out:
> @@ -704,6 +710,7 @@ static int fw_realloc_buffer(struct firmware_priv
> *fw_priv, int min_size)
> GFP_KERNEL);
> if (!new_pages) {
> fw_load_abort(buf);
> + fw_priv->buf = NULL;
> return -ENOMEM;
> }
> memcpy(new_pages, buf->pages,
> @@ -721,6 +728,7 @@ static int fw_realloc_buffer(struct firmware_priv
> *fw_priv, int min_size)
>
> if (!buf->pages[buf->nr_pages]) {
> fw_load_abort(buf);
> + fw_priv->buf = NULL;
> return -ENOMEM;
> }
> buf->nr_pages++;
> @@ -805,6 +813,7 @@ static void firmware_class_timeout_work(struct
> work_struct *work)
> return;
> }
> fw_load_abort(fw_priv->buf);
> + fw_priv->buf = NULL;
> mutex_unlock(&fw_lock);
> }
>
> @@ -886,8 +895,6 @@ static int _request_firmware_load(struct
> firmware_priv *fw_priv, bool uevent,
>
> cancel_delayed_work_sync(&fw_priv->timeout_work);
>
> - fw_priv->buf = NULL;
> -
> device_remove_file(f_dev, &dev_attr_loading);
> err_del_bin_attr:
> device_remove_bin_file(f_dev, &firmware_attr_data);
>
>
> Thanks,
> --
> Ming Lei
--
nirinA
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists