lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACVXFVPSGcBHj1tPfr16ORkuhkjTvDN3CHJv1Ke=W731pFxU4g@mail.gmail.com>
Date:	Sat, 15 Jun 2013 01:02:25 +0800
From:	Ming Lei <ming.lei@...onical.com>
To:	Bjorn Helgaas <bhelgaas@...gle.com>
Cc:	nirinA raseliarison <nirina.raseliarison@...il.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	Francois Romieu <romieu@...zoreil.com>, nic_swsd@...ltek.com,
	Hayes Wang <hayeswang@...ltek.com>,
	Guenter Roeck <linux@...ck-us.net>
Subject: Re: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040

On Fri, Jun 14, 2013 at 10:30 PM, Bjorn Helgaas <bhelgaas@...gle.com> wrote:
> [+cc Ming, Hayes, Francois, r8169 list]
>
> On Fri, Jun 14, 2013 at 6:49 AM, nirinA raseliarison
> <nirina.raseliarison@...il.com> wrote:
>> hello there,
>> i have this ethernet controler:
>>
>>  Realtek Semiconductor Co., Ltd. RTL8101E/RTL8102E PCI Express Fast Ethernet
>> controller (rev 05)
>>
>> that uses the r8169 module.
>> it works fine, but sometimes after a reboot and issueing:
>>
>>  ifconfig eth0 192.168.1.1 up
>>
>> i got the message below. after another reboot the
>> message disappears. i also get the same message this 3.9.5 and 3.9.4.
>>
>> it seems i catch my first oops and don't know what to do with it.
>> currently running:
>>
>>  cat /proc/version
>>  Linux version 3.9.6.20130614 (root@...ernova) (gcc version 4.8.1 (GCC) ) #1
>> SMP Fri Jun 14 09:14:50 EAT 2013
>>
>>  uname -a
>>  Linux supernova 3.9.6.20130614 #1 SMP Fri Jun 14 09:14:50 EAT 2013 x86_64
>> Intel(R) Celeron(R) CPU G1610 @ 2.60GHz GenuineIntel GNU/Linux
>>
>> thanks,
>> -----------------8<------------------------------8<---------------------------------------
>>
>> [   57.877560] BUG: unable to handle kernel NULL pointer dereference at
>> 0000000000000040
>> [   57.877603] IP: [<ffffffff81491844>] fw_load_abort.isra.5+0x4/0x20
>> [   57.877634] PGD 21330a067 PUD 211a3a067 PMD 0
>> [   57.877660] Oops: 0002 [#1] SMP
>> [   57.877681] Modules linked in: fuse coretemp kvm_intel kvm evdev r8169
>> microcode mii
>> [   57.877735] CPU 0
>> [   57.877746] Pid: 1950, comm: firmware Not tainted 3.9.6.20130614 #1 To be
>> filled by O.E.M. To be filled by O.E.M./ONDA H61V Ver:4.01
>> [   57.877790] RIP: 0010:[<ffffffff81491844>]  [<ffffffff81491844>]
>> fw_load_abort.isra.5+0x4/0x20
>> [   57.877824] RSP: 0018:ffff8802119a7e80  EFLAGS: 00010246
>> [   57.877844] RAX: ffff8802158fe250 RBX: ffff880211a03b40 RCX:
>> 0000000000000000
>> [   57.877869] RDX: ffffffff81c742c8 RSI: ffff8802158fe250 RDI:
>> 0000000000000000
>> [   57.877895] RBP: ffff8802119a7e80 R08: ffff8802119a6000 R09:
>> 00000000000005aa
>> [   57.877920] R10: 0000000000000000 R11: 0000000000000000 R12:
>> ffffffffffffffff
>> [   57.877945] R13: ffff880213d34088 R14: 0000000000000003 R15:
>> ffff88020eafc230
>> [   57.877970] FS:  00007f3c6cb2a740(0000) GS:ffff88021f200000(0000)
>> knlGS:0000000000000000
>> [   57.877998] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [   57.878019] CR2: 0000000000000040 CR3: 0000000203155000 CR4:
>> 00000000001407f0
>> [   57.878044] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>> 0000000000000000
>> [   57.878069] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
>> 0000000000000400
>> [   57.878094] Process firmware (pid: 1950, threadinfo ffff8802119a6000,
>> task ffff8802158fe250)
>> [   57.878124] Stack:
>> [   57.878133]  ffff8802119a7eb0 ffffffff81491917 ffff880211a4d5a0
>> 0000000000000003
>> [   57.878168]  ffff8802119a7f50 ffffffff818765a0 ffff8802119a7ec0
>> ffffffff81483063
>> [   57.878203]  ffff8802119a7f08 ffffffff8119bc9e ffff880213d34098
>> ffff880211a4d5c0
>> [   57.878237] Call Trace:
>> [   57.878251]  [<ffffffff81491917>] firmware_loading_store+0x77/0x150
>> [   57.878275]  [<ffffffff81483063>] dev_attr_store+0x13/0x20
>> [   57.878297]  [<ffffffff8119bc9e>] sysfs_write_file+0xce/0x140
>> [   57.878320]  [<ffffffff81133e8a>] vfs_write+0x9a/0x160
>> [   57.878340]  [<ffffffff81134164>] sys_write+0x44/0x90
>> [   57.878360]  [<ffffffff817d70ed>] system_call_fastpath+0x1a/0x1f
>> [   57.879379] Code: 6b ff ff ff 48 89 df 31 db e8 b9 b0 c9 ff e9 79 ff ff
>> ff 0f 1f 40 00 48 83 c4 10 5b 41 5c 41 5d 41 5e 5d c3 0f 1f 00 55 48 89 e5
>> <f0> 80 4f 40 04 48 83 c7 18 e8 8e a9 bd ff 5d c3 66 66 66 2e 0f
>> [   57.881753] RIP  [<ffffffff81491844>] fw_load_abort.isra.5+0x4/0x20
>> [   57.882888]  RSP <ffff8802119a7e80>
>> [   57.884019] CR2: 0000000000000040
>> [   57.885166] ---[ end trace 6705f6d4ce6b6a12 ]---

Looks it is a double abort race, could you try below patch?
(also attached for applying)

--
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 6ede229..a217ba8 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -550,7 +550,12 @@ static ssize_t firmware_loading_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-	int loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
+	int loading = 0;
+
+	mutex_lock(&fw_lock);
+	if (fw_priv->buf)
+		loading = test_bit(FW_STATUS_LOADING, &fw_priv->buf->status);
+	mutex_unlock(&fw_lock);

 	return sprintf(buf, "%d\n", loading);
 }
@@ -592,12 +597,12 @@ static ssize_t firmware_loading_store(struct device *dev,
 				      const char *buf, size_t count)
 {
 	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-	struct firmware_buf *fw_buf = fw_priv->buf;
+	struct firmware_buf *fw_buf;
 	int loading = simple_strtol(buf, NULL, 10);
 	int i;

 	mutex_lock(&fw_lock);
-
+	fw_buf = fw_priv->buf;
 	if (!fw_buf)
 		goto out;

@@ -636,6 +641,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 		/* fallthrough */
 	case -1:
 		fw_load_abort(fw_buf);
+		fw_priv->buf = NULL;
 		break;
 	}
 out:
@@ -704,6 +710,7 @@ static int fw_realloc_buffer(struct firmware_priv
*fw_priv, int min_size)
 				    GFP_KERNEL);
 		if (!new_pages) {
 			fw_load_abort(buf);
+			fw_priv->buf = NULL;
 			return -ENOMEM;
 		}
 		memcpy(new_pages, buf->pages,
@@ -721,6 +728,7 @@ static int fw_realloc_buffer(struct firmware_priv
*fw_priv, int min_size)

 		if (!buf->pages[buf->nr_pages]) {
 			fw_load_abort(buf);
+			fw_priv->buf = NULL;
 			return -ENOMEM;
 		}
 		buf->nr_pages++;
@@ -805,6 +813,7 @@ static void firmware_class_timeout_work(struct
work_struct *work)
 		return;
 	}
 	fw_load_abort(fw_priv->buf);
+	fw_priv->buf = NULL;
 	mutex_unlock(&fw_lock);
 }

@@ -886,8 +895,6 @@ static int _request_firmware_load(struct
firmware_priv *fw_priv, bool uevent,

 	cancel_delayed_work_sync(&fw_priv->timeout_work);

-	fw_priv->buf = NULL;
-
 	device_remove_file(f_dev, &dev_attr_loading);
 err_del_bin_attr:
 	device_remove_bin_file(f_dev, &firmware_attr_data);


Thanks,
--
Ming Lei

Download attachment "fw-double-abort.patch" of type "application/octet-stream" (2170 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ