lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 29 Mar 2007 20:47:18 +0200
From:	Philipp Reisner <>
To:	Evgeniy Polyakov <>
Subject: Issue with connector/netlink

Hi Evgenjy,

Again we run into an issue in the connector/netlink code
path. This time we were not able to create a fix. But 
please allow me to describe everything:



general protection fault: 0000 [1] SMP
Modules linked in: tun nfs lockd nfs_acl sunrpc ipv6 bridge kvm_intel kvm drbd cn tsde
v i2c_i801 psmouse i2c_core floppy pcspkr serio_raw parport_pc parport evdev shpchp pc
i_hotplug ext3 jbd mbcache dm_mirror dm_snapshot dm_mod raid1 raid0 md_mod ide_generic
 sd_mod ata_piix libata scsi_mod generic ide_core ehci_hcd uhci_hcd e1000 thermal proc
essor fan
Pid: 1948, comm: cqueue/0 Not tainted #2
RIP: 0010:[<ffffffff8024f904>]  [<ffffffff8024f904>] netlink_broadcast+0x123/0x2de
RSP: 0018:ffff8100379bddc0  EFLAGS: 00010297
RAX: 656b736968772d31 RBX: ffff810079d7f800 RCX: 0000000000000004
RDX: ffff81007e113000 RSI: ffff810079d68280 RDI: ffffffff804c6a80
RBP: ffff810079d68280 R08: 00000000000000d0 R09: ffff810079d68280
R10: 0000000000000002 R11: ffff81007fd6fac0 R12: 0000000000000020
R13: 0000000000000000 R14: ffff810079d7f818 R15: 0000000000000003
FS:  0000000000000000(0000) GS:ffffffff804d6000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00002b2acc1ecb40 CR3: 0000000079ac1000 CR4: 00000000000026e0
Process cqueue/0 (pid: 1948, threadinfo ffff8100379bc000, task ffff810037fd8040)
Stack:  ffff810079d7f400 00000000000000d0 ffff81007e113000 000000007e069a24
 0000000000000000 0000000000000100 ffff810079d7f400 ffff81007e069a10
 ffff81007e069a24 ffffffff881e9d00 ffff81007cf07800 ffffffff881d5c23
Call Trace:
 [<ffffffff881d5c23>] :drbd:drbd_connector_callback+0x14f/0x19c
 [<ffffffff881b70c3>] :cn:cn_queue_wrapper+0x0/0x33
 [<ffffffff881b70d8>] :cn:cn_queue_wrapper+0x15/0x33
 [<ffffffff881b70c3>] :cn:cn_queue_wrapper+0x0/0x33
 [<ffffffff80247176>] run_workqueue+0x8f/0x137
 [<ffffffff80243ddc>] worker_thread+0x0/0x14a
 [<ffffffff8028e63b>] keventd_create_kthread+0x0/0x65
 [<ffffffff80243ef0>] worker_thread+0x114/0x14a
 [<ffffffff8027c586>] default_wake_function+0x0/0xe
 [<ffffffff8022ef0a>] kthread+0xd1/0x100
 [<ffffffff80256ec8>] child_rip+0xa/0x12
 [<ffffffff8028e63b>] keventd_create_kthread+0x0/0x65
 [<ffffffff8022ee39>] kthread+0x0/0x100
 [<ffffffff80256ebe>] child_rip+0x0/0x12

Code: 44 0f a3 38 19 c0 85 c0 0f 84 17 01 00 00 83 7c 24 24 00 74


>>RIP; ffffffff8024f904 <netlink_broadcast+123/2de>   <=====

>>RAX; 656b736968772d31 <phys_startup_64+656b736968572c31/ffffffff7fffff00>
>>RBX; ffff810079d7f800 <phys_startup_64+ffff810079b7f700/ffffffff7fffff00>
>>RDX; ffff81007e113000 <phys_startup_64+ffff81007df12f00/ffffffff7fffff00>
>>RSI; ffff810079d68280 <phys_startup_64+ffff810079b68180/ffffffff7fffff00>
>>RDI; ffffffff804c6a80 <nl_table_lock+0/10>
>>RBP; ffff810079d68280 <phys_startup_64+ffff810079b68180/ffffffff7fffff00>
>>R09; ffff810079d68280 <phys_startup_64+ffff810079b68180/ffffffff7fffff00>
>>R11; ffff81007fd6fac0 <phys_startup_64+ffff81007fb6f9c0/ffffffff7fffff00>
>>R14; ffff810079d7f818 <phys_startup_64+ffff810079b7f718/ffffffff7fffff00>

Trace; ffffffff881d5c23 <_end+7c0ffb3/7f03a390>
Trace; ffffffff881b70c3 <_end+7bf1453/7f03a390>
Trace; ffffffff881b70d8 <_end+7bf1468/7f03a390>
Trace; ffffffff881b70c3 <_end+7bf1453/7f03a390>
Trace; ffffffff80247176 <run_workqueue+8f/137>
Trace; ffffffff80243ddc <worker_thread+0/14a>
Trace; ffffffff8028e63b <keventd_create_kthread+0/65>
Trace; ffffffff80243ef0 <worker_thread+114/14a>
Trace; ffffffff8027c586 <default_wake_function+0/e>
Trace; ffffffff8022ef0a <kthread+d1/100>
Trace; ffffffff80256ec8 <child_rip+a/12>
Trace; ffffffff8028e63b <keventd_create_kthread+0/65>
Trace; ffffffff8022ee39 <kthread+0/100>
Trace; ffffffff80256ebe <child_rip+0/12>

Code;  ffffffff8024f904 <netlink_broadcast+123/2de>
0000000000000000 <_RIP>:
Code;  ffffffff8024f904 <netlink_broadcast+123/2de>   <=====
   0:   44 0f a3 38               bt     %r15d,(%rax)   <=====
Code;  ffffffff8024f908 <netlink_broadcast+127/2de>
   4:   19 c0                     sbb    %eax,%eax
Code;  ffffffff8024f90a <netlink_broadcast+129/2de>
   6:   85 c0                     test   %eax,%eax
Code;  ffffffff8024f90c <netlink_broadcast+12b/2de>
   8:   0f 84 17 01 00 00         je     125 <_RIP+0x125>
Code;  ffffffff8024f912 <netlink_broadcast+131/2de>
   e:   83 7c 24 24 00            cmpl   $0x0,0x24(%rsp)
Code;  ffffffff8024f917 <netlink_broadcast+136/2de>
  13:   74 00                     je     15 <_RIP+0x15>

It happens in netlink_broadcast() which seems to get called
from drbd_connector_callback(). Drbd_connector_callback() 
calls cn_netlink_send(), which in turn calls netlink_broadcast().
   I guess this little detail is missing from the trace since
   the call to netlink_broadcast() happens with the return
   statement in cn_netlink_send().

netlink_broadcast() in turn calls the inlined function
do_one_broadcast(), in which the OOPS happens. It is the test_bit()

static inline int do_one_broadcast(struct sock *sk,
                                   struct netlink_broadcast_data *p)
        struct netlink_sock *nlk = nlk_sk(sk);
        int val;

        if (p->exclude_sk == sk)
                goto out;

        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
            !test_bit(p->group - 1, nlk->groups)) <=<<==<<<===<<<<====<<<<<======
                goto out;

        if (p->failure) {
                goto out;

        if (p->skb2 == NULL) {
                if (skb_shared(p->skb)) {
                        p->skb2 = skb_clone(p->skb, p->allocation);
                } else {
                        p->skb2 = skb_get(p->skb);
                         * skb ownership may have been set when
                         * delivered to a previous socket.
        if (p->skb2 == NULL) {
                /* Clone failed. Notify ALL listeners. */
                p->failure = 1;
        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
        } else {
                p->congested |= val;
                p->delivered = 1;
                p->skb2 = NULL;

        return 0;

Here is a bit more of the context in assembler source:

        .stabn  68,0,937,.LM391-netlink_broadcast
        movzbl  57(%rdx), %eax
        imulq   $80, %rax, %rax
        addq    nl_table(%rip), %rax
        movq    40(%rax), %r14
        .stabn  68,0,875,.LM392-netlink_broadcast
        movl    $0, 28(%rsp)
        movl    $0, 32(%rsp)
        movl    $0, 36(%rsp)
        jmp     .L239
        movl    12(%rsp), %eax
        cmpl    %eax, 544(%rbx)
        je      .L241
        cmpl    564(%rbx), %r15d
        jae     .L241
        movq    568(%rbx), %rax
        .stabs  "include/asm/bitops.h",132,0,0,.Ltext105
        .stabn  68,0,243,.LM393-netlink_broadcast
        btl %r15d,(%rax) <=<<==<<<===<<<<====<<<<<=====<<<<<<======
        sbbl %eax,%eax
        .stabs  "net/netlink/af_netlink.c",132,0,0,.Ltext106
        .stabn  68,0,875,.LM394-netlink_broadcast
        testl   %eax, %eax
        je      .L241
        .stabn  68,0,879,.LM395-netlink_broadcast
        cmpl    $0, 36(%rsp)
        je      .L245
        .stabn  68,0,880,.LM396-netlink_broadcast
        movq    %rbx, %rdi
        call    netlink_overrun
        jmp     .L241

I hope that all this helps you to understand the issue... Thats too much
of networking internals for me...

To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to
More majordomo info at

Powered by blists - more mailing lists