lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Wed, 29 Nov 2017 03:22:51 +0800
From:   "Yang Shi" <yang.s@...baba-inc.com>
To:     Waiman Long <longman@...hat.com>, tglx@...utronix.de
Cc:     linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2 v8] lib: debugobjects: touch watchdog to avoid
 softlockup when !CONFIG_PREEMPT



On 11/28/17 11:20 AM, Waiman Long wrote:
> On 11/28/2017 02:13 PM, Yang Shi wrote:
>> Sorry for the typo, it should be v2 in the subject.
>>
>> Yang
>>
>>
>> On 11/28/17 11:12 AM, Yang Shi wrote:
>>> There are nested loops on debug objects free path, sometimes it may take
>>> over hundred thousands of loops, then cause soft lockup with
>>> !CONFIG_PREEMPT
>>> occasionally, like below:
>>>
>>>    NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s!
>>> [stress-ng-getde:110342]
>>>    Modules linked in: binfmt_misc(E) tcp_diag(E)
>>> inet_diag(E) bonding(E) intel_rapl(E) iosf_mbi(E)
>>> x86_pkg_temp_thermal(E) coretemp(E) iTCO_wdt(E) iTCO_vendor_support(E)
>>> kvm_intel(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E)
>>> dcdbas(E) ghash_clmulni_intel(E) aesni_intel(E) lrw(E) gf128mul(E)
>>> glue_helper(E) ablk_helper(E) ipmi_devintf(E) sg(E) cryptd(E) pcspkr(E)
>>> mei_me(E) lpc_ich(E) ipmi_si(E) mfd_core(E) mei(E) shpchp(E) wmi(E)
>>> ipmi_msghandler(E) acpi_power_meter(E) nfsd(E) auth_rpcgss(E) nfs_acl(E)
>>> lockd(E) grace(E) sunrpc(E) ip_tables(E) ext4(E) jbd2(E) mbcache(E)
>>> sd_mod(E) mgag200(E) igb(E) drm_kms_helper(E) ixgbe(E) syscopyarea(E)
>>> mdio(E) sysfillrect(E) sysimgblt(E) ptp(E) fb_sys_fops(E) pps_core(E)
>>> ttm(E) drm(E) crc32c_intel(E) i2c_algo_bit(E) i2c_core(E)
>>> megaraid_sas(E)
>>>     dca(E)
>>>    irq event stamp: 4340444
>>>    hardirqs last  enabled at (4340443): [<ffffffff817d6476>]
>>> _raw_spin_unlock_irqrestore+0x36/0x60
>>>    hardirqs last disabled at (4340444): [<ffffffff817d8c81>]
>>> apic_timer_interrupt+0x91/0xa0
>>>    softirqs last  enabled at (4340398): [<ffffffff817da179>]
>>> __do_softirq+0x349/0x50e
>>>    softirqs last disabled at (4340391): [<ffffffff810a5255>]
>>> irq_exit+0xf5/0x110
>>>    CPU: 15 PID: 110342 Comm: stress-ng-getde Tainted: G
>>> E   4.9.44-003.ali3000.alios7.x86_64.debug #1
>>>    Hardware name: Dell Inc. PowerEdge R720xd/0X6FFV, BIOS
>>> 1.6.0 03/07/2013
>>>    task: ffff884cbb0d0000 task.stack: ffff884cabc70000
>>>    RIP: 0010:[<ffffffff817d647b>]  [<ffffffff817d647b>]
>>> _raw_spin_unlock_irqrestore+0x3b/0x60
>>>    RSP: 0018:ffff884cabc77b78  EFLAGS: 00000292
>>>    RAX: ffff884cbb0d0000 RBX: 0000000000000292 RCX: 0000000000000000
>>>    RDX: ffff884cbb0d0000 RSI: 0000000000000001 RDI: 0000000000000292
>>>    RBP: ffff884cabc77b88 R08: 0000000000000000 R09: 0000000000000000
>>>    R10: 0000000000000001 R11: 0000000000000001 R12: ffffffff8357a0d8
>>>    R13: ffff884cabc77bc8 R14: ffffffff8357a0d0 R15: 00000000000000fc
>>>    FS:  00002aee845fd2c0(0000) GS:ffff8852bd400000(0000)
>>> knlGS:0000000000000000
>>>    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>    CR2: 0000000002991808 CR3: 0000005123abf000 CR4: 00000000000406e0
>>>    Stack:
>>>     ffff884ff4fe0000 ffff884ff4fd8000 ffff884cabc77c00 ffffffff8141177e
>>>     0000000000000202 ffff884cbb0d0000 ffff884cabc77bc8 0000000000000006
>>>     ffff884ff4fda000 ffffffff8357a0d8 0000000000000000 91f5d976f6020b6c
>>>    Call Trace:
>>>     [<ffffffff8141177e>] debug_check_no_obj_freed+0x13e/0x220
>>>     [<ffffffff811f8751>] __free_pages_ok+0x1f1/0x5c0
>>>     [<ffffffff811fa785>] __free_pages+0x25/0x40
>>>     [<ffffffff812638db>] __free_slab+0x19b/0x270
>>>     [<ffffffff812639e9>] discard_slab+0x39/0x50
>>>     [<ffffffff812679f7>] __slab_free+0x207/0x270
>>>     [<ffffffff81269966>] ___cache_free+0xa6/0xb0
>>>     [<ffffffff8126c267>] qlist_free_all+0x47/0x80
>>>     [<ffffffff8126c5a9>] quarantine_reduce+0x159/0x190
>>>     [<ffffffff8126b3bf>] kasan_kmalloc+0xaf/0xc0
>>>     [<ffffffff8126b8a2>] kasan_slab_alloc+0x12/0x20
>>>     [<ffffffff81265e8a>] kmem_cache_alloc+0xfa/0x360
>>>     [<ffffffff812abc8f>] ? getname_flags+0x4f/0x1f0
>>>     [<ffffffff812abc8f>] getname_flags+0x4f/0x1f0
>>>     [<ffffffff812abe42>] getname+0x12/0x20
>>>     [<ffffffff81298da9>] do_sys_open+0xf9/0x210
>>>     [<ffffffff81298ede>] SyS_open+0x1e/0x20
>>>     [<ffffffff817d6e01>] entry_SYSCALL_64_fastpath+0x1f/0xc2
>>>    Code: 7f 18 53 48 8b 55 08 48 89 f3 be 01 00 00 00 e8 3c
>>> cd 92 ff 4c 89 e7 e8 f4 0e 93 ff f6 c7 02 74 1b e8 3a ac 92 ff 48 89 df
>>> 57 9d <66> 66 90 66 90 65 ff 0d d1 ff 83 7e 5b 41 5c 5d c3 48 89 df 57
>>>
>>> The code path might be called in either atomic or non-atomic context,
>>> so touching softlockup watchdog instead of calling cond_resched() which
>>> might fall asleep. However, it is unnecessary to touch the watchdog
>>> every loop, so just touch the watchdog at every 10000 (best estimate)
>>> loops.
>>>
>>> And, introduce a new knob:
>>> /sys/kernel/debug/debug_objects/suppress_lockup.
>>> 0 value means not suppress the softlockup message by touching the
>>> watchdog, non-zero value means suppress the softlockup message.
>>> The default value is zero.
>>>
>>> Signed-off-by: Yang Shi <yang.s@...baba-inc.com>
>>> CC: Waiman Long <longman@...hat.com>
>>> CC: Thomas Gleixner <tglx@...utronix.de>
>>> ---
>>> v1 --> v2:
>>>    * Added suppress_lockup knob in debugfs per Waiman's suggestion
>>>
>>>    lib/debugobjects.c | 30 ++++++++++++++++++++++++++++--
>>>    1 file changed, 28 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
>>> index 166488d..f009a21 100644
>>> --- a/lib/debugobjects.c
>>> +++ b/lib/debugobjects.c
>>> @@ -19,6 +19,7 @@
>>>    #include <linux/slab.h>
>>>    #include <linux/hash.h>
>>>    #include <linux/kmemleak.h>
>>> +#include <linux/nmi.h>
>>>      #define ODEBUG_HASH_BITS    14
>>>    #define ODEBUG_HASH_SIZE    (1 << ODEBUG_HASH_BITS)
>>> @@ -67,6 +68,8 @@ struct debug_bucket {
>>>    static int            debug_objects_allocated;
>>>    static int            debug_objects_freed;
>>>    +static int            suppress_lockup;
>>> +
>>>    static void free_obj_work(struct work_struct *work);
>>>    static DECLARE_WORK(debug_obj_work, free_obj_work);
>>>    @@ -768,6 +771,10 @@ static void __debug_check_no_obj_freed(const
>>> void *address, unsigned long size)
>>>                debug_objects_maxchain = cnt;
>>>              max_loops += cnt;
>>> +
>>> +        if (max_loops > 10000 && ((max_loops % 10000) == 0)
>>> +            && suppress_lockup != 0)
>>> +            touch_softlockup_watchdog();
>>>        }
>>>          if (max_loops > debug_objects_maxloops)
>>> @@ -810,9 +817,23 @@ static int debug_stats_open(struct inode *inode,
>>> struct file *filp)
>>>        .release    = single_release,
>>>    };
>>>    +static int suppress_lockup_get(void *data, u64 *val)
>>> +{
>>> +    *val = (u64) suppress_lockup;
>>> +    return 0;
>>> +}
>>> +
>>> +static int suppress_lockup_set(void *data, u64 val)
>>> +{
>>> +    suppress_lockup = (u32) val;
>>> +    return 0;
>>> +}
>>> +DEFINE_DEBUGFS_ATTRIBUTE(suppress_lockup_fops,
>>> +        suppress_lockup_get, suppress_lockup_set, "%llu\n");
>>> +
>>>    static int __init debug_objects_init_debugfs(void)
>>>    {
>>> -    struct dentry *dbgdir, *dbgstats;
>>> +    struct dentry *dbgdir, *dbgstats, *dbglockup;
>>>          if (!debug_objects_enabled)
>>>            return 0;
>>> @@ -826,10 +847,15 @@ static int __init debug_objects_init_debugfs(void)
>>>        if (!dbgstats)
>>>            goto err;
>>>    +    dbglockup = debugfs_create_file_unsafe("suppress_lockup",
>>> 0644, dbgdir,
>>> +                    NULL, &suppress_lockup_fops);
>>> +    if (!dbglockup)
>>> +        goto err;
>>> +
> 
> It is actually easier to use helper function like debugfs_create_u32()
> to create the file without defining any file ops.

Aha, thanks for mentioning the helper API. Will adopt the API in v3.

Regards,
Yang

> 
> Cheers,
> Longman
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ