[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20151223085428.69ac2522@tlielax.poochiereds.net>
Date: Wed, 23 Dec 2015 08:54:28 -0500
From: Jeff Layton <jlayton@...chiereds.net>
To: Dmitry Vyukov <dvyukov@...gle.com>
Cc: "J. Bruce Fields" <bfields@...ldses.org>,
Alexander Viro <viro@...iv.linux.org.uk>,
"linux-fsdevel@...r.kernel.org" <linux-fsdevel@...r.kernel.org>,
LKML <linux-kernel@...r.kernel.org>,
syzkaller <syzkaller@...glegroups.com>,
Kostya Serebryany <kcc@...gle.com>,
Alexander Potapenko <glider@...gle.com>,
Sasha Levin <sasha.levin@...cle.com>,
Eric Dumazet <edumazet@...gle.com>
Subject: Re: fs: WARNING in locks_free_lock_context()
On Wed, 23 Dec 2015 11:37:39 +0100
Dmitry Vyukov <dvyukov@...gle.com> wrote:
> Hello,
>
> The following program triggers
> WARN_ON_ONCE(!list_empty(&ctx->flc_posix)) warning in
> locks_free_lock_context (run it in a loop):
>
> // autogenerated by syzkaller (http://github.com/google/syzkaller)
> #include <unistd.h>
> #include <sys/syscall.h>
> #include <string.h>
> #include <stdint.h>
> #include <pthread.h>
>
> #ifndef SYS_memfd_create
> #define SYS_memfd_create 319
> #endif
>
> long r[15];
> long done[14];
>
> void *thr(void *arg)
> {
> if (rand()%2)
> usleep(100);
>
> switch ((long)arg) {
> case 0:
> r[0] = syscall(SYS_mmap, 0x20000000ul, 0x5000ul,
> 0x3ul, 0x32ul, 0xfffffffffffffffful, 0x0ul);
> break;
> case 1:
> memcpy((void*)0x20000c49,
> "\xb6\x70\x70\x70\x31\x73\x65\x63\x75\x72\x69\x74\x79\x9e\x00", 15);
> r[2] = syscall(SYS_memfd_create, 0x20000c49ul, 0x3ul,
> 0, 0, 0, 0);
> break;
> case 2:
> r[3] = syscall(SYS_socketpair, 0x1ul, 0x1ul, 0x0ul,
> 0x20001000ul, 0, 0);
> if (r[3] != -1)
> r[4] = *(uint32_t*)0x20001000;
> if (r[3] != -1)
> r[5] = *(uint32_t*)0x20001004;
> break;
> case 3:
> *(uint16_t*)0x20000000 = (uint16_t)0x0;
> *(uint16_t*)0x20000002 = (uint16_t)0x1;
> *(uint64_t*)0x20000008 = (uint64_t)0x6;
> *(uint64_t*)0x20000010 = (uint64_t)0xad;
> *(uint32_t*)0x20000018 = (uint32_t)0x0;
> r[11] = syscall(SYS_fcntl, r[5], 0x7ul, 0x20000000ul, 0, 0, 0);
> break;
> case 4:
> r[12] = syscall(SYS_write, r[5], 0x200006cbul,
> 0x1000ul, 0, 0, 0);
> break;
> case 5:
> r[13] = syscall(SYS_close, r[5], 0, 0, 0, 0, 0);
> break;
> case 6:
> r[14] = syscall(SYS_dup2, r[2], r[4], 0, 0, 0, 0);
> break;
> }
> done[(long)arg] = 1;
> return 0;
> }
>
> int main()
> {
> long i, j;
> pthread_t th[14];
>
> srand(time(0)+getpid());
> memset(r, -1, sizeof(r));
> for (i = 0; i < 7; i++) {
> pthread_create(&th[i], 0, thr, (void*)i);
> for (j = 0; j < 10; j++) {
> if (done[i])
> break;
> usleep(100);
> }
> }
> for (i = 0; i < 7; i++)
> done[i] = 0;
> for (i = 0; i < 7; i++) {
> pthread_create(&th[7+i], 0, thr, (void*)i);
> if (rand()%2)
> continue;
> for (j = 0; j < 10; j++) {
> if (done[i])
> break;
> usleep(100);
> }
> }
> usleep(100);
> return 0;
> }
>
>
> ------------[ cut here ]------------
> WARNING: CPU: 3 PID: 1975 at fs/locks.c:241
> locks_free_lock_context+0x118/0x180()
> Modules linked in:
> CPU: 3 PID: 1975 Comm: a.out Not tainted 4.4.0-rc6+ #173
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> 00000000ffffffff ffff880068e67bf8 ffffffff82899ffd 0000000000000000
> ffff88006130af00 ffffffff85e17d60 ffff880068e67c38 ffffffff812ebbb9
> ffffffff818162d8 ffffffff85e17d60 00000000000000f1 ffff8800685c2828
> Call Trace:
> [< inline >] __dump_stack lib/dump_stack.c:15
> [<ffffffff82899ffd>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
> [<ffffffff812ebbb9>] warn_slowpath_common+0xd9/0x140 kernel/panic.c:460
> [<ffffffff812ebde9>] warn_slowpath_null+0x29/0x30 kernel/panic.c:493
> [<ffffffff818162d8>] locks_free_lock_context+0x118/0x180 fs/locks.c:241
> [<ffffffff81765783>] __destroy_inode+0x1d3/0x4d0 fs/inode.c:228
> [<ffffffff81765acb>] destroy_inode+0x4b/0x120 fs/inode.c:253
> [<ffffffff81765ec0>] evict+0x320/0x4f0 fs/inode.c:559
> [< inline >] iput_final fs/inode.c:1477
> [<ffffffff817665dc>] iput+0x45c/0x850 fs/inode.c:1504
> [< inline >] dentry_iput fs/dcache.c:358
> [<ffffffff81757237>] __dentry_kill+0x457/0x620 fs/dcache.c:543
> [< inline >] dentry_kill fs/dcache.c:587
> [<ffffffff8175c499>] dput+0x659/0x740 fs/dcache.c:796
> [<ffffffff817162fc>] __fput+0x42c/0x780 fs/file_table.c:226
> [<ffffffff817166d5>] ____fput+0x15/0x20 fs/file_table.c:244
> [<ffffffff8134679b>] task_work_run+0x16b/0x200 kernel/task_work.c:115
> [< inline >] tracehook_notify_resume include/linux/tracehook.h:191
> [<ffffffff81003990>] exit_to_usermode_loop+0x180/0x1a0
> arch/x86/entry/common.c:251
> [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:282
> [<ffffffff8100631f>] syscall_return_slowpath+0x19f/0x210
> arch/x86/entry/common.c:344
> [<ffffffff85ccea22>] int_ret_from_sys_call+0x25/0x9f
> arch/x86/entry/entry_64.S:281
> ---[ end trace 2dde0624dd974a19 ]---
>
>
> On commit 4ef7675344d687a0ef5b0d7c0cee12da005870c0 (Dec 20).
Ooh, nice catch...and just in time for Christmas.
filp_close does this after the fd has been detached from the file table
in __close_fd:
if (likely(!(filp->f_mode & FMODE_PATH))) {
dnotify_flush(filp, id);
locks_remove_posix(filp, id);
}
fput(filp);
...and fcntl_setlk does this:
/*
* Attempt to detect a close/fcntl race and recover by
* releasing the lock that was just acquired.
*/
/*
* we need that spin_lock here - it prevents reordering between
* update of i_flctx->flc_posix and check for it done in close().
* rcu_read_lock() wouldn't do.
*/
spin_lock(¤t->files->file_lock);
f = fcheck(fd);
spin_unlock(¤t->files->file_lock);
if (!error && f != filp && flock.l_type != F_UNLCK) {
flock.l_type = F_UNLCK;
goto again;
}
...so in principle that should keep new locks from racing onto the list
just after we call filp_close. Hmm...I'll see if I can reproduce and
figure out how this could happen.
Thanks,
--
Jeff Layton <jlayton@...chiereds.net>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists