lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 29 Jan 2016 11:06:30 +0100
From:	Dmitry Vyukov <dvyukov@...gle.com>
To:	"Kirill A. Shutemov" <kirill@...temov.name>
Cc:	Vlastimil Babka <vbabka@...e.cz>,
	Doug Gilbert <dgilbert@...erlog.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	David Rientjes <rientjes@...gle.com>,
	Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Shiraz Hashim <shashim@...eaurora.org>,
	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Hugh Dickins <hughd@...gle.com>,
	Sasha Levin <sasha.levin@...cle.com>,
	syzkaller <syzkaller@...glegroups.com>,
	Kostya Serebryany <kcc@...gle.com>,
	Alexander Potapenko <glider@...gle.com>,
	linux-scsi <linux-scsi@...r.kernel.org>
Subject: Re: mm: another VM_BUG_ON_PAGE(PageTail(page))

On Thu, Jan 28, 2016 at 12:40 PM, Kirill A. Shutemov
<kirill@...temov.name> wrote:
> On Thu, Jan 28, 2016 at 11:55:14AM +0100, Dmitry Vyukov wrote:
>> On Thu, Jan 28, 2016 at 11:51 AM, Kirill A. Shutemov
>> <kirill@...temov.name> wrote:
>> > On Thu, Jan 28, 2016 at 11:27:11AM +0100, Dmitry Vyukov wrote:
>> >> Hello,
>> >>
>> >> The following program triggers VM_BUG_ON_PAGE(PageTail(page)):
>> >>
>> >> // autogenerated by syzkaller (http://github.com/google/syzkaller)
>> >> #include <fcntl.h>
>> >> #include <numaif.h>
>> >> #include <sys/mman.h>
>> >> #include <unistd.h>
>> >>
>> >> int main()
>> >> {
>> >>   int fd;
>> >>
>> >>   mmap((void*)0x20000000, 4096, PROT_READ|PROT_WRITE,
>> >> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
>> >>   fd = open("/dev/sg1", O_RDONLY|O_SYNC|0x100000);
>> >>   mmap((void*)0x20001000, 0x4000, PROT_READ|PROT_WRITE,
>> >> MAP_PRIVATE|MAP_FIXED, fd, 0);
>> >>   mbind((void*)0x20000000, 0x4000, 0x8002, (void*)0x20002ff8, 3660,
>> >> MPOL_MF_STRICT|MPOL_MF_MOVE);
>> >>   return 0;
>> >> }
>> >
>> > I don't have sg1 in my VM. I changed it to sg0 and it doesn't trigger an
>> > issue: mbind() returns -EINVAL as it supposed to. Hm..
>>
>> I've attached my config, and here is how I start qemu:
>>
>> qemu-system-x86_64 -hda wheezy.img -net
>> user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel
>> arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug
>> earlyprintk=serial slub_debug=UZ" -enable-kvm -pidfile vm_pid -m 2G
>> -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp
>> sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet
>> -soundhw all
>
> Still no luck. :-/
>
> Could you try patch below. I want to see what vm_flags are.
>
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 27d135408a22..93edf181f88a 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -548,8 +548,10 @@ retry:
>                         goto retry;
>                 }
>
> -               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
> +               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
> +                       VM_BUG_ON_VMA(PageTail(page), vma);
>                         migrate_page_add(page, qp->pagelist, flags);
> +               }
>         }
>         pte_unmap_unlock(pte - 1, ptl);
>         cond_resched();



Humm... now I cannot reproduce it with the original program as well.
But I reproduced it with another program. Please try the one below.
I've updated to 26cd83670f2f5a3d5b5514a1f7d96567cdb9558b and have few
pending fixes to mm (see below) including your VM_BUG_ON_VMA change
above.

This report contains vm_flags:


vma ffff880062082450 start 0000000020001000 end 0000000020005000
next ffff880030460a60 prev ffff880062083910 mm ffff88002f691380
prot 8000000000000025 anon_vma ffff880062a55180 vm_ops ffffffff86d5aac0
pgoff 0 file ffff880060b1f0c0 private_data ffff880061240000
flags: 0x4144073(read|write|mayread|maywrite|mayexec|io|dontexpand|account)
------------[ cut here ]------------
kernel BUG at mm/mempolicy.c:552!
invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN
Modules linked in:
CPU: 3 PID: 11434 Comm: tail Not tainted 4.5.0-rc1+ #301
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff88002ecddf00 ti: ffff880033ed0000 task.ti: ffff880033ed0000
RIP: 0010:[<ffffffff817502db>]  [<ffffffff817502db>]
queue_pages_pte_range+0x8ab/0x10f0
RSP: 0018:ffff880033ed7a98  EFLAGS: 00010296
RAX: 0000000000000001 RBX: ffff880061e77018 RCX: 0000000000000000
RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffffed00067daf29
RBP: ffff880033ed7b10 R08: 0000000000000001 R09: 0000000000000000
R10: 1ffff1000c41048a R11: 0000000000000001 R12: 0000000020003000
R13: dffffc0000000000 R14: ffffea0001b4c280 R15: 0000000020004000
FS:  00007f670c480700(0000) GS:ffff88006d700000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000020003000 CR3: 00000000004e7000 CR4: 00000000000006e0
Stack:
 ffffea0000000001 ffff880033ed7c70 ffffea0001b4c200 ffff880062020840
 ffffed00067daf8e ffffea0001b4c2a0 0000000000000023 ffff880062082450
 ffff880033ed7c60 ffff880032747800 ffffffff8174fa30 dffffc0000000000
Call Trace:
 [<     inline     >] walk_pmd_range mm/pagewalk.c:50
 [<     inline     >] walk_pud_range mm/pagewalk.c:90
 [<     inline     >] walk_pgd_range mm/pagewalk.c:116
 [<ffffffff817209b3>] __walk_page_range+0x653/0xcd0 mm/pagewalk.c:204
 [<ffffffff81721164>] walk_page_range+0x134/0x300 mm/pagewalk.c:281
 [<ffffffff8174cd8b>] queue_pages_range+0xfb/0x130 mm/mempolicy.c:689
 [<ffffffff81755611>] do_mbind+0x2c1/0xdc0 mm/mempolicy.c:1241
 [<     inline     >] SYSC_mbind mm/mempolicy.c:1353
 [<ffffffff8175646d>] SyS_mbind+0x13d/0x150 mm/mempolicy.c:1335
 [<ffffffff86653276>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
Code: 55 98 48 8d 42 ff e9 ce fa ff ff e8 00 9f e1 ff 4c 89 f7 e8 18
fc f2 ff e9 92 fe ff ff e8 ee 9e e1 ff 48 8b 7d c0 e8 85 87 f8 ff <0f>
0b e8 de 9e e1 ff 48 89 df 48 89 f8 0f 1f 40 00 48 89 c3 48
RIP  [<ffffffff817502db>] queue_pages_pte_range+0x8ab/0x10f0 mm/mempolicy.c:552
 RSP <ffff880033ed7a98>
---[ end trace 3f5635b07e2902a8 ]---
BUG: sleeping function called from invalid context at include/linux/sched.h:2805
in_atomic(): 1, irqs_disabled(): 0, pid: 11434, name: tail
INFO: lockdep is turned off.
CPU: 3 PID: 11434 Comm: tail Tainted: G      D         4.5.0-rc1+ #301
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 00000000ffffffff ffff880033ed7588 ffffffff82be11cd ffff88002ecddf00
 0000000000002caa 0000000000000000 ffff880033ed75b0 ffffffff813cb8cb
 ffff88002ecddf00 ffffffff867387a0 0000000000000af5 ffff880033ed75f0
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82be11cd>] dump_stack+0x6f/0xa2 lib/dump_stack.c:50
 [<ffffffff813cb8cb>] ___might_sleep+0x27b/0x3a0 kernel/sched/core.c:7703
 [<ffffffff813cba80>] __might_sleep+0x90/0x1a0 kernel/sched/core.c:7665
 [<     inline     >] threadgroup_change_begin include/linux/sched.h:2805
 [<ffffffff813830d1>] exit_signals+0x81/0x430 kernel/signal.c:2392
 [<ffffffff8135c3dc>] do_exit+0x23c/0x2cb0 kernel/exit.c:701
 [<ffffffff811aa28f>] oops_end+0x9f/0xd0 arch/x86/kernel/dumpstack.c:250
 [<ffffffff811aa686>] die+0x46/0x60 arch/x86/kernel/dumpstack.c:316
 [<     inline     >] do_trap_no_signal arch/x86/kernel/traps.c:205
 [<ffffffff811a3b9f>] do_trap+0x18f/0x380 arch/x86/kernel/traps.c:251
 [<ffffffff811a400e>] do_error_trap+0x11e/0x280 arch/x86/kernel/traps.c:290
 [<ffffffff811a527b>] do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:303
 [<ffffffff86654f8e>] invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:830
 [<     inline     >] walk_pmd_range mm/pagewalk.c:50
 [<     inline     >] walk_pud_range mm/pagewalk.c:90
 [<     inline     >] walk_pgd_range mm/pagewalk.c:116
 [<ffffffff817209b3>] __walk_page_range+0x653/0xcd0 mm/pagewalk.c:204
 [<ffffffff81721164>] walk_page_range+0x134/0x300 mm/pagewalk.c:281
 [<ffffffff8174cd8b>] queue_pages_range+0xfb/0x130 mm/mempolicy.c:689
 [<ffffffff81755611>] do_mbind+0x2c1/0xdc0 mm/mempolicy.c:1241
 [<     inline     >] SYSC_mbind mm/mempolicy.c:1353
 [<ffffffff8175646d>] SyS_mbind+0x13d/0x150 mm/mempolicy.c:1335
 [<ffffffff86653276>] entry_SYSCALL_64_fastpath+0x16/0x7a
arch/x86/entry/entry_64.S:185
note: tail[11434] exited with preempt_count 1





// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <pthread.h>
#include <stdint.h>
#include <string.h>
#include <sys/syscall.h>
#include <unistd.h>

long r[12];

void* thr(void* arg)
{
  switch ((long)arg) {
  case 0:
    r[0] = syscall(SYS_mmap, 0x20000000ul, 0x1000ul, 0x3ul, 0x32ul,
                   0xfffffffffffffffful, 0x0ul);
    break;
  case 1:
    r[2] = syscall(SYS_open, "/dev/sg1", 0x101000ul, 0);
    break;
  case 2:
    r[3] = syscall(SYS_mmap, 0x20001000ul, 0x4000ul, 0x3ul, 0x12ul,
                   r[2], 0x0ul);
    break;
  case 3:
    *(uint64_t*)0x20002ff8 = (uint64_t)0xffffffff;
    r[5] = syscall(SYS_mbind, 0x20000000ul, 0x4000ul, 0x8002ul,
                   0x20002ff8ul, 0xe4cul, 0x3ul);
    break;
  case 4:
    r[6] = syscall(SYS_mmap, 0x20005000ul, 0x1000ul, 0x3ul, 0x32ul,
                   0xfffffffffffffffful, 0x0ul);
    break;
  case 5:
    memcpy((void*)0x20005ffd, "\x7b\x3a\x00", 3);
    memcpy((void*)0x200053e8, "\x70\x6f\x73\x69\x78\x5f\x61\x63\x6c\x5f"
                              "\x61\x63\x63\x65\x73\x73\x70\x70\x70\x31"
                              "\x6b\x65\x79\x72\x69\x6e\x67\x73\x65\x6c"
                              "\x66\x65\x74\x68\x30\x2f\x5c\x00",
           38);
    memcpy((void*)0x20000ffe, "\x73\x65\x63\x75\x72\x69\x74\x79\x00",
           9);
    r[10] = syscall(SYS_request_key, 0x20005ffdul, 0x200053e8ul,
                    0x20000ffeul, 0xfffffffffffffffful, 0, 0);
    break;
  case 6:
    r[11] =
        syscall(SYS_keyctl, 0x11ul, r[10], 0x20003fbaul, 0xa9ul, 0, 0);
    break;
  }
  return 0;
}

int main()
{
  long i;
  pthread_t th[7];

  memset(r, -1, sizeof(r));
  for (i = 0; i < 7; i++) {
    pthread_create(&th[i], 0, thr, (void*)i);
    usleep(10000);
  }
  for (i = 0; i < 7; i++) {
    pthread_create(&th[i], 0, thr, (void*)i);
    if (i % 2 == 0)
      usleep(10000);
  }
  usleep(100000);
  return 0;
}



diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 27d1354..93edf18 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -548,8 +548,10 @@ retry:
                        goto retry;
                }

-               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+               if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+                       VM_BUG_ON_VMA(PageTail(page), vma);
                        migrate_page_add(page, qp->pagelist, flags);
+               }
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
diff --git a/mm/mmap.c b/mm/mmap.c
index 84b1262..082b8a9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -387,8 +387,9 @@ static long vma_compute_subtree_gap(struct
vm_area_struct *vma)
 }

 #ifdef CONFIG_DEBUG_VM_RB
-static int browse_rb(struct rb_root *root)
+static int browse_rb(struct mm_struct *mm)
 {
+       struct rb_root *root = &mm->mm_rb;
        int i = 0, j, bug = 0;
        struct rb_node *nd, *pn = NULL;
        unsigned long prev = 0, pend = 0;
@@ -411,12 +412,14 @@ static int browse_rb(struct rb_root *root)
                                  vma->vm_start, vma->vm_end);
                        bug = 1;
                }
+               spin_lock(&mm->page_table_lock);
                if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
                        pr_emerg("free gap %lx, correct %lx\n",
                               vma->rb_subtree_gap,
                               vma_compute_subtree_gap(vma));
                        bug = 1;
                }
+               spin_unlock(&mm->page_table_lock);
                i++;
                pn = nd;
                prev = vma->vm_start;
@@ -453,12 +456,16 @@ static void validate_mm(struct mm_struct *mm)
        struct vm_area_struct *vma = mm->mmap;

        while (vma) {
+               struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;

-               vma_lock_anon_vma(vma);
-               list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                       anon_vma_interval_tree_verify(avc);
-               vma_unlock_anon_vma(vma);
+               if (anon_vma) {
+                       anon_vma_lock_read(anon_vma);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
+                       anon_vma_unlock_read(anon_vma);
+               }
+
                highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
@@ -472,7 +479,7 @@ static void validate_mm(struct mm_struct *mm)
                          mm->highest_vm_end, highest_address);
                bug = 1;
        }
-       i = browse_rb(&mm->mm_rb);
+       i = browse_rb(mm);
        if (i != mm->map_count) {
                if (i != -1)
                        pr_emerg("map_count %d rb %d\n", mm->map_count, i);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ