[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b5cfb010-c892-49d6-a278-fae0e1a0b0fc@lucifer.local>
Date: Thu, 17 Oct 2024 10:47:38 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Jann Horn <jannh@...gle.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH fix 6.12] mm: mark mas allocation in
vms_abort_munmap_vmas as __GFP_NOFAIL
On Wed, Oct 16, 2024 at 05:07:53PM +0200, Jann Horn wrote:
> vms_abort_munmap_vmas() is a recovery path where, on entry, some VMAs
> have already been torn down halfway (in a way we can't undo) but are
> still present in the maple tree.
>
> At this point, we *must* remove the VMAs from the VMA tree, otherwise
> we get UAF.
>
> Because removing VMA tree nodes can require memory allocation, the
> existing code has an error path which tries to handle this by
> reattaching the VMAs; but that can't be done safely.
>
> A nicer way to fix it would probably be to preallocate enough maple
> tree nodes for the removal before the point of no return, or something
> like that; but for now, fix it the easy and kinda ugly way, by marking
> this allocation __GFP_NOFAIL.
>
> Fixes: 4f87153e82c4 ("mm: change failure of MAP_FIXED to restoring the gap on failure")
> Signed-off-by: Jann Horn <jannh@...gle.com>
I kind of question whether this is real-world achievable (yes I realise you
included a repro, but one prodding /sys/kernel/debug bits :>) but to be
honest at this point I think I feel a lot safer just clearing this here for
sure. So:
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> This can be tested with the following reproducer (on a kernel built with
> CONFIG_KASAN=y, CONFIG_FAILSLAB=y, CONFIG_FAULT_INJECTION_DEBUG_FS=y,
> with the reproducer running as root):
>
> ```
>
> typeof(x) __res = (x); \
> if (__res == (typeof(x))-1) \
> err(1, "SYSCHK(" #x ")"); \
> __res; \
> })
>
> static void write_file(char *name, char *buf) {
> int fd = open(name, O_WRONLY);
> if (fd == -1)
> err(1, "unable to open for writing: %s", name);
> if (SYSCHK(write(fd, buf, strlen(buf))) != strlen(buf))
> errx(1, "write %s", name);
> SYSCHK(close(fd));
> }
>
> int main(void) {
> // make a large area with a bunch of VMAs
> char *area = SYSCHK(mmap(NULL, AREA_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
> for (int off=0; off<AREA_SIZE; off += 0x2000)
> SYSCHK(mmap(area+off, 0x1000, PROT_READ, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
>
> // open a file whose mappings use usbdev_vm_ops, and map it in part of this area
> int map_fd = SYSCHK(open("/dev/bus/usb/001/001", O_RDONLY));
> void *map = SYSCHK(mmap(area, 0x1000, PROT_READ, MAP_SHARED|MAP_FIXED, map_fd, 0));
> close(map_fd);
>
> // make RWX mapping request fail late
> SYSCHK(prctl(65/*PR_SET_MDWE*/, 1/*PR_MDWE_REFUSE_EXEC_GAIN*/, 0, 0, 0));
>
> // some random other file
> int fd = SYSCHK(open("/etc/passwd", O_RDONLY));
>
> /* disarm for now */
> write_file("/sys/kernel/debug/failslab/probability", "0");
>
> /* fail allocations of maple_node... */
> write_file("/sys/kernel/debug/failslab/cache-filter", "Y");
> write_file("/sys/kernel/slab/maple_node/failslab", "1");
> /* ... even though it's a sleepable allocation... */
> write_file("/sys/kernel/debug/failslab/ignore-gfp-wait", "N");
> /* ... in this task... */
> write_file("/sys/kernel/debug/failslab/task-filter", "Y");
> write_file("/proc/self/make-it-fail", "1");
> /* ... every time... */
> write_file("/sys/kernel/debug/failslab/times", "-1");
> /* ... after 8 successful allocations (value chosen experimentally)... */
> write_file("/sys/kernel/debug/failslab/space", "2048"); // one object is 256
> /* ... and print where the allocations actually failed... */
> write_file("/sys/kernel/debug/failslab/verbose", "2");
> /* ... and that's it, arm it */
> write_file("/sys/kernel/debug/failslab/probability", "100");
>
> // This mmap request will fail late due to MDWE.
> // The error recovery path will attempt to clear out the VMA pointers with a
> // spanning_store (which will be blocked by error injection).
> mmap(area, AREA_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, fd, 0);
>
> /* disarm */
> write_file("/sys/kernel/debug/failslab/probability", "0");
>
> SYSCHK(munmap(map, 0x1000)); // UAF expected here
> system("cat /proc/$PPID/maps");
> }
> ```
>
> Result:
> ```
> FAULT_INJECTION: forcing a failure.
> name failslab, interval 1, probability 100, space 256, times -1
> CPU: 3 UID: 0 PID: 607 Comm: unmap-fail Not tainted 6.12.0-rc3-00013-geca631b8fe80 #518
> [...]
> Call Trace:
> <TASK>
> dump_stack_lvl+0x80/0xa0
> should_fail_ex+0x4d3/0x5c0
> [...]
> should_failslab+0xc7/0x130
> kmem_cache_alloc_noprof+0x73/0x3a0
> [...]
> mas_alloc_nodes+0x3a3/0x690
> mas_nomem+0xaa/0x1d0
> mas_store_gfp+0x515/0xa80
> [...]
> mmap_region+0xa96/0x2590
> [...]
> do_mmap+0x71e/0xfe0
> [...]
> vm_mmap_pgoff+0x17a/0x2f0
> [...]
> ksys_mmap_pgoff+0x2ee/0x460
> do_syscall_64+0x68/0x140
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [...]
> </TASK>
> mmap: unmap-fail: (607) Unable to abort munmap() operation
> ==================================================================
> BUG: KASAN: slab-use-after-free in dec_usb_memory_use_count+0x365/0x430
> Read of size 8 at addr ffff88810e9ba8b8 by task unmap-fail/607
>
> CPU: 3 UID: 0 PID: 607 Comm: unmap-fail Not tainted 6.12.0-rc3-00013-geca631b8fe80 #518
> [...]
> Call Trace:
> <TASK>
> dump_stack_lvl+0x66/0xa0
> print_report+0xce/0x670
> [...]
> kasan_report+0xf7/0x130
> [...]
> dec_usb_memory_use_count+0x365/0x430
> remove_vma+0x76/0x120
> vms_complete_munmap_vmas+0x447/0x750
> do_vmi_align_munmap+0x4b9/0x700
> [...]
> do_vmi_munmap+0x164/0x2e0
> __vm_munmap+0x128/0x2a0
> [...]
> __x64_sys_munmap+0x59/0x80
> do_syscall_64+0x68/0x140
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [...]
> </TASK>
>
> Allocated by task 607:
> kasan_save_stack+0x33/0x60
> kasan_save_track+0x14/0x30
> __kasan_kmalloc+0xaa/0xb0
> usbdev_mmap+0x1a0/0xaf0
> mmap_region+0xf6e/0x2590
> do_mmap+0x71e/0xfe0
> vm_mmap_pgoff+0x17a/0x2f0
> ksys_mmap_pgoff+0x2ee/0x460
> do_syscall_64+0x68/0x140
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> Freed by task 607:
> kasan_save_stack+0x33/0x60
> kasan_save_track+0x14/0x30
> kasan_save_free_info+0x3b/0x60
> __kasan_slab_free+0x4f/0x70
> kfree+0x148/0x450
> vms_clean_up_area+0x188/0x220
> mmap_region+0xf1b/0x2590
> do_mmap+0x71e/0xfe0
> vm_mmap_pgoff+0x17a/0x2f0
> ksys_mmap_pgoff+0x2ee/0x460
> do_syscall_64+0x68/0x140
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [...]
> ==================================================================
> ```
> ---
> mm/vma.h | 12 +++---------
> 1 file changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/mm/vma.h b/mm/vma.h
> index 819f994cf727..ebd78f1577f3 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -241,15 +241,9 @@ static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
> * failure method of leaving a gap where the MAP_FIXED mapping failed.
> */
> mas_set_range(mas, vms->start, vms->end - 1);
> - if (unlikely(mas_store_gfp(mas, NULL, GFP_KERNEL))) {
> - pr_warn_once("%s: (%d) Unable to abort munmap() operation\n",
> - current->comm, current->pid);
> - /* Leaving vmas detached and in-tree may hamper recovery */
> - reattach_vmas(mas_detach);
> - } else {
> - /* Clean up the insertion of the unfortunate gap */
> - vms_complete_munmap_vmas(vms, mas_detach);
> - }
> + mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
> + /* Clean up the insertion of the unfortunate gap */
> + vms_complete_munmap_vmas(vms, mas_detach);
> }
>
> int
>
> ---
> base-commit: eca631b8fe808748d7585059c4307005ca5c5820
> change-id: 20241016-fix-munmap-abort-2330b61332aa
> --
> Jann Horn <jannh@...gle.com>
>
Powered by blists - more mailing lists