lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJfpegu8qTARQBftZSaiif0E6dbRcbBvZvW7dQf8sf_ymoogCA@mail.gmail.com>
Date: Fri, 22 Mar 2024 14:50:00 +0100
From: Miklos Szeredi <miklos@...redi.hu>
To: xingwei lee <xrivendell7@...il.com>
Cc: linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org, 
	samsun1006219@...il.com, syzkaller-bugs@...glegroups.com, 
	linux-mm <linux-mm@...ck.org>, Mike Rapoport <rppt@...nel.org>
Subject: Re: BUG: unable to handle kernel paging request in fuse_copy_do

[MM list + secretmem author CC-d]

On Thu, 21 Mar 2024 at 08:52, xingwei lee <xrivendell7@...il.com> wrote:
>
> Hello I found a bug titled "BUG: unable to handle kernel paging
> request in fuse_copy_do” with modified syzkaller, and maybe it is
> related to fs/fuse.
> I also confirmed in the latest upstream.
>
> If you fix this issue, please add the following tag to the commit:
> Reported-by: xingwei lee <xrivendell7@...il.com>
> Reported-by: yue sun <samsun1006219@...il.com>

Thanks for the report.   This looks like a secretmem vs get_user_pages issue.

I reduced the syz reproducer to a minimal one that isn't dependent on fuse:

=== repro.c ===
#define _GNU_SOURCE

#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/socket.h>

int main(void)
{
        int fd1, fd2, fd3;
        int pip[2];
        struct iovec iov;
        void *addr;

        fd1 = syscall(__NR_memfd_secret, 0);
        addr = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
        ftruncate(fd1, 7);
        fd2 = socket(AF_INET, SOCK_DGRAM, 0);
        getsockopt(fd2, 0, 0, NULL, addr);

        pipe(pip);
        iov.iov_base = addr;
        iov.iov_len = 0x50;
        vmsplice(pip[1], &iov, 1, 0);

        fd3 = open("/tmp/repro-secretmem.test", O_RDWR | O_CREAT, 0x600);
        splice(pip[0], NULL, fd3, NULL, 0x50, 0);

        return 0;
}
=======

Thanks,
Miklos

>
> kernel: upstream 23956900041d968f9ad0f30db6dede4daccd7aa9
> kernel config: https://syzkaller.appspot.com/text?tag=KernelConfig&x=9f47e8dfa53b0b11
> with KASAN enabled
> compiler: gcc (Debian 12.2.0-14) 12.2.0
>
> BUG: unable to handle kernel paging request in fuse_copy_do
> UDPLite: UDP-Lite is deprecated and scheduled to be removed in 2025,
> please contact the netdev mailing list
> BUG: unable to handle page fault for address: ffff88802c29c000
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 13001067 P4D 13001067 PUD 13002067 PMD 24c8d063 PTE 800fffffd3d63060
> Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
> CPU: 1 PID: 8221 Comm: 1e9 Not tainted 6.8.0-05202-g9187210eee7d-dirty #21
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> 1.16.2-1.fc38 04/01/2014
> RIP: 0010:memcpy+0xc/0x20 arch/x86/lib/memcpy_64.S:38
> Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90
> 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 48 89 f80
> RSP: 0018:ffffc9001065f9c8 EFLAGS: 00010246
> RAX: ffffc9001065fb10 RBX: ffffc9001065fc78 RCX: 0000000000000010
> RDX: 0000000000000010 RSI: ffff88802c29c000 RDI: ffffc9001065fb10
> RBP: 0000000000000010 R08: ffff88802c29c000 R09: 0000000000000001
> R10: ffffffff8ea82ed7 R11: ffffc9001065fd98 R12: ffffc9001065fac0
> R13: 0000000000000010 R14: ffffc9001065faf0 R15: ffffc9001065fcbc
> FS: 000000000f82d480(0000) GS:ffff88823bc00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffff88802c29c000 CR3: 000000002dd7c000 CR4: 0000000000750ef0
> PKRU: 55555554
> Call Trace:
> <TASK>
> fuse_copy_do+0x152/0x340 fs/fuse/dev.c:758
> fuse_copy_one fs/fuse/dev.c:1007 [inline]
> fuse_dev_do_write+0x1df/0x26a0 fs/fuse/dev.c:1863
> fuse_dev_write+0x129/0x1b0 fs/fuse/dev.c:1960
> call_write_iter include/linux/fs.h:2108 [inline]
> new_sync_write fs/read_write.c:497 [inline]
> vfs_write+0x62e/0x10a0 fs/read_write.c:590
> ksys_write+0xf6/0x1d0 fs/read_write.c:643
> do_syscall_x64 arch/x86/entry/common.c:52 [inline]
> do_syscall_64+0x7c/0x1d0 arch/x86/entry/common.c:83
> entry_SYSCALL_64_after_hwframe+0x6c/0x74
>
> =* repro.c =*
> #define _GNU_SOURCE
>
> #include <dirent.h>
> #include <endian.h>
> #include <errno.h>
> #include <fcntl.h>
> #include <setjmp.h>
> #include <signal.h>
> #include <stdarg.h>
> #include <stdbool.h>
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/prctl.h>
> #include <sys/stat.h>
> #include <sys/syscall.h>
> #include <sys/types.h>
> #include <sys/wait.h>
> #include <time.h>
> #include <unistd.h>
>
> #ifndef __NR_memfd_secret
> #define __NR_memfd_secret 447
> #endif
>
> static __thread int clone_ongoing;
> static __thread int skip_segv;
> static __thread jmp_buf segv_env;
>
> static void segv_handler(int sig, siginfo_t* info, void* ctx) {
>  if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) {
>    exit(sig);
>  }
>  uintptr_t addr = (uintptr_t)info->si_addr;
>  const uintptr_t prog_start = 1 << 20;
>  const uintptr_t prog_end = 100 << 20;
>  int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0;
>  int valid = addr < prog_start || addr > prog_end;
>  if (skip && valid) {
>    _longjmp(segv_env, 1);
>  }
>  exit(sig);
> }
>
> static void install_segv_handler(void) {
>  struct sigaction sa;
>  memset(&sa, 0, sizeof(sa));
>  sa.sa_handler = SIG_IGN;
>  syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
>  syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
>  memset(&sa, 0, sizeof(sa));
>  sa.sa_sigaction = segv_handler;
>  sa.sa_flags = SA_NODEFER | SA_SIGINFO;
>  sigaction(SIGSEGV, &sa, NULL);
>  sigaction(SIGBUS, &sa, NULL);
> }
>
> #define NONFAILING(...)                                  \
>  ({                                                     \
>    int ok = 1;                                          \
>    __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \
>    if (_setjmp(segv_env) == 0) {                        \
>      __VA_ARGS__;                                       \
>    } else                                               \
>      ok = 0;                                            \
>    __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \
>    ok;                                                  \
>  })
>
> static void sleep_ms(uint64_t ms) {
>  usleep(ms * 1000);
> }
>
> static uint64_t current_time_ms(void) {
>  struct timespec ts;
>  if (clock_gettime(CLOCK_MONOTONIC, &ts))
>    exit(1);
>  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
> }
>
> static bool write_file(const char* file, const char* what, ...) {
>  char buf[1024];
>  va_list args;
>  va_start(args, what);
>  vsnprintf(buf, sizeof(buf), what, args);
>  va_end(args);
>  buf[sizeof(buf) - 1] = 0;
>  int len = strlen(buf);
>  int fd = open(file, O_WRONLY | O_CLOEXEC);
>  if (fd == -1)
>    return false;
>  if (write(fd, buf, len) != len) {
>    int err = errno;
>    close(fd);
>    errno = err;
>    return false;
>  }
>  close(fd);
>  return true;
> }
>
> static void kill_and_wait(int pid, int* status) {
>  kill(-pid, SIGKILL);
>  kill(pid, SIGKILL);
>  for (int i = 0; i < 100; i++) {
>    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
>      return;
>    usleep(1000);
>  }
>  DIR* dir = opendir("/sys/fs/fuse/connections");
>  if (dir) {
>    for (;;) {
>      struct dirent* ent = readdir(dir);
>      if (!ent)
>        break;
>      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
>        continue;
>      char abort[300];
>      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
>               ent->d_name);
>      int fd = open(abort, O_WRONLY);
>      if (fd == -1) {
>        continue;
>      }
>      if (write(fd, abort, 1) < 0) {
>      }
>      close(fd);
>    }
>    closedir(dir);
>  } else {
>  }
>  while (waitpid(-1, status, __WALL) != pid) {
>  }
> }
>
> static void setup_test() {
>  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
>  setpgrp();
>  write_file("/proc/self/oom_score_adj", "1000");
> }
>
> static void execute_one(void);
>
> #define WAIT_FLAGS __WALL
>
> static void loop(void) {
>  int iter = 0;
>  for (;; iter++) {
>    int pid = fork();
>    if (pid < 0)
>      exit(1);
>    if (pid == 0) {
>      setup_test();
>      execute_one();
>      exit(0);
>    }
>    int status = 0;
>    uint64_t start = current_time_ms();
>    for (;;) {
>      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
>        break;
>      sleep_ms(1);
>      if (current_time_ms() - start < 5000)
>        continue;
>      kill_and_wait(pid, &status);
>      break;
>    }
>  }
> }
>
> uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};
>
> void execute_one(void) {
>  intptr_t res = 0;
>  NONFAILING(memcpy((void*)0x20002040, "./file0\000", 8));
>  syscall(__NR_mkdirat, /*fd=*/0xffffff9c, /*path=*/0x20002040ul, /*mode=*/0ul);
>  NONFAILING(memcpy((void*)0x20002080, "/dev/fuse\000", 10));
>  res = syscall(__NR_openat, /*fd=*/0xffffffffffffff9cul, /*file=*/0x20002080ul,
>                /*flags=*/2ul, /*mode=*/0ul);
>  if (res != -1)
>    r[0] = res;
>  NONFAILING(memcpy((void*)0x200020c0, "./file0\000", 8));
>  NONFAILING(memcpy((void*)0x20002100, "fuse\000", 5));
>  NONFAILING(memcpy((void*)0x20002140, "fd", 2));
>  NONFAILING(*(uint8_t*)0x20002142 = 0x3d);
>  NONFAILING(sprintf((char*)0x20002143, "0x%016llx", (long long)r[0]));
>  NONFAILING(*(uint8_t*)0x20002155 = 0x2c);
>  NONFAILING(memcpy((void*)0x20002156, "rootmode", 8));
>  NONFAILING(*(uint8_t*)0x2000215e = 0x3d);
>  NONFAILING(sprintf((char*)0x2000215f, "%023llo", (long long)0x4000));
>  NONFAILING(*(uint8_t*)0x20002176 = 0x2c);
>  NONFAILING(memcpy((void*)0x20002177, "user_id", 7));
>  NONFAILING(*(uint8_t*)0x2000217e = 0x3d);
>  NONFAILING(sprintf((char*)0x2000217f, "%020llu", (long long)0));
>  NONFAILING(*(uint8_t*)0x20002193 = 0x2c);
>  NONFAILING(memcpy((void*)0x20002194, "group_id", 8));
>  NONFAILING(*(uint8_t*)0x2000219c = 0x3d);
>  NONFAILING(sprintf((char*)0x2000219d, "%020llu", (long long)0));
>  NONFAILING(*(uint8_t*)0x200021b1 = 0x2c);
>  NONFAILING(*(uint8_t*)0x200021b2 = 0);
>  syscall(__NR_mount, /*src=*/0ul, /*dst=*/0x200020c0ul, /*type=*/0x20002100ul,
>          /*flags=*/0ul, /*opts=*/0x20002140ul);
>  res = syscall(__NR_memfd_secret, /*flags=*/0ul);
>  if (res != -1)
>    r[1] = res;
>  syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0xb36000ul,
>          /*prot=PROT_GROWSUP|PROT_READ*/ 0x2000001ul,
>          /*flags=MAP_STACK|MAP_POPULATE|MAP_FIXED|MAP_SHARED*/ 0x28011ul,
>          /*fd=*/r[1], /*offset=*/0ul);
>  syscall(__NR_ftruncate, /*fd=*/r[1], /*len=*/7ul);
>  res = syscall(__NR_socket, /*domain=*/2ul, /*type=*/2ul, /*proto=*/0x88);
>  if (res != -1)
>    r[2] = res;
>  NONFAILING(*(uint32_t*)0x20000280 = 0);
>  syscall(__NR_getsockopt, /*fd=*/r[2], /*level=*/1, /*optname=*/0x11,
>          /*optval=*/0ul, /*optlen=*/0x20000280ul);
>  NONFAILING(*(uint32_t*)0x20000000 = 0x50);
>  NONFAILING(*(uint32_t*)0x20000004 = 0);
>  NONFAILING(*(uint64_t*)0x20000008 = 0);
>  NONFAILING(*(uint32_t*)0x20000010 = 7);
>  NONFAILING(*(uint32_t*)0x20000014 = 0x27);
>  NONFAILING(*(uint32_t*)0x20000018 = 0);
>  NONFAILING(*(uint32_t*)0x2000001c = 0);
>  NONFAILING(*(uint16_t*)0x20000020 = 0);
>  NONFAILING(*(uint16_t*)0x20000022 = 0);
>  NONFAILING(*(uint32_t*)0x20000024 = 0);
>  NONFAILING(*(uint32_t*)0x20000028 = 0);
>  NONFAILING(*(uint16_t*)0x2000002c = 0);
>  NONFAILING(*(uint16_t*)0x2000002e = 0);
>  NONFAILING(memset((void*)0x20000030, 0, 32));
>  syscall(__NR_write, /*fd=*/r[0], /*arg=*/0x20000000ul, /*len=*/0x50ul);
> }
> int main(void) {
>  syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
>          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
>          /*offset=*/0ul);
>  syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
>          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
>          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
>          /*offset=*/0ul);
>  syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
>          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
>          /*offset=*/0ul);
>  install_segv_handler();
>  loop();
>  return 0;
> }
>
> =* repro.txt =*
> mkdirat(0xffffffffffffff9c, &(0x7f0000002040)='./file0\x00', 0x0)
> r0 = openat$fuse(0xffffffffffffff9c, &(0x7f0000002080), 0x2, 0x0)
> mount$fuse(0x0, &(0x7f00000020c0)='./file0\x00', &(0x7f0000002100),
> 0x0, &(0x7f0000002140)={{'fd', 0x3d, r0}, 0x2c, {'rootmode', 0x3d,
> 0x4000}})
> r1 = memfd_secret(0x0)
> mmap(&(0x7f0000000000/0xb36000)=nil, 0xb36000, 0x2000001, 0x28011, r1, 0x0)
> ftruncate(r1, 0x7)
> r2 = socket$inet_udplite(0x2, 0x2, 0x88)
> getsockopt$sock_cred(r2, 0x1, 0x11, 0x0, &(0x7f0000000280))
> write$FUSE_INIT(r0, &(0x7f0000000000)={0x50}, 0x50)
>
>
> see aslo https://gist.github.com/xrivendell7/961be96ae091c9671bb56efea902cec4.
>
> I hope it helps.
> best regards.
> xingwei Lee

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ