lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAMkWEXOvvbZFCXtH8Z99H1sGoSpfXetRqPtpG20VmPCoR1W88w@mail.gmail.com>
Date:	Tue, 6 Oct 2015 16:00:43 +0000
From:	Michael Tirado <mtirado418@...il.com>
To:	Kees Cook <keescook@...omium.org>
Cc:	Andy Lutomirski <luto@...capital.net>,
	LKML <linux-kernel@...r.kernel.org>,
	Will Drewry <wad@...omium.org>
Subject: Re: eBPF / seccomp globals?

On Tue, Sep 29, 2015 at 11:44 PM, Kees Cook <keescook@...omium.org> wrote:
> On Thu, Sep 10, 2015 at 2:55 PM, Michael Tirado <mtirado418@...il.com> wrote:
>> On Fri, Sep 4, 2015 at 8:37 PM, Kees Cook <keescook@...omium.org> wrote:
>>>
>> @@ -196,7 +197,12 @@ static u32 seccomp_run_filters(struct seccomp_data *sd)
>>       * value always takes priority (ignoring the DATA).
>>       */
>>      for (; f; f = f->prev) {
>> -        u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
>> +        u32 cur_ret;
>> +        if (unlikely(f->deferred)) {
>> +            --f->deferred;
>> +            continue;
>> +        }
>> +        cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
>
> I do like the idea of a deferred filters, but I wouldn't want them
> checked this way (it adds a fixed cost to all filters). I would rather
> that a separate list of filters exist, waiting for something like
> "exec" to trigger them getting added to the actual filter list. In
> fact, probably you'd want something like "prepare" and "cancel" too,
> then you could "prepare", fork, exec. Then the child would get the
> prepared filters added, and the parent could call "cancel" to drop the
> prepared filters from itself.
>
> Andy, Will, do you see issues with a class of "deferred" filters that
> would be attached after exec?
>


Thanks for the feedback.
Like Andy said, it's definitely not beautiful, but I decided anyway
to spend some time writing a more legitimate version. It seems that unless
we can make a new temporary mode for detecting execve, the check
on SECCOMP_RET_ALLOW return is unavoidable. The comments seem to forbid mode
changes, so I did not test if that approach would be more optimized. This new
version only checks once unlike the previous that checked for each filter.

Again, sorry about the formatting,  I can re-send this the proper way if
interested, or let me know of any other ideas for solving this problem.



Subject: [PATCH] seccomp: Add deferred filter flag

---
 include/asm-generic/seccomp.h                      |   2 +
 include/linux/seccomp.h                            |   4 +-
 include/uapi/linux/seccomp.h                       |   2 +-
 kernel/seccomp.c                                   |  52 +++++++++-
 tools/testing/selftests/seccomp/Makefile           |   1 +
 tools/testing/selftests/seccomp/seccomp_bpf.c      | 105 +++++++++++++++++++++
 .../testing/selftests/seccomp/seccomp_defer_test.c |  15 +++
 7 files changed, 176 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/seccomp/seccomp_defer_test.c

diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h
index c9ccafa..5dd7e7d 100644
--- a/include/asm-generic/seccomp.h
+++ b/include/asm-generic/seccomp.h
@@ -29,4 +29,6 @@
 #define __NR_seccomp_sigreturn        __NR_rt_sigreturn
 #endif

+#define __NR_seccomp_execve        __NR_execve
+
 #endif /* _ASM_GENERIC_SECCOMP_H */
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index a19ddac..8ab7a68 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@

 #include <uapi/linux/seccomp.h>

-#define SECCOMP_FILTER_FLAG_MASK    (SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK    (SECCOMP_FILTER_FLAG_TSYNC |    \
+                     SECCOMP_FILTER_FLAG_DEFER)

 #ifdef CONFIG_SECCOMP

@@ -25,6 +26,7 @@ struct seccomp_filter;
 struct seccomp {
     int mode;
     struct seccomp_filter *filter;
+    struct seccomp_filter *deferred;
 };

 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a4..01fab07 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -16,7 +16,7 @@

 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC    1
-
+#define SECCOMP_FILTER_FLAG_DEFER    2
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 245df6b..6d54066 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -176,11 +176,12 @@ static int seccomp_check_filter(struct
sock_filter *filter, unsigned int flen)
 static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
     struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
+    struct seccomp_filter *d = ACCESS_ONCE(current->seccomp.deferred);
     struct seccomp_data sd_local;
     u32 ret = SECCOMP_RET_ALLOW;

     /* Ensure unexpected behavior doesn't result in failing open. */
-    if (unlikely(WARN_ON(f == NULL)))
+    if (unlikely(WARN_ON(f == NULL && d == NULL)))
         return SECCOMP_RET_KILL;

     /* Make sure cross-thread synced filter points somewhere sane. */
@@ -447,8 +448,14 @@ static long seccomp_attach_filter(unsigned int flags,
      * If there is an existing filter, make it the prev and don't drop its
      * task reference.
      */
-    filter->prev = current->seccomp.filter;
-    current->seccomp.filter = filter;
+    if (flags & SECCOMP_FILTER_FLAG_DEFER) {
+        filter->prev = current->seccomp.deferred;
+        current->seccomp.deferred = filter;
+    }
+    else {
+        filter->prev = current->seccomp.filter;
+        current->seccomp.filter = filter;
+    }

     /* Now that the new filter is in place, synchronize to all threads. */
     if (flags & SECCOMP_FILTER_FLAG_TSYNC)
@@ -487,6 +494,35 @@ void put_seccomp_filter(struct task_struct *tsk)
     }
 }

+/* attach any filters waiting in deferred list */
+static long seccomp_attach_deferred(void)
+{
+    struct seccomp_filter *tmp;
+    int ret;
+
+    spin_lock_irq(&current->sighand->siglock);
+
+    while(current->seccomp.deferred) {
+        tmp = current->seccomp.deferred->prev;
+        ret = seccomp_attach_filter(0, current->seccomp.deferred);
+        if (ret)
+            goto out_free;
+        current->seccomp.deferred = tmp;
+    }
+
+    spin_unlock_irq(&current->sighand->siglock);
+    return 0;
+
+out_free:
+    while(current->seccomp.deferred) {
+        tmp = current->seccomp.deferred->prev;
+        seccomp_filter_free(current->seccomp.deferred);
+        current->seccomp.deferred = tmp;
+    }
+    spin_unlock_irq(&current->sighand->siglock);
+    return ret;
+}
+
 /**
  * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
  * @syscall: syscall number to send to userland
@@ -605,6 +641,12 @@ static u32 __seccomp_phase1_filter(int
this_syscall, struct seccomp_data *sd)
         return filter_ret;  /* Save the rest for phase 2. */

     case SECCOMP_RET_ALLOW:
+        if (unlikely(current->seccomp.deferred) &&
+                this_syscall == __NR_seccomp_execve) {
+            if (seccomp_attach_deferred()) {
+                do_exit(SIGKILL);
+            }
+        }
         return SECCOMP_PHASE1_OK;

     case SECCOMP_RET_KILL:
@@ -815,6 +857,10 @@ static long do_seccomp(unsigned int op, unsigned int flags,
             return -EINVAL;
         return seccomp_set_mode_strict();
     case SECCOMP_SET_MODE_FILTER:
+        if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+            flags & SECCOMP_FILTER_FLAG_DEFER) {
+            return -EINVAL;
+        }
         return seccomp_set_mode_filter(flags, uargs);
     default:
         return -EINVAL;
diff --git a/tools/testing/selftests/seccomp/Makefile
b/tools/testing/selftests/seccomp/Makefile
index 8401e87..69bf673 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,4 +1,5 @@
 TEST_PROGS := seccomp_bpf
+TEST_PROGS += seccomp_defer_test
 CFLAGS += -Wl,-no-as-needed -Wall
 LDFLAGS += -lpthread

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c
b/tools/testing/selftests/seccomp/seccomp_bpf.c
index c5abe7f..946060a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2095,6 +2095,111 @@ TEST(syscall_restart)
         _metadata->passed = 0;
 }

+TEST(seccomp_deferred)
+{
+    struct sock_filter filter[] = {
+        BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+            offsetof(struct seccomp_data, nr)),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mmap2, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mprotect, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_brk, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_open, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_close, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_munmap, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigaction, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigprocmask, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_fstat64, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_set_thread_area, 0, 1),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | ENOSYS),
+    };
+    struct sock_fprog prog = {
+        .len = (unsigned short)ARRAY_SIZE(filter),
+        .filter = filter,
+    };
+    long ret;
+    uid_t uid;
+    int status;
+    pid_t pid;
+    int i;
+
+    ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+    ASSERT_EQ(0, ret) {
+        TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+    }
+
+    /* fail if tsync is set */
+    ret = seccomp(SECCOMP_SET_MODE_FILTER,
+              SECCOMP_FILTER_FLAG_TSYNC | SECCOMP_FILTER_FLAG_DEFER,
+              &prog);
+    EXPECT_NE(0, ret) {
+        TH_LOG("tsync + defer not implemented");
+    }
+
+    pid = fork();
+    if (pid == 0) { /* exec thread */
+        char *env[] = {NULL};
+        char *arg[] = {NULL};
+
+        ret = seccomp(SECCOMP_SET_MODE_FILTER,
+                  SECCOMP_FILTER_FLAG_DEFER,
+                  &prog);
+        EXPECT_EQ(0, ret) {
+            TH_LOG("seccomp syscall failed %s", strerror(errno));
+        }
+
+        /* should allow any system call, try one! */
+        EXPECT_NE(-1, (uid = getuid())) {
+            TH_LOG("getuid error: %s\n", strerror(errno));
+        }
+
+        /*exec'd program should return 57 when it fails write syscall*/
+        if (execve("seccomp_defer_test", arg, env)) {
+            ASSERT_EQ(0, 1) {
+                printf("exec failed: %s\n", strerror(errno));
+            }
+        }
+        abort();
+    }
+    EXPECT_NE(-1, pid) {
+        printf("fork error: %s\n", strerror(errno));
+    }
+
+    for (i = 0; i < 20; ++i) {
+        ret = waitpid(pid, &status, WNOHANG);
+        EXPECT_NE(-1, ret) {
+            TH_LOG("waitpid error: %s\n", strerror(errno));
+        }
+        if (ret == pid) {
+            EXPECT_NE(0,  WIFEXITED(status)) {
+                printf("process did not exit normally\n");
+            }
+            EXPECT_EQ(57, WEXITSTATUS(status)) {
+                printf("unexpected exit status: %d\n",
+                        WEXITSTATUS(status));
+            }
+            break;
+        }
+        usleep(25000);
+    }
+    EXPECT_NE(i, 20) {
+        printf("process timed out\n");
+    }
+}
+
 /*
  * TODO:
  * - add microbenchmarks
diff --git a/tools/testing/selftests/seccomp/seccomp_defer_test.c
b/tools/testing/selftests/seccomp/seccomp_defer_test.c
new file mode 100644
index 0000000..1c62581
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_defer_test.c
@@ -0,0 +1,15 @@
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+int main(int argc, char *argv[])
+{
+    char str[] = "this should fail\n";
+    if (write(STDOUT_FILENO, str, sizeof(str)) == -1) {
+        if (errno == ENOSYS) {
+            return 57; /* expected behavior */
+        }
+    }
+    return -1;
+}
-- 
1.8.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ