linux-kernel - Re: [RFC PATCH v1 2/3] LSM/x86/sgx: Implement SGX specific hooks in SELinux

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b6f099cd-c0eb-d5cf-847d-27a15ac5ceaf@tycho.nsa.gov>
Date:   Tue, 11 Jun 2019 09:40:25 -0400
From:   Stephen Smalley <sds@...ho.nsa.gov>
To:     Cedric Xing <cedric.xing@...el.com>,
        linux-security-module@...r.kernel.org, selinux@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-sgx@...r.kernel.org
Cc:     jarkko.sakkinen@...ux.intel.com, luto@...nel.org,
        jmorris@...ei.org, serge@...lyn.com, paul@...l-moore.com,
        eparis@...isplace.org, jethro@...tanix.com, dave.hansen@...el.com,
        tglx@...utronix.de, torvalds@...ux-foundation.org,
        akpm@...ux-foundation.org, nhorman@...hat.com,
        pmccallum@...hat.com, serge.ayoun@...el.com,
        shay.katz-zamir@...el.com, haitao.huang@...el.com,
        andriy.shevchenko@...ux.intel.com, kai.svahn@...el.com,
        bp@...en8.de, josh@...htriplett.org, kai.huang@...el.com,
        rientjes@...gle.com, william.c.roberts@...el.com,
        philip.b.tricca@...el.com
Subject: Re: [RFC PATCH v1 2/3] LSM/x86/sgx: Implement SGX specific hooks in
 SELinux

On 6/10/19 3:03 AM, Cedric Xing wrote:
> In this patch, SELinux maintains two bits per enclave page, namely SGX__EXECUTE
> and SGX__EXECMOD.
> 
> SGX__EXECUTE is set initially (by selinux_enclave_load) for every enclave page
> that was loaded from a potentially executable source page. SGX__EXECMOD is set
> for every page that was loaded from a file that has FILE__EXECMOD.
> 
> At runtime, on every protection change (resulted in a call to
> selinux_file_mprotect), SGX__EXECUTE is cleared for a page if VM_WRITE is
> requested, unless SGX__EXECMOD is set.
> 
> To track enclave page protection changes, SELinux has been changed in four
> different places.
> 
> Firstly, storage is required for storing per page SGX__EXECUTE and SGX__EXECMOD
> bits. Given every enclave instance is uniquely tied to an open file (i.e.
> struct file), the storage is allocated by extending `file_security_struct`.
> More precisely, a new field `esec` has been added, initially zero, to point to
> the data structure for tracking per page protection. `esec` will be
> allocated/initialized at the first invocation of selinux_enclave_load().
> 
> Then, selinux_enclave_load() initializes those 2 bits for every new enclave as
> described above. One more detail worth noting, is that selinux_enclave_load()
> sets SGX__EXECUTE/SGX__EXECMOD for EAUG'ed pages (for upcoming SGX2) only if
> the calling process has FILE__EXECMOD on the sigstruct file.
> 
> Afterwards, every change on protection will go through selinux_file_mprotect()
> so will be noted. Please note that user space could munmap() then mmap() to
> work around mprotect(), but that "leak" could be "plugged" by SGX subsystem
> calling security_file_mprotect() explicitly whenever new mappings are created.
> 
> Finally, the storage for page protection tracking must be freed when the
> associated file is closed. Hence a new selinux_file_free_security() has been
> added.
> 
> Signed-off-by: Cedric Xing <cedric.xing@...el.com>
> ---
>   security/selinux/Makefile            |   2 +
>   security/selinux/hooks.c             |  77 ++++++-
>   security/selinux/include/intel_sgx.h |  18 ++
>   security/selinux/include/objsec.h    |   3 +
>   security/selinux/intel_sgx.c         | 292 +++++++++++++++++++++++++++
>   5 files changed, 391 insertions(+), 1 deletion(-)
>   create mode 100644 security/selinux/include/intel_sgx.h
>   create mode 100644 security/selinux/intel_sgx.c
> 
> diff --git a/security/selinux/Makefile b/security/selinux/Makefile
> index ccf950409384..58a05a9639e0 100644
> --- a/security/selinux/Makefile
> +++ b/security/selinux/Makefile
> @@ -14,6 +14,8 @@ selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o
>   
>   selinux-$(CONFIG_NETLABEL) += netlabel.o
>   
> +selinux-$(CONFIG_INTEL_SGX) += intel_sgx.o
> +
>   ccflags-y := -I$(srctree)/security/selinux -I$(srctree)/security/selinux/include
>   
>   $(addprefix $(obj)/,$(selinux-y)): $(obj)/flask.h
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 3ec702cf46ca..17f855871a41 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -103,6 +103,7 @@
>   #include "netlabel.h"
>   #include "audit.h"
>   #include "avc_ss.h"
> +#include "intel_sgx.h"
>   
>   struct selinux_state selinux_state;
>   
> @@ -3485,6 +3486,11 @@ static int selinux_file_alloc_security(struct file *file)
>   	return file_alloc_security(file);
>   }
>   
> +static void selinux_file_free_security(struct file *file)
> +{
> +	sgxsec_enclave_free(file);
> +}
> +
>   /*
>    * Check whether a task has the ioctl permission and cmd
>    * operation to an inode.
> @@ -3656,6 +3662,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
>   				 unsigned long reqprot,
>   				 unsigned long prot)
>   {
> +	int rc;
>   	const struct cred *cred = current_cred();
>   	u32 sid = cred_sid(cred);
>   
> @@ -3664,7 +3671,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
>   
>   	if (default_noexec &&
>   	    (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
> -		int rc = 0;
> +		rc = 0;
>   		if (vma->vm_start >= vma->vm_mm->start_brk &&
>   		    vma->vm_end <= vma->vm_mm->brk) {
>   			rc = avc_has_perm(&selinux_state,
> @@ -3691,6 +3698,12 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
>   			return rc;
>   	}
>   
> +#ifdef CONFIG_INTEL_SGX
> +	rc = sgxsec_mprotect(vma, prot);
> +	if (rc <= 0)
> +		return rc;

Why are you skipping the file_map_prot_check() call when rc == 0?
What would SELinux check if you didn't do so - 
FILE__READ|FILE__WRITE|FILE__EXECUTE to /dev/sgx/enclave?  Is it a 
problem to let SELinux proceed with that check?

> +#endif
> +
>   	return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
>   }
>   
> @@ -6726,6 +6739,62 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
>   }
>   #endif
>   
> +#ifdef CONFIG_INTEL_SGX
> +
> +static int selinux_enclave_load(struct file *encl, unsigned long addr,
> +				unsigned long size, unsigned long prot,
> +				struct vm_area_struct *source)
> +{
> +	if (source) {
> +		/**
> +		 * Adding page from source => EADD request
> +		 */
> +		int rc = selinux_file_mprotect(source, prot, prot);
> +		if (rc)
> +			return rc;
> +
> +		if (!(prot & VM_EXEC) &&
> +		    selinux_file_mprotect(source, VM_EXEC, VM_EXEC))

I wouldn't conflate VM_EXEC with PROT_EXEC even if they happen to be 
defined with the same values currently.  Elsewhere the kernel appears to 
explicitly translate them ala calc_vm_prot_bits().

Also, this will mean that we will always perform an execute check on all 
sources, thereby triggering audit denial messages for any EADD sources 
that are only intended to be data.  Depending on the source, this could 
trigger PROCESS__EXECMEM or FILE__EXECMOD or FILE__EXECUTE.  In a world 
where users often just run any denials they see through audit2allow, 
they'll end up always allowing them all.  How can they tell whether it 
was needed? It would be preferable if we could only trigger execute 
checks when there is some probability that execute will be requested in 
the future.  Alternatives would be to silence the audit of these 
permission checks always via use of _noaudit() interfaces or to silence 
audit of these permissions via dontaudit rules in policy, but the latter 
would hide all denials of the permission by the process, not just those 
triggered from security_enclave_load().  And if we silence them, then we 
won't see them even if they were needed.

> +			prot = 0;
> +		else {
> +			prot = SGX__EXECUTE;
> +			if (source->vm_file &&
> +			    !file_has_perm(current_cred(), source->vm_file,
> +					   FILE__EXECMOD))
> +				prot |= SGX__EXECMOD;

Similarly, this means that we will always perform a FILE__EXECMOD check 
on all executable sources, triggering audit denial messages for any EADD 
source that is executable but to which EXECMOD is not allowed, and again 
the most common pattern will be that users will add EXECMOD to all 
executable sources to avoid this.

> +		}
> +		return sgxsec_eadd(encl, addr, size, prot);
> +	} else {
> +		/**
> +		  * Adding page from NULL => EAUG request
> +		  */
> +		return sgxsec_eaug(encl, addr, size, prot);
> +	}
> +}
> +
> +static int selinux_enclave_init(struct file *encl,
> +				const struct sgx_sigstruct *sigstruct,
> +				struct vm_area_struct *vma)
> +{
> +	int rc = 0;
> +
> +	if (!vma)
> +		rc = -EINVAL;

Is it ever valid to call this hook with a NULL vma?  If not, this should 
be handled/prevented by the caller.  If so, I'd just return -EINVAL 
immediately here.

> +
> +	if (!rc && !(vma->vm_flags & VM_EXEC))
> +		rc = selinux_file_mprotect(vma, VM_EXEC, VM_EXEC);

I had thought we were trying to avoid overloading FILE__EXECUTE (or 
whatever gets checked here, e.g. could be PROCESS__EXECMEM or 
FILE__EXECMOD) on the sigstruct file, since the caller isn't truly 
executing code from it.

I'd define new ENCLAVE__* permissions, including an up-front 
ENCLAVE__INIT permission that governs whether the sigstruct file can be 
used at all irrespective of memory protections.

Then you can also have ENCLAVE__EXECUTE, ENCLAVE__EXECMEM, 
ENCLAVE__EXECMOD for the execute-related checks.  Or you can use the 
/dev/sgx/enclave inode as the target for the execute checks and just 
reuse the file permissions there.

> +
> +	if (!rc) {
> +		if (vma->vm_file)
> +			rc = file_has_perm(current_cred(), vma->vm_file,
> +					   FILE__EXECMOD);

Similar issue here with always triggering EXECMOD audit denials even if 
never required.

> +		rc = sgxsec_einit(encl, sigstruct, !rc);
> +	}
> +	return rc;
> +}
> +
> +#endif
> +
>   struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
>   	.lbs_cred = sizeof(struct task_security_struct),
>   	.lbs_file = sizeof(struct file_security_struct),
> @@ -6808,6 +6877,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
>   
>   	LSM_HOOK_INIT(file_permission, selinux_file_permission),
>   	LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
> +	LSM_HOOK_INIT(file_free_security, selinux_file_free_security),
>   	LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
>   	LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
>   	LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
> @@ -6968,6 +7038,11 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
>   	LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
>   	LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
>   #endif
> +
> +#ifdef CONFIG_INTEL_SGX
> +	LSM_HOOK_INIT(enclave_load, selinux_enclave_load),
> +	LSM_HOOK_INIT(enclave_init, selinux_enclave_init),
> +#endif
>   };
>   
>   static __init int selinux_init(void)
> diff --git a/security/selinux/include/intel_sgx.h b/security/selinux/include/intel_sgx.h
> new file mode 100644
> index 000000000000..8f9c6c734921
> --- /dev/null
> +++ b/security/selinux/include/intel_sgx.h
> @@ -0,0 +1,18 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> +// Copyright(c) 2016-18 Intel Corporation.
> +
> +#ifndef _SELINUX_SGXSEC_H_
> +#define _SELINUX_SGXSEC_H_
> +
> +#include <linux/lsm_hooks.h>
> +
> +#define SGX__EXECUTE	1
> +#define SGX__EXECMOD	2
> +
> +void sgxsec_enclave_free(struct file *);
> +int sgxsec_mprotect(struct vm_area_struct *, size_t);
> +int sgxsec_eadd(struct file *, size_t, size_t, size_t);
> +int sgxsec_eaug(struct file *, size_t, size_t, size_t);
> +int sgxsec_einit(struct file *, const struct sgx_sigstruct *, int);
> +
> +#endif
> diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
> index 231262d8eac9..0fb4da7e3a8a 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -71,6 +71,9 @@ struct file_security_struct {
>   	u32 fown_sid;		/* SID of file owner (for SIGIO) */
>   	u32 isid;		/* SID of inode at the time of file open */
>   	u32 pseqno;		/* Policy seqno at the time of file open */
> +#ifdef CONFIG_INTEL_SGX
> +	atomic_long_t esec;
> +#endif
>   };
>   
>   struct superblock_security_struct {
> diff --git a/security/selinux/intel_sgx.c b/security/selinux/intel_sgx.c
> new file mode 100644
> index 000000000000..37dacf5c295f
> --- /dev/null
> +++ b/security/selinux/intel_sgx.c
> @@ -0,0 +1,292 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> +// Copyright(c) 2016-18 Intel Corporation.
> +
> +#include "objsec.h"
> +#include "intel_sgx.h"
> +
> +struct region {
> +	struct list_head	link;
> +	size_t			start;
> +	size_t			end;
> +	size_t			data;
> +};
> +
> +static inline struct region *region_new(void)
> +{
> +	struct region *n = kzalloc(sizeof(struct region), GFP_KERNEL);
> +	if (n)
> +		INIT_LIST_HEAD(&n->link);
> +	return n;
> +}
> +
> +static inline void region_free(struct region *r)
> +{
> +	list_del(&r->link);
> +	kfree(r);
> +}
> +
> +static struct list_head *
> +region_apply_to_range(struct list_head *rgs,
> +		      size_t start, size_t end,
> +		      struct list_head *(*cb)(struct region *,
> +					      size_t, size_t, size_t),
> +		      size_t arg)
> +{
> +	struct region *r, *n;
> +
> +	list_for_each_entry(r, rgs, link)
> +		if (start < r-> end)
> +			break;
> +
> +	if (&r->link == rgs || end <= r->start)
> +		return rgs;
> +
> +	do {
> +		struct list_head *ret;
> +		n = list_next_entry(r, link);
> +		ret = (*cb)(r, start, end, arg);
> +		if (ret)
> +			return ret;
> +		r = n;
> +	} while (&r->link != rgs && r->start < end);
> +	return &r->link;
> +}
> +
> +static struct list_head *
> +region_clear_cb(struct region *r, size_t start, size_t end, size_t arg)
> +{
> +	if (end < r->end) {
> +		if (start > r->start) {
> +			struct region *n = region_new();
> +			if (unlikely(!n))
> +				return ERR_PTR(-ENOMEM);
> +
> +			n->start = r->start;
> +			n->end = start;
> +			n->data = r->data;
> +			list_add_tail(&n->link, &r->link);
> +		}
> +		r->start = end;
> +		return &r->link;
> +	}
> +
> +	if (start > r->start)
> +		r->end = start;
> +	else
> +		region_free(r);
> +	return NULL;
> +}
> +
> +static inline struct list_head *
> +region_clear_range(struct list_head *rgs, size_t start, size_t end)
> +{
> +	return region_apply_to_range(rgs, start, end, region_clear_cb, 0);
> +}
> +
> +static struct list_head *
> +region_add_range(struct list_head *rgs, size_t start, size_t end, size_t data)
> +{
> +	struct region *r, *n;
> +
> +	n = list_entry(region_clear_range(rgs, start, end), typeof(*n), link);
> +	if (unlikely(IS_ERR_VALUE(&n->link)))
> +		return &n->link;
> +
> +	if (&n->link != rgs && end == n->start && data == n->data) {
> +		n->start = start;
> +		r = n;
> +	} else {
> +		r = region_new();
> +		if (unlikely(!r))
> +			return ERR_PTR(-ENOMEM);
> +
> +		r->start = start;
> +		r->end = end;
> +		r->data = data;
> +		list_add_tail(&r->link, &n->link);
> +	}
> +
> +	n = list_prev_entry(r, link);
> +	if (&n->link != rgs && start == n->end && data == n->data) {
> +		r->start = n->start;
> +		region_free(n);
> +	}
> +
> +	return &r->link;
> +}
> +
> +static inline int
> +enclave_add_pages(struct list_head *rgs, size_t start, size_t end, size_t flags)
> +{
> +	void *p = region_add_range(rgs, start, end, flags);
> +	return PTR_ERR_OR_ZERO(p);
> +}
> +
> +static inline int enclave_prot_allowed(size_t prot, size_t flags)
> +{
> +	return !(prot & VM_EXEC) || (flags & SGX__EXECUTE);
> +}
> +
> +static struct list_head *
> +enclave_prot_check_cb(struct region *r, size_t start, size_t end, size_t prot)
> +{
> +	if (!enclave_prot_allowed(prot, r->data))
> +		return ERR_PTR(-EACCES);
> +	return NULL;
> +}
> +
> +static struct list_head *
> +enclave_prot_set_cb(struct region *r, size_t start, size_t end, size_t prot)
> +{
> +	BUG_ON(!enclave_prot_allowed(prot, r->data));
> +
> +	if (!(prot & VM_WRITE) ||
> +	    (r->data & SGX__EXECMOD) ||
> +	    !(r->data & SGX__EXECUTE))
> +		return NULL;
> +
> +	if (end < r->end) {
> +		struct region *n = region_new();
> +		if (unlikely(!n))
> +			return ERR_PTR(-ENOMEM);
> +
> +		n->start = end;
> +		n->end = r->end;
> +		n->data = r->data;
> +		r->end = end;
> +		list_add(&n->link, &r->link);
> +	}
> +
> +	if (start > r->start) {
> +		struct region *n = region_new();
> +		if (unlikely(!n))
> +			return ERR_PTR(-ENOMEM);
> +
> +		n->start = r->start;
> +		n->end = start;
> +		n->data = r->data;
> +		r->start = start;
> +		list_add_tail(&n->link, &r->link);
> +	}
> +
> +	r->data &= ~SGX__EXECUTE;
> +	return NULL;
> +}
> +
> +static inline int
> +enclave_mprotect(struct list_head *rgs, size_t start, size_t end, size_t prot)
> +{
> +	void *ret;
> +
> +	ret = region_apply_to_range(rgs, start, end,
> +				    enclave_prot_check_cb, prot);
> +	if (!IS_ERR_VALUE(ret) && (prot & VM_WRITE))
> +		ret = region_apply_to_range(rgs, start, end,
> +					    enclave_prot_set_cb, prot);
> +	return PTR_ERR_OR_ZERO(ret);
> +}
> +
> +struct enclave_sec {
> +	struct rw_semaphore	sem;
> +	struct list_head	regions;
> +	size_t			eaug_perm;
> +};
> +
> +static inline struct enclave_sec *__esec(struct file_security_struct *fsec)
> +{
> +	return (struct enclave_sec *)atomic_long_read(&fsec->esec);
> +}
> +
> +static struct enclave_sec *encl_esec(struct file *encl)
> +{
> +	struct file_security_struct *fsec = selinux_file(encl);
> +	struct enclave_sec *esec = __esec(fsec);
> +
> +	if (unlikely(!esec)) {
> +		long n;
> +
> +		esec = kzalloc(sizeof(*esec), GFP_KERNEL);
> +		if (!esec)
> +			return NULL;
> +
> +		init_rwsem(&esec->sem);
> +		INIT_LIST_HEAD(&esec->regions);
> +
> +		n = atomic_long_cmpxchg(&fsec->esec, 0, (long)esec);
> +		if (n) {
> +			kfree(esec);
> +			esec = (typeof(esec))n;
> +		}
> +	}
> +
> +	return esec;
> +}
> +
> +void sgxsec_enclave_free(struct file *encl)
> +{
> +	struct enclave_sec *esec = __esec(selinux_file(encl));
> +
> +	if (esec) {
> +		struct region *r, *n;
> +
> +		BUG_ON(rwsem_is_locked(&esec->sem));
> +
> +		list_for_each_entry_safe(r, n, &esec->regions, link)
> +			region_free(r);
> +
> +		kfree(esec);
> +	}
> +}
> +
> +int sgxsec_mprotect(struct vm_area_struct *vma, size_t prot)
> +{
> +	struct enclave_sec *esec;
> +	int rc;
> +
> +	if (!vma->vm_file || !(esec = __esec(selinux_file(vma->vm_file)))) {
> +		/* Positive return value indicates non-enclave VMA */
> +		return 1;
> +	}
> +
> +	down_read(&esec->sem);
> +	rc = enclave_mprotect(&esec->regions, vma->vm_start, vma->vm_end, prot);

Why is it safe for this to only use down_read()? enclave_mprotect() can 
call enclave_prot_set_cb() which modifies the list?

> +	up_read(&esec->sem);
> +	return rc;
> +}
> +
> +int sgxsec_eadd(struct file *encl, size_t start, size_t size, size_t perm)
> +{
> +	struct enclave_sec *esec = encl_esec(encl);
> +	int rc;
> +
> +	if (down_write_killable(&esec->sem))
> +		return -EINTR;
> +	rc = enclave_add_pages(&esec->regions, start, start + size, perm);
> +	up_write(&esec->sem);
> +	return rc;
> +}
> +
> +int sgxsec_eaug(struct file *encl, size_t start, size_t size, size_t prot)
> +{
> +	struct enclave_sec *esec = encl_esec(encl);
> +	int rc = -EPERM;
> +
> +	if (down_write_killable(&esec->sem))
> +		return -EINTR;
> +	if (enclave_prot_allowed(prot, esec->eaug_perm))
> +		rc = enclave_add_pages(&esec->regions, start, start + size,
> +				       esec->eaug_perm);
> +	up_write(&esec->sem);
> +	return rc;
> +}
> +
> +int sgxsec_einit(struct file *encl, const struct sgx_sigstruct *sigstruct, int execmod)
> +{
> +	struct enclave_sec *esec = encl_esec(encl);
> +
> +	if (down_write_killable(&esec->sem))
> +		return -EINTR;
> +	esec->eaug_perm = execmod ? SGX__EXECUTE | SGX__EXECMOD : 0;
> +	up_write(&esec->sem);
> +	return 0;
> +}

I haven't looked at this code closely, but it feels like a lot of 
SGX-specific logic embedded into SELinux that will have to be repeated 
or reused for every security module.  Does SGX not track this state itself?