[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1739984e-0010-2031-1561-809a0b6380bb@intel.com>
Date: Fri, 16 Oct 2020 14:25:50 -0700
From: Dave Hansen <dave.hansen@...el.com>
To: Jarkko Sakkinen <jarkko.sakkinen@...ux.intel.com>, x86@...nel.org,
linux-sgx@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, Jethro Beekman <jethro@...tanix.com>,
Haitao Huang <haitao.huang@...ux.intel.com>,
Chunyang Hui <sanqian.hcy@...fin.com>,
Jordan Hand <jorhand@...ux.microsoft.com>,
Nathaniel McCallum <npmccallum@...hat.com>,
Seth Moore <sethmo@...gle.com>,
Darren Kenny <darren.kenny@...cle.com>,
Sean Christopherson <sean.j.christopherson@...el.com>,
Suresh Siddha <suresh.b.siddha@...el.com>,
akpm@...ux-foundation.org, andriy.shevchenko@...ux.intel.com,
asapek@...gle.com, bp@...en8.de, cedric.xing@...el.com,
chenalexchen@...gle.com, conradparker@...gle.com,
cyhanish@...gle.com, haitao.huang@...el.com, kai.huang@...el.com,
kai.svahn@...el.com, kmoy@...gle.com, ludloff@...gle.com,
luto@...nel.org, nhorman@...hat.com, puiterwijk@...hat.com,
rientjes@...gle.com, tglx@...utronix.de, yaozhangx@...gle.com,
mikko.ylinen@...el.com
Subject: Re: [PATCH v39 13/24] x86/sgx: Add SGX_IOC_ENCLAVE_ADD_PAGES
> +/**
> + * struct sgx_enclave_add_pages - parameter structure for the
> + * %SGX_IOC_ENCLAVE_ADD_PAGE ioctl
> + * @src: start address for the page data
> + * @offset: starting page offset
Is this the offset *within* the page? Might be nice to say that.
> + * @length: length of the data (multiple of the page size)
> + * @secinfo: address for the SECINFO data
> + * @flags: page control flags
> + * @count: number of bytes added (multiple of the page size)
> + */
> +struct sgx_enclave_add_pages {
> + __u64 src;
> + __u64 offset;
> + __u64 length;
> + __u64 secinfo;
> + __u64 flags;
> + __u64 count;
> +};
> +
> #endif /* _UAPI_ASM_X86_SGX_H */
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index 9bb4694e57c1..e13e04737683 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -194,6 +194,302 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
> return ret;
> }
>
> +static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
> + unsigned long offset,
> + u64 secinfo_flags)
> +{
> + struct sgx_encl_page *encl_page;
> + unsigned long prot;
> +
> + encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
> + if (!encl_page)
> + return ERR_PTR(-ENOMEM);
> +
> + encl_page->desc = encl->base + offset;
> + encl_page->encl = encl;
Somewhere, we need an explanation of why we have 'sgx_epc_page' and
'sgx_encl_page'. I think they're 1:1 at least after
sgx_encl_page_alloc(), so I'm wondering why we need two.
> + prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
> + _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
> + _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
> +
> + /*
> + * TCS pages must always RW set for CPU access while the SECINFO
> + * permissions are *always* zero - the CPU ignores the user provided
> + * values and silently overwrites them with zero permissions.
> + */
> + if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
> + prot |= PROT_READ | PROT_WRITE;
> +
> + /* Calculate maximum of the VM flags for the page. */
> + encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
> +
> + return encl_page;
> +}
> +
> +static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
> +{
> + u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
> + u64 pt = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK;
I'd align the ='s up there ^^
> +
> + if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS)
> + return -EINVAL;
> +
> + if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R))
> + return -EINVAL;
> +
> + /*
> + * CPU will silently overwrite the permissions as zero, which means
> + * that we need to validate it ourselves.
> + */
> + if (pt == SGX_SECINFO_TCS && perm)
> + return -EINVAL;
> +
> + if (secinfo->flags & SGX_SECINFO_RESERVED_MASK)
> + return -EINVAL;
> +
> + if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved)))
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +static int __sgx_encl_add_page(struct sgx_encl *encl,
> + struct sgx_encl_page *encl_page,
> + struct sgx_epc_page *epc_page,
> + struct sgx_secinfo *secinfo, unsigned long src)
> +{
> + struct sgx_pageinfo pginfo;
> + struct vm_area_struct *vma;
> + struct page *src_page;
> + int ret;
> +
> + /* Deny noexec. */
> + vma = find_vma(current->mm, src);
> + if (!vma)
> + return -EFAULT;
> +
> + if (!(vma->vm_flags & VM_MAYEXEC))
> + return -EACCES;
> +
> + ret = get_user_pages(src, 1, 0, &src_page, NULL);
> + if (ret < 1)
> + return -EFAULT;
> +
> + pginfo.secs = (unsigned long)sgx_get_epc_addr(encl->secs.epc_page);
> + pginfo.addr = SGX_ENCL_PAGE_ADDR(encl_page);
> + pginfo.metadata = (unsigned long)secinfo;
> + pginfo.contents = (unsigned long)kmap_atomic(src_page);
> +
> + ret = __eadd(&pginfo, sgx_get_epc_addr(epc_page));
Could you convince me that EADD is not going to fault and make the
kmap_atomic() mad?
> + kunmap_atomic((void *)pginfo.contents);
All the casting is kinda nasty, but I gues you do it to ensure you can
use __u64 in the hardware structs.
> + put_page(src_page);
> +
> + return ret ? -EIO : 0;
> +}
> +
> +/*
> + * If the caller requires measurement of the page as a proof for the content,
> + * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
> + * operation until the entire page is measured."
> + */
> +static int __sgx_encl_extend(struct sgx_encl *encl,
> + struct sgx_epc_page *epc_page)
> +{
> + int ret;
> + int i;
> +
> + for (i = 0; i < 16; i++) {
No magic numbers please.
#define SGX_EEXTEND_NR_BYTES 16 ??
> + ret = __eextend(sgx_get_epc_addr(encl->secs.epc_page),
> + sgx_get_epc_addr(epc_page) + (i * 0x100));
What's the 0x100 for?
> + if (ret) {
> + if (encls_failed(ret))
> + ENCLS_WARN(ret, "EEXTEND");
> + return -EIO;
How frequent should we expect these to be? Can users cause them? You
should *proably* call it ENCLS_WARN_ONCE() if it's implemented that way.
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
> + unsigned long offset, struct sgx_secinfo *secinfo,
> + unsigned long flags)
> +{
> + struct sgx_encl_page *encl_page;
> + struct sgx_epc_page *epc_page;
> + int ret;
> +
> + encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags);
> + if (IS_ERR(encl_page))
> + return PTR_ERR(encl_page);
> +
> + epc_page = __sgx_alloc_epc_page();
> + if (IS_ERR(epc_page)) {
> + kfree(encl_page);
> + return PTR_ERR(epc_page);
> + }
Looking at these, I'm forgetting why we need to both allocate an
encl_page and an epc_page. Commends might remind me. So would better
names.
> + mmap_read_lock(current->mm);
> + mutex_lock(&encl->lock);
> +
> + /*
> + * Insert prior to EADD in case of OOM.
I wouldn't say OOM. Maybe:
xa_insert() and EADD can both fail. But xa_insert() is easier
to unwind so do it first.
> EADD modifies MRENCLAVE, i.e.
What is MRENCLAVE?
> + * can't be gracefully unwound, while failure on EADD/EXTEND is limited
> + * to userspace errors (or kernel/hardware bugs).
> + */
> + ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
> + encl_page, GFP_KERNEL);
> + if (ret)
> + goto err_out_unlock;
> +
> + ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
> + src);
> + if (ret)
> + goto err_out;
> +
> + /*
> + * Complete the "add" before doing the "extend" so that the "add"
> + * isn't in a half-baked state in the extremely unlikely scenario
> + * the enclave will be destroyed in response to EEXTEND failure.
> + */
> + encl_page->encl = encl;
> + encl_page->epc_page = epc_page;
> + encl->secs_child_cnt++;
> +
> + if (flags & SGX_PAGE_MEASURE) {
> + ret = __sgx_encl_extend(encl, epc_page);
> + if (ret)
> + goto err_out;
> + }
Why would we never *not* measure an added page?
> + mutex_unlock(&encl->lock);
> + mmap_read_unlock(current->mm);
> + return ret;
> +
> +err_out:
> + xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
> +
> +err_out_unlock:
> + mutex_unlock(&encl->lock);
> + mmap_read_unlock(current->mm);
> +
> + sgx_free_epc_page(epc_page);
> + kfree(encl_page);
> +
> + return ret;
> +}
> +
> +/**
> + * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
> + * @encl: an enclave pointer
> + * @arg: a user pointer to a struct sgx_enclave_add_pages instance
> + *
> + * Add one or more pages to an uninitialized enclave, and optionally extend the
> + * measurement with the contents of the page. The SECINFO and measurement mask
> + * are applied to all pages.
> + *
> + * A SECINFO for a TCS is required to always contain zero permissions because
> + * CPU silently zeros them. Allowing anything else would cause a mismatch in
> + * the measurement.
> + *
> + * mmap()'s protection bits are capped by the page permissions. For each page
> + * address, the maximum protection bits are computed with the following
> + * heuristics:
> + *
> + * 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions.
> + * 2. A TCS page: PROT_R | PROT_W.
> + *
> + * mmap() is not allowed to surpass the minimum of the maximum protection bits
> + * within the given address range.
> + *
> + * The function deinitializes kernel data structures for enclave and returns
> + * -EIO in any of the following conditions:
> + *
> + * - Enclave Page Cache (EPC), the physical memory holding enclaves, has
> + * been invalidated. This will cause EADD and EEXTEND to fail.
> + * - If the source address is corrupted somehow when executing EADD.
> + *
> + * Return:
> + * length of the data processed on success,
> + * -EACCES if an executable source page is located in a noexec partition,
> + * -ENOMEM if the system is out of EPC pages,
> + * -EINTR if the call was interrupted before any data was processed,
> + * -EIO if the enclave was lost
> + * -errno otherwise
> + */
> +static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
> +{
> + struct sgx_enclave_add_pages addp;
> + struct sgx_secinfo secinfo;
> + unsigned long c;
> + int ret;
> +
> + if ((atomic_read(&encl->flags) & SGX_ENCL_INITIALIZED) ||
> + !(atomic_read(&encl->flags) & SGX_ENCL_CREATED))
> + return -EINVAL;
There should to be a nice state machine documented somewhere. Is ther?
> + if (copy_from_user(&addp, arg, sizeof(addp)))
> + return -EFAULT;
> +
> + if (!IS_ALIGNED(addp.offset, PAGE_SIZE) ||
> + !IS_ALIGNED(addp.src, PAGE_SIZE))
> + return -EINVAL;
> +
> + if (!(access_ok(addp.src, PAGE_SIZE)))
> + return -EFAULT;
This worries me. You're doing an access_ok() check on addp.src because
you evidently don't trust it. But, below, it looks to be accessed
directly with an offset, bound by addp.length, which I think can be
>PAGE_SIZE.
I'd feel a lot better if addp.src's value was being passed around as a
__user pointer.
> + if (addp.length & (PAGE_SIZE - 1))
> + return -EINVAL;
> +
> + if (addp.offset + addp.length - PAGE_SIZE >= encl->size)
> + return -EINVAL;
> +
> + if (copy_from_user(&secinfo, (void __user *)addp.secinfo,
> + sizeof(secinfo)))
> + return -EFAULT;
> +
> + if (sgx_validate_secinfo(&secinfo))
> + return -EINVAL;
> +
> + for (c = 0 ; c < addp.length; c += PAGE_SIZE) {
> + if (signal_pending(current)) {
> + if (!c)
> + ret = -ERESTARTSYS;
> +
> + break;
> + }
> +
> + if (c == SGX_MAX_ADD_PAGES_LENGTH)
> + break;
> +
> + if (need_resched())
> + cond_resched();
> +
> + ret = sgx_encl_add_page(encl, addp.src + c, addp.offset + c,
> + &secinfo, addp.flags);
Yeah... Don't we need to do another access_ok() check here, if we
needed one above since we are moving away from addrp.src?
> + if (ret)
> + break;
> + }
> +
> + addp.count = c;
> +
> + if (copy_to_user(arg, &addp, sizeof(addp)))
> + return -EFAULT;
> +
> + /*
> + * If the enlave was lost, deinitialize the internal data structures
> + * for the enclave.
> + */
> + if (ret == -EIO) {
> + mutex_lock(&encl->lock);
> + sgx_encl_destroy(encl);
> + mutex_unlock(&encl->lock);
> + }
> +
> + return ret;
> +}
> +
> long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> {
> struct sgx_encl *encl = filep->private_data;
> @@ -212,6 +508,9 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> case SGX_IOC_ENCLAVE_CREATE:
> ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
> break;
> + case SGX_IOC_ENCLAVE_ADD_PAGES:
> + ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg);
> + break;
> default:
> ret = -ENOIOCTLCMD;
> break;
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index fce756c3434b..8d126070db1e 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -34,6 +34,7 @@ struct sgx_epc_section {
>
> #define SGX_EPC_SECTION_MASK GENMASK(7, 0)
> #define SGX_MAX_EPC_SECTIONS (SGX_EPC_SECTION_MASK + 1)
> +#define SGX_MAX_ADD_PAGES_LENGTH 0x100000
>
> extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
>
>
Powered by blists - more mailing lists