[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190508144422.13171-47-kirill.shutemov@linux.intel.com>
Date: Wed, 8 May 2019 17:44:06 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: Andrew Morton <akpm@...ux-foundation.org>, x86@...nel.org,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
"H. Peter Anvin" <hpa@...or.com>, Borislav Petkov <bp@...en8.de>,
Peter Zijlstra <peterz@...radead.org>,
Andy Lutomirski <luto@...capital.net>,
David Howells <dhowells@...hat.com>
Cc: Kees Cook <keescook@...omium.org>,
Dave Hansen <dave.hansen@...el.com>,
Kai Huang <kai.huang@...ux.intel.com>,
Jacob Pan <jacob.jun.pan@...ux.intel.com>,
Alison Schofield <alison.schofield@...el.com>,
linux-mm@...ck.org, kvm@...r.kernel.org, keyrings@...r.kernel.org,
linux-kernel@...r.kernel.org,
"Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [PATCH, RFC 46/62] x86/mm: Keep reference counts on encrypted VMAs for MKTME
From: Alison Schofield <alison.schofield@...el.com>
The MKTME (Multi-Key Total Memory Encryption) Key Service needs
a reference count on encrypted VMAs. This reference count is used
to determine when a hardware encryption KeyID is no longer in use
and can be freed and reassigned to another Userspace Key.
The MKTME Key service does the percpu_ref_init and _kill, so
these gets/puts on encrypted VMA's can be considered the
intermediaries in the lifetime of the key.
Increment/decrement the reference count during encrypt_mprotect()
system call for initial or updated encryption on a VMA.
Piggy back on the vm_area_dup/free() helpers. If the VMAs being
duplicated, or freed are encrypted, adjust the reference count.
Signed-off-by: Alison Schofield <alison.schofield@...el.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
arch/x86/include/asm/mktme.h | 5 +++++
arch/x86/mm/mktme.c | 37 ++++++++++++++++++++++++++++++++++--
include/linux/mm.h | 2 ++
kernel/fork.c | 2 ++
4 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/mktme.h b/arch/x86/include/asm/mktme.h
index 0e6df07f1921..14da002d2e85 100644
--- a/arch/x86/include/asm/mktme.h
+++ b/arch/x86/include/asm/mktme.h
@@ -16,6 +16,11 @@ extern int mktme_keyid_shift;
extern void mprotect_set_encrypt(struct vm_area_struct *vma, int newkeyid,
unsigned long start, unsigned long end);
+/* MTKME encrypt_count for VMAs */
+extern struct percpu_ref *encrypt_count;
+extern void vma_get_encrypt_ref(struct vm_area_struct *vma);
+extern void vma_put_encrypt_ref(struct vm_area_struct *vma);
+
DECLARE_STATIC_KEY_FALSE(mktme_enabled_key);
static inline bool mktme_enabled(void)
{
diff --git a/arch/x86/mm/mktme.c b/arch/x86/mm/mktme.c
index 91b49e88ca3f..df70651816a1 100644
--- a/arch/x86/mm/mktme.c
+++ b/arch/x86/mm/mktme.c
@@ -66,11 +66,12 @@ void mprotect_set_encrypt(struct vm_area_struct *vma, int newkeyid,
if (oldkeyid == newkeyid)
return;
-
+ vma_put_encrypt_ref(vma);
newprot = pgprot_val(vma->vm_page_prot);
newprot &= ~mktme_keyid_mask;
newprot |= (unsigned long)newkeyid << mktme_keyid_shift;
vma->vm_page_prot = __pgprot(newprot);
+ vma_get_encrypt_ref(vma);
/*
* The VMA doesn't have any inherited pages.
@@ -79,6 +80,18 @@ void mprotect_set_encrypt(struct vm_area_struct *vma, int newkeyid,
unlink_anon_vmas(vma);
}
+void vma_get_encrypt_ref(struct vm_area_struct *vma)
+{
+ if (vma_keyid(vma))
+ percpu_ref_get(&encrypt_count[vma_keyid(vma)]);
+}
+
+void vma_put_encrypt_ref(struct vm_area_struct *vma)
+{
+ if (vma_keyid(vma))
+ percpu_ref_put(&encrypt_count[vma_keyid(vma)]);
+}
+
/* Prepare page to be used for encryption. Called from page allocator. */
void __prep_encrypted_page(struct page *page, int order, int keyid, bool zero)
{
@@ -102,6 +115,22 @@ void __prep_encrypted_page(struct page *page, int order, int keyid, bool zero)
page++;
}
+
+ /*
+ * Make sure the KeyID cannot be freed until the last page that
+ * uses the KeyID is gone.
+ *
+ * This is required because the page may live longer than VMA it
+ * is mapped into (i.e. in get_user_pages() case) and having
+ * refcounting per-VMA is not enough.
+ *
+ * Taking a reference per-4K helps in case if the page will be
+ * split after the allocation. free_encrypted_page() will balance
+ * out the refcount even if the page was split and freed as bunch
+ * of 4K pages.
+ */
+
+ percpu_ref_get_many(&encrypt_count[keyid], 1 << order);
}
/*
@@ -110,7 +139,9 @@ void __prep_encrypted_page(struct page *page, int order, int keyid, bool zero)
*/
void free_encrypted_page(struct page *page, int order)
{
- int i;
+ int i, keyid;
+
+ keyid = page_keyid(page);
/*
* The hardware/CPU does not enforce coherency between mappings
@@ -125,6 +156,8 @@ void free_encrypted_page(struct page *page, int order)
lookup_page_ext(page)->keyid = 0;
page++;
}
+
+ percpu_ref_put_many(&encrypt_count[keyid], 1 << order);
}
static int sync_direct_mapping_pte(unsigned long keyid,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7f52d053826..00c0fd70816b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2831,6 +2831,8 @@ static inline void mprotect_set_encrypt(struct vm_area_struct *vma,
int newkeyid,
unsigned long start,
unsigned long end) {}
+static inline void vma_get_encrypt_ref(struct vm_area_struct *vma) {}
+static inline void vma_put_encrypt_ref(struct vm_area_struct *vma) {}
#endif /* CONFIG_X86_INTEL_MKTME */
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 9dcd18aa210b..f0e35ed76f5a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -342,12 +342,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
if (new) {
*new = *orig;
INIT_LIST_HEAD(&new->anon_vma_chain);
+ vma_get_encrypt_ref(new);
}
return new;
}
void vm_area_free(struct vm_area_struct *vma)
{
+ vma_put_encrypt_ref(vma);
kmem_cache_free(vm_area_cachep, vma);
}
--
2.20.1
Powered by blists - more mailing lists