[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f899be71-4bc0-d07b-f650-d85a335cdebb@suse.cz>
Date: Wed, 9 Oct 2019 15:13:13 +0200
From: Vlastimil Babka <vbabka@...e.cz>
To: Ajay Kaher <akaher@...are.com>, gregkh@...uxfoundation.org
Cc: torvalds@...ux-foundation.org, punit.agrawal@....com,
akpm@...ux-foundation.org, kirill.shutemov@...ux.intel.com,
willy@...radead.org, will.deacon@....com, mszeredi@...hat.com,
stable@...r.kernel.org, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, srivatsab@...are.com,
srivatsa@...il.mit.edu, amakhalov@...are.com, srinidhir@...are.com,
bvikas@...are.com, anishs@...are.com, vsirnapalli@...are.com,
srostedt@...are.com, stable@...nel.org,
Ben Hutchings <ben@...adent.org.uk>
Subject: Re: [PATCH v2 6/8] mm: prevent get_user_pages() from overflowing page
refcount
On 10/9/19 2:44 AM, Ajay Kaher wrote:
> From: Linus Torvalds <torvalds@...ux-foundation.org>
>
> commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream.
>
> If the page refcount wraps around past zero, it will be freed while
> there are still four billion references to it. One of the possible
> avenues for an attacker to try to make this happen is by doing direct IO
> on a page multiple times. This patch makes get_user_pages() refuse to
> take a new page reference if there are already more than two billion
> references to the page.
>
> Reported-by: Jann Horn <jannh@...gle.com>
> Acked-by: Matthew Wilcox <willy@...radead.org>
> Cc: stable@...nel.org
> Signed-off-by: Linus Torvalds <torvalds@...ux-foundation.org>
> [ 4.4.y backport notes:
> Ajay: Added local variable 'err' with-in follow_hugetlb_page()
> from 2be7cfed995e, to resolve compilation error
> Srivatsa: Replaced call to get_page_foll() with try_get_page_foll() ]
> Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@...il.mit.edu>
> Signed-off-by: Ajay Kaher <akaher@...are.com>
> ---
> mm/gup.c | 43 ++++++++++++++++++++++++++++++++-----------
> mm/hugetlb.c | 16 +++++++++++++++-
> 2 files changed, 47 insertions(+), 12 deletions(-)
This seems to have the same issue as the 4.9 stable version [1], in not
touching the arch-specific gup.c variants.
[1]
https://lore.kernel.org/lkml/6650323f-dbc9-f069-000b-f6b0f941a065@suse.cz/
> diff --git a/mm/gup.c b/mm/gup.c
> index fae4d1e..171b460 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -126,8 +126,12 @@ retry:
> }
> }
>
> - if (flags & FOLL_GET)
> - get_page_foll(page);
> + if (flags & FOLL_GET) {
> + if (unlikely(!try_get_page_foll(page))) {
> + page = ERR_PTR(-ENOMEM);
> + goto out;
> + }
> + }
> if (flags & FOLL_TOUCH) {
> if ((flags & FOLL_WRITE) &&
> !pte_dirty(pte) && !PageDirty(page))
> @@ -289,7 +293,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
> goto unmap;
> *page = pte_page(*pte);
> }
> - get_page(*page);
> + if (unlikely(!try_get_page(*page))) {
> + ret = -ENOMEM;
> + goto unmap;
> + }
> out:
> ret = 0;
> unmap:
> @@ -1053,6 +1060,20 @@ struct page *get_dump_page(unsigned long addr)
> */
> #ifdef CONFIG_HAVE_GENERIC_RCU_GUP
>
> +/*
> + * Return the compund head page with ref appropriately incremented,
> + * or NULL if that failed.
> + */
> +static inline struct page *try_get_compound_head(struct page *page, int refs)
> +{
> + struct page *head = compound_head(page);
> + if (WARN_ON_ONCE(atomic_read(&head->_count) < 0))
> + return NULL;
> + if (unlikely(!page_cache_add_speculative(head, refs)))
> + return NULL;
> + return head;
> +}
> +
> #ifdef __HAVE_ARCH_PTE_SPECIAL
> static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
> int write, struct page **pages, int *nr)
> @@ -1082,9 +1103,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
>
> VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> page = pte_page(pte);
> - head = compound_head(page);
>
> - if (!page_cache_get_speculative(head))
> + head = try_get_compound_head(page, 1);
> + if (!head)
> goto pte_unmap;
>
> if (unlikely(pte_val(pte) != pte_val(*ptep))) {
> @@ -1141,8 +1162,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
> refs++;
> } while (addr += PAGE_SIZE, addr != end);
>
> - head = compound_head(pmd_page(orig));
> - if (!page_cache_add_speculative(head, refs)) {
> + head = try_get_compound_head(pmd_page(orig), refs);
> + if (!head) {
> *nr -= refs;
> return 0;
> }
> @@ -1187,8 +1208,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
> refs++;
> } while (addr += PAGE_SIZE, addr != end);
>
> - head = compound_head(pud_page(orig));
> - if (!page_cache_add_speculative(head, refs)) {
> + head = try_get_compound_head(pud_page(orig), refs);
> + if (!head) {
> *nr -= refs;
> return 0;
> }
> @@ -1229,8 +1250,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
> refs++;
> } while (addr += PAGE_SIZE, addr != end);
>
> - head = compound_head(pgd_page(orig));
> - if (!page_cache_add_speculative(head, refs)) {
> + head = try_get_compound_head(pgd_page(orig), refs);
> + if (!head) {
> *nr -= refs;
> return 0;
> }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index fd932e7..3a1501e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3886,6 +3886,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
> unsigned long vaddr = *position;
> unsigned long remainder = *nr_pages;
> struct hstate *h = hstate_vma(vma);
> + int err = -EFAULT;
>
> while (vaddr < vma->vm_end && remainder) {
> pte_t *pte;
> @@ -3957,6 +3958,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
>
> pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
> page = pte_page(huge_ptep_get(pte));
> +
> + /*
> + * Instead of doing 'try_get_page_foll()' below in the same_page
> + * loop, just check the count once here.
> + */
> + if (unlikely(page_count(page) <= 0)) {
> + if (pages) {
> + spin_unlock(ptl);
> + remainder = 0;
> + err = -ENOMEM;
> + break;
> + }
> + }
> same_page:
> if (pages) {
> pages[i] = mem_map_offset(page, pfn_offset);
> @@ -3983,7 +3997,7 @@ same_page:
> *nr_pages = remainder;
> *position = vaddr;
>
> - return i ? i : -EFAULT;
> + return i ? i : err;
> }
>
> unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>
Powered by blists - more mailing lists