lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPcyv4h7+Rgs83JefhJajHtitPWUFEKKgUt-_e-bqhQZM5L2FA@mail.gmail.com>
Date:   Tue, 25 Apr 2017 09:44:12 -0700
From:   Dan Williams <dan.j.williams@...el.com>
To:     "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Cc:     "Kirill A. Shutemov" <kirill@...temov.name>,
        Linux MM <linux-mm@...ck.org>,
        Catalin Marinas <catalin.marinas@....com>,
        "Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>,
        Steve Capper <steve.capper@...aro.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Peter Zijlstra <peterz@...radead.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...el.com>,
        Borislav Petkov <bp@...en8.de>, Rik van Riel <riel@...hat.com>,
        Dann Frazier <dann.frazier@...onical.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Michal Hocko <mhocko@...e.cz>,
        linux-tip-commits@...r.kernel.org
Subject: Re: get_zone_device_page() in get_page() and page_cache_get_speculative()

On Tue, Apr 25, 2017 at 6:19 AM, Kirill A. Shutemov
<kirill.shutemov@...ux.intel.com> wrote:
> On Mon, Apr 24, 2017 at 11:41:51AM -0700, Dan Williams wrote:
>> On Mon, Apr 24, 2017 at 11:25 AM, Kirill A. Shutemov
>> <kirill.shutemov@...ux.intel.com> wrote:
>> > On Mon, Apr 24, 2017 at 09:01:58PM +0300, Kirill A. Shutemov wrote:
>> >> On Mon, Apr 24, 2017 at 10:47:43AM -0700, Dan Williams wrote:
>> >> I think it's still better to do it on page_ref_* level.
>> >
>> > Something like patch below? What do you think?
>>
>> From a quick glance, I think this looks like the right way to go.
>
> Okay, but I still would like to remove manipulation with pgmap->ref from
> hot path.
>
> Can we just check that page_count() match our expectation on
> devm_memremap_pages_release() instead of this?
>
> I probably miss something in bigger picture, but would something like
> patch work too? It seems work for the test case.

No, unfortunately this is broken. It should be perfectly legal to
start the driver shutdown process while page references are still
outstanding. We use the percpu-ref infrastructure to wait for those
references to be dropped. With the approach below we'll just race and
crash.

>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a835edd2db34..695da2a19b4c 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -762,19 +762,11 @@ static inline enum zone_type page_zonenum(const struct page *page)
>  }
>
>  #ifdef CONFIG_ZONE_DEVICE
> -void get_zone_device_page(struct page *page);
> -void put_zone_device_page(struct page *page);
>  static inline bool is_zone_device_page(const struct page *page)
>  {
>         return page_zonenum(page) == ZONE_DEVICE;
>  }
>  #else
> -static inline void get_zone_device_page(struct page *page)
> -{
> -}
> -static inline void put_zone_device_page(struct page *page)
> -{
> -}
>  static inline bool is_zone_device_page(const struct page *page)
>  {
>         return false;
> @@ -790,9 +782,6 @@ static inline void get_page(struct page *page)
>          */
>         VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
>         page_ref_inc(page);
> -
> -       if (unlikely(is_zone_device_page(page)))
> -               get_zone_device_page(page);
>  }
>
>  static inline void put_page(struct page *page)
> @@ -801,9 +790,6 @@ static inline void put_page(struct page *page)
>
>         if (put_page_testzero(page))
>                 __put_page(page);
> -
> -       if (unlikely(is_zone_device_page(page)))
> -               put_zone_device_page(page);
>  }
>
>  #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index 07e85e5229da..e542bb2f7ab0 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -182,18 +182,6 @@ struct page_map {
>         struct vmem_altmap altmap;
>  };
>
> -void get_zone_device_page(struct page *page)
> -{
> -       percpu_ref_get(page->pgmap->ref);
> -}
> -EXPORT_SYMBOL(get_zone_device_page);
> -
> -void put_zone_device_page(struct page *page)
> -{
> -       put_dev_pagemap(page->pgmap);
> -}
> -EXPORT_SYMBOL(put_zone_device_page);
> -
>  static void pgmap_radix_release(struct resource *res)
>  {
>         resource_size_t key, align_start, align_size, align_end;
> @@ -237,12 +225,21 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
>         struct resource *res = &page_map->res;
>         resource_size_t align_start, align_size;
>         struct dev_pagemap *pgmap = &page_map->pgmap;
> +       unsigned long pfn;
>
>         if (percpu_ref_tryget_live(pgmap->ref)) {
>                 dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
>                 percpu_ref_put(pgmap->ref);
>         }
>
> +       for_each_device_pfn(pfn, page_map) {
> +               struct page *page = pfn_to_page(pfn);
> +
> +               dev_WARN_ONCE(dev, page_count(page) != 1,
> +                               "%s: unexpected page count: %d!\n",
> +                               __func__, page_count(page));
> +       }
> +
>         /* pages are dead and unused, undo the arch mapping */
>         align_start = res->start & ~(SECTION_SIZE - 1);
>         align_size = ALIGN(resource_size(res), SECTION_SIZE);
> --
>  Kirill A. Shutemov

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ