lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 7 Dec 2010 14:44:30 +0900
From:	Minchan Kim <minchan.kim@...il.com>
To:	Hugh Dickins <hughd@...gle.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Rik van Riel <riel@...hat.com>,
	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
	linux-mm <linux-mm@...ck.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Johannes Weiner <hannes@...xchg.org>,
	Nick Piggin <npiggin@...nel.dk>, Mel Gorman <mel@....ul.ie>,
	Wu Fengguang <fengguang.wu@...el.com>
Subject: Re: [PATCH v4 7/7] Prevent activation of page in madvise_dontneed

On Tue, Dec 7, 2010 at 1:48 PM, Hugh Dickins <hughd@...gle.com> wrote:
> On Mon, 6 Dec 2010, Minchan Kim wrote:
>
>> Now zap_pte_range alwayas activates pages which are pte_young &&
>> !VM_SequentialReadHint(vma). But in case of calling MADV_DONTNEED,
>> it's unnecessary since the page wouldn't use any more.
>>
>> Signed-off-by: Minchan Kim <minchan.kim@...il.com>
>> Acked-by: Rik van Riel <riel@...hat.com>
>> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
>> Cc: KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>
>> Cc: Johannes Weiner <hannes@...xchg.org>
>> Cc: Nick Piggin <npiggin@...nel.dk>
>> Cc: Mel Gorman <mel@....ul.ie>
>> Cc: Wu Fengguang <fengguang.wu@...el.com>
>> Cc: Hugh Dickins <hughd@...gle.com>
>>
>> Changelog since v3:
>>  - Change variable name - suggested by Johannes
>>  - Union ignore_references with zap_details - suggested by Hugh
>>
>> Changelog since v2:
>>  - remove unnecessary description
>>
>> Changelog since v1:
>>  - change word from promote to activate
>>  - add activate argument to zap_pte_range and family function
>> ---
>>  include/linux/mm.h |    4 +++-
>>  mm/madvise.c       |    6 +++---
>>  mm/memory.c        |    5 ++++-
>>  3 files changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 6522ae4..e57190f 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -771,12 +771,14 @@ struct zap_details {
>>       pgoff_t last_index;                     /* Highest page->index to unmap */
>>       spinlock_t *i_mmap_lock;                /* For unmap_mapping_range: */
>>       unsigned long truncate_count;           /* Compare vm_truncate_count */
>> +     bool ignore_references;                 /* For page activation */
>>  };
>>
>>  #define __ZAP_DETAILS_INITIALIZER(name) \
>>                  { .nonlinear_vma = NULL \
>>               , .check_mapping = NULL \
>> -             , .i_mmap_lock = NULL }
>> +             , .i_mmap_lock = NULL   \
>> +             , .ignore_references = false }
>
> Okay.
>
>>
>>  #define DEFINE_ZAP_DETAILS(name)             \
>>       struct zap_details name = __ZAP_DETAILS_INITIALIZER(name)
>> diff --git a/mm/madvise.c b/mm/madvise.c
>> index bfa17aa..8e7aba3 100644
>> --- a/mm/madvise.c
>> +++ b/mm/madvise.c
>> @@ -163,6 +163,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
>>                            unsigned long start, unsigned long end)
>>  {
>>       DEFINE_ZAP_DETAILS(details);
>> +     details.ignore_references = true;
>>
>>       *prev = vma;
>>       if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
>> @@ -173,10 +174,9 @@ static long madvise_dontneed(struct vm_area_struct * vma,
>>               details.last_index = ULONG_MAX;
>>
>>               zap_page_range(vma, start, end - start, &details);
>> -     } else {
>> -
>> +     } else
>>               zap_page_range(vma, start, end - start, &details);
>> -     }
>> +
>
> As in the previous patch, you have the same in the if {} and the else.
>
>>       return 0;
>>  }
>>
>> diff --git a/mm/memory.c b/mm/memory.c
>> index c0879bb..44d87e1 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -897,6 +897,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>       pte_t *pte;
>>       spinlock_t *ptl;
>>       int rss[NR_MM_COUNTERS];
>> +     bool ignore_references = details->ignore_references;
>>
>>       init_rss_vec(rss);
>>
>> @@ -952,7 +953,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>>                               if (pte_dirty(ptent))
>>                                       set_page_dirty(page);
>>                               if (pte_young(ptent) &&
>> -                                 likely(!VM_SequentialReadHint(vma)))
>> +                                 likely(!VM_SequentialReadHint(vma)) &&
>> +                                     !ignore_references)
>
> I think ignore_references is about as likely as VM_SequentialReadHint:
> I'd probably just omit that "likely()" nowadays, but you might prefer
> to put your "|| !ignore_references" inside.
>
> Hmm, actually it would probably be better to say something like
>
>        mark_accessed = true;
>        if (VM_SequentialReadHint(vma) ||
>            (details && details->ignore_references))
>                mark_accessed = false;
>
> on entry to zap_pte_range().
>
>>                                       mark_page_accessed(page);
>>                               rss[MM_FILEPAGES]--;
>>                       }
>> @@ -1218,6 +1220,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
>>               unsigned long size)
>>  {
>>       DEFINE_ZAP_DETAILS(details);
>> +     details.ignore_references = true;
>>       if (address < vma->vm_start || address + size > vma->vm_end ||
>>                       !(vma->vm_flags & VM_PFNMAP))
>>               return -1;
>> --
>
> Unnecessary here (would make more sense in the truncation case,
> but not necessary there either): zap_vma_ptes() is only being used on
> GRU's un-cowable VM_PFNMAP area, so vm_normal_page() won't even give
> you a non-NULL page to mark.

Thanks for the notice.

How about this? Although it doesn't remove null dependency, it meet my
goal without big overhead.
It's just quick patch. If you agree, I will resend this version as formal patch.
(If you suffered from seeing below word-wrapped source, see the
attachment. I asked to google two time to support text-plain mode in
gmail web but I can't receive any response until now. ;(. Lots of
kernel developer in google. Please support this mode for us who can't
use SMTP although it's a very small VOC)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e097df6..14ae918 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -771,6 +771,7 @@ struct zap_details {
        pgoff_t last_index;                     /* Highest page->index
to unmap */
        spinlock_t *i_mmap_lock;                /* For unmap_mapping_range: */
        unsigned long truncate_count;           /* Compare vm_truncate_count */
+       int ignore_reference;
 };

 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/madvise.c b/mm/madvise.c
index 319528b..fdb0253 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -162,18 +162,22 @@ static long madvise_dontneed(struct vm_area_struct * vma,
                             struct vm_area_struct ** prev,
                             unsigned long start, unsigned long end)
 {
+       struct zap_details details ;
+
        *prev = vma;
        if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
                return -EINVAL;

        if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
-               struct zap_details details = {
-                       .nonlinear_vma = vma,
-                       .last_index = ULONG_MAX,
-               };
-               zap_page_range(vma, start, end - start, &details);
-       } else
-               zap_page_range(vma, start, end - start, NULL);
+               details.nonlinear_vma = vma;
+               details.last_index = ULONG_MAX;
+       } else {
+               details.nonlinear_vma = NULL;
+               details.last_index = NULL;
+       }
+
+       details.ignore_references = true;
+       zap_page_range(vma, start, end - start, &details);
        return 0;
 }

diff --git a/mm/memory.c b/mm/memory.c
index ebfeedf..d46ac42 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -897,9 +897,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
        pte_t *pte;
        spinlock_t *ptl;
        int rss[NR_MM_COUNTERS];
-
+       bool ignore_reference = false;
        init_rss_vec(rss);

+       if (details && ((!details->check_mapping && !details->nonlinear_vma)
+                                        || !details->ignore_reference))
+               details = NULL;
+
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        arch_enter_lazy_mmu_mode();
        do {
@@ -949,7 +955,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                if (pte_dirty(ptent))
                                        set_page_dirty(page);
                                if (pte_young(ptent) &&
-                                   likely(!VM_SequentialReadHint(vma)))
+                                   likely(!VM_SequentialReadHint(vma)) &&
+                                   likely(!ignore_reference))
                                        mark_page_accessed(page);
                                rss[MM_FILEPAGES]--;
                        }
@@ -1038,8 +1045,6 @@ static unsigned long unmap_page_range(struct
mmu_gather *tlb,
        pgd_t *pgd;
        unsigned long next;

-       if (details && !details->check_mapping && !details->nonlinear_vma)
-               details = NULL;

        BUG_ON(addr >= end);
        mem_cgroup_uncharge_start();
@@ -1102,7 +1107,8 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
        unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
        int tlb_start_valid = 0;
        unsigned long start = start_addr;
-       spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
+       spinlock_t *i_mmap_lock = details ?
+               (detais->check_mapping ? details->i_mmap_lock: NULL) : NULL;
        int fullmm = (*tlbp)->fullmm;
        struct mm_struct *mm = vma->vm_mm;



>
> Hugh
>



-- 
Kind regards,
Minchan Kim

View attachment "madvise.patch" of type "text/x-diff" (2952 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ