lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 04 May 2009 12:23:55 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Wu Fengguang <fengguang.wu@...el.com>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Rik van Riel <riel@...hat.com>, elladan@...imo.com,
	linux-kernel@...r.kernel.org, tytso@....edu,
	kosaki.motohiro@...fujitsu.com, linux-mm@...ck.org
Subject: Re: [PATCH] vmscan: evict use-once pages first (v2)

On Sun, 2009-05-03 at 11:15 +0800, Wu Fengguang wrote:
> On Fri, May 01, 2009 at 12:35:41PM -0700, Andrew Morton wrote:
> > On Fri, 01 May 2009 10:05:53 -0400
> > Rik van Riel <riel@...hat.com> wrote:
> > 
> > > Andrew Morton wrote:
> > > 
> > > >> When we implement working set protection, we might as well
> > > >> do it for frequently accessed unmapped pages too.  There is
> > > >> no reason to restrict this protection to mapped pages.
> > > > 
> > > > Well.  Except for empirical observation, which tells us that biasing
> > > > reclaim to prefer to retain mapped memory produces a better result.
> > > 
> > > That used to be the case because file-backed and
> > > swap-backed pages shared the same set of LRUs,
> > > while each following a different page reclaim
> > > heuristic!
> > 
> > No, I think it still _is_ the case.  When reclaim is treating mapped
> > and non-mapped pages equally, the end result sucks.  Applications get
> > all laggy and humans get irritated.  It may be that the system was
> > optimised from an overall throughput POV, but the result was
> > *irritating*.
> > 
> > Which led us to prefer to retain mapped pages.  This had nothing at all
> > to do with internal impementation details - it was a design objective
> > based upon empirical observation of system behaviour.
> 
> Heartily Agreed. We shall try hard to protect the running applications.
> 
> Commit 7e9cd484204f(vmscan: fix pagecache reclaim referenced bit check)
> tries to address scalability problem when every page get mapped and
> referenced, so that logic(which lowed the priority of mapped pages)
> could be enabled only on conditions like (priority < DEF_PRIORITY).
> 
> Or preferably we can explicitly protect the mapped executables,
> as illustrated by this patch (a quick prototype).

Ah, nice, this re-instates the young bit for PROT_EXEC pages.
I very much like this.


> Thanks,
> Fengguang
> ---
>  include/linux/pagemap.h |    1 +
>  mm/mmap.c               |    2 ++
>  mm/nommu.c              |    2 ++
>  mm/vmscan.c             |   37 +++++++++++++++++++++++++++++++++++--
>  4 files changed, 40 insertions(+), 2 deletions(-)
> 
> --- linux.orig/include/linux/pagemap.h
> +++ linux/include/linux/pagemap.h
> @@ -25,6 +25,7 @@ enum mapping_flags {
>  #ifdef CONFIG_UNEVICTABLE_LRU
>  	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
>  #endif
> +	AS_EXEC		= __GFP_BITS_SHIFT + 4,	/* mapped PROT_EXEC somewhere */
>  };
>  
>  static inline void mapping_set_error(struct address_space *mapping, int error)
> --- linux.orig/mm/mmap.c
> +++ linux/mm/mmap.c
> @@ -1198,6 +1198,8 @@ munmap_back:
>  			goto unmap_and_free_vma;
>  		if (vm_flags & VM_EXECUTABLE)
>  			added_exe_file_vma(mm);
> +		if (vm_flags & VM_EXEC)
> +			set_bit(AS_EXEC, &file->f_mapping->flags);
>  	} else if (vm_flags & VM_SHARED) {
>  		error = shmem_zero_setup(vma);
>  		if (error)
> --- linux.orig/mm/vmscan.c
> +++ linux/mm/vmscan.c
> @@ -1220,6 +1220,7 @@ static void shrink_active_list(unsigned 
>  	int pgdeactivate = 0;
>  	unsigned long pgscanned;
>  	LIST_HEAD(l_hold);	/* The pages which were snipped off */
> +	LIST_HEAD(l_active);
>  	LIST_HEAD(l_inactive);
>  	struct page *page;
>  	struct pagevec pvec;
> @@ -1259,8 +1260,15 @@ static void shrink_active_list(unsigned 
>  
>  		/* page_referenced clears PageReferenced */
>  		if (page_mapping_inuse(page) &&
> -		    page_referenced(page, 0, sc->mem_cgroup))
> +		    page_referenced(page, 0, sc->mem_cgroup)) {
> +			struct address_space *mapping = page_mapping(page);
> +
>  			pgmoved++;
> +			if (mapping && test_bit(AS_EXEC, &mapping->flags)) {
> +				list_add(&page->lru, &l_active);
> +				continue;
> +			}
> +		}
>  
>  		list_add(&page->lru, &l_inactive);
>  	}
> @@ -1269,7 +1277,6 @@ static void shrink_active_list(unsigned 
>  	 * Move the pages to the [file or anon] inactive list.
>  	 */
>  	pagevec_init(&pvec, 1);
> -	lru = LRU_BASE + file * LRU_FILE;
>  
>  	spin_lock_irq(&zone->lru_lock);
>  	/*
> @@ -1281,6 +1288,7 @@ static void shrink_active_list(unsigned 
>  	reclaim_stat->recent_rotated[!!file] += pgmoved;
>  
>  	pgmoved = 0;
> +	lru = LRU_BASE + file * LRU_FILE;
>  	while (!list_empty(&l_inactive)) {
>  		page = lru_to_page(&l_inactive);
>  		prefetchw_prev_lru_page(page, &l_inactive, flags);
> @@ -1305,6 +1313,31 @@ static void shrink_active_list(unsigned 
>  	}
>  	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
>  	pgdeactivate += pgmoved;
> +
> +	pgmoved = 0;
> +	lru = LRU_ACTIVE + file * LRU_FILE;
> +	while (!list_empty(&l_active)) {
> +		page = lru_to_page(&l_active);
> +		prefetchw_prev_lru_page(page, &l_active, flags);
> +		VM_BUG_ON(PageLRU(page));
> +		SetPageLRU(page);
> +		VM_BUG_ON(!PageActive(page));
> +
> +		list_move(&page->lru, &zone->lru[lru].list);
> +		mem_cgroup_add_lru_list(page, lru);
> +		pgmoved++;
> +		if (!pagevec_add(&pvec, page)) {
> +			__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
> +			pgmoved = 0;
> +			spin_unlock_irq(&zone->lru_lock);
> +			if (buffer_heads_over_limit)
> +				pagevec_strip(&pvec);
> +			__pagevec_release(&pvec);
> +			spin_lock_irq(&zone->lru_lock);
> +		}
> +	}
> +	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
> +
>  	__count_zone_vm_events(PGREFILL, zone, pgscanned);
>  	__count_vm_events(PGDEACTIVATE, pgdeactivate);
>  	spin_unlock_irq(&zone->lru_lock);
> --- linux.orig/mm/nommu.c
> +++ linux/mm/nommu.c
> @@ -1220,6 +1220,8 @@ unsigned long do_mmap_pgoff(struct file 
>  			added_exe_file_vma(current->mm);
>  			vma->vm_mm = current->mm;
>  		}
> +		if (vm_flags & VM_EXEC)
> +			set_bit(AS_EXEC, &file->f_mapping->flags);
>  	}
>  
>  	down_write(&nommu_region_sem);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ