linux-kernel - Re: Found the commit that causes the OOMs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20090629091741.ab815ae7.minchan.kim@barrios-desktop>
Date:	Mon, 29 Jun 2009 09:17:41 +0900
From:	Minchan Kim <minchan.kim@...il.com>
To:	Wu Fengguang <fengguang.wu@...el.com>
Cc:	KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
	Minchan Kim <minchan.kim@...il.com>,
	Johannes Weiner <hannes@...xchg.org>,
	David Howells <dhowells@...hat.com>,
	"riel@...hat.com" <riel@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Christoph Lameter <cl@...ux-foundation.org>,
	"peterz@...radead.org" <peterz@...radead.org>,
	"tytso@....edu" <tytso@....edu>,
	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"elladan@...imo.com" <elladan@...imo.com>,
	"npiggin@...e.de" <npiggin@...e.de>,
	"Barnes, Jesse" <jesse.barnes@...el.com>
Subject: Re: Found the commit that causes the OOMs

On Sun, 28 Jun 2009 23:10:26 +0800
Wu Fengguang <fengguang.wu@...el.com> wrote:

> On Sun, Jun 28, 2009 at 11:01:40PM +0800, KOSAKI Motohiro wrote:
> > > Yes, smaller inactive_anon means smaller (pointless) nr_scanned,
> > > and therefore less slab scans. Strictly speaking, it's not the fault
> > > of your patch. It indicates that the slab scan ratio algorithm should
> > > be updated too :)
> > 
> > I don't think this patch is related to minchan's patch.
> > but I think this patch is good.
> 
> OK.
> 
> > 
> > > We could refine the estimation of "reclaimable" pages like this:
> > 
> > hmhm, reasonable idea.
> 
> Thank you.
> 
> > >
> > > diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> > > index 416f748..e9c5b0e 100644
> > > --- a/include/linux/vmstat.h
> > > +++ b/include/linux/vmstat.h
> > > @@ -167,14 +167,7 @@ static inline unsigned long zone_page_state(struct zone *zone,
> > >  }
> > >
> > >  extern unsigned long global_lru_pages(void);
> > > -
> > > -static inline unsigned long zone_lru_pages(struct zone *zone)
> > > -{
> > > -       return (zone_page_state(zone, NR_ACTIVE_ANON)
> > > -               + zone_page_state(zone, NR_ACTIVE_FILE)
> > > -               + zone_page_state(zone, NR_INACTIVE_ANON)
> > > -               + zone_page_state(zone, NR_INACTIVE_FILE));
> > > -}
> > > +extern unsigned long zone_lru_pages(void);
> > >
> > >  #ifdef CONFIG_NUMA
> > >  /*
> > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > > index 026f452..4281c6f 100644
> > > --- a/mm/vmscan.c
> > > +++ b/mm/vmscan.c
> > > @@ -2123,10 +2123,31 @@ void wakeup_kswapd(struct zone *zone, int order)
> > >
> > >  unsigned long global_lru_pages(void)
> > >  {
> > > -       return global_page_state(NR_ACTIVE_ANON)
> > > -               + global_page_state(NR_ACTIVE_FILE)
> > > -               + global_page_state(NR_INACTIVE_ANON)
> > > -               + global_page_state(NR_INACTIVE_FILE);
> > > +       int nr;
> > > +
> > > +       nr = global_page_state(zone, NR_ACTIVE_FILE) +
> > > +            global_page_state(zone, NR_INACTIVE_FILE);
> > > +
> > > +       if (total_swap_pages)
> > > +               nr += global_page_state(zone, NR_ACTIVE_ANON) +
> > > +                     global_page_state(zone, NR_INACTIVE_ANON);
> > > +
> > > +       return nr;
> > > +}
> > 
> > Please change function name too.
> > Now, this function only account reclaimable pages.
> 
> Good suggestion - I did considered renaming them to *_relaimable_pages.
> 
> > Plus, total_swap_pages is bad. if we need to concern "reclaimable
> > pages", we should use nr_swap_pages.
> 
> > I mean, swap-full also makes anon is unreclaimable althouth system
> > have sone swap device.
>  
> Right, changed to (nr_swap_pages > 0).
> 
> Thanks,
> Fengguang
> ---
> 
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 416f748..8d8aa20 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone,
>  	return x;
>  }
>  
> -extern unsigned long global_lru_pages(void);
> -
> -static inline unsigned long zone_lru_pages(struct zone *zone)
> -{
> -	return (zone_page_state(zone, NR_ACTIVE_ANON)
> -		+ zone_page_state(zone, NR_ACTIVE_FILE)
> -		+ zone_page_state(zone, NR_INACTIVE_ANON)
> -		+ zone_page_state(zone, NR_INACTIVE_FILE));
> -}
> +extern unsigned long global_reclaimable_pages(void);
> +extern unsigned long zone_reclaimable_pages(void);
>  
>  #ifdef CONFIG_NUMA
>  /*
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index a91b870..74c3067 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -394,7 +394,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
>  		struct zone *z =
>  			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
>  
> -		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
> +		x += zone_page_state(z, NR_FREE_PAGES) +
> +		     zone_reclaimable_pages(z);
>  	}
>  	/*
>  	 * Make sure that the number of highmem pages is never larger
> @@ -418,7 +419,7 @@ unsigned long determine_dirtyable_memory(void)
>  {
>  	unsigned long x;
>  
> -	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
> +	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
>  
>  	if (!vm_highmem_is_dirtyable)
>  		x -= highmem_dirtyable_memory(x);
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 026f452..3768332 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1693,7 +1693,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
>  			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
>  				continue;
>  
> -			lru_pages += zone_lru_pages(zone);
> +			lru_pages += zone_reclaimable_pages(zone);
>  		}
>  	}
>  
> @@ -1910,7 +1910,7 @@ loop_again:
>  		for (i = 0; i <= end_zone; i++) {
>  			struct zone *zone = pgdat->node_zones + i;
>  
> -			lru_pages += zone_lru_pages(zone);
> +			lru_pages += zone_reclaimable_pages(zone);
>  		}
>  
>  		/*
> @@ -1954,7 +1954,7 @@ loop_again:
>  			if (zone_is_all_unreclaimable(zone))
>  				continue;
>  			if (nr_slab == 0 && zone->pages_scanned >=
> -						(zone_lru_pages(zone) * 6))
> +					(zone_reclaimable_pages(zone) * 6))
>  					zone_set_flag(zone,
>  						      ZONE_ALL_UNRECLAIMABLE);
>  			/*
> @@ -2121,12 +2121,33 @@ void wakeup_kswapd(struct zone *zone, int order)
>  	wake_up_interruptible(&pgdat->kswapd_wait);
>  }
>  
> -unsigned long global_lru_pages(void)
> +unsigned long global_reclaimable_pages(void)
>  {
> -	return global_page_state(NR_ACTIVE_ANON)
> -		+ global_page_state(NR_ACTIVE_FILE)
> -		+ global_page_state(NR_INACTIVE_ANON)
> -		+ global_page_state(NR_INACTIVE_FILE);
> +	int nr;
> +
> +	nr = global_page_state(zone, NR_ACTIVE_FILE) +
> +	     global_page_state(zone, NR_INACTIVE_FILE);
> +
> +	if (total_swap_pages)


Dont' we have to change from total_swap_pages to nr_swap_pages, too ?

> +		nr += global_page_state(zone, NR_ACTIVE_ANON) +
> +		      global_page_state(zone, NR_INACTIVE_ANON);
> +
> +	return nr;
> +}
> +
> +
> +unsigned long zone_reclaimable_pages(struct zone *zone)
> +{
> +	int nr;
> +
> +	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
> +	     zone_page_state(zone, NR_INACTIVE_FILE);
> +
> +	if (nr_swap_pages > 0)
> +		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
> +		      zone_page_state(zone, NR_INACTIVE_ANON);
> +
> +	return nr;
>  }
>  
>  #ifdef CONFIG_HIBERNATION
> @@ -2198,7 +2219,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
>  
>  	current->reclaim_state = &reclaim_state;
>  
> -	lru_pages = global_lru_pages();
> +	lru_pages = global_reclaimable_pages();
>  	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
>  	/* If slab caches are huge, it's better to hit them first */
>  	while (nr_slab >= lru_pages) {
> @@ -2240,7 +2261,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
>  
>  			reclaim_state.reclaimed_slab = 0;
>  			shrink_slab(sc.nr_scanned, sc.gfp_mask,
> -					global_lru_pages());
> +				    global_reclaimable_pages());
>  			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
>  			if (sc.nr_reclaimed >= nr_pages)
>  				goto out;
> @@ -2257,7 +2278,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
>  	if (!sc.nr_reclaimed) {
>  		do {
>  			reclaim_state.reclaimed_slab = 0;
> -			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
> +			shrink_slab(nr_pages, sc.gfp_mask,
> +				    global_reclaimable_pages());
>  			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
>  		} while (sc.nr_reclaimed < nr_pages &&
>  				reclaim_state.reclaimed_slab > 0);


-- 
Kinds Regards
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/