lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20080220053306.6a1f5600.akpm@linux-foundation.org>
Date:	Wed, 20 Feb 2008 05:33:06 -0800
From:	Andrew Morton <akpm@...ux-foundation.org>
To:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:	Larry Woodman <lwoodman@...hat.com>, linux-kernel@...r.kernel.org,
	Nick Piggin <nickpiggin@...oo.com.au>
Subject: Re: Problem with /proc/sys/vm/lowmem_reserve_ratio

On Wed, 20 Feb 2008 13:53:14 +0100 Peter Zijlstra <a.p.zijlstra@...llo.nl> wrote:

> 
> On Tue, 2008-02-19 at 15:55 -0800, Andrew Morton wrote:
> > On Tue, 19 Feb 2008 16:35:49 -0500 Larry Woodman <lwoodman@...hat.com> wrote:
> > 
> > > balance_pgdat() calls zone_watermark_ok() three times, the first call
> > > passes a zero(0) in as the 4th argument.  This 4th argument is the
> > > classzone_idx which is used as the index into the zone->lowmem_reserve[] 
> > > array. 
> > > Since setup_per_zone_lowmem_reserve()
> > > always sets the zone->lowmem_reserve[0] = 0(because there is nothing
> > > below the DMA zone), zone_watermark_ok() will not consider the
> > > lowmem_reserve pages when zero is passed as the 4th arg.   The
> > > 4th argument must be "i" or balance_pgdat wont even get into the main loop
> > > when lowmem_reserve_ratio is lowered.
> > > 
> > > -------------------------------------------------------------------------
> > > --- linux-2.6.24.noarch/mm/vmscan.c.orig        2008-02-13
> > > 11:14:55.000000000 -0500
> > > +++ linux-2.6.24.noarch/mm/vmscan.c     2008-02-13 11:15:02.000000000
> > > -0500
> > > @@ -1375,7 +1375,7 @@ loop_again:
> > >                                continue;
> > > 
> > >                        if (!zone_watermark_ok(zone, order, 
> > > zone->pages_high,
> > > 
> > > -                                              0, 0)) {
> > > +                                              i, 0)) {
> > >                                end_zone = i;
> > >                                break;
> > 
> > Yes, thanks, this is in my things-to-worry-about-when-i-get-home bucket. 
> > We should find the changeset which added this and work out if for some
> > reason it was intentional.
> 
> 
> commit e0e1723229b6f96922d10bb932f94d899132b462

Thanks.

> Author: nickpiggin <nickpiggin>
> Date:   Tue Jan 4 04:14:42 2005 +0000
> 
>     [PATCH] mm: teach kswapd about higher order areas
>     
>     Teach kswapd to free memory on behalf of higher order allocators.  This
>     could be important for higher order atomic allocations because they
>     otherwise have no means to free the memory themselves.
>     
>     Signed-off-by: Nick Piggin <nickpiggin@...oo.com.au>
>     Signed-off-by: Andrew Morton <akpm@...l.org>
>     Signed-off-by: Linus Torvalds <torvalds@...l.org>
>     
>     BKrev: 41da1832E5flzqtNXq5m70WxihpcMw
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 2fd19fa..e048bbc 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -264,8 +264,9 @@ typedef struct pglist_data {
>  					     range, including holes */
>  	int node_id;
>  	struct pglist_data *pgdat_next;
> -	wait_queue_head_t       kswapd_wait;
> +	wait_queue_head_t kswapd_wait;
>  	struct task_struct *kswapd;
> +	int kswapd_max_order;
>  } pg_data_t;
>  
>  #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
> @@ -279,7 +280,7 @@ void __get_zone_counts(unsigned long *active, unsigned long *inactive,
>  void get_zone_counts(unsigned long *active, unsigned long *inactive,
>  			unsigned long *free);
>  void build_all_zonelists(void);
> -void wakeup_kswapd(struct zone *zone);
> +void wakeup_kswapd(struct zone *zone, int order);
>  int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
>  		int alloc_type, int can_try_harder, int gfp_high);
>  
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index bb11a6d..1f264ba 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -677,7 +677,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
>  	}
>  
>  	for (i = 0; (z = zones[i]) != NULL; i++)
> -		wakeup_kswapd(z);
> +		wakeup_kswapd(z, order);
>  
>  	/*
>  	 * Go through the zonelist again. Let __GFP_HIGH and allocations
> @@ -1516,6 +1516,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
>  
>  	pgdat->nr_zones = 0;
>  	init_waitqueue_head(&pgdat->kswapd_wait);
> +	pgdat->kswapd_max_order = 0;
>  	
>  	for (j = 0; j < MAX_NR_ZONES; j++) {
>  		struct zone *zone = pgdat->node_zones + j;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index aa074e5..1062a30 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -968,7 +968,7 @@ out:
>   * the page allocator fallback scheme to ensure that aging of pages is balanced
>   * across the zones.
>   */
> -static int balance_pgdat(pg_data_t *pgdat, int nr_pages)
> +static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
>  {
>  	int to_free = nr_pages;
>  	int all_zones_ok;
> @@ -1014,7 +1014,8 @@ loop_again:
>  						priority != DEF_PRIORITY)
>  					continue;
>  
> -				if (zone->free_pages <= zone->pages_high) {
> +				if (!zone_watermark_ok(zone, order,
> +						zone->pages_high, 0, 0, 0)) {
>  					end_zone = i;
>  					goto scan;
>  				}

No, it doesn't look like there was a deeper purpose here.  Just a thinko?

> @@ -1049,7 +1050,8 @@ scan:
>  				continue;
>  
>  			if (nr_pages == 0) {	/* Not software suspend */
> -				if (zone->free_pages <= zone->pages_high)
> +				if (!zone_watermark_ok(zone, order,
> +						zone->pages_high, end_zone, 0, 0))
>  					all_zones_ok = 0;
>  			}
>  			zone->temp_priority = priority;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ