[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100324121045.GI21147@csn.ul.ie>
Date: Wed, 24 Mar 2010 12:10:45 +0000
From: Mel Gorman <mel@....ul.ie>
To: Minchan Kim <minchan.kim@...il.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Andrea Arcangeli <aarcange@...hat.com>,
Christoph Lameter <cl@...ux-foundation.org>,
Adam Litke <agl@...ibm.com>, Avi Kivity <avi@...hat.com>,
David Rientjes <rientjes@...gle.com>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
Rik van Riel <riel@...hat.com>, linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Subject: Re: [PATCH 10/11] Direct compact when a high-order allocation fails
On Wed, Mar 24, 2010 at 09:06:51PM +0900, Minchan Kim wrote:
> On Wed, Mar 24, 2010 at 8:59 PM, Minchan Kim <minchan.kim@...il.com> wrote:
> > On Wed, Mar 24, 2010 at 8:11 PM, Mel Gorman <mel@....ul.ie> wrote:
> >> On Wed, Mar 24, 2010 at 08:10:40AM +0900, Minchan Kim wrote:
> >>> Hi, Mel.
> >>>
> >>> On Tue, Mar 23, 2010 at 9:25 PM, Mel Gorman <mel@....ul.ie> wrote:
> >>> > Ordinarily when a high-order allocation fails, direct reclaim is entered to
> >>> > free pages to satisfy the allocation. With this patch, it is determined if
> >>> > an allocation failed due to external fragmentation instead of low memory
> >>> > and if so, the calling process will compact until a suitable page is
> >>> > freed. Compaction by moving pages in memory is considerably cheaper than
> >>> > paging out to disk and works where there are locked pages or no swap. If
> >>> > compaction fails to free a page of a suitable size, then reclaim will
> >>> > still occur.
> >>> >
> >>> > Direct compaction returns as soon as possible. As each block is compacted,
> >>> > it is checked if a suitable page has been freed and if so, it returns.
> >>> >
> >>> > Signed-off-by: Mel Gorman <mel@....ul.ie>
> >>> > Acked-by: Rik van Riel <riel@...hat.com>
> >>> > ---
> >>> > include/linux/compaction.h | 16 +++++-
> >>> > include/linux/vmstat.h | 1 +
> >>> > mm/compaction.c | 118 ++++++++++++++++++++++++++++++++++++++++++++
> >>> > mm/page_alloc.c | 26 ++++++++++
> >>> > mm/vmstat.c | 15 +++++-
> >>> > 5 files changed, 172 insertions(+), 4 deletions(-)
> >>> >
> >>> > diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> >>> > index c94890b..b851428 100644
> >>> > --- a/include/linux/compaction.h
> >>> > +++ b/include/linux/compaction.h
> >>> > @@ -1,14 +1,26 @@
> >>> > #ifndef _LINUX_COMPACTION_H
> >>> > #define _LINUX_COMPACTION_H
> >>> >
> >>> > -/* Return values for compact_zone() */
> >>> > +/* Return values for compact_zone() and try_to_compact_pages() */
> >>> > #define COMPACT_INCOMPLETE 0
> >>> > -#define COMPACT_COMPLETE 1
> >>> > +#define COMPACT_PARTIAL 1
> >>> > +#define COMPACT_COMPLETE 2
> >>> >
> >>> > #ifdef CONFIG_COMPACTION
> >>> > extern int sysctl_compact_memory;
> >>> > extern int sysctl_compaction_handler(struct ctl_table *table, int write,
> >>> > void __user *buffer, size_t *length, loff_t *ppos);
> >>> > +
> >>> > +extern int fragmentation_index(struct zone *zone, unsigned int order);
> >>> > +extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
> >>> > + int order, gfp_t gfp_mask, nodemask_t *mask);
> >>> > +#else
> >>> > +static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
> >>> > + int order, gfp_t gfp_mask, nodemask_t *nodemask)
> >>> > +{
> >>> > + return COMPACT_INCOMPLETE;
> >>> > +}
> >>> > +
> >>> > #endif /* CONFIG_COMPACTION */
> >>> >
> >>> > #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
> >>> > diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> >>> > index 56e4b44..b4b4d34 100644
> >>> > --- a/include/linux/vmstat.h
> >>> > +++ b/include/linux/vmstat.h
> >>> > @@ -44,6 +44,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
> >>> > KSWAPD_SKIP_CONGESTION_WAIT,
> >>> > PAGEOUTRUN, ALLOCSTALL, PGROTATED,
> >>> > COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
> >>> > + COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
> >>> > #ifdef CONFIG_HUGETLB_PAGE
> >>> > HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
> >>> > #endif
> >>> > diff --git a/mm/compaction.c b/mm/compaction.c
> >>> > index 8df6e3d..6688700 100644
> >>> > --- a/mm/compaction.c
> >>> > +++ b/mm/compaction.c
> >>> > @@ -34,6 +34,8 @@ struct compact_control {
> >>> > unsigned long nr_anon;
> >>> > unsigned long nr_file;
> >>> >
> >>> > + unsigned int order; /* order a direct compactor needs */
> >>> > + int migratetype; /* MOVABLE, RECLAIMABLE etc */
> >>> > struct zone *zone;
> >>> > };
> >>> >
> >>> > @@ -301,10 +303,31 @@ static void update_nr_listpages(struct compact_control *cc)
> >>> > static inline int compact_finished(struct zone *zone,
> >>> > struct compact_control *cc)
> >>> > {
> >>> > + unsigned int order;
> >>> > + unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
> >>> > +
> >>> > /* Compaction run completes if the migrate and free scanner meet */
> >>> > if (cc->free_pfn <= cc->migrate_pfn)
> >>> > return COMPACT_COMPLETE;
> >>> >
> >>> > + /* Compaction run is not finished if the watermark is not met */
> >>> > + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
> >>> > + return COMPACT_INCOMPLETE;
> >>> > +
> >>> > + if (cc->order == -1)
> >>> > + return COMPACT_INCOMPLETE;
> >>> > +
> >>> > + /* Direct compactor: Is a suitable page free? */
> >>> > + for (order = cc->order; order < MAX_ORDER; order++) {
> >>> > + /* Job done if page is free of the right migratetype */
> >>> > + if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
> >>> > + return COMPACT_PARTIAL;
> >>> > +
> >>> > + /* Job done if allocation would set block type */
> >>> > + if (order >= pageblock_order && zone->free_area[order].nr_free)
> >>> > + return COMPACT_PARTIAL;
> >>> > + }
> >>> > +
> >>> > return COMPACT_INCOMPLETE;
> >>> > }
> >>> >
> >>> > @@ -348,6 +371,101 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
> >>> > return ret;
> >>> > }
> >>> >
> >>> > +static inline unsigned long compact_zone_order(struct zone *zone,
> >>> > + int order, gfp_t gfp_mask)
> >>> > +{
> >>> > + struct compact_control cc = {
> >>> > + .nr_freepages = 0,
> >>> > + .nr_migratepages = 0,
> >>> > + .order = order,
> >>> > + .migratetype = allocflags_to_migratetype(gfp_mask),
> >>> > + .zone = zone,
> >>> > + };
> >>> > + INIT_LIST_HEAD(&cc.freepages);
> >>> > + INIT_LIST_HEAD(&cc.migratepages);
> >>> > +
> >>> > + return compact_zone(zone, &cc);
> >>> > +}
> >>> > +
> >>> > +/**
> >>> > + * try_to_compact_pages - Direct compact to satisfy a high-order allocation
> >>> > + * @zonelist: The zonelist used for the current allocation
> >>> > + * @order: The order of the current allocation
> >>> > + * @gfp_mask: The GFP mask of the current allocation
> >>> > + * @nodemask: The allowed nodes to allocate from
> >>> > + *
> >>> > + * This is the main entry point for direct page compaction.
> >>> > + */
> >>> > +unsigned long try_to_compact_pages(struct zonelist *zonelist,
> >>> > + int order, gfp_t gfp_mask, nodemask_t *nodemask)
> >>> > +{
> >>> > + enum zone_type high_zoneidx = gfp_zone(gfp_mask);
> >>> > + int may_enter_fs = gfp_mask & __GFP_FS;
> >>> > + int may_perform_io = gfp_mask & __GFP_IO;
> >>> > + unsigned long watermark;
> >>> > + struct zoneref *z;
> >>> > + struct zone *zone;
> >>> > + int rc = COMPACT_INCOMPLETE;
> >>> > +
> >>> > + /* Check whether it is worth even starting compaction */
> >>> > + if (order == 0 || !may_enter_fs || !may_perform_io)
> >>> > + return rc;
> >>> > +
> >>> > + /*
> >>> > + * We will not stall if the necessary conditions are not met for
> >>> > + * migration but direct reclaim seems to account stalls similarly
> >>> > + */
>
> Then, Let's remove this comment.
>
Yes, it hinders more than it helps in this case. It's deleted now.
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists