[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <afe668f90701110005ya2e8187pc6604c5aad24cc84@mail.gmail.com>
Date: Thu, 11 Jan 2007 16:05:16 +0800
From: "Roy Huang" <royhuang9@...il.com>
To: Aubrey <aubreylee@...il.com>
Cc: "Nick Piggin" <nickpiggin@...oo.com.au>,
"Andrew Morton" <akpm@...l.org>,
"Linus Torvalds" <torvalds@...l.org>,
"Hua Zhong" <hzhong@...il.com>, "Hugh Dickins" <hugh@...itas.com>,
linux-kernel@...r.kernel.org, hch@...radead.org,
kenneth.w.chen@...el.com, mjt@....msk.ru
Subject: Re: O_DIRECT question
On a embedded systerm, limiting page cache can relieve memory
fragmentation. There is a patch against 2.6.19, which limit every
opened file page cache and total pagecache. When the limit reach, it
will release the page cache overrun the limit.
Index: include/linux/pagemap.h
===================================================================
--- include/linux/pagemap.h (revision 2628)
+++ include/linux/pagemap.h (working copy)
@@ -12,6 +12,7 @@
#include <asm/uaccess.h>
#include <linux/gfp.h>
+extern int total_pagecache_limit;
/*
* Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
* allocation mode flags.
Index: include/linux/fs.h
===================================================================
--- include/linux/fs.h (revision 2628)
+++ include/linux/fs.h (working copy)
@@ -444,6 +444,10 @@
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
+#ifdef CONFIG_LIMIT_PAGECACHE
+ unsigned long pages_limit;
+ struct list_head page_head;
+#endif
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
Index: include/linux/mm.h
===================================================================
--- include/linux/mm.h (revision 2628)
+++ include/linux/mm.h (working copy)
@@ -231,6 +231,9 @@
#else
#define VM_BUG_ON(condition) do { } while(0)
#endif
+#ifdef CONFIG_LIMIT_PAGECACHE
+ struct list_head page_list;
+#endif
/*
* Methods to modify the page usage count.
@@ -1030,7 +1033,21 @@
/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
+/* possible outcome of pageout() */
+typedef enum {
+ /* failed to write page out, page is locked */
+ PAGE_KEEP,
+ /* move page to the active list, page is locked */
+ PAGE_ACTIVATE,
+ /* page has been sent to the disk successfully, page is unlocked */
+ PAGE_SUCCESS,
+ /* page is clean and locked */
+ PAGE_CLEAN,
+} pageout_t;
+
+pageout_t pageout(struct page *page, struct address_space *mapping);
+
/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
Index: init/Kconfig
===================================================================
--- init/Kconfig (revision 2628)
+++ init/Kconfig (working copy)
@@ -419,6 +419,19 @@
option replaces shmem and tmpfs with the much simpler ramfs code,
which may be appropriate on small systems without swap.
+config LIMIT_PAGECACHE
+ bool "Limit page caches" if EMBEDDED
+
+config PAGECACHE_LIMIT
+ int "Page cache limit for every file in page unit"
+ depends on LIMIT_PAGECACHE
+ default 32
+
+config PAGECACHE_LIMIT_TOTAL
+ int "Total page cache limit in MB unit"
+ depends on LIMIT_PAGECACHE
+ default 10
+
choice
prompt "Page frame management algorithm"
default BUDDY
Index: fs/inode.c
===================================================================
--- fs/inode.c (revision 2628)
+++ fs/inode.c (working copy)
@@ -205,6 +205,10 @@
INIT_LIST_HEAD(&inode->inotify_watches);
mutex_init(&inode->inotify_mutex);
#endif
+#ifdef CONFIG_LIMIT_PAGECACHE
+ INIT_LIST_HEAD(&inode->i_data.page_head);
+ inode->i_data.pages_limit = CONFIG_PAGECACHE_LIMIT;
+#endif
}
EXPORT_SYMBOL(inode_init_once);
Index: mm/filemap.c
===================================================================
--- mm/filemap.c (revision 2628)
+++ mm/filemap.c (working copy)
@@ -18,6 +18,7 @@
#include <linux/capability.h>
#include <linux/kernel_stat.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
@@ -30,6 +31,9 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/cpuset.h>
+#include <linux/rmap.h>
+#include <linux/buffer_head.h>
+#include <linux/page-flags.h>
#include "filemap.h"
#include "internal.h"
@@ -119,6 +123,9 @@
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
+#ifdef CONFIG_LIMIT_PAGECACHE
+ list_del_init(&page->page_list);
+#endif
__dec_zone_page_state(page, NR_FILE_PAGES);
}
@@ -169,6 +176,96 @@
return 0;
}
+#ifdef CONFIG_LIMIT_PAGECACHE
+static void balance_cache(struct address_space *mapping)
+{
+ /* Release half of the pages */
+ int count ;
+ int nr_released = 0;
+ struct page *page;
+ struct zone *zone= NULL;
+ struct pagevec freed_pvec;
+ struct list_head ret_list;
+
+ count = mapping->nrpages /2;
+ pagevec_init(&freed_pvec, 0);
+ INIT_LIST_HEAD(&ret_list);
+ lru_add_drain();
+ while(count-->0) {
+ page = list_entry(mapping->page_head.prev, struct page, page_list);
+ zone = page_zone(page);
+ TestClearPageLRU(page);
+ if (PageActive(page))
+ del_page_from_active_list(zone, page);
+ else
+ del_page_from_inactive_list(zone, page);
+
+ list_del_init(&page->page_list); /* Remove from current process's
page list */
+ get_page(page);
+
+ if (TestSetPageLocked(page))
+ goto __keep;
+ if (PageWriteback(page))
+ goto __keep_locked;
+ if (page_referenced(page, 1))
+ goto __keep_locked;
+ if (PageDirty(page)) {
+ switch(pageout(page, mapping)) {
+ case PAGE_KEEP:
+ case PAGE_ACTIVATE:
+ goto __keep_locked;
+ case PAGE_SUCCESS:
+ if (PageWriteback(page) || PageDirty(page))
+ goto __keep;
+ if (TestSetPageLocked(page))
+ goto __keep;
+ if (PageDirty(page) || PageWriteback(page))
+ goto __keep_locked;
+ case PAGE_CLEAN:
+ ;
+ }
+ }
+
+ if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
+ goto __keep_locked;
+ if (!remove_mapping(mapping, page))
+ goto __keep_locked;
+
+ unlock_page(page);
+ nr_released++;
+ /* This page maybe in Active LRU */
+ ClearPageActive(page);
+ ClearPageUptodate(page);
+ if (!pagevec_add(&freed_pvec, page))
+ __pagevec_release_nonlru(&freed_pvec);
+ continue;
+__keep_locked:
+ unlock_page(page);
+__keep:
+ SetPageLRU(page);
+ if (PageActive(page)) {
+ add_page_to_active_list(zone, page);
+ } else {
+ add_page_to_inactive_list(zone, page);
+ }
+
+ list_add(&page->page_list, &ret_list);
+ }
+ while(!list_empty(&ret_list)) {
+ page = list_entry(ret_list.prev, struct page, page_list);
+ list_move_tail(&page->page_list, &mapping->page_head);
+ put_page(page);
+ }
+ if (pagevec_count(&freed_pvec))
+ __pagevec_release_nonlru(&freed_pvec);
+
+ if (global_page_state(NR_FILE_PAGES) > total_pagecache_limit)
+ if (zone) {
+ wakeup_kswapd(zone, 0);
+ }
+}
+#endif
+
/**
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* @mapping: address space structure to write
@@ -448,6 +545,10 @@
page->mapping = mapping;
page->index = offset;
mapping->nrpages++;
+#ifdef CONFIG_LIMIT_PAGECACHE
+ list_add(&page->page_list, &mapping->page_head);
+#endif
+
__inc_zone_page_state(page, NR_FILE_PAGES);
}
write_unlock_irq(&mapping->tree_lock);
@@ -1085,6 +1186,10 @@
page_cache_release(cached_page);
if (filp)
file_accessed(filp);
+#ifdef CONFIG_LIMIT_PAGECACHE
+ if (mapping->nrpages >= mapping->pages_limit)
+ balance_cache(mapping);
+#endif
}
EXPORT_SYMBOL(do_generic_mapping_read);
@@ -2195,6 +2300,11 @@
if (cached_page)
page_cache_release(cached_page);
+#ifdef CONFIG_LIMIT_PAGECACHE
+ if (mapping->nrpages >= mapping->pages_limit)
+ balance_cache(mapping);
+#endif
+
/*
* For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
*/
Index: mm/vmscan.c
===================================================================
--- mm/vmscan.c (revision 2628)
+++ mm/vmscan.c (working copy)
@@ -116,6 +116,7 @@
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
+int total_pagecache_limit = CONFIG_PAGECACHE_LIMIT_TOTAL * 1024 / 4;
/*
* Add a shrinker callback to be called from the vm
@@ -292,23 +293,11 @@
unlock_page(page);
}
-/* possible outcome of pageout() */
-typedef enum {
- /* failed to write page out, page is locked */
- PAGE_KEEP,
- /* move page to the active list, page is locked */
- PAGE_ACTIVATE,
- /* page has been sent to the disk successfully, page is unlocked */
- PAGE_SUCCESS,
- /* page is clean and locked */
- PAGE_CLEAN,
-} pageout_t;
-
/*
* pageout is called by shrink_page_list() for each dirty page.
* Calls ->writepage().
*/
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+pageout_t pageout(struct page *page, struct address_space *mapping)
{
/*
* If the page is dirty, only perform writeback if that write
@@ -1328,7 +1317,11 @@
order = pgdat->kswapd_max_order;
}
finish_wait(&pgdat->kswapd_wait, &wait);
- balance_pgdat(pgdat, order);
+ if (global_page_state(NR_FILE_PAGES) >= total_pagecache_limit)
+ balance_pgdat(pgdat, (global_page_state(NR_FILE_PAGES) \
+ - total_pagecache_limit), order);
+ else
+ balance_pgdat(pgdat, order);
}
return 0;
}
@@ -1344,8 +1337,10 @@
return;
pgdat = zone->zone_pgdat;
- if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
- return;
+ if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) {
+ if (global_page_state(NR_FILE_PAGES) < total_pagecache_limit)
+ return;
+ }
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists