[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Pine.LNX.4.64.0809241451320.18811@hs20-bc2-1.build.redhat.com>
Date: Wed, 24 Sep 2008 14:52:18 -0400 (EDT)
From: Mikulas Patocka <mpatocka@...hat.com>
To: Andrew Morton <akpm@...ux-foundation.org>
cc: linux-kernel@...r.kernel.org, linux-mm@...r.kernel.org,
agk@...hat.com, mbroz@...hat.com, chris@...chsys.com
Subject: [PATCH 2/3] Memory management livelock
Avoid starvation when walking address space.
Signed-off-by: Mikulas Patocka <mpatocka@...hat.com>
---
include/linux/pagemap.h | 1 +
mm/filemap.c | 20 ++++++++++++++++++++
mm/page-writeback.c | 37 ++++++++++++++++++++++++++++++++++++-
mm/truncate.c | 24 +++++++++++++++++++++++-
4 files changed, 80 insertions(+), 2 deletions(-)
Index: linux-2.6.27-rc7-devel/include/linux/pagemap.h
===================================================================
--- linux-2.6.27-rc7-devel.orig/include/linux/pagemap.h 2008-09-24 02:57:37.000000000 +0200
+++ linux-2.6.27-rc7-devel/include/linux/pagemap.h 2008-09-24 02:59:04.000000000 +0200
@@ -21,6 +21,7 @@
#define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */
#define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */
#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */
+#define AS_STARVATION (__GFP_BITS_SHIFT + 3) /* an anti-starvation barrier */
static inline void mapping_set_error(struct address_space *mapping, int error)
{
Index: linux-2.6.27-rc7-devel/mm/filemap.c
===================================================================
--- linux-2.6.27-rc7-devel.orig/mm/filemap.c 2008-09-24 02:59:33.000000000 +0200
+++ linux-2.6.27-rc7-devel/mm/filemap.c 2008-09-24 03:13:47.000000000 +0200
@@ -269,10 +269,19 @@ int wait_on_page_writeback_range(struct
int nr_pages;
int ret = 0;
pgoff_t index;
+ long pages_to_process;
if (end < start)
return 0;
+ /*
+ * Estimate the number of pages to process. If we process significantly
+ * more than this, someone is making writeback pages under us.
+ * We must pull the anti-starvation plug.
+ */
+ pages_to_process = bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
+ pages_to_process += (pages_to_process >> 3) + 16;
+
pagevec_init(&pvec, 0);
index = start;
while ((index <= end) &&
@@ -288,6 +297,10 @@ int wait_on_page_writeback_range(struct
if (page->index > end)
continue;
+ if (pages_to_process >= 0)
+ if (!pages_to_process--)
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+
wait_on_page_writeback(page);
if (PageError(page))
ret = -EIO;
@@ -296,6 +309,13 @@ int wait_on_page_writeback_range(struct
cond_resched();
}
+ if (pages_to_process < 0) {
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
/* Check for outstanding write errors */
if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
ret = -ENOSPC;
Index: linux-2.6.27-rc7-devel/mm/page-writeback.c
===================================================================
--- linux-2.6.27-rc7-devel.orig/mm/page-writeback.c 2008-09-24 03:10:34.000000000 +0200
+++ linux-2.6.27-rc7-devel/mm/page-writeback.c 2008-09-24 03:20:24.000000000 +0200
@@ -435,6 +435,18 @@ static void balance_dirty_pages(struct a
struct backing_dev_info *bdi = mapping->backing_dev_info;
+ /*
+ * If there is sync() starving on this address space, block
+ * writers until it finishes.
+ */
+ if (unlikely(test_bit(AS_STARVATION, &mapping->flags))) {
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
for (;;) {
struct writeback_control wbc = {
.bdi = bdi,
@@ -876,12 +888,21 @@ int write_cache_pages(struct address_spa
pgoff_t end; /* Inclusive */
int scanned = 0;
int range_whole = 0;
+ long pages_to_process;
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
return 0;
}
+ /*
+ * Estimate the number of pages to process. If we process significantly
+ * more than this, someone is making dirty pages under us.
+ * Pull the anti-starvation plug to stop him.
+ */
+ pages_to_process = bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ pages_to_process += (pages_to_process >> 3) + 16;
+
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
@@ -902,7 +923,13 @@ retry:
scanned = 1;
for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ struct page *page;
+
+ if (pages_to_process >= 0)
+ if (!pages_to_process--)
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+
+ page = pvec.pages[i];
/*
* At this point we hold neither mapping->tree_lock nor
@@ -949,6 +976,14 @@ retry:
pagevec_release(&pvec);
cond_resched();
}
+
+ if (pages_to_process < 0) {
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
if (!scanned && !done) {
/*
* We hit the last page and there is more work to be done: wrap
Index: linux-2.6.27-rc7-devel/mm/truncate.c
===================================================================
--- linux-2.6.27-rc7-devel.orig/mm/truncate.c 2008-09-24 03:16:15.000000000 +0200
+++ linux-2.6.27-rc7-devel/mm/truncate.c 2008-09-24 03:18:00.000000000 +0200
@@ -392,6 +392,14 @@ int invalidate_inode_pages2_range(struct
int ret2 = 0;
int did_range_unmap = 0;
int wrapped = 0;
+ long pages_to_process;
+
+ /*
+ * Estimate number of pages to process. If we process more, someone
+ * is making pages under us.
+ */
+ pages_to_process = mapping->nrpages;
+ pages_to_process += (pages_to_process >> 3) + 16;
pagevec_init(&pvec, 0);
next = start;
@@ -399,9 +407,15 @@ int invalidate_inode_pages2_range(struct
pagevec_lookup(&pvec, mapping, next,
min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
+ struct page *page;
pgoff_t page_index;
+ if (pages_to_process >= 0)
+ if (!pages_to_process--)
+ wait_on_bit_lock(&mapping->flags, AS_STARVATION, wait_action_schedule, TASK_UNINTERRUPTIBLE);
+
+ page = pvec.pages[i];
+
lock_page(page);
if (page->mapping != mapping) {
unlock_page(page);
@@ -449,6 +463,14 @@ int invalidate_inode_pages2_range(struct
pagevec_release(&pvec);
cond_resched();
}
+
+ if (pages_to_process < 0) {
+ smp_mb__before_clear_bit();
+ clear_bit(AS_STARVATION, &mapping->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&mapping->flags, AS_STARVATION);
+ }
+
return ret;
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists