lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100121054734.GC24236@localhost>
Date:	Thu, 21 Jan 2010 13:47:34 +0800
From:	Wu Fengguang <fengguang.wu@...el.com>
To:	Chris Frost <frost@...ucla.edu>
Cc:	Andrew Morton <akpm@...ux-foundation.org>,
	Steve Dickson <steved@...hat.com>,
	David Howells <dhowells@...hat.com>,
	Xu Chenfeng <xcf@...c.edu.cn>,
	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	Steve VanDeBogart <vandebo-lkml@...dbox.net>
Subject: Re: [PATCH] mm/readahead.c: update the LRU positions of in-core
	pages, too

Hi Chris,

On Wed, Jan 20, 2010 at 01:55:36PM -0800, Chris Frost wrote:
> This patch changes readahead to move pages that are already in memory and
> in the inactive list to the top of the list. This mirrors the behavior
> of non-in-core pages. The position of pages already in the active list
> remains unchanged.
 
This is good in general. 

> @@ -170,19 +201,24 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  		rcu_read_lock();
>  		page = radix_tree_lookup(&mapping->page_tree, page_offset);
>  		rcu_read_unlock();
> -		if (page)
> -			continue;
> -
> -		page = page_cache_alloc_cold(mapping);
> -		if (!page)
> -			break;
> -		page->index = page_offset;
> -		list_add(&page->lru, &page_pool);
> -		if (page_idx == nr_to_read - lookahead_size)
> -			SetPageReadahead(page);
> -		ret++;
> +		if (page) {
> +			page_cache_get(page);

This is racy - the page may have already be freed and possibly reused
by others in the mean time.

If you do page_cache_get() on a random page, it may trigger bad_page()
in the buddy page allocator, or the VM_BUG_ON() in put_page_testzero().

> +			if (!pagevec_add(&retain_vec, page))
> +				retain_pages(&retain_vec);
> +		} else {
> +			page = page_cache_alloc_cold(mapping);
> +			if (!page)
> +				break;
> +			page->index = page_offset;
> +			list_add(&page->lru, &page_pool);
> +			if (page_idx == nr_to_read - lookahead_size)
> +				SetPageReadahead(page);
> +			ret++;
> +		}

Years ago I wrote a similar function, which can be called for both
in-kernel-readahead (when it decides not to bring in new pages, but
only retain existing pages) and fadvise-readahead (where it want to
read new pages as well as retain existing pages).

For better chance of code reuse, would you rebase the patch on it?
(You'll have to do some cleanups first.)

+/*
+ * Move pages in danger (of thrashing) to the head of inactive_list.
+ * Not expected to happen frequently.
+ */
+static unsigned long rescue_pages(struct address_space *mapping,
+				  struct file_ra_state *ra,
+				  pgoff_t index, unsigned long nr_pages)
+{
+	struct page *grabbed_page;
+	struct page *page;
+	struct zone *zone;
+	int pgrescue = 0;
+
+	dprintk("rescue_pages(ino=%lu, index=%lu, nr=%lu)\n",
+			mapping->host->i_ino, index, nr_pages);
+
+	for(; nr_pages;) {
+		grabbed_page = page = find_get_page(mapping, index);
+		if (!page) {
+			index++;
+			nr_pages--;
+			continue;
+		}
+
+		zone = page_zone(page);
+		spin_lock_irq(&zone->lru_lock);
+
+		if (!PageLRU(page)) {
+			index++;
+			nr_pages--;
+			goto next_unlock;
+		}
+
+		do {
+			struct page *the_page = page;
+			page = list_entry((page)->lru.prev, struct page, lru);
+			index++;
+			nr_pages--;
+			ClearPageReadahead(the_page);
+			if (!PageActive(the_page) &&
+					!PageLocked(the_page) &&
+					page_count(the_page) == 1) {
+				list_move(&the_page->lru, &zone->inactive_list);
+				pgrescue++;
+			}
+		} while (nr_pages &&
+				page_mapping(page) == mapping &&
+				page_index(page) == index);
+
+next_unlock:
+		spin_unlock_irq(&zone->lru_lock);
+		page_cache_release(grabbed_page);
+		cond_resched();
+	}
+
+	ra_account(ra, RA_EVENT_READAHEAD_RESCUE, pgrescue);
+	return pgrescue;
+}

Thanks,
Fengguang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ