linux-kernel - Re: regression caused by cgroups optimization in 3.17-rc2

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20140905144723.GB13392@cmpxchg.org>
Date:	Fri, 5 Sep 2014 10:47:23 -0400
From:	Johannes Weiner <hannes@...xchg.org>
To:	Michal Hocko <mhocko@...e.cz>
Cc:	Dave Hansen <dave@...1.net>, Hugh Dickins <hughd@...gle.com>,
	Dave Hansen <dave.hansen@...el.com>, Tejun Heo <tj@...nel.org>,
	Linux-MM <linux-mm@...ck.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Vladimir Davydov <vdavydov@...allels.com>,
	LKML <linux-kernel@...r.kernel.org>
Subject: Re: regression caused by cgroups optimization in 3.17-rc2

On Fri, Sep 05, 2014 at 11:25:37AM +0200, Michal Hocko wrote:
> @@ -900,10 +900,10 @@ void lru_add_drain_all(void)
>   * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
>   * will free it.
>   */
> -void release_pages(struct page **pages, int nr, bool cold)
> +static void release_lru_pages(struct page **pages, int nr,
> +			      struct list_head *pages_to_free)
>  {
>  	int i;
> -	LIST_HEAD(pages_to_free);
>  	struct zone *zone = NULL;
>  	struct lruvec *lruvec;
>  	unsigned long uninitialized_var(flags);
> @@ -943,11 +943,26 @@ void release_pages(struct page **pages, int nr, bool cold)
>  		/* Clear Active bit in case of parallel mark_page_accessed */
>  		__ClearPageActive(page);
>  
> -		list_add(&page->lru, &pages_to_free);
> +		list_add(&page->lru, pages_to_free);
>  	}
>  	if (zone)
>  		spin_unlock_irqrestore(&zone->lru_lock, flags);
> +}
> +/*
> + * Batched page_cache_release(). Frees and uncharges all given pages
> + * for which the reference count drops to 0.
> + */
> +void release_pages(struct page **pages, int nr, bool cold)
> +{
> +	LIST_HEAD(pages_to_free);
>  
> +	while (nr) {
> +		int batch = min(nr, PAGEVEC_SIZE);
> +
> +		release_lru_pages(pages, batch, &pages_to_free);
> +		pages += batch;
> +		nr -= batch;
> +	}

We might be able to process a lot more pages in one go if nobody else
needs the lock or the CPU.  Can't we just cycle the lock or reschedule
if necessary?

diff --git a/mm/swap.c b/mm/swap.c
index 6b2dc3897cd5..ee0cf21dd521 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -944,6 +944,15 @@ void release_pages(struct page **pages, int nr, bool cold)
 		__ClearPageActive(page);
 
 		list_add(&page->lru, &pages_to_free);
+
+		if (should_resched() ||
+		    (zone && spin_needbreak(&zone->lru_lock))) {
+			if (zone) {
+				spin_unlock_irqrestore(&zone->lru_lock, flags);
+				zone = NULL;
+			}
+			cond_resched();
+		}
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3e0ec83d000c..c487ca4682a4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -262,19 +262,12 @@ void free_page_and_swap_cache(struct page *page)
  */
 void free_pages_and_swap_cache(struct page **pages, int nr)
 {
-	struct page **pagep = pages;
+	int i;
 
 	lru_add_drain();
-	while (nr) {
-		int todo = min(nr, PAGEVEC_SIZE);
-		int i;
-
-		for (i = 0; i < todo; i++)
-			free_swap_cache(pagep[i]);
-		release_pages(pagep, todo, false);
-		pagep += todo;
-		nr -= todo;
-	}
+	for (i = 0; i < nr; i++)
+		free_swap_cache(pages[i]);
+	release_pages(pages, nr, false);
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/