lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 18 Apr 2021 20:00:31 -0400
From:   Waiman Long <longman@...hat.com>
To:     Johannes Weiner <hannes@...xchg.org>,
        Michal Hocko <mhocko@...nel.org>,
        Vladimir Davydov <vdavydov.dev@...il.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Tejun Heo <tj@...nel.org>, Christoph Lameter <cl@...ux.com>,
        Pekka Enberg <penberg@...nel.org>,
        David Rientjes <rientjes@...gle.com>,
        Joonsoo Kim <iamjoonsoo.kim@....com>,
        Vlastimil Babka <vbabka@...e.cz>, Roman Gushchin <guro@...com>
Cc:     linux-kernel@...r.kernel.org, cgroups@...r.kernel.org,
        linux-mm@...ck.org, Shakeel Butt <shakeelb@...gle.com>,
        Muchun Song <songmuchun@...edance.com>,
        Alex Shi <alex.shi@...ux.alibaba.com>,
        Chris Down <chris@...isdown.name>,
        Yafang Shao <laoar.shao@...il.com>,
        Wei Yang <richard.weiyang@...il.com>,
        Masayoshi Mizuma <msys.mizuma@...il.com>,
        Xing Zhengjun <zhengjun.xing@...ux.intel.com>,
        Matthew Wilcox <willy@...radead.org>,
        Waiman Long <longman@...hat.com>
Subject: [PATCH v4 4/5] mm/memcg: Save both reclaimable & unreclaimable bytes in object stock

Currently, the object stock structure caches either reclaimable vmstat
bytes or unreclaimable vmstat bytes in its object stock structure. The
hit rate can be improved if both types of vmstat data can be cached
especially for single-node system.

This patch supports the cacheing of both type of vmstat data, though
at the expense of a slightly increased complexity in the caching code.
For large object (>= PAGE_SIZE), vmstat array is done directly without
going through the stock caching step.

On a 2-socket Cascade Lake server with instrumentation enabled, the
miss rates are shown in the table below.

  Initial bootup:

  Kernel       __mod_objcg_state    mod_objcg_state    %age
  ------       -----------------    ---------------    ----
  Before patch      634400              3243830        19.6%
  After patch       419810              3182424        13.2%

  Parallel kernel build:

  Kernel       __mod_objcg_state    mod_objcg_state    %age
  ------       -----------------    ---------------    ----
  Before patch      24329265           142512465       17.1%
  After patch       24051721           142445825       16.9%

There was a decrease of miss rate after initial system bootup. However,
the miss rate for parallel kernel build remained about the same probably
because most of the touched kmemcache objects were reclaimable inodes
and dentries.

Signed-off-by: Waiman Long <longman@...hat.com>
---
 mm/memcontrol.c | 79 +++++++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c13502eab282..a6dd18f6d8a8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2212,8 +2212,8 @@ struct obj_stock {
 	struct obj_cgroup *cached_objcg;
 	struct pglist_data *cached_pgdat;
 	unsigned int nr_bytes;
-	int vmstat_idx;
-	int vmstat_bytes;
+	int reclaimable_bytes;		/* NR_SLAB_RECLAIMABLE_B */
+	int unreclaimable_bytes;	/* NR_SLAB_UNRECLAIMABLE_B */
 #else
 	int dummy[0];
 #endif
@@ -3217,40 +3217,56 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 		     enum node_stat_item idx, int nr)
 {
 	unsigned long flags;
-	struct obj_stock *stock = get_obj_stock(&flags);
+	struct obj_stock *stock;
+	int *bytes, *alt_bytes, alt_idx;
+
+	/*
+	 * Directly update vmstat array for big object.
+	 */
+	if (unlikely(abs(nr) >= PAGE_SIZE))
+		goto update_vmstat;
+
+	stock = get_obj_stock(&flags);
+	if (idx == NR_SLAB_RECLAIMABLE_B) {
+		bytes = &stock->reclaimable_bytes;
+		alt_bytes = &stock->unreclaimable_bytes;
+		alt_idx = NR_SLAB_UNRECLAIMABLE_B;
+	} else {
+		bytes = &stock->unreclaimable_bytes;
+		alt_bytes = &stock->reclaimable_bytes;
+		alt_idx = NR_SLAB_RECLAIMABLE_B;
+	}
 
 	/*
-	 * Save vmstat data in stock and skip vmstat array update unless
-	 * accumulating over a page of vmstat data or when pgdat or idx
+	 * Try to save vmstat data in stock and skip vmstat array update
+	 * unless accumulating over a page of vmstat data or when pgdat
 	 * changes.
 	 */
 	if (stock->cached_objcg != objcg) {
 		/* Output the current data as is */
-	} else if (!stock->vmstat_bytes) {
-		/* Save the current data */
-		stock->vmstat_bytes = nr;
-		stock->vmstat_idx = idx;
-		stock->cached_pgdat = pgdat;
-		nr = 0;
-	} else if ((stock->cached_pgdat != pgdat) ||
-		   (stock->vmstat_idx != idx)) {
-		/* Output the cached data & save the current data */
-		swap(nr, stock->vmstat_bytes);
-		swap(idx, stock->vmstat_idx);
+	} else if (stock->cached_pgdat != pgdat) {
+		/* Save the current data and output cached data, if any */
+		swap(nr, *bytes);
 		swap(pgdat, stock->cached_pgdat);
+		if (*alt_bytes) {
+			__mod_objcg_state(objcg, pgdat, alt_idx,
+					  *alt_bytes);
+			*alt_bytes = 0;
+		}
 	} else {
-		stock->vmstat_bytes += nr;
-		if (abs(stock->vmstat_bytes) > PAGE_SIZE) {
-			nr = stock->vmstat_bytes;
-			stock->vmstat_bytes = 0;
+		*bytes += nr;
+		if (abs(*bytes) > PAGE_SIZE) {
+			nr = *bytes;
+			*bytes = 0;
 		} else {
 			nr = 0;
 		}
 	}
-	if (nr)
-		__mod_objcg_state(objcg, pgdat, idx, nr);
-
 	put_obj_stock(flags);
+	if (!nr)
+		return;
+update_vmstat:
+	__mod_objcg_state(objcg, pgdat, idx, nr);
 }
 
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -3303,12 +3319,19 @@ static void drain_obj_stock(struct obj_stock *stock)
 	/*
 	 * Flush the vmstat data in current stock
 	 */
-	if (stock->vmstat_bytes) {
-		__mod_objcg_state(old, stock->cached_pgdat, stock->vmstat_idx,
-				  stock->vmstat_bytes);
+	if (stock->reclaimable_bytes || stock->unreclaimable_bytes) {
+		int bytes;
+
+		if ((bytes = stock->reclaimable_bytes))
+			__mod_objcg_state(old, stock->cached_pgdat,
+					  NR_SLAB_RECLAIMABLE_B, bytes);
+		if ((bytes = stock->unreclaimable_bytes))
+			__mod_objcg_state(old, stock->cached_pgdat,
+					  NR_SLAB_UNRECLAIMABLE_B, bytes);
+
 		stock->cached_pgdat = NULL;
-		stock->vmstat_bytes = 0;
-		stock->vmstat_idx = 0;
+		stock->reclaimable_bytes = 0;
+		stock->unreclaimable_bytes = 0;
 	}
 
 	obj_cgroup_put(old);
-- 
2.18.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ