lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 26 Sep 2013 04:52:10 +0530
From:	"Srivatsa S. Bhat" <srivatsa.bhat@...ux.vnet.ibm.com>
To:	akpm@...ux-foundation.org, mgorman@...e.de, dave@...1.net,
	hannes@...xchg.org, tony.luck@...el.com,
	matthew.garrett@...ula.com, riel@...hat.com, arjan@...ux.intel.com,
	srinivas.pandruvada@...ux.intel.com, willy@...ux.intel.com,
	kamezawa.hiroyu@...fujitsu.com, lenb@...nel.org, rjw@...k.pl
Cc:	gargankita@...il.com, paulmck@...ux.vnet.ibm.com,
	svaidy@...ux.vnet.ibm.com, andi@...stfloor.org,
	isimatu.yasuaki@...fujitsu.com, santosh.shilimkar@...com,
	kosaki.motohiro@...il.com, srivatsa.bhat@...ux.vnet.ibm.com,
	linux-pm@...r.kernel.org, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: [RFC PATCH v4 37/40] mm: Add a kthread to perform targeted compaction
 for memory power management

To further increase the opportunities for memory power savings, we can perform
targeted compaction to evacuate lightly-filled memory regions. For this
purpose, introduce a dedicated per-node kthread to perform the targeted
compaction work.

Our "kmempowerd" kthread uses the generic kthread-worker framework to do most
of the usual work all kthreads need to do. On top of that, this kthread has the
following infrastructure in place, to perform the region evacuation.

A work item is instantiated for every zone. Accessible to this work item is a
spin-lock protected bitmask, which helps us indicate which regions have to be
evacuated. The bits set in the bitmask represent the zone-memory-region number
within that zone that would benefit from evacuation.

The operation of the "kmempowerd" kthread is quite straight-forward: it makes a
local copy of the bitmask (which represents the work it is supposed to do), and
performs targeted region evacuation for each of the regions represented in
that bitmask. When its done, it updates the original bitmask by clearing those
bits, to indicate that the requested work was completed. While the kthread is
going about doing its duty, the original bitmask can be updated to indicate the
arrival of more work. So once the kthread finishes one round of processing, it
re-examines the original bitmask to see if any new work had arrived in the
meantime, and does the corresponding work if required. This process continues
until the original bitmask becomes empty (no bits set, so no more work to do).

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@...ux.vnet.ibm.com>
---

 include/linux/mmzone.h |   10 ++++++
 mm/compaction.c        |   80 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 49c8926..257afdf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -10,6 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
+#include <linux/kthread-work.h>
 #include <linux/numa.h>
 #include <linux/init.h>
 #include <linux/seqlock.h>
@@ -128,6 +129,13 @@ struct region_allocator {
 	DECLARE_BITMAP(ralloc_mask, MAX_NR_ZONE_REGIONS);
 };
 
+struct mempower_work {
+	spinlock_t		lock;
+	DECLARE_BITMAP(mempower_mask, MAX_NR_ZONE_REGIONS);
+
+	struct kthread_work	work;
+};
+
 struct pglist_data;
 
 /*
@@ -460,6 +468,7 @@ struct zone {
 	 */
 	unsigned int inactive_ratio;
 
+	struct mempower_work	mempower_work;
 
 	ZONE_PADDING(_pad2_)
 	/* Rarely used or read-mostly fields */
@@ -830,6 +839,7 @@ typedef struct pglist_data {
 	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
+	struct kthread_worker mempower_worker;
 #ifdef CONFIG_NUMA_BALANCING
 	/*
 	 * Lock serializing the per destination node AutoNUMA memory
diff --git a/mm/compaction.c b/mm/compaction.c
index 9449b7f..0511eae 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -16,6 +16,7 @@
 #include <linux/sysfs.h>
 #include <linux/balloon_compaction.h>
 #include <linux/page-isolation.h>
+#include <linux/kthread.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -1267,6 +1268,85 @@ int evacuate_mem_region(struct zone *z, struct zone_mem_region *zmr)
 	return compact_range(&cc, &ac, &fc, start_pfn, end_pfn);
 }
 
+#define nr_zone_region_bits	MAX_NR_ZONE_REGIONS
+static DECLARE_BITMAP(mpwork_mask, nr_zone_region_bits);
+
+static void kmempowerd(struct kthread_work *work)
+{
+	struct mempower_work *mpwork;
+	struct zone *zone;
+	unsigned long flags;
+	int region_id;
+
+	mpwork = container_of(work, struct mempower_work, work);
+	zone = container_of(mpwork, struct zone, mempower_work);
+
+	spin_lock_irqsave(&mpwork->lock, flags);
+repeat:
+	bitmap_copy(mpwork_mask, mpwork->mempower_mask, nr_zone_region_bits);
+	spin_unlock_irqrestore(&mpwork->lock, flags);
+
+	if (bitmap_empty(mpwork_mask, nr_zone_region_bits))
+		return;
+
+	for_each_set_bit(region_id, mpwork_mask, nr_zone_region_bits)
+		evacuate_mem_region(zone, &zone->zone_regions[region_id]);
+
+	spin_lock_irqsave(&mpwork->lock, flags);
+
+	bitmap_andnot(mpwork->mempower_mask, mpwork->mempower_mask, mpwork_mask,
+		      nr_zone_region_bits);
+	if (!bitmap_empty(mpwork->mempower_mask, nr_zone_region_bits))
+		goto repeat; /* More work got added in the meanwhile */
+
+	spin_unlock_irqrestore(&mpwork->lock, flags);
+
+}
+
+static void kmempowerd_run(int nid)
+{
+	struct kthread_worker *worker;
+	struct mempower_work *mpwork;
+	struct pglist_data *pgdat;
+	struct task_struct *task;
+	unsigned long flags;
+	int i;
+
+	pgdat = NODE_DATA(nid);
+	worker = &pgdat->mempower_worker;
+
+	init_kthread_worker(worker);
+
+	task = kthread_create_on_node(kthread_worker_fn, worker, nid,
+				      "kmempowerd/%d", nid);
+	if (IS_ERR(task))
+		return;
+
+	for (i = 0; i < pgdat->nr_zones; i++) {
+		mpwork = &pgdat->node_zones[i].mempower_work;
+		init_kthread_work(&mpwork->work, kmempowerd);
+
+		spin_lock_init(&mpwork->lock);
+
+		/* Initialize bitmap to zero to indicate no-pending-work */
+		spin_lock_irqsave(&mpwork->lock, flags);
+		bitmap_zero(mpwork->mempower_mask, nr_zone_region_bits);
+		spin_unlock_irqrestore(&mpwork->lock, flags);
+	}
+
+	wake_up_process(task);
+}
+
+int kmempowerd_init(void)
+{
+	int nid;
+
+	for_each_node_state(nid, N_MEMORY)
+		kmempowerd_run(nid);
+
+	return 0;
+}
+module_init(kmempowerd_init);
 
 /* Compact all zones within a node */
 static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ