[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1345647560-30387-8-git-send-email-aarcange@redhat.com>
Date: Wed, 22 Aug 2012 16:58:51 +0200
From: Andrea Arcangeli <aarcange@...hat.com>
To: linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc: Hillf Danton <dhillf@...il.com>, Dan Smith <danms@...ibm.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...e.hu>, Paul Turner <pjt@...gle.com>,
Suresh Siddha <suresh.b.siddha@...el.com>,
Mike Galbraith <efault@....de>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Lai Jiangshan <laijs@...fujitsu.com>,
Bharata B Rao <bharata.rao@...il.com>,
Lee Schermerhorn <Lee.Schermerhorn@...com>,
Rik van Riel <riel@...hat.com>,
Johannes Weiner <hannes@...xchg.org>,
Srivatsa Vaddagiri <vatsa@...ux.vnet.ibm.com>,
Christoph Lameter <cl@...ux.com>,
Alex Shi <alex.shi@...el.com>,
Mauricio Faria de Oliveira <mauricfo@...ux.vnet.ibm.com>,
Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>,
Don Morris <don.morris@...com>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>
Subject: [PATCH 07/36] autonuma: mm_autonuma and task_autonuma data structures
Define the two data structures that collect the per-process (in the
mm) and per-thread (in the task_struct) statistical information that
are the input of the CPU follow memory algorithms in the NUMA
scheduler.
Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
---
include/linux/autonuma_types.h | 107 ++++++++++++++++++++++++++++++++++++++++
1 files changed, 107 insertions(+), 0 deletions(-)
create mode 100644 include/linux/autonuma_types.h
diff --git a/include/linux/autonuma_types.h b/include/linux/autonuma_types.h
new file mode 100644
index 0000000..9673ce8
--- /dev/null
+++ b/include/linux/autonuma_types.h
@@ -0,0 +1,107 @@
+#ifndef _LINUX_AUTONUMA_TYPES_H
+#define _LINUX_AUTONUMA_TYPES_H
+
+#ifdef CONFIG_AUTONUMA
+
+#include <linux/numa.h>
+
+
+/*
+ * Per-mm (per-process) structure that contains the NUMA memory
+ * placement statistics generated by the knuma scan daemon. This
+ * structure is dynamically allocated only if AutoNUMA is possible on
+ * this system. They are linked togehter in a list headed within the
+ * knumad_scan structure.
+ */
+struct mm_autonuma {
+ /* link for knuma_scand's list of mm structures to scan */
+ struct list_head mm_node;
+ /* Pointer to associated mm structure */
+ struct mm_struct *mm;
+
+ /*
+ * Zeroed from here during allocation, check
+ * mm_autonuma_reset() if you alter the below.
+ */
+
+ /*
+ * Pass counter for this mm. This exist only to be able to
+ * tell when it's time to apply the exponential backoff on the
+ * task_autonuma statistics.
+ */
+ unsigned long mm_numa_fault_pass;
+ /* Total number of pages that will trigger NUMA faults for this mm */
+ unsigned long mm_numa_fault_tot;
+ /* Number of pages that will trigger NUMA faults for each [nid] */
+ unsigned long mm_numa_fault[0];
+ /* do not add more variables here, the above array size is dynamic */
+};
+
+extern int alloc_mm_autonuma(struct mm_struct *mm);
+extern void free_mm_autonuma(struct mm_struct *mm);
+extern void __init mm_autonuma_init(void);
+
+/*
+ * Per-task (thread) structure that contains the NUMA memory placement
+ * statistics generated by the knuma scan daemon. This structure is
+ * dynamically allocated only if AutoNUMA is possible on this
+ * system. They are linked togehter in a list headed within the
+ * knumad_scan structure.
+ */
+struct task_autonuma {
+ /* node id the CPU scheduler should try to stick with (-1 if none) */
+ int task_selected_nid;
+
+ /*
+ * Zeroed from here during allocation, check
+ * mm_autonuma_reset() if you alter the below.
+ */
+
+ /*
+ * Pass counter for this task. When the pass counter is found
+ * out of sync with the mm_numa_fault_pass we know it's time
+ * to apply the exponential backoff on the task_autonuma
+ * statistics, and then we synchronize it with
+ * mm_numa_fault_pass. This pass counter is needed because in
+ * knuma_scand we work on the mm and we've no visibility on
+ * the task_autonuma. Furthermore it would be detrimental to
+ * apply exponential backoff to all task_autonuma associated
+ * to a certain mm_autonuma (potentially zeroing out the trail
+ * of statistical data in task_autonuma) if the task is idle
+ * for a long period of time (i.e. several knuma_scand passes).
+ */
+ unsigned long task_numa_fault_pass;
+ /* Total number of eligible pages that triggered NUMA faults */
+ unsigned long task_numa_fault_tot;
+ /* Number of pages that triggered NUMA faults for each [nid] */
+ unsigned long task_numa_fault[0];
+ /* do not add more variables here, the above array size is dynamic */
+};
+
+extern int alloc_task_autonuma(struct task_struct *tsk,
+ struct task_struct *orig,
+ int node);
+extern void __init task_autonuma_init(void);
+extern void free_task_autonuma(struct task_struct *tsk);
+
+#else /* CONFIG_AUTONUMA */
+
+static inline int alloc_mm_autonuma(struct mm_struct *mm)
+{
+ return 0;
+}
+static inline void free_mm_autonuma(struct mm_struct *mm) {}
+static inline void mm_autonuma_init(void) {}
+
+static inline int alloc_task_autonuma(struct task_struct *tsk,
+ struct task_struct *orig,
+ int node)
+{
+ return 0;
+}
+static inline void task_autonuma_init(void) {}
+static inline void free_task_autonuma(struct task_struct *tsk) {}
+
+#endif /* CONFIG_AUTONUMA */
+
+#endif /* _LINUX_AUTONUMA_TYPES_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists