lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 26 Feb 2015 15:12:56 +0100
From:	Peter Zijlstra <peterz@...radead.org>
To:	Andi Kleen <ak@...ux.intel.com>
Cc:	Andi Kleen <andi@...stfloor.org>, x86@...nel.org,
	linux-kernel@...r.kernel.org, mathieu.desnoyers@...icios.com,
	oleg@...hat.com, paulmck@...ux.vnet.ibm.com, rusty@...tcorp.com.au,
	mingo@...nel.org
Subject: Re: [RFC][PATCH] module: Optimize __module_address() using a latched
 RB-tree

On Thu, Feb 26, 2015 at 12:43:09PM +0100, Peter Zijlstra wrote:

> +static struct module *__tree_find(struct rb_root *r, unsigned long addr)
> +{
> +	struct rb_node *n = r->rb_node;
> +
> +	while (n) {
> +		struct module *m = mod_entry(n);
> +		int idx = mod_node_idx(m, n);
> +
> +		if (idx & 2) {				/* core */
> +			if (addr < (unsigned long)m->module_core)
> +				n = n->rb_left;
> +			else if (addr >= (unsigned long)m->module_core + m->core_size)
> +				n = n->rb_right;
> +			else
> +				return m;
> +		} else {				/* init */
> +			if (addr < (unsigned long)m->module_init)
> +				n = n->rb_left;
> +			else if (addr >= (unsigned long)m->module_init + m->init_size)
> +				n = n->rb_right;
> +			else
> +				return m;
> +		}
> +	}
> +
> +	return NULL;
> +}

Assuming struct module is cacheline aligned, Rusty? from what I can find
its squirreled away in some data structure:

  mod = (void *)info->sechdrs[info->index.mod].sh_addr

And I can't seem to quickly find its alignment. If its not cacheline
aligned, can we make it so?

The below patch shuffles things around a bit and relies on the above
assumption.

#pahole -EC module ivb-ep-build/kernel/module.o

struct module {
	...

        /* --- cacheline 6 boundary (384 bytes) --- */
        void *                     module_init;                                          /*   384     8 */
        void *                     module_core;                                          /*   392     8 */
        unsigned int               init_size;                                            /*   400     4 */
        unsigned int               core_size;                                            /*   404     4 */
        unsigned int               init_text_size;                                       /*   408     4 */
        unsigned int               core_text_size;                                       /*   412     4 */
        struct module_node {
                struct module *    mod;                                                  /*   416     8 */
                struct rb_node {
                        long unsigned int __rb_parent_color;                             /*   424     8 */
                        struct rb_node * rb_right;                                       /*   432     8 */
                        struct rb_node * rb_left;                                        /*   440     8 */
                } node; /*   424    24 */
        } tree_node[4]; /*   416   128 */

	...
};

Which gets module_{init,core} and {init,core}_size on the same cacheline
as tree_node[0].

And given that module loading/unloading is rare, tree modifications will
be rare, and we'll normally always iterate tree latch0. Furthermore,
execution of init code is brief and by switching core to elements 0,2
we've insured to typically only hit tree_node[0].

Therefore the loads in __tree_find() will (typically) hit the same
cacheline and life is good.

---

Index: linux-2.6/include/linux/module.h
===================================================================
--- linux-2.6.orig/include/linux/module.h	2015-02-26 14:59:06.549177534 +0100
+++ linux-2.6/include/linux/module.h	2015-02-26 14:53:27.708162155 +0100
@@ -222,8 +222,6 @@
 	/* Member of list of modules */
 	struct list_head list;
 
-	struct module_node tree_node[4];
-
 	/* Unique handle for this module */
 	char name[MODULE_NAME_LEN];
 
@@ -278,7 +276,7 @@
 	int (*init)(void);
 
 	/* If this is non-NULL, vfree after init() returns */
-	void *module_init;
+	void *module_init	____cacheline_aligned;
 
 	/* Here is the actual code + data, vfree'd on unload. */
 	void *module_core;
@@ -289,6 +287,8 @@
 	/* The size of the executable code in each section.  */
 	unsigned int init_text_size, core_text_size;
 
+	struct module_node tree_node[4];
+
 	/* Size of RO sections of the module (text+rodata) */
 	unsigned int init_ro_size, core_ro_size;
 
Index: linux-2.6/kernel/module.c
===================================================================
--- linux-2.6.orig/kernel/module.c	2015-02-26 14:59:06.551177534 +0100
+++ linux-2.6/kernel/module.c	2015-02-26 14:52:04.094158360 +0100
@@ -125,10 +125,10 @@
  * module, but other than that its pretty straight forward.
  *
  * idx
- *  0	- latch 0, init
- *  1	- latch 1, init
- *  2	- latch 0, core
- *  3	- latch 1, core
+ *  0	- latch 0, core
+ *  1	- latch 1, core
+ *  2	- latch 0, init
+ *  3	- latch 1, init
  */
 
 struct latch_tree_root {
@@ -138,10 +138,10 @@
 
 static unsigned long mod_value(struct module *mod, int idx)
 {
-	if (idx & 2) {		/* core */
-		return (unsigned long)mod->module_core;
-	} else {		/* init */
+	if (idx & 2) {		/* init */
 		return (unsigned long)mod->module_init;
+	} else {		/* core */
+		return (unsigned long)mod->module_core;
 	}
 }
 
@@ -207,17 +207,17 @@
 		struct module *m = mod_entry(n);
 		int idx = mod_node_idx(m, n);
 
-		if (idx & 2) {				/* core */
-			if (addr < (unsigned long)m->module_core)
+		if (idx & 2) {				/* init */
+			if (addr < (unsigned long)m->module_init)
 				n = n->rb_left;
-			else if (addr >= (unsigned long)m->module_core + m->core_size)
+			else if (addr >= (unsigned long)m->module_init + m->init_size)
 				n = n->rb_right;
 			else
 				return m;
-		} else {				/* init */
-			if (addr < (unsigned long)m->module_init)
+		} else {				/* core */
+			if (addr < (unsigned long)m->module_core)
 				n = n->rb_left;
-			else if (addr >= (unsigned long)m->module_init + m->init_size)
+			else if (addr >= (unsigned long)m->module_core + m->core_size)
 				n = n->rb_right;
 			else
 				return m;
@@ -232,35 +232,35 @@
 static void mod_tree_insert(struct module *mod)
 {
 	raw_write_seqcount_latch(&mod_tree.seq);
+	__tree_insert(&mod_tree, mod, 0);		/* core */
 	if (mod->init_size)
-		__tree_insert(&mod_tree, mod, 0);	/* init */
-	__tree_insert(&mod_tree, mod, 2);		/* core */
+		__tree_insert(&mod_tree, mod, 2);	/* init */
 	raw_write_seqcount_latch(&mod_tree.seq);
+	__tree_insert(&mod_tree, mod, 1);		/* core */
 	if (mod->init_size)
-		__tree_insert(&mod_tree, mod, 1);	/* init */
-	__tree_insert(&mod_tree, mod, 3);		/* core */
+		__tree_insert(&mod_tree, mod, 3);	/* init */
 }
 
 static void mod_tree_remove_init(struct module *mod)
 {
 	raw_write_seqcount_latch(&mod_tree.seq);
 	if (mod->init_size)
-		__tree_remove(&mod_tree, mod, 0);	/* init */
+		__tree_remove(&mod_tree, mod, 2);	/* init */
 	raw_write_seqcount_latch(&mod_tree.seq);
 	if (mod->init_size)
-		__tree_remove(&mod_tree, mod, 1);	/* init */
+		__tree_remove(&mod_tree, mod, 3);	/* init */
 }
 
 static void mod_tree_remove(struct module *mod)
 {
 	raw_write_seqcount_latch(&mod_tree.seq);
+	__tree_remove(&mod_tree, mod, 0);		/* core */
 	if (mod->init_size)
-		__tree_remove(&mod_tree, mod, 0);	/* init */
-	__tree_remove(&mod_tree, mod, 2);		/* core */
+		__tree_remove(&mod_tree, mod, 2);	/* init */
 	raw_write_seqcount_latch(&mod_tree.seq);
+	__tree_remove(&mod_tree, mod, 1);		/* core */
 	if (mod->init_size)
-		__tree_remove(&mod_tree, mod, 1);	/* init */
-	__tree_remove(&mod_tree, mod, 3);		/* core */
+		__tree_remove(&mod_tree, mod, 3);	/* init */
 }
 
 static struct module *mod_tree_find(unsigned long addr)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ