Currently __module_address() is using a linear search through all modules in order to find the module corresponding to the provided address. With a lot of modules this can take a lot of time. One of the users of this is kernel_text_address() which is employed in many stack unwinders; which in turn are used by perf-callchain and ftrace (possibly from NMI context). So by optimizing __module_address() we optimize many stack unwinders which are used by both perf and tracing in performance sensitive code. Note that because (un)module loading is very rare, we will typically iterate the tree[0] which will contain only core.node[0] entries -- seeing how the init entries will get removed once the module is completely loaded. This is why we place the core.node[0] entry in the same cacheline as the search key data. Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: "Paul E. McKenney" Cc: Rusty Russell Cc: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) --- include/linux/module.h | 22 ++++++++-- kernel/module.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 121 insertions(+), 8 deletions(-) --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -269,8 +270,15 @@ struct module { /* Startup function. */ int (*init)(void); - /* If this is non-NULL, vfree after init() returns */ - void *module_init; + /* + * If this is non-NULL, vfree after init() returns. + * + * Cacheline align here, such that: + * module_init, module_core, init_size, core_size, + * init_text_size, core_text_size and ltn_core.node[0] + * are on the same cacheline. + */ + void *module_init ____cacheline_aligned; /* Here is the actual code + data, vfree'd on unload. */ void *module_core; @@ -281,6 +289,14 @@ struct module { /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* + * We rely on the order of these two entries; not only do we want + * core.node[0] to be in the same cacheline as the above entries, + * we also assume ltn_init has a higher address than ltn_core. + */ + struct latch_tree_nodes ltn_core; + struct latch_tree_nodes ltn_init; + /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; @@ -361,7 +377,7 @@ struct module { ctor_fn_t *ctors; unsigned int num_ctors; #endif -}; +} ____cacheline_aligned; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif --- a/kernel/module.c +++ b/kernel/module.c @@ -102,6 +102,99 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); + +/* + * Use a latched RB-tree for __module_address(); this allows us to use + * RCU-sched lookups of the address from any context. + * + * Because modules have two address ranges: init and core, we need two + * latch_tree_nodes entries. We use the order they appear in struct module to + * determine if we need to use the init or core values for the comparisons. + */ + +static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) +{ + struct module *mod = n->priv; + + if (n >= mod->ltn_init.node) + return (unsigned long)mod->module_init; + else + return (unsigned long)mod->module_core; +} + +static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) +{ + struct module *mod = n->priv; + + if (n >= mod->ltn_init.node) + return (unsigned long)mod->init_size; + else + return (unsigned long)mod->core_size; +} + +static __always_inline bool +mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) +{ + return __mod_tree_val(a) < __mod_tree_val(b); +} + +static __always_inline int +mod_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long start, end; + + end = start = __mod_tree_val(n); + end += __mod_tree_size(n); + + if (val < start) + return -1; + + if (val >= end) + return 1; + + return 0; +} + +static const struct latch_tree_ops mod_tree_ops = { + .less = mod_tree_less, + .comp = mod_tree_comp, +}; + +static struct latch_tree_root mod_tree __cacheline_aligned; + +/* + * These modifications: insert, remove_init and remove; are serialized by the + * module_mutex. + */ +static void mod_tree_insert(struct module *mod) +{ + latch_tree_insert(&mod->ltn_core, &mod_tree, mod, &mod_tree_ops); + if (mod->init_size) + latch_tree_insert(&mod->ltn_init, &mod_tree, mod, &mod_tree_ops); +} + +static void mod_tree_remove_init(struct module *mod) +{ + if (mod->init_size) + latch_tree_erase(&mod->ltn_init, &mod_tree, &mod_tree_ops); +} + +static void mod_tree_remove(struct module *mod) +{ + latch_tree_erase(&mod->ltn_core, &mod_tree, &mod_tree_ops); + mod_tree_remove_init(mod); +} + +static struct module *mod_tree_find(unsigned long addr) +{ + struct latch_tree_node *ltn; + + ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops); + + return ltn ? ltn->priv : NULL; +} + #ifdef CONFIG_KGDB_KDB struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ @@ -1876,6 +1969,7 @@ static void free_module(struct module *m mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ @@ -3120,6 +3214,7 @@ static noinline int do_init_module(struc mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); mod->module_init = NULL; @@ -3190,6 +3285,7 @@ static int add_unformed_module(struct mo goto out; } list_add_rcu(&mod->list, &modules); + mod_tree_insert(mod); err = 0; out: @@ -3389,6 +3485,7 @@ static int load_module(struct load_info mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ synchronize_sched(); @@ -3833,13 +3930,13 @@ struct module *__module_address(unsigned module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list) { + mod = mod_tree_find(addr); + if (mod) { + BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) - return mod; + mod = NULL; } - return NULL; + return mod; } EXPORT_SYMBOL_GPL(__module_address); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/