lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 29 Jan 2013 10:04:52 -0800
From:	Kent Overstreet <koverstreet@...gle.com>
To:	Tejun Heo <tj@...nel.org>
Cc:	Oleg Nesterov <oleg@...hat.com>, srivatsa.bhat@...ux.vnet.ibm.com,
	rusty@...tcorp.com.au, linux-kernel@...r.kernel.org
Subject: [PATCH] module: Convert to generic percpu refcounts

I started screwing aronud just to see how hard a conversion would be and
what it'd look like. I _think_ this is complete, but there's enough
going on I undoubtedly missed something.

Completely untested - builds and that's it. I'm sure it's broken.

Deletes almost 100 lines of code though. I like how it looks for the
most part... MODULE_STATE_GOING is messy since the percpu ref has to
track that, and I didn't want to track that in two places - but I
couldn't just delete the enum since module notifiers use it. It's
currently vestigal and only used for the notifiers.

Similarly with the other places module->state and percpu_ref_dead() (via
module_is_live()) are checked... bit ugly but ah well.

---
 include/linux/module.h        |  28 +++----
 include/trace/events/module.h |   2 +-
 kernel/debug/kdb/kdb_main.c   |   8 +-
 kernel/module.c               | 165 +++++++++++++-----------------------------
 4 files changed, 60 insertions(+), 143 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index ead1b57..88f78f6 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,6 +17,7 @@
 #include <linux/moduleparam.h>
 #include <linux/tracepoint.h>
 #include <linux/export.h>
+#include <linux/percpu-refcount.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -206,19 +207,7 @@ enum module_state {
 	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
-/**
- * struct module_ref - per cpu module reference counts
- * @incs: number of module get on this cpu
- * @decs: number of module put on this cpu
- *
- * We force an alignment on 8 or 16 bytes, so that alloc_percpu()
- * put @incs/@...s in same cache line, with no extra memory cost,
- * since alloc_percpu() is fine grained.
- */
-struct module_ref {
-	unsigned long incs;
-	unsigned long decs;
-} __attribute((aligned(2 * sizeof(unsigned long))));
+const char *module_state(struct module *mod);
 
 struct module
 {
@@ -361,13 +350,10 @@ struct module
 	/* What modules do I depend on? */
 	struct list_head target_list;
 
-	/* Who is waiting for us to be unloaded */
-	struct task_struct *waiter;
-
 	/* Destruction function. */
 	void (*exit)(void);
 
-	struct module_ref __percpu *refptr;
+	struct percpu_ref ref;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
@@ -387,7 +373,7 @@ extern struct mutex module_mutex;
    (IDE & SCSI) require entry into the module during init.*/
 static inline int module_is_live(struct module *mod)
 {
-	return mod->state != MODULE_STATE_GOING;
+	return !percpu_ref_dead(&mod->ref);
 }
 
 struct module *__module_text_address(unsigned long addr);
@@ -451,7 +437,11 @@ extern void __module_put_and_exit(struct module *mod, long code)
 #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code);
 
 #ifdef CONFIG_MODULE_UNLOAD
-unsigned long module_refcount(struct module *mod);
+static inline unsigned long module_refcount(struct module *mod)
+{
+	return percpu_ref_count(&mod->ref) - 1;
+}
+
 void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 1619327..3de2241 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -78,7 +78,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
 
 	TP_fast_assign(
 		__entry->ip	= ip;
-		__entry->refcnt	= __this_cpu_read(mod->refptr->incs) + __this_cpu_read(mod->refptr->decs);
+		__entry->refcnt = module_refcount(mod);
 		__assign_str(name, mod->name);
 	),
 
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 8875254..813c0ed 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1978,13 +1978,7 @@ static int kdb_lsmod(int argc, const char **argv)
 #ifdef CONFIG_MODULE_UNLOAD
 		kdb_printf("%4ld ", module_refcount(mod));
 #endif
-		if (mod->state == MODULE_STATE_GOING)
-			kdb_printf(" (Unloading)");
-		else if (mod->state == MODULE_STATE_COMING)
-			kdb_printf(" (Loading)");
-		else
-			kdb_printf(" (Live)");
-		kdb_printf(" 0x%p", mod->module_core);
+		kdb_printf(" (%s) 0x%p", module_state(mod), mod->module_core);
 
 #ifdef CONFIG_MODULE_UNLOAD
 		{
diff --git a/kernel/module.c b/kernel/module.c
index 921bed4..08aa83a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -625,21 +625,15 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
 EXPORT_TRACEPOINT_SYMBOL(module_get);
 
 /* Init the unload section of the module. */
-static int module_unload_init(struct module *mod)
+static void module_unload_init(struct module *mod)
 {
-	mod->refptr = alloc_percpu(struct module_ref);
-	if (!mod->refptr)
-		return -ENOMEM;
+	percpu_ref_init(&mod->ref);
 
 	INIT_LIST_HEAD(&mod->source_list);
 	INIT_LIST_HEAD(&mod->target_list);
 
 	/* Hold reference count during initialization. */
-	__this_cpu_write(mod->refptr->incs, 1);
-	/* Backwards compatibility macros put refcount during init. */
-	mod->waiter = current;
-
-	return 0;
+	__module_get(mod);
 }
 
 /* Does a already use b? */
@@ -719,8 +713,6 @@ static void module_unload_free(struct module *mod)
 		kfree(use);
 	}
 	mutex_unlock(&module_mutex);
-
-	free_percpu(mod->refptr);
 }
 
 #ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -745,6 +737,15 @@ struct stopref
 	int *forced;
 };
 
+static void stop_module(struct module *mod)
+{
+	/* Mark it as dying, drop base ref */
+	if (percpu_ref_kill(&mod->ref))
+		module_put(mod);
+	else
+		WARN(1, "initial module ref already dropped");
+}
+
 /* Whole machine is stopped with interrupts off when this runs. */
 static int __try_stop_module(void *_sref)
 {
@@ -756,8 +757,7 @@ static int __try_stop_module(void *_sref)
 			return -EWOULDBLOCK;
 	}
 
-	/* Mark it as dying. */
-	sref->mod->state = MODULE_STATE_GOING;
+	stop_module(sref->mod);
 	return 0;
 }
 
@@ -769,57 +769,14 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
 		return stop_machine(__try_stop_module, &sref, NULL);
 	} else {
 		/* We don't need to stop the machine for this. */
-		mod->state = MODULE_STATE_GOING;
-		synchronize_sched();
+		stop_module(mod);
 		return 0;
 	}
 }
 
-unsigned long module_refcount(struct module *mod)
-{
-	unsigned long incs = 0, decs = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		decs += per_cpu_ptr(mod->refptr, cpu)->decs;
-	/*
-	 * ensure the incs are added up after the decs.
-	 * module_put ensures incs are visible before decs with smp_wmb.
-	 *
-	 * This 2-count scheme avoids the situation where the refcount
-	 * for CPU0 is read, then CPU0 increments the module refcount,
-	 * then CPU1 drops that refcount, then the refcount for CPU1 is
-	 * read. We would record a decrement but not its corresponding
-	 * increment so we would see a low count (disaster).
-	 *
-	 * Rare situation? But module_refcount can be preempted, and we
-	 * might be tallying up 4096+ CPUs. So it is not impossible.
-	 */
-	smp_rmb();
-	for_each_possible_cpu(cpu)
-		incs += per_cpu_ptr(mod->refptr, cpu)->incs;
-	return incs - decs;
-}
-EXPORT_SYMBOL(module_refcount);
-
 /* This exists whether we can unload or not */
 static void free_module(struct module *mod);
 
-static void wait_for_zero_refcount(struct module *mod)
-{
-	/* Since we might sleep for some time, release the mutex first */
-	mutex_unlock(&module_mutex);
-	for (;;) {
-		pr_debug("Looking at refcount...\n");
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (module_refcount(mod) == 0)
-			break;
-		schedule();
-	}
-	current->state = TASK_RUNNING;
-	mutex_lock(&module_mutex);
-}
-
 SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		unsigned int, flags)
 {
@@ -850,7 +807,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 	}
 
 	/* Doing init or already dying? */
-	if (mod->state != MODULE_STATE_LIVE) {
+	if (mod->state != MODULE_STATE_LIVE || !module_is_live(mod)) {
 		/* FIXME: if (force), slam module count and wake up
                    waiter --RR */
 		pr_debug("%s already dying\n", mod->name);
@@ -868,19 +825,17 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		}
 	}
 
-	/* Set this up before setting mod->state */
-	mod->waiter = current;
-
 	/* Stop the machine so refcounts can't move and disable module. */
 	ret = try_stop_module(mod, flags, &forced);
 	if (ret != 0)
 		goto out;
 
+	mutex_unlock(&module_mutex);
+
 	/* Never wait if forced. */
-	if (!forced && module_refcount(mod) != 0)
-		wait_for_zero_refcount(mod);
+	if (!forced)
+		wait_event(module_wq, !percpu_ref_count(&mod->ref));
 
-	mutex_unlock(&module_mutex);
 	/* Final destruction now no one is using it. */
 	if (mod->exit != NULL)
 		mod->exit();
@@ -962,45 +917,29 @@ static struct module_attribute modinfo_refcnt =
 void __module_get(struct module *module)
 {
 	if (module) {
-		preempt_disable();
-		__this_cpu_inc(module->refptr->incs);
 		trace_module_get(module, _RET_IP_);
-		preempt_enable();
+		percpu_ref_get(&module->ref);
 	}
 }
 EXPORT_SYMBOL(__module_get);
 
 bool try_module_get(struct module *module)
 {
-	bool ret = true;
-
 	if (module) {
-		preempt_disable();
-
-		if (likely(module_is_live(module))) {
-			__this_cpu_inc(module->refptr->incs);
-			trace_module_get(module, _RET_IP_);
-		} else
-			ret = false;
-
-		preempt_enable();
-	}
-	return ret;
+		trace_module_get(module, _RET_IP_);
+		return percpu_ref_tryget(&module->ref);
+	} else
+		return true;
 }
 EXPORT_SYMBOL(try_module_get);
 
 void module_put(struct module *module)
 {
 	if (module) {
-		preempt_disable();
-		smp_wmb(); /* see comment in module_refcount */
-		__this_cpu_inc(module->refptr->decs);
-
 		trace_module_put(module, _RET_IP_);
-		/* Maybe they're waiting for us to drop reference? */
-		if (unlikely(!module_is_live(module)))
-			wake_up_process(module->waiter);
-		preempt_enable();
+
+		if (percpu_ref_put(&module->ref))
+			wake_up_all(&module_wq);
 	}
 }
 EXPORT_SYMBOL(module_put);
@@ -1048,25 +987,27 @@ static size_t module_flags_taint(struct module *mod, char *buf)
 	return l;
 }
 
-static ssize_t show_initstate(struct module_attribute *mattr,
-			      struct module_kobject *mk, char *buffer)
+const char *module_state(struct module *mod)
 {
-	const char *state = "unknown";
-
-	switch (mk->mod->state) {
+	switch (mod->state) {
 	case MODULE_STATE_LIVE:
-		state = "live";
-		break;
+		if (module_is_live(mod))
+			return "live";
+		else
+			return "going";
 	case MODULE_STATE_COMING:
-		state = "coming";
-		break;
-	case MODULE_STATE_GOING:
-		state = "going";
-		break;
+		return "coming";
+	case MODULE_STATE_UNFORMED:
+		return "unformed";
 	default:
-		BUG();
+		return "unknown";
 	}
-	return sprintf(buffer, "%s\n", state);
+}
+
+static ssize_t show_initstate(struct module_attribute *mattr,
+			      struct module_kobject *mk, char *buffer)
+{
+	return sprintf(buffer, "%s\n", module_state(mk->mod));
 }
 
 static struct module_attribute modinfo_initstate =
@@ -3022,8 +2963,7 @@ static bool finished_loading(const char *name)
 
 	mutex_lock(&module_mutex);
 	mod = find_module_all(name, true);
-	ret = !mod || mod->state == MODULE_STATE_LIVE
-		|| mod->state == MODULE_STATE_GOING;
+	ret = !mod || mod->state == MODULE_STATE_LIVE;
 	mutex_unlock(&module_mutex);
 
 	return ret;
@@ -3073,8 +3013,7 @@ static int do_init_module(struct module *mod)
 	if (ret < 0) {
 		/* Init routine failed: abort.  Try to protect us from
                    buggy refcounters. */
-		mod->state = MODULE_STATE_GOING;
-		synchronize_sched();
+		stop_module(mod);
 		module_put(mod);
 		blocking_notifier_call_chain(&module_notify_list,
 					     MODULE_STATE_GOING, mod);
@@ -3245,9 +3184,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 #endif
 
 	/* Now module is in final location, initialize linked lists, etc. */
-	err = module_unload_init(mod);
-	if (err)
-		goto unlink_mod;
+	module_unload_init(mod);
 
 	/* Now we've got everything in the final locations, we can
 	 * find optional sections. */
@@ -3323,7 +3260,6 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	free_modinfo(mod);
  free_unload:
 	module_unload_free(mod);
- unlink_mod:
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
@@ -3614,12 +3550,12 @@ static char *module_flags(struct module *mod, char *buf)
 
 	BUG_ON(mod->state == MODULE_STATE_UNFORMED);
 	if (mod->taints ||
-	    mod->state == MODULE_STATE_GOING ||
+	    !module_is_live(mod) ||
 	    mod->state == MODULE_STATE_COMING) {
 		buf[bx++] = '(';
 		bx += module_flags_taint(mod, buf + bx);
 		/* Show a - for module-is-being-unloaded */
-		if (mod->state == MODULE_STATE_GOING)
+		if (!module_is_live(mod))
 			buf[bx++] = '-';
 		/* Show a + for module-is-being-loaded */
 		if (mod->state == MODULE_STATE_COMING)
@@ -3663,10 +3599,7 @@ static int m_show(struct seq_file *m, void *p)
 	print_unload_info(m, mod);
 
 	/* Informative for users. */
-	seq_printf(m, " %s",
-		   mod->state == MODULE_STATE_GOING ? "Unloading":
-		   mod->state == MODULE_STATE_COMING ? "Loading":
-		   "Live");
+	seq_printf(m, " %s", module_state(mod));
 	/* Used by oprofile and other similar tools. */
 	seq_printf(m, " 0x%pK", mod->module_core);
 
-- 
1.7.12

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ