lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8638842.4sDhnmlMBm@hyperiorarchmachine>
Date:   Mon, 14 Jun 2021 21:31:14 +0300
From:   jarmo.tiitto@...il.com
To:     Jarmo Tiitto <jarmo.tiitto@...il.com>,
        Kees Cook <keescook@...omium.org>
Cc:     Sami Tolvanen <samitolvanen@...gle.com>,
        Bill Wendling <wcw@...gle.com>,
        Nathan Chancellor <nathan@...nel.org>,
        Nick Desaulniers <ndesaulniers@...gle.com>,
        clang-built-linux@...glegroups.com, linux-kernel@...r.kernel.org,
        morbo@...gle.com, jeyu@...nel.org
Subject: Re: [RFC PATCH 4/5] pgo: Add module notifier machinery

Kees Cook wrote maanantaina 14. kesäkuuta 2021 19.00.34 EEST:
> On Sat, Jun 12, 2021 at 06:24:25AM +0300, Jarmo Tiitto wrote:
> > Add module notifier callback and register modules
> > into prf_list.
> > 
> > For each module that has __llvm_prf_{data,cnts,names,vnds} sections
> > The callback creates debugfs <module>.profraw entry and
> > links new prf_object into prf_list.
> > 
> > This enables profiling for all loaded modules.
> > 
> > * Moved rcu_read_lock() outside of allocate_node() in order
> > 
> >   to protect __llvm_profile_instrument_target() from module unload(s)
> > 
> > Signed-off-by: Jarmo Tiitto <jarmo.tiitto@...il.com>
> > ---
> > v2: Passed QEMU SMP boot test into minimal Arch Linux distro,
> > module loading and unloading work without warnings.
> > Module profile data looks ok. :-)
> > ---
> > 
> >  kernel/pgo/fs.c         | 111 ++++++++++++++++++++++++++++++++++++++++
> >  kernel/pgo/instrument.c |  19 ++++---
> >  2 files changed, 122 insertions(+), 8 deletions(-)
> > 
> > diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> > index 84b36e61758b..98b982245b58 100644
> > --- a/kernel/pgo/fs.c
> > +++ b/kernel/pgo/fs.c
> > @@ -419,6 +419,110 @@ static const struct file_operations prf_reset_fops = 
{
> > 
> >  	.llseek		= noop_llseek,
> >  
> >  };
> > 
> > +static void pgo_module_init(struct module *mod)
> > +{
> > +	struct prf_object *po;
> > +	char fname[MODULE_NAME_LEN + 9]; /* +strlen(".profraw") */
> > +	unsigned long flags;
> > +
> > +	/* add new prf_object entry for the module */
> > +	po = kzalloc(sizeof(*po), GFP_KERNEL);
> > +	if (!po)
> > +		goto out;
> > +
> > +	po->mod_name = mod->name;
> > +
> > +	fname[0] = 0;
> > +	snprintf(fname, sizeof(fname), "%s.profraw", po->mod_name);
> > +
> > +	/* setup prf_object sections */
> > +	po->data = mod->prf_data;
> > +	po->data_num = prf_get_count(mod->prf_data,
> > +		(char *)mod->prf_data + mod->prf_data_size, 
sizeof(po->data[0]));
> > +
> > +	po->cnts = mod->prf_cnts;
> > +	po->cnts_num = prf_get_count(mod->prf_cnts,
> > +		(char *)mod->prf_cnts + mod->prf_cnts_size, 
sizeof(po->cnts[0]));
> > +
> > +	po->names = mod->prf_names;
> > +	po->names_num = prf_get_count(mod->prf_names,
> > +		(char *)mod->prf_names + mod->prf_names_size, 
sizeof(po->names[0]));
> > +
> > +	po->vnds = mod->prf_vnds;
> > +	po->vnds_num = prf_get_count(mod->prf_vnds,
> > +		(char *)mod->prf_vnds + mod->prf_vnds_size, 
sizeof(po->vnds[0]));
> > +
> > +	/* create debugfs entry */
> > +	po->file = debugfs_create_file(fname, 0600, directory, po, 
&prf_fops);
> > +	if (!po->file) {
> > +		pr_err("Failed to setup module pgo: %s", fname);
> > +		kfree(po);
> > +		goto out;
> > +	}
> > +
> > +	/* finally enable profiling for the module */
> > +	flags = prf_list_lock();
> > +	list_add_tail_rcu(&po->link, &prf_list);
> > +	prf_list_unlock(flags);
> > +
> > +out:
> > +	return;
> > +}
> > +
> > +static int pgo_module_notifier(struct notifier_block *nb, unsigned long
> > event, +				void *pdata)
> > +{
> > +	struct module *mod = pdata;
> > +	struct prf_object *po;
> > +	unsigned long flags;
> > +
> > +	if (event == MODULE_STATE_LIVE) {
> > +		/* does the module have profiling info? */
> > +		if (mod->prf_data
> > +			&& mod->prf_cnts
> > +			&& mod->prf_names
> > +			&& mod->prf_vnds) {
> > +
> > +			/* setup module profiling */
> > +			pgo_module_init(mod);
> > +		}
> > +	}
> > +
> > +	if (event == MODULE_STATE_GOING) {
> > +		/* find the prf_object from the list */
> > +		rcu_read_lock();
> > +
> > +		list_for_each_entry_rcu(po, &prf_list, link) {
> > +			if (strcmp(po->mod_name, mod->name) == 
0)
> > +				goto out_unlock;
> > +		}
> > +		/* no such module */
> > +		po = NULL;
> > +
> > +out_unlock:
> > +		rcu_read_unlock();
> 
> Is it correct to do the unlock before the possible list_del_rcu()?
> 

Oh, list_del_rcu() should then propably go before unlocking. I'll fix that.

> > +
> > +		if (po) {
> > +			/* remove from profiled modules */
> > +			flags = prf_list_lock();
> > +			list_del_rcu(&po->link);
> > +			prf_list_unlock(flags);
> > +
> > +			debugfs_remove(po->file);
> > +			po->file = NULL;
> > +
> > +			/* cleanup memory */
> > +			kfree_rcu(po, rcu);
> > +		}
> 
> Though I thought module load/unload was already under a global lock, so
> maybe a race isn't possible here.
> 

I searched a bit and found out that module.c/module_mutex is not held during
the module notifier MODULE_STATE_GOING callbacks.
But the callback it only invoked only once per module un/load so I think it is 
okay.
If I remember correctly, the main reason I moved the tear down code after 
rcu_read_unlock() was that debugfs_remove() may sleep.


> For the next version of this series, please Cc the module subsystem
> maintainer as well:
> 	Jessica Yu <jeyu@...nel.org>
> 

OK, after all there is a lot of pointers to the kernel's module subsys.

> > +	}
> > +
> > +	return NOTIFY_OK;
> > +}
> > +
> > +static struct notifier_block pgo_module_nb = {
> > +	.notifier_call = pgo_module_notifier
> > +};
> > +
> > 
> >  /* Create debugfs entries. */
> >  static int __init pgo_init(void)
> >  {
> > 
> > @@ -444,6 +548,7 @@ static int __init pgo_init(void)
> > 
> >  	/* enable profiling */
> >  	flags = prf_list_lock();
> > 
> > +	prf_vmlinux.mod_name = "vmlinux";
> > 
> >  	list_add_tail_rcu(&prf_vmlinux.link, &prf_list);
> >  	prf_list_unlock(flags);
> > 
> > @@ -460,6 +565,9 @@ static int __init pgo_init(void)
> > 
> >  				 &prf_reset_fops))
> >  		
> >  		goto err_remove;
> > 
> > +	/* register module notifer */
> > +	register_module_notifier(&pgo_module_nb);
> > +
> > 
> >  	/* show notice why the system slower: */
> >  	pr_notice("Clang PGO instrumentation is active.");
> > 
> > @@ -473,6 +581,9 @@ static int __init pgo_init(void)
> > 
> >  /* Remove debugfs entries. */
> >  static void __exit pgo_exit(void)
> >  {
> > 
> > +	/* unsubscribe the notifier and do cleanup. */
> > +	unregister_module_notifier(&pgo_module_nb);
> > +
> > 
> >  	debugfs_remove_recursive(directory);
> >  
> >  }
> > 
> > diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> > index e214c9d7a113..70bab7e4c153 100644
> > --- a/kernel/pgo/instrument.c
> > +++ b/kernel/pgo/instrument.c
> > @@ -33,7 +33,6 @@
> > 
> >   * ensures that we don't try to serialize data that's only partially
> >   updated.
> >   */
> >  
> >  static DEFINE_SPINLOCK(pgo_lock);
> > 
> > -static int current_node;
> > 
> >  unsigned long prf_lock(void)
> >  {
> > 
> > @@ -62,8 +61,6 @@ static struct llvm_prf_value_node *allocate_node(struct
> > llvm_prf_data *p,> 
> >  	struct llvm_prf_data *data_end;
> >  	int max_vnds;
> > 
> > -	rcu_read_lock();
> > -
> 
> Please move these rcu locks change into the patch that introduces them
> to avoid adding those changes here.
> 
> >  	list_for_each_entry_rcu(po, &prf_list, link) {
> >  	
> >  		/* get section limits */
> >  		max_vnds = prf_vnds_count(po);
> > 
> > @@ -76,7 +73,6 @@ static struct llvm_prf_value_node *allocate_node(struct
> > llvm_prf_data *p,> 
> >  		 */
> >  		
> >  		if (memory_contains(po->data, data_end, p, 
sizeof(*p))) {
> > 
> > -
> > 
> >  			if (WARN_ON_ONCE(po->current_node >= 
max_vnds))
> >  			
> >  				return NULL; /* Out of 
nodes */
> > 
> > @@ -87,7 +83,6 @@ static struct llvm_prf_value_node *allocate_node(struct
> > llvm_prf_data *p,> 
> >  	}
> >  
> >  out:
> > -	rcu_read_unlock();
> > 
> >  	return vnode;
> >  
> >  }
> > 
> > @@ -108,8 +103,14 @@ void __llvm_profile_instrument_target(u64 
target_value,
> > void *data, u32 index)> 
> >  	u8 values = 0;
> >  	unsigned long flags;
> > 
> > +	/*
> > +	 * lock the underlying prf_object(s) in place,
> > +	 * so they won't vanish while we are operating on it.
> > +	 */
> > +	rcu_read_lock();
> > +
> > 
> >  	if (!p || !p->values)
> > 
> > -		return;
> > +		goto rcu_unlock;
> > 
> >  	counters = (struct llvm_prf_value_node **)p->values;
> >  	curr = counters[index];
> > 
> > @@ -117,7 +118,7 @@ void __llvm_profile_instrument_target(u64 target_value,
> > void *data, u32 index)> 
> >  	while (curr) {
> >  	
> >  		if (target_value == curr->value) {
> >  		
> >  			curr->count++;
> > 
> > -			return;
> > +			goto rcu_unlock;
> > 
> >  		}
> >  		
> >  		if (curr->count < min_count) {
> > 
> > @@ -136,7 +137,7 @@ void __llvm_profile_instrument_target(u64 target_value,
> > void *data, u32 index)> 
> >  			curr->value = target_value;
> >  			curr->count++;
> >  		
> >  		}
> > 
> > -		return;
> > +		goto rcu_unlock;
> > 
> >  	}
> >  	
> >  	/* Lock when updating the value node structure. */
> > 
> > @@ -156,6 +157,8 @@ void __llvm_profile_instrument_target(u64 target_value,
> > void *data, u32 index)> 
> >  out:
> >  	prf_unlock(flags);
> > 
> > +rcu_unlock:
> > +	rcu_read_unlock();
> > 
> >  }
> >  EXPORT_SYMBOL(__llvm_profile_instrument_target);
> > 
> > --
> > 2.32.0
> 
> --
> Kees Cook




Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ