lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YLU0VAoWG7qa7u24@Ryzen-9-3900X.localdomain>
Date:   Mon, 31 May 2021 12:09:08 -0700
From:   Nathan Chancellor <nathan@...nel.org>
To:     Jarmo Tiitto <jarmo.tiitto@...il.com>
Cc:     samitolvanen@...gle.com, wcw@...gle.com, ndesaulniers@...gle.com,
        linux-kernel@...r.kernel.org, clang-built-linux@...glegroups.com
Subject: Re: [PATCH 3/6] pgo: modules Add module profile data export
 machinery.

On Fri, May 28, 2021 at 11:08:21PM +0300, Jarmo Tiitto wrote:
> PGO profile data is exported from the kernel by
> creating debugfs files: pgo/<module>.profraw for each module.

Again, I do not really have many comments on the actual code as I am not
super familiar with it.

However, fs_mod.c duplicates a lot of the functions in fs.c, which makes
maintaining this code even more difficult, especially against LLVM PGO
profile data format changes. I just want to make sure I understand this:
does PGO currently not work with modules? Or does this patch series just
make it so that each module has its own .profraw file so individual
modules can be optimized?

If it is the latter, what is the point? Why would you want to optimize
just a module and not the entire kernel, if you already have to go
through the profiling steps?

If it is the former, there has to be a better way to share more of the
machinery between fs.c and fs_mod.c than basically duplicating
everything because there are some parameters and logic that have to
change. I do not have a ton of time to outline exactly what that might
look like but for example, prf_fill_header and prf_module_fill_header
are basically the same thing aside from the mod parameter and the
prf_..._count() calls. It seems like if mod was NULL, you would call the
vmlinux versions of the functions.

> Modules are register into the profiler via module notifier callback
> similiar to gcov/base.c. Note that if module does not have __llvm_prf_xxx
> sections required the module ignored.
> 
> Also there is no "reset" support for yet for these files.
> 
> Signed-off-by: Jarmo Tiitto <jarmo.tiitto@...il.com>
> ---
>  kernel/pgo/Makefile     |   2 +-
>  kernel/pgo/fs.c         |  54 ++++--
>  kernel/pgo/fs_mod.c     | 415 ++++++++++++++++++++++++++++++++++++++++
>  kernel/pgo/instrument.c |  12 +-
>  kernel/pgo/pgo.h        |  11 +-
>  5 files changed, 466 insertions(+), 28 deletions(-)
>  create mode 100644 kernel/pgo/fs_mod.c
> 
> diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
> index 41e27cefd9a4..662b7dfdfbe9 100644
> --- a/kernel/pgo/Makefile
> +++ b/kernel/pgo/Makefile
> @@ -2,4 +2,4 @@
>  GCOV_PROFILE	:= n
>  PGO_PROFILE	:= n
>  
> -obj-y	+= fs.o instrument.o
> +obj-y	+= fs.o fs_mod.o instrument.o
> diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> index 575142735273..5471d270a5bb 100644
> --- a/kernel/pgo/fs.c
> +++ b/kernel/pgo/fs.c
> @@ -227,15 +227,15 @@ static unsigned long prf_buffer_size(void)
>   * Serialize the profiling data into a format LLVM's tools can understand.
>   * Note: caller *must* hold pgo_lock.
>   */
> -static int prf_serialize(struct prf_private_data *p)
> +static int prf_serialize(struct prf_private_data *p, unsigned long *buf_size)
>  {
>  	int err = 0;
>  	void *buffer;
>  
> -	p->size = prf_buffer_size();
> -	p->buffer = vzalloc(p->size);
> +	/* re-check buffer size */
> +	*buf_size = prf_buffer_size();
>  
> -	if (!p->buffer) {
> +	if (p->size < *buf_size || !p->buffer) {
>  		err = -ENOMEM;
>  		goto out;
>  	}
> @@ -262,7 +262,8 @@ static int prf_open(struct inode *inode, struct file *file)
>  {
>  	struct prf_private_data *data;
>  	unsigned long flags;
> -	int err;
> +	unsigned long buf_size;
> +	int err = 0;
>  
>  	data = kzalloc(sizeof(*data), GFP_KERNEL);
>  	if (!data) {
> @@ -270,18 +271,41 @@ static int prf_open(struct inode *inode, struct file *file)
>  		goto out;
>  	}
>  
> +	/* estimate amount of memory needed:
> +	 * can't vzalloc() while prf_lock() is held:
> +	 * CONFIG_DEBUG_ATOMIC_SLEEP complains.
> +	 * So first get buffer size, release the lock,
> +	 * vzalloc(), re-lock and try serialize.
> +	 */
>  	flags = prf_lock();
> +	buf_size = prf_buffer_size();
>  
> -	err = prf_serialize(data);
> -	if (unlikely(err)) {
> -		kfree(data);
> -		goto out_unlock;
> -	}
> +	do {
> +		prf_unlock(flags);
>  
> -	file->private_data = data;
> +		/* resize buffer */
> +		if (data->size < buf_size && data->buffer) {
> +			vfree(data->buffer);
> +			data->buffer = NULL;
> +		}
> +
> +		if (!data->buffer) {
> +			data->size = buf_size;
> +			data->buffer = vzalloc(data->size);
>  
> -out_unlock:
> +			if (!data->buffer) {
> +				err = -ENOMEM;
> +				goto out;
> +			}
> +		}
> +		/* try serialize */
> +		flags = prf_lock();
> +	} while (prf_serialize(data, &buf_size));
>  	prf_unlock(flags);
> +
> +	data->size = buf_size;
> +	file->private_data = data;
> +
>  out:
>  	return err;
>  }
> @@ -363,6 +387,8 @@ static const struct file_operations prf_reset_fops = {
>  /* Create debugfs entries. */
>  static int __init pgo_init(void)
>  {
> +	pr_notice("Clang PGO profile data available.");
> +
>  	directory = debugfs_create_dir("pgo", NULL);
>  	if (!directory)
>  		goto err_remove;
> @@ -375,6 +401,8 @@ static int __init pgo_init(void)
>  				 &prf_reset_fops))
>  		goto err_remove;
>  
> +	prf_modules_init();
> +
>  	return 0;
>  
>  err_remove:
> @@ -385,6 +413,8 @@ static int __init pgo_init(void)
>  /* Remove debugfs entries. */
>  static void __exit pgo_exit(void)
>  {
> +	prf_modules_exit();
> +
>  	debugfs_remove_recursive(directory);
>  }
>  
> diff --git a/kernel/pgo/fs_mod.c b/kernel/pgo/fs_mod.c
> new file mode 100644
> index 000000000000..0808d44227f1
> --- /dev/null
> +++ b/kernel/pgo/fs_mod.c
> @@ -0,0 +1,415 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Jarmo Tiitto
> + *
> + * Author:
> + *	Jarmo Tiitto <jarmo.tiitto@...il.com>
> + *
> + * Based on the clang PGO kernel patch by:
> + *  Sami Tolvanen <samitolvanen@...gle.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#define pr_fmt(fmt)	"pgo: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/debugfs.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include "pgo.h"
> +
> +/*
> + * PGO profile data reporting for modules:
> + * We maintain one debugfs pgo/<module>.profraw file per module.
> + */
> +
> +
> +DEFINE_MUTEX(prf_mod_lock);
> +LIST_HEAD(prf_mod_list);
> +
> +struct prf_mod_data {
> +	void *buffer;
> +	unsigned long size;
> +};
> +
> +/* these are trivial, but each one differs a bit */
> +static inline unsigned long prf_mod_data_size(struct module *mod)
> +{
> +	return mod->prf_data_size;
> +}
> +
> +static inline unsigned long prf_mod_data_count(struct module *mod)
> +{
> +	return mod->prf_data_size / sizeof(struct llvm_prf_data);
> +}
> +
> +static inline unsigned long prf_mod_cnts_size(struct module *mod)
> +{
> +	return mod->prf_cnts_num * sizeof(mod->prf_cnts[0]);
> +}
> +
> +static inline unsigned long prf_mod_cnts_count(struct module *mod)
> +{
> +	return mod->prf_cnts_num;
> +}
> +
> +static inline unsigned long prf_mod_names_size(struct module *mod)
> +{
> +	return mod->prf_names_num * sizeof(mod->prf_names[0]);
> +}
> +
> +static inline unsigned long prf_mod_names_count(struct module *mod)
> +{
> +	return mod->prf_names_num;
> +}
> +
> +static inline unsigned long prf_mod_vnds_size(struct module *mod)
> +{
> +	return mod->prf_vnds_size;
> +}
> +
> +static inline unsigned long prf_mod_vnds_count(struct module *mod)
> +{
> +	return mod->prf_vnds_size / sizeof(struct llvm_prf_value_node);
> +}
> +
> +/*
> + * Raw profile data format:
> + *
> + *	- llvm_prf_header
> + *	- __llvm_prf_data
> + *	- __llvm_prf_cnts
> + *	- __llvm_prf_names
> + *	- zero padding to 8 bytes
> + *	- for each llvm_prf_data in __llvm_prf_data:
> + *		- llvm_prf_value_data
> + *			- llvm_prf_value_record + site count array
> + *				- llvm_prf_value_node_data
> + *				...
> + *			...
> + *		...
> + */
> +
> +static void prf_module_fill_header(struct module *mod, void **buffer)
> +{
> +	struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
> +
> +#ifdef CONFIG_64BIT
> +	header->magic = LLVM_INSTR_PROF_RAW_MAGIC_64;
> +#else
> +	header->magic = LLVM_INSTR_PROF_RAW_MAGIC_32;
> +#endif
> +	header->version = LLVM_VARIANT_MASK_IR_PROF | LLVM_INSTR_PROF_RAW_VERSION;
> +	header->data_size = prf_mod_data_count(mod);
> +	header->padding_bytes_before_counters = 0;
> +	header->counters_size = prf_mod_cnts_count(mod);
> +	header->padding_bytes_after_counters = 0;
> +	header->names_size = prf_mod_names_count(mod);
> +	header->counters_delta = (u64)mod->prf_cnts;
> +	header->names_delta = (u64)mod->prf_names;
> +	header->value_kind_last = LLVM_INSTR_PROF_IPVK_LAST;
> +
> +	*buffer += sizeof(*header);
> +}
> +
> +/*
> + * Copy the source into the buffer, incrementing the pointer into buffer in the
> + * process.
> + */
> +static void prf_copy_to_buffer(void **buffer, const void *src, unsigned long size)
> +{
> +	memcpy(*buffer, src, size);
> +	*buffer += size;
> +}
> +
> +/* extern u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds) */
> +
> +static u32 prf_module_get_value_size(struct module *mod)
> +{
> +	u32 size = 0;
> +	struct llvm_prf_data *p;
> +	struct llvm_prf_data *start = mod->prf_data;
> +	struct llvm_prf_data *end = start + prf_mod_data_count(mod);
> +
> +	for (p = start; p < end; p++)
> +		size += __prf_get_value_size(p, NULL);
> +
> +	return size;
> +}
> +
> +/* Serialize the profiling's value.
> + * extern void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
> + */
> +
> +static void prf_module_serialize_values(struct module *mod, void **buffer)
> +{
> +	struct llvm_prf_data *p;
> +	struct llvm_prf_data *start = mod->prf_data;
> +	struct llvm_prf_data *end = start + prf_mod_data_count(mod);
> +
> +	for (p = start; p < end; p++)
> +		prf_serialize_value(p, buffer);
> +}
> +
> +static inline unsigned long prf_get_padding(unsigned long size)
> +{
> +	return 7 & (sizeof(u64) - size % sizeof(u64));
> +}
> +
> +static unsigned long prf_module_buffer_size(struct module *mod)
> +{
> +	return sizeof(struct llvm_prf_header) +
> +			prf_mod_data_size(mod)	+
> +			prf_mod_cnts_size(mod) +
> +			prf_mod_names_size(mod) +
> +			prf_get_padding(prf_mod_names_size(mod)) +
> +			prf_module_get_value_size(mod);
> +}
> +
> +/*
> + * Serialize the profiling data into a format LLVM's tools can understand.
> + * Note: caller *must* hold pgo_lock and hold reference to the module.
> + */
> +static int prf_module_serialize(struct module *mod, struct prf_mod_data *p, unsigned long *buf_size)
> +{
> +	int err = 0;
> +	void *buffer;
> +
> +	/* re-check buffer size */
> +	*buf_size = prf_module_buffer_size(mod);
> +
> +	if (p->size < *buf_size || !p->buffer) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	buffer = p->buffer;
> +
> +	prf_module_fill_header(mod, &buffer);
> +	prf_copy_to_buffer(&buffer, mod->prf_data,  prf_mod_data_size(mod));
> +	prf_copy_to_buffer(&buffer, mod->prf_cnts,  prf_mod_cnts_size(mod));
> +	prf_copy_to_buffer(&buffer, mod->prf_names, prf_mod_names_size(mod));
> +	buffer += prf_get_padding(prf_mod_names_size(mod));
> +
> +	prf_module_serialize_values(mod, &buffer);
> +
> +out:
> +	return err;
> +}
> +
> +/* open() implementation for module PGO. */
> +static int prf_module_open(struct inode *inode, struct file *file)
> +{
> +	struct prf_mod_private_data *data;
> +	struct prf_mod_data *pinfo;
> +	struct module *mod;
> +	unsigned long flags;
> +	unsigned long buf_size = 0;
> +	int err = 0;
> +
> +	mutex_lock(&prf_mod_lock);
> +	data = inode->i_private; /* see: pgo_module_notifier() */
> +
> +	BUG_ON(!data);
> +
> +	/* grab the module */
> +	mod = READ_ONCE(data->mod);
> +	if (mod && try_module_get(mod)) {
> +		// Is it live?
> +		if (mod->state != MODULE_STATE_LIVE) {
> +			err = -EAGAIN;
> +			goto put_unlock;
> +		}
> +
> +		pinfo = kzalloc(sizeof(*pinfo), GFP_KERNEL);
> +		if (!pinfo) {
> +			err = -ENOMEM;
> +			goto put_unlock;
> +		}
> +
> +		mutex_unlock(&prf_mod_lock);
> +
> +		/* estimate amount of memory needed:
> +		 * can't vzalloc() while prf_lock() is held
> +		 * and prf_module_buffer_size() only works while it is held..
> +		 */
> +		flags = prf_lock();
> +		buf_size = prf_module_buffer_size(mod);
> +		do {
> +			prf_unlock(flags);
> +
> +			/* resize buffer */
> +			if (pinfo->size < buf_size && pinfo->buffer) {
> +				vfree(pinfo->buffer);
> +				pinfo->buffer = NULL;
> +			}
> +
> +			if (!pinfo->buffer) {
> +				pinfo->size = buf_size;
> +				pinfo->buffer = vzalloc(pinfo->size);
> +
> +				if (!pinfo->buffer) {
> +					err = -ENOMEM;
> +					goto out;
> +				}
> +			}
> +
> +			/* try serialize */
> +			flags = prf_lock();
> +
> +		} while (prf_module_serialize(mod, pinfo, &buf_size));
> +
> +		prf_unlock(flags);
> +
> +		/* success! */
> +		pinfo->size = buf_size;
> +		file->private_data = pinfo;
> +
> +		module_put(mod);
> +		return err;
> +	}
> +
> +put_unlock:
> +	module_put(mod);
> +	mutex_unlock(&prf_mod_lock);
> +out:
> +	return err;
> +}
> +
> +/* read() implementation for PGO. */
> +static ssize_t prf_module_read(struct file *file, char __user *buf, size_t count,
> +			loff_t *ppos)
> +{
> +	struct prf_mod_data *pinfo = file->private_data;
> +
> +	BUG_ON(!pinfo);
> +
> +	return simple_read_from_buffer(buf, count, ppos, pinfo->buffer,
> +					   pinfo->size);
> +}
> +
> +/* release() implementation for PGO. Release resources allocated by open(). */
> +static int prf_module_release(struct inode *inode, struct file *file)
> +{
> +	struct prf_mod_data *pinfo = file->private_data;
> +
> +	if (pinfo) {
> +		vfree(pinfo->buffer);
> +		kfree(pinfo);
> +		file->private_data = 0;
> +	}
> +	return 0;
> +}
> +
> +static const struct file_operations prf_mod_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= prf_module_open,
> +	.read		= prf_module_read,
> +	.llseek		= default_llseek,
> +	.release	= prf_module_release
> +};
> +
> +static void prf_module_free(struct rcu_head *rcu)
> +{
> +	struct prf_mod_private_data *data;
> +
> +	data = container_of(rcu, struct prf_mod_private_data, rcu);
> +
> +	debugfs_remove(data->file);
> +
> +	kfree(data);
> +}
> +
> +static int pgo_module_notifier(struct notifier_block *nb, unsigned long event,
> +				void *pdata)
> +{
> +	struct module *mod = pdata;
> +	struct prf_mod_private_data *data;
> +	char fsname[MODULE_NAME_LEN + 9]; // +strlen(".profraw")
> +
> +	if (event == MODULE_STATE_LIVE) {
> +		/* does the module have profiling info? */
> +		if (mod->prf_data
> +			  && mod->prf_cnts
> +			  && mod->prf_names
> +			  && mod->prf_vnds) {
> +			/* add module prf_mod_private_data entry */
> +			data = kzalloc(sizeof(*data), GFP_KERNEL);
> +
> +			fsname[0] = 0;
> +			snprintf(fsname, sizeof(fsname), "%s.profraw", mod->name);
> +
> +			mutex_lock(&prf_mod_lock);
> +
> +			data->file = debugfs_create_file(fsname, 0600, directory, data, &prf_mod_fops);
> +			if (!data->file) {
> +				pr_err("Failed setup module pgo: %s", fsname);
> +				kfree(data);
> +				mutex_unlock(&prf_mod_lock);
> +				return NOTIFY_OK;
> +			}
> +
> +			WRITE_ONCE(data->mod, mod);
> +
> +			list_add_tail_rcu(&data->link, &prf_mod_list);
> +			mutex_unlock(&prf_mod_lock);
> +		}
> +	}
> +	if (event == MODULE_STATE_GOING) {
> +		/* remove module from the list */
> +		rcu_read_lock();
> +		list_for_each_entry_rcu(data, &prf_mod_list, link) {
> +			if (strcmp(data->mod->name, mod->name) == 0) {
> +
> +				mutex_lock(&prf_mod_lock);
> +				/* remofe from profiled modules */
> +				list_del_rcu(&data->link);
> +				/* mark it stale */
> +				WRITE_ONCE(data->mod, NULL);
> +				mutex_unlock(&prf_mod_lock);
> +				call_rcu(&data->rcu, prf_module_free);
> +				break;
> +			}
> +		}
> +		rcu_read_unlock();
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static struct notifier_block pgo_module_nb = {
> +	.notifier_call = pgo_module_notifier
> +};
> +
> +void prf_modules_init(void)
> +{
> +	register_module_notifier(&pgo_module_nb);
> +}
> +
> +void prf_modules_exit(void)
> +{
> +	struct prf_mod_private_data *p;
> +
> +	/* unsubscribe the notifier and do cleanup. */
> +	unregister_module_notifier(&pgo_module_nb);
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(p, &prf_mod_list, link) {
> +		/* delete nodes */
> +		list_del(&p->link);
> +		WRITE_ONCE(p->mod, NULL);
> +		call_rcu(&p->rcu, prf_module_free);
> +	}
> +	rcu_read_unlock();
> +}
> \ No newline at end of file
> diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> index 464b3bc77431..98cfa11a7b76 100644
> --- a/kernel/pgo/instrument.c
> +++ b/kernel/pgo/instrument.c
> @@ -139,11 +139,11 @@ EXPORT_SYMBOL(__llvm_profile_instrument_target);
>  
>  /* Counts the number of times a range of targets values are seen. */
>  void __llvm_profile_instrument_range(u64 target_value, void *data,
> -				     u32 index, s64 precise_start,
> -				     s64 precise_last, s64 large_value);
> +					 u32 index, s64 precise_start,
> +					 s64 precise_last, s64 large_value);
>  void __llvm_profile_instrument_range(u64 target_value, void *data,
> -				     u32 index, s64 precise_start,
> -				     s64 precise_last, s64 large_value)
> +					 u32 index, s64 precise_start,
> +					 s64 precise_last, s64 large_value)
>  {
>  	if (large_value != S64_MIN && (s64)target_value >= large_value)
>  		target_value = large_value;
> @@ -176,9 +176,9 @@ static u64 inst_prof_get_range_rep_value(u64 value)
>   * defined in compiler-rt/include/profile/InstrProfData.inc.
>   */
>  void __llvm_profile_instrument_memop(u64 target_value, void *data,
> -				     u32 counter_index);
> +					 u32 counter_index);
>  void __llvm_profile_instrument_memop(u64 target_value, void *data,
> -				     u32 counter_index)
> +					 u32 counter_index)
>  {
>  	u64 rep_value;
>  
> diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> index a9ff51abbfd5..2840da63c7cd 100644
> --- a/kernel/pgo/pgo.h
> +++ b/kernel/pgo/pgo.h
> @@ -212,17 +212,13 @@ struct prf_mod_private_data {
>  	struct list_head link;
>  	struct rcu_head rcu;
>  
> -	void *buffer;
> -	unsigned long size;
> -
> -	char mod_name[MODULE_NAME_LEN];
>  	struct module *mod;
>  	struct dentry *file;
>  
>  	int current_node;
>  };
>  
> -/* Mutex protecting the prf_mod_list and entries */
> +/* Mutex protecting the prf_mod_list */
>  extern struct mutex prf_mod_lock;
>  
>  /* List of modules profiled */
> @@ -231,10 +227,7 @@ extern struct list_head prf_mod_list;
>  extern void prf_modules_init(void);
>  extern void prf_modules_exit(void);
>  
> -/* Update each modules snapshot of the profiling data. */
> -extern int prf_modules_snapshot(void);
> -
> -/* below funcs are required by prf_modules_snapshot() */
> +/* below funcs are required by prf_modules */
>  extern u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds);
>  
>  extern void prf_serialize_value(struct llvm_prf_data *p, void **buffer);
> -- 
> 2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ