lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 4 Aug 2016 17:56:20 +0530
From:	Hari Bathini <hbathini@...ux.vnet.ibm.com>
To:	"Eric W. Biederman" <ebiederm@...ssion.com>
Cc:	daniel@...earbox.net, peterz@...radead.org,
	linux-kernel@...r.kernel.org, acme@...nel.org,
	alexander.shishkin@...ux.intel.com, mingo@...hat.com,
	paulus@...ba.org, kernel@...p.com, rostedt@...dmis.org,
	viro@...iv.linux.org.uk, aravinda@...ux.vnet.ibm.com,
	ananth@...ibm.com
Subject: Re: [RFC PATCH v2 3/3] tracefs: add 'newinstance' mount option

Hi Eric,


Thanks for the comments..


On Thursday 04 August 2016 08:24 AM, Eric W. Biederman wrote:
> Hari Bathini <hbathini@...ux.vnet.ibm.com> writes:
>
>> When tracefs is mounted inside a container, its files are visible to
>> all containers. This implies that a user from within a container can
>> list/delete uprobes registered elsewhere, leading to security issues
>> and/or denial of service (Eg. deleting a probe that is registered from
>> elsewhere). This patch addresses this problem by adding mount option
>> 'newinstance', allowing containers to have their own instance mounted
>> separately. Something like the below from within a container:
> newinstance is an anti-pattern in devpts and should not be copied.
> To fix some severe defects of devpts we had to always create new
> istances and the code and the testing to make that all work was

OK..

> not pleasant.  Please don't add another option that we will just have to
> make redundant later.

IIUC, you mean, implicitly create a new instance for tracefs mount
inside container without the need for a new option?

Thanks
Hari

> Eric
>
>
>>    $ mount -o newinstance -t tracefs tracefs /sys/kernel/tracing
>>    $
>>    $
>>    $ perf probe /lib/x86_64-linux-gnu/libc.so.6 malloc
>>    Added new event:
>>      probe_libc:malloc    (on malloc in /lib/x86_64-linux-gnu/libc.so.6)
>>
>>    You can now use it in all perf tools, such as:
>>
>>    	perf record -e probe_libc:malloc -aR sleep 1
>>
>>    $
>>    $
>>    $ perf probe --list
>>      probe_libc:malloc    (on __libc_malloc in /lib64/libc.so.6)
>>    $
>>
>> while another container/host has a completely different view:
>>
>>
>>    $ perf probe --list
>>      probe_libc:memset    (on __libc_memset in /lib64/libc.so.6)
>>    $
>>
>> This patch reuses the code that provides support to create new instances
>> under tracefs instances directory.
>>
>> Signed-off-by: Hari Bathini <hbathini@...ux.vnet.ibm.com>
>> ---
>>   fs/tracefs/inode.c      |  171 ++++++++++++++++++++++++++++++++++++++---------
>>   include/linux/tracefs.h |   11 ++-
>>   kernel/trace/trace.c    |   52 ++++++++++----
>>   3 files changed, 181 insertions(+), 53 deletions(-)
>>
>> diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
>> index 4a0e48f..2d6acda 100644
>> --- a/fs/tracefs/inode.c
>> +++ b/fs/tracefs/inode.c
>> @@ -51,9 +51,9 @@ static const struct file_operations tracefs_file_operations = {
>>   };
>>   
>>   static struct tracefs_dir_ops {
>> -	int (*mkdir)(const char *name);
>> -	int (*rmdir)(const char *name);
>> -} tracefs_ops;
>> +	int (*mkdir)(int instance_type, void *data);
>> +	int (*rmdir)(int instance_type, void *data);
>> +} tracefs_instance_ops;
>>   
>>   static char *get_dname(struct dentry *dentry)
>>   {
>> @@ -85,7 +85,7 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
>>   	 * mkdir routine to handle races.
>>   	 */
>>   	inode_unlock(inode);
>> -	ret = tracefs_ops.mkdir(name);
>> +	ret = tracefs_instance_ops.mkdir(INSTANCE_DIR, name);
>>   	inode_lock(inode);
>>   
>>   	kfree(name);
>> @@ -112,7 +112,7 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
>>   	inode_unlock(inode);
>>   	inode_unlock(dentry->d_inode);
>>   
>> -	ret = tracefs_ops.rmdir(name);
>> +	ret = tracefs_instance_ops.rmdir(INSTANCE_DIR, name);
>>   
>>   	inode_lock_nested(inode, I_MUTEX_PARENT);
>>   	inode_lock(dentry->d_inode);
>> @@ -142,12 +142,14 @@ struct tracefs_mount_opts {
>>   	kuid_t uid;
>>   	kgid_t gid;
>>   	umode_t mode;
>> +	int newinstance;
>>   };
>>   
>>   enum {
>>   	Opt_uid,
>>   	Opt_gid,
>>   	Opt_mode,
>> +	Opt_newinstance,
>>   	Opt_err
>>   };
>>   
>> @@ -155,14 +157,26 @@ static const match_table_t tokens = {
>>   	{Opt_uid, "uid=%u"},
>>   	{Opt_gid, "gid=%u"},
>>   	{Opt_mode, "mode=%o"},
>> +	{Opt_newinstance, "newinstance"},
>>   	{Opt_err, NULL}
>>   };
>>   
>>   struct tracefs_fs_info {
>>   	struct tracefs_mount_opts mount_opts;
>> +	struct super_block *sb;
>>   };
>>   
>> -static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
>> +static inline struct tracefs_fs_info *TRACEFS_SB(struct super_block *sb)
>> +{
>> +	return sb->s_fs_info;
>> +}
>> +
>> +#define PARSE_MOUNT		0
>> +#define PARSE_REMOUNT		1
>> +
>> +static int tracefs_parse_options(char *data,
>> +				 int op,
>> +				 struct tracefs_mount_opts *opts)
>>   {
>>   	substring_t args[MAX_OPT_ARGS];
>>   	int option;
>> @@ -173,6 +187,10 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
>>   
>>   	opts->mode = TRACEFS_DEFAULT_MODE;
>>   
>> +	/* newinstance makes sense only on initial mount */
>> +	if (op == PARSE_MOUNT)
>> +		opts->newinstance = 0;
>> +
>>   	while ((p = strsep(&data, ",")) != NULL) {
>>   		if (!*p)
>>   			continue;
>> @@ -200,6 +218,11 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
>>   				return -EINVAL;
>>   			opts->mode = option & S_IALLUGO;
>>   			break;
>> +		case Opt_newinstance:
>> +			/* newinstance makes sense only on initial mount */
>> +			if (op == PARSE_MOUNT)
>> +				opts->newinstance = 1;
>> +			break;
>>   		/*
>>   		 * We might like to report bad mount options here;
>>   		 * but traditionally tracefs has ignored all mount options
>> @@ -231,7 +254,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data)
>>   	struct tracefs_fs_info *fsi = sb->s_fs_info;
>>   
>>   	sync_filesystem(sb);
>> -	err = tracefs_parse_options(data, &fsi->mount_opts);
>> +	err = tracefs_parse_options(data, PARSE_REMOUNT, &fsi->mount_opts);
>>   	if (err)
>>   		goto fail;
>>   
>> @@ -254,6 +277,8 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
>>   			   from_kgid_munged(&init_user_ns, opts->gid));
>>   	if (opts->mode != TRACEFS_DEFAULT_MODE)
>>   		seq_printf(m, ",mode=%o", opts->mode);
>> +	if (opts->newinstance)
>> +		seq_puts(m, ",newinstance");
>>   
>>   	return 0;
>>   }
>> @@ -264,53 +289,130 @@ static const struct super_operations tracefs_super_operations = {
>>   	.show_options	= tracefs_show_options,
>>   };
>>   
>> -static int trace_fill_super(struct super_block *sb, void *data, int silent)
>> +static void *new_tracefs_fs_info(struct super_block *sb)
>>   {
>> -	static struct tree_descr trace_files[] = {{""}};
>>   	struct tracefs_fs_info *fsi;
>> -	int err;
>> -
>> -	save_mount_options(sb, data);
>>   
>>   	fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
>> -	sb->s_fs_info = fsi;
>> -	if (!fsi) {
>> -		err = -ENOMEM;
>> -		goto fail;
>> -	}
>> +	if (!fsi)
>> +		return NULL;
>>   
>> -	err = tracefs_parse_options(data, &fsi->mount_opts);
>> -	if (err)
>> +	fsi->mount_opts.mode = TRACEFS_DEFAULT_MODE;
>> +	fsi->sb = sb;
>> +
>> +	return fsi;
>> +}
>> +
>> +static int trace_fill_super(struct super_block *sb, void *data, int silent)
>> +{
>> +	struct inode *inode;
>> +
>> +	sb->s_blocksize = PAGE_SIZE;
>> +	sb->s_blocksize_bits = PAGE_SHIFT;
>> +	sb->s_magic = TRACEFS_MAGIC;
>> +	sb->s_op = &tracefs_super_operations;
>> +	sb->s_time_gran = 1;
>> +
>> +	sb->s_fs_info = new_tracefs_fs_info(sb);
>> +	if (!sb->s_fs_info)
>>   		goto fail;
>>   
>> -	err  =  simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
>> -	if (err)
>> +	inode = new_inode(sb);
>> +	if (!inode)
>>   		goto fail;
>> +	inode->i_ino = 1;
>> +	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
>> +	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
>> +	inode->i_op = &simple_dir_inode_operations;
>> +	inode->i_fop = &simple_dir_operations;
>> +	set_nlink(inode, 2);
>>   
>> -	sb->s_op = &tracefs_super_operations;
>> +	sb->s_root = d_make_root(inode);
>> +	if (sb->s_root)
>> +		return 0;
>>   
>> -	tracefs_apply_options(sb);
>> +	pr_err("get root dentry failed\n");
>>   
>>   	return 0;
>>   
>>   fail:
>> -	kfree(fsi);
>> -	sb->s_fs_info = NULL;
>> -	return err;
>> +	return -ENOMEM;
>> +}
>> +
>> +static int compare_init_tracefs_sb(struct super_block *s, void *p)
>> +{
>> +	if (tracefs_mount)
>> +		return tracefs_mount->mnt_sb == s;
>> +	return 0;
>>   }
>>   
>>   static struct dentry *trace_mount(struct file_system_type *fs_type,
>>   			int flags, const char *dev_name,
>>   			void *data)
>>   {
>> -	return mount_single(fs_type, flags, data, trace_fill_super);
>> +	int err;
>> +	struct tracefs_mount_opts opts;
>> +	struct super_block *s;
>> +
>> +	err = tracefs_parse_options(data, PARSE_MOUNT, &opts);
>> +	if (err)
>> +		return ERR_PTR(err);
>> +
>> +	/* Require newinstance for all user namespace mounts to ensure
>> +	 * the mount options are not changed.
>> +	 */
>> +	if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	if (opts.newinstance)
>> +		s = sget(fs_type, NULL, set_anon_super, flags, NULL);
>> +	else
>> +		s = sget(fs_type, compare_init_tracefs_sb, set_anon_super,
>> +			 flags, NULL);
>> +
>> +	if (IS_ERR(s))
>> +		return ERR_CAST(s);
>> +
>> +	if (!s->s_root) {
>> +		err = trace_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
>> +		if (err)
>> +			goto out_undo_sget;
>> +		s->s_flags |= MS_ACTIVE;
>> +	}
>> +
>> +	if (opts.newinstance) {
>> +		err = tracefs_instance_ops.mkdir(INSTANCE_MNT, s->s_root);
>> +		if (err)
>> +			goto out_undo_sget;
>> +	}
>> +
>> +	memcpy(&(TRACEFS_SB(s))->mount_opts, &opts, sizeof(opts));
>> +
>> +	tracefs_apply_options(s);
>> +
>> +	return dget(s->s_root);
>> +
>> +out_undo_sget:
>> +	deactivate_locked_super(s);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static void trace_kill_sb(struct super_block *sb)
>> +{
>> +	struct tracefs_fs_info *fsi = TRACEFS_SB(sb);
>> +
>> +	if (fsi->mount_opts.newinstance)
>> +		tracefs_instance_ops.rmdir(INSTANCE_MNT, sb->s_root);
>> +
>> +	kfree(fsi);
>> +	kill_litter_super(sb);
>>   }
>>   
>>   static struct file_system_type trace_fs_type = {
>>   	.owner =	THIS_MODULE,
>>   	.name =		"tracefs",
>>   	.mount =	trace_mount,
>> -	.kill_sb =	kill_litter_super,
>> +	.kill_sb =	trace_kill_sb,
>>   };
>>   MODULE_ALIAS_FS("tracefs");
>>   
>> @@ -480,22 +582,23 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
>>    *
>>    * Returns the dentry of the instances directory.
>>    */
>> -struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
>> -					  int (*mkdir)(const char *name),
>> -					  int (*rmdir)(const char *name))
>> +struct dentry *
>> +tracefs_create_instance_dir(const char *name, struct dentry *parent,
>> +			    int (*mkdir)(int instance_type, void *data),
>> +			    int (*rmdir)(int instance_type, void *data))
>>   {
>>   	struct dentry *dentry;
>>   
>>   	/* Only allow one instance of the instances directory. */
>> -	if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
>> +	if (WARN_ON(tracefs_instance_ops.mkdir || tracefs_instance_ops.rmdir))
>>   		return NULL;
>>   
>>   	dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
>>   	if (!dentry)
>>   		return NULL;
>>   
>> -	tracefs_ops.mkdir = mkdir;
>> -	tracefs_ops.rmdir = rmdir;
>> +	tracefs_instance_ops.mkdir = mkdir;
>> +	tracefs_instance_ops.rmdir = rmdir;
>>   
>>   	return dentry;
>>   }
>> diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
>> index 5b727a1..30d4e55 100644
>> --- a/include/linux/tracefs.h
>> +++ b/include/linux/tracefs.h
>> @@ -25,6 +25,10 @@ struct file_operations;
>>   
>>   #ifdef CONFIG_TRACING
>>   
>> +/* instance types */
>> +#define INSTANCE_DIR	0	/* created inside instances dir */
>> +#define INSTANCE_MNT	1	/* created with newinstance mount option */
>> +
>>   struct dentry *tracefs_create_file(const char *name, umode_t mode,
>>   				   struct dentry *parent, void *data,
>>   				   const struct file_operations *fops);
>> @@ -34,9 +38,10 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent);
>>   void tracefs_remove(struct dentry *dentry);
>>   void tracefs_remove_recursive(struct dentry *dentry);
>>   
>> -struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
>> -					   int (*mkdir)(const char *name),
>> -					   int (*rmdir)(const char *name));
>> +struct dentry *
>> +tracefs_create_instance_dir(const char *name, struct dentry *parent,
>> +			    int (*mkdir)(int instance_type, void *data),
>> +			    int (*rmdir)(int instance_type, void *data));
>>   
>>   bool tracefs_initialized(void);
>>   
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index 23a8111..a991e9d 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -6782,17 +6782,24 @@ static void update_tracer_options(struct trace_array *tr)
>>   	mutex_unlock(&trace_types_lock);
>>   }
>>   
>> -static int instance_mkdir(const char *name)
>> +static int instance_mkdir(int instance_type, void *data)
>>   {
>> +	const char *name =  "tracing";
>> +	struct dentry *mnt_root = NULL;
>>   	struct trace_array *tr;
>>   	int ret;
>>   
>>   	mutex_lock(&trace_types_lock);
>>   
>> -	ret = -EEXIST;
>> -	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
>> -		if (tr->name && strcmp(tr->name, name) == 0)
>> -			goto out_unlock;
>> +	if (instance_type == INSTANCE_MNT)
>> +		mnt_root = data;
>> +	else {
>> +		name = data;
>> +		ret = -EEXIST;
>> +		list_for_each_entry(tr, &ftrace_trace_arrays, list) {
>> +			if (tr->name && strcmp(tr->name, name) == 0)
>> +				goto out_unlock;
>> +		}
>>   	}
>>   
>>   	ret = -ENOMEM;
>> @@ -6823,9 +6830,14 @@ static int instance_mkdir(const char *name)
>>   	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
>>   		goto out_free_tr;
>>   
>> -	tr->dir = tracefs_create_dir(name, trace_instance_dir);
>> -	if (!tr->dir)
>> -		goto out_free_tr;
>> +	if (instance_type == INSTANCE_MNT) {
>> +		mnt_root->d_inode->i_private = tr;
>> +		tr->dir = mnt_root;
>> +	} else {
>> +		tr->dir = tracefs_create_dir(name, trace_instance_dir);
>> +		if (!tr->dir)
>> +			goto out_free_tr;
>> +	}
>>   
>>   	ret = event_trace_add_tracer(tr->dir, tr);
>>   	if (ret) {
>> @@ -6856,8 +6868,10 @@ static int instance_mkdir(const char *name)
>>   
>>   }
>>   
>> -static int instance_rmdir(const char *name)
>> +static int instance_rmdir(int instance_type, void *data)
>>   {
>> +	const char *name =  "tracing";
>> +	struct dentry *mnt_root = NULL;
>>   	struct trace_array *tr;
>>   	int found = 0;
>>   	int ret;
>> @@ -6865,15 +6879,21 @@ static int instance_rmdir(const char *name)
>>   
>>   	mutex_lock(&trace_types_lock);
>>   
>> -	ret = -ENODEV;
>> -	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
>> -		if (tr->name && strcmp(tr->name, name) == 0) {
>> -			found = 1;
>> -			break;
>> +	if (instance_type == INSTANCE_MNT) {
>> +		mnt_root = data;
>> +		tr = mnt_root->d_inode->i_private;
>> +	} else {
>> +		name = data;
>> +		ret = -ENODEV;
>> +		list_for_each_entry(tr, &ftrace_trace_arrays, list) {
>> +			if (tr->name && strcmp(tr->name, name) == 0) {
>> +				found = 1;
>> +				break;
>> +			}
>>   		}
>> +		if (!found)
>> +			goto out_unlock;
>>   	}
>> -	if (!found)
>> -		goto out_unlock;
>>   
>>   	ret = -EBUSY;
>>   	if (tr->ref || (tr->current_trace && tr->current_trace->ref))

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ