lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 8 Aug 2019 05:09:51 +0000
From:   Yonghong Song <yhs@...com>
To:     Carlos Antonio Neira Bustos <cneirabustos@...il.com>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC:     "ebiederm@...ssion.com" <ebiederm@...ssion.com>,
        "brouer@...hat.com" <brouer@...hat.com>,
        "quentin.monnet@...ronome.com" <quentin.monnet@...ronome.com>
Subject: Re: [PATCH v5 bpf-next] BPF: helpers: New helper to obtain namespace
 data from current task



On 8/7/19 6:22 PM, Carlos Antonio Neira Bustos wrote:
> The code has been modified to avoid syscalls that could sleep.
> Please let me know if any other modification is needed.
> 
>  From be0384c0fa209a78c1567936e8db4e35b9a7c0f8 Mon Sep 17 00:00:00 2001
> From: Carlos <cneirabustos@...il.com>
> Date: Wed, 7 Aug 2019 20:04:30 -0400
> Subject: [PATCH] [PATCH v5 bpf-next] BPF: New helper to obtain namespace data
>   from current task
> 
> This helper obtains the active namespace from current and returns pid, tgid,
> device and namespace id as seen from that namespace, allowing to instrument
> a process inside a container.
> Device is read from /proc/self/ns/pid, as in the future it's possible that
> different pid_ns files may belong to different devices, according
> to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
> conference.
> Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
> scripts but this helper returns the pid as seen by the root namespace which is
> fine when a bcc script is not executed inside a container.
> When the process of interest is inside a container, pid filtering will not work
> if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
> returning the pid as it's seen by the current namespace where the script is
> executing.
> 
> This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
> used to do pid filtering even inside a container.
> 
> For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):
> 
>          u32 pid = bpf_get_current_pid_tgid() >> 32;
>          if (pid != <pid_arg_passed_in>)
>                  return 0;
> Could be modified to use bpf_get_current_pidns_info() as follows:
> 
>          struct bpf_pidns pidns;
>          bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
>          u32 pid = pidns.tgid;
>          u32 nsid = pidns.nsid;
>          if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
>                  return 0;
> 
> To find out the name PID namespace id of a process, you could use this command:
> 
> $ ps -h -o pidns -p <pid_of_interest>
> 
> Or this other command:
> 
> $ ls -Li /proc/<pid_of_interest>/ns/pid
> 
> Signed-off-by: Carlos Neira <cneirabustos@...il.com>
> ---
>   fs/namei.c                                         |   2 +-
>   include/linux/bpf.h                                |   1 +
>   include/linux/namei.h                              |   4 +
>   include/uapi/linux/bpf.h                           |  29 ++++-
>   kernel/bpf/core.c                                  |   1 +
>   kernel/bpf/helpers.c                               |  78 ++++++++++++
>   kernel/trace/bpf_trace.c                           |   2 +
>   samples/bpf/Makefile                               |   3 +
>   samples/bpf/trace_ns_info_user.c                   |  35 ++++++
>   samples/bpf/trace_ns_info_user_kern.c              |  44 +++++++
>   tools/include/uapi/linux/bpf.h                     |  29 ++++-
>   tools/testing/selftests/bpf/Makefile               |   2 +-
>   tools/testing/selftests/bpf/bpf_helpers.h          |   3 +
>   .../testing/selftests/bpf/progs/test_pidns_kern.c  |  51 ++++++++
>   tools/testing/selftests/bpf/test_pidns.c           | 138 +++++++++++++++++++++
>   15 files changed, 418 insertions(+), 4 deletions(-)
>   create mode 100644 samples/bpf/trace_ns_info_user.c
>   create mode 100644 samples/bpf/trace_ns_info_user_kern.c
>   create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_kern.c
>   create mode 100644 tools/testing/selftests/bpf/test_pidns.c
> 
> diff --git a/fs/namei.c b/fs/namei.c
> index 209c51a5226c..d1eca36972d2 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -19,7 +19,6 @@
>   #include <linux/export.h>
>   #include <linux/kernel.h>
>   #include <linux/slab.h>
> -#include <linux/fs.h>
>   #include <linux/namei.h>
>   #include <linux/pagemap.h>
>   #include <linux/fsnotify.h>
> @@ -2355,6 +2354,7 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
>   	putname(name);
>   	return retval;
>   }
> +EXPORT_SYMBOL(filename_lookup);

No need to export symbols. bpf uses it and bpf is in the core, not in 
modules.

>   
>   /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
>   static int path_parentat(struct nameidata *nd, unsigned flags,
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f9a506147c8a..e4adf5e05afd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1050,6 +1050,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
>   extern const struct bpf_func_proto bpf_strtol_proto;
>   extern const struct bpf_func_proto bpf_strtoul_proto;
>   extern const struct bpf_func_proto bpf_tcp_sock_proto;
> +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
>   
>   /* Shared helpers among cBPF and eBPF. */
>   void bpf_user_rnd_init_once(void);
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index 9138b4471dbf..2c24e8c71d46 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -6,6 +6,7 @@
>   #include <linux/path.h>
>   #include <linux/fcntl.h>
>   #include <linux/errno.h>
> +#include <linux/fs.h>
>   
>   enum { MAX_NESTED_LINKS = 8 };
>   
> @@ -97,6 +98,9 @@ extern void unlock_rename(struct dentry *, struct dentry *);
>   
>   extern void nd_jump_link(struct path *path);
>   
> +extern int filename_lookup(int dfd, struct filename *name, unsigned int flags,
> +		    struct path *path, struct path *root);

The previous definition in fs/internal.h should be removed.

> +
>   static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
>   {
>   	((char *) name)[min(len, maxlen)] = '\0';
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4393bd4b2419..6f601f7106e2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2741,6 +2741,26 @@ union bpf_attr {
>    *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
>    *
>    *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
> + *
> + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> + *	Description
> + *		Copies into *pidns* pid, namespace id and tgid as seen by the
> + *		current namespace and also device from /proc/self/ns/pid.
> + *		*size_of_pidns* must be the size of *pidns*
> + *
> + *		This helper is used when pid filtering is needed inside a
> + *		container as bpf_get_current_tgid() helper returns always the
> + *		pid id as seen by the root namespace.
> + *	Return
> + *		0 on success
> + *
> + *		**-EINVAL**  if unable to get ns, pid or tgid of current task.
> + *		Or if size_of_pidns is not valid.

Maybe reword by following the code sequence.
    if *size_of_pidns* is not valid or unable to get ns, pid or tgid of
    the current task.

> + *
> + *		**-ENOMEM**  if allocation fails.

Maybe some other error codes in filename_lookup() function?

> + *
> + *		If unable to get the inode from /proc/self/ns/pid an error code
> + *		will be returned.

You do not need this. The description of error code cases should cover this.

>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -2853,7 +2873,8 @@ union bpf_attr {
>   	FN(sk_storage_get),		\
>   	FN(sk_storage_delete),		\
>   	FN(send_signal),		\
> -	FN(tcp_gen_syncookie),
> +	FN(tcp_gen_syncookie),		\
> +	FN(get_current_pidns_info),
>   
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>    * function eBPF program intends to call
> @@ -3604,4 +3625,10 @@ struct bpf_sockopt {
>   	__s32	retval;
>   };
>   
> +struct bpf_pidns_info {
> +	__u32 dev;
> +	__u32 nsid;
> +	__u32 tgid;
> +	__u32 pid;
> +};
>   #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 8191a7db2777..3159f2a0188c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2038,6 +2038,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
>   const struct bpf_func_proto bpf_get_current_comm_proto __weak;
>   const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
>   const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
>   
>   const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
>   {
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 5e28718928ca..571f24077db2 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -11,6 +11,12 @@
>   #include <linux/uidgid.h>
>   #include <linux/filter.h>
>   #include <linux/ctype.h>
> +#include <linux/pid_namespace.h>
> +#include <linux/major.h>
> +#include <linux/stat.h>
> +#include <linux/namei.h>
> +#include <linux/version.h>
> +
>   
>   #include "../../lib/kstrtox.h"
>   
> @@ -312,6 +318,78 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
>   	preempt_enable();
>   }
>   
> +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
> +	 size)
> +{
> +	const char *name = "/proc/self/ns/pid";

maybe rename this variable to pidns_path?

> +	struct pid_namespace *pidns = NULL;
> +	struct filename *tmp = NULL;

Maybe rename this variable to name?

> +	int len = strlen(name) + 1;

We can delay this assignment later until it is needed.

> +	struct inode *inode;
> +	struct path kp;
> +	pid_t tgid = 0;
> +	pid_t pid = 0;
> +	int ret;
> +
> +	if (unlikely(size != sizeof(struct bpf_pidns_info)))
> +		return -EINVAL;
> +
> +	pidns = task_active_pid_ns(current);
> +

we can save an empty line here.

> +	if (unlikely(!pidns))
> +		goto clear;
> +
> +	pidns_info->nsid =  pidns->ns.inum;
> +	pid = task_pid_nr_ns(current, pidns);
> +

We can save an empty line here.

> +	if (unlikely(!pid))
> +		goto clear;
> +
> +	tgid = task_tgid_nr_ns(current, pidns);
> +
ditto. save an empty line.
> +	if (unlikely(!tgid))
> +		goto clear;
> +
> +	pidns_info->tgid = (u32) tgid;
> +	pidns_info->pid = (u32) pid;
> +
> +	tmp = kmem_cache_alloc(names_cachep, GFP_ATOMIC);
> +	if (unlikely(!tmp)) {
> +		memset((void *)pidns_info, 0, (size_t) size);
> +		return -ENOMEM;
> +	}
> +
> +	memcpy((char *)tmp->name, name, len);
> +	tmp->uptr = NULL;
> +	tmp->aname = NULL;
> +	tmp->refcnt = 1;
> +
ditto. save an empty line.
> +	ret = filename_lookup(AT_FDCWD, tmp, 0, &kp, NULL);
> +
ditto. save an empty line.
> +	if (ret) {
> +		memset((void *)pidns_info, 0, (size_t) size);
> +		return ret;
> +	}
> +
> +	inode = d_backing_inode(kp.dentry);
> +	pidns_info->dev = inode->i_sb->s_dev;
> +
> +	return 0;
> +
> +clear:
> +	memset((void *)pidns_info, 0, (size_t) size);
> +
save an empty line.
> +	return -EINVAL;
> +}
> +
> +const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
> +	.func	= bpf_get_current_pidns_info,
make the "= " aligned with others?
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
> +	.arg2_type	= ARG_CONST_SIZE,
> +};
> +
>   #ifdef CONFIG_CGROUPS
>   BPF_CALL_0(bpf_get_current_cgroup_id)
>   {
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index ca1255d14576..5e1dc22765a5 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -709,6 +709,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   #endif
>   	case BPF_FUNC_send_signal:
>   		return &bpf_send_signal_proto;
> +	case BPF_FUNC_get_current_pidns_info:
> +		return &bpf_get_current_pidns_info_proto;
>   	default:
>   		return NULL;
>   	}
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index 1d9be26b4edd..238453ff27d2 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -53,6 +53,7 @@ hostprogs-y += task_fd_query
>   hostprogs-y += xdp_sample_pkts
>   hostprogs-y += ibumad
>   hostprogs-y += hbm
> +hostprogs-y += trace_ns_info
[...]

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ