lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 8 May 2018 21:09:18 +0000
From:   Song Liu <songliubraving@...com>
To:     Martin Lau <kafai@...com>
CC:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Alexei Starovoitov <ast@...com>,
        Daniel Borkmann <daniel@...earbox.net>,
        Kernel Team <Kernel-team@...com>
Subject: Re: [PATCH bpf-next 2/6] bpf: btf: Introduce BTF ID



> On May 4, 2018, at 2:49 PM, Martin KaFai Lau <kafai@...com> wrote:
> 
> This patch gives an ID to each loaded BTF.  The ID is allocated by
> the idr like the existing prog-id and map-id.
> 
> The bpf_put(map->btf) is moved to __bpf_map_put() so that the
> userspace can stop seeing the BTF ID ASAP when the last BTF
> refcnt is gone.
> 
> It also makes BTF accessible from userspace through the
> 1. new BPF_BTF_GET_FD_BY_ID command.  It is limited to CAP_SYS_ADMIN
>   which is inline with the BPF_BTF_LOAD cmd and the existing
>   BPF_[MAP|PROG]_GET_FD_BY_ID cmd.
> 2. new btf_id (and btf_key_id + btf_value_id) in "struct bpf_map_info"
> 
> Once the BTF ID handler is accessible from userspace, freeing a BTF
> object has to go through a rcu period.  The BPF_BTF_GET_FD_BY_ID cmd
> can then be done under a rcu_read_lock() instead of taking
> spin_lock.
> [Note: A similar rcu usage can be done to the existing
>       bpf_prog_get_fd_by_id() in a follow up patch]
> 
> When processing the BPF_BTF_GET_FD_BY_ID cmd,
> refcount_inc_not_zero() is needed because the BTF object
> could be already in the rcu dead row .  btf_get() is
> removed since its usage is currently limited to btf.c
> alone.  refcount_inc() is used directly instead.
> 
> Signed-off-by: Martin KaFai Lau <kafai@...com>
> Acked-by: Alexei Starovoitov <ast@...com>
> ---
> include/linux/btf.h      |   2 +
> include/uapi/linux/bpf.h |   5 +++
> kernel/bpf/btf.c         | 108 ++++++++++++++++++++++++++++++++++++++++++-----
> kernel/bpf/syscall.c     |  24 ++++++++++-
> 4 files changed, 128 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index a966dc6d61ee..e076c4697049 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -44,5 +44,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
> 					u32 *ret_size);
> void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
> 		       struct seq_file *m);
> +int btf_get_fd_by_id(u32 id);
> +u32 btf_id(const struct btf *btf);
> 
> #endif
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 93d5a4eeec2a..6106f23a9a8a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -96,6 +96,7 @@ enum bpf_cmd {
> 	BPF_PROG_QUERY,
> 	BPF_RAW_TRACEPOINT_OPEN,
> 	BPF_BTF_LOAD,
> +	BPF_BTF_GET_FD_BY_ID,
> };
> 
> enum bpf_map_type {
> @@ -344,6 +345,7 @@ union bpf_attr {
> 			__u32		start_id;
> 			__u32		prog_id;
> 			__u32		map_id;
> +			__u32		btf_id;
> 		};
> 		__u32		next_id;
> 		__u32		open_flags;
> @@ -2130,6 +2132,9 @@ struct bpf_map_info {
> 	__u32 ifindex;
> 	__u64 netns_dev;
> 	__u64 netns_ino;
> +	__u32 btf_id;
> +	__u32 btf_key_id;
> +	__u32 btf_value_id;
> } __attribute__((aligned(8)));
> 
> /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index fa0dce0452e7..40950b6bf395 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -11,6 +11,7 @@
> #include <linux/file.h>
> #include <linux/uaccess.h>
> #include <linux/kernel.h>
> +#include <linux/idr.h>
> #include <linux/bpf_verifier.h>
> #include <linux/btf.h>
> 
> @@ -179,6 +180,9 @@
> 	     i < btf_type_vlen(struct_type);				\
> 	     i++, member++)
> 
> +static DEFINE_IDR(btf_idr);
> +static DEFINE_SPINLOCK(btf_idr_lock);
> +
> struct btf {
> 	union {
> 		struct btf_header *hdr;
> @@ -193,6 +197,8 @@ struct btf {
> 	u32 types_size;
> 	u32 data_size;
> 	refcount_t refcnt;
> +	u32 id;
> +	struct rcu_head rcu;
> };
> 
> enum verifier_phase {
> @@ -598,6 +604,42 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
> 	return 0;
> }
> 
> +static int btf_alloc_id(struct btf *btf)
> +{
> +	int id;
> +
> +	idr_preload(GFP_KERNEL);
> +	spin_lock_bh(&btf_idr_lock);
> +	id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
> +	if (id > 0)
> +		btf->id = id;
> +	spin_unlock_bh(&btf_idr_lock);
> +	idr_preload_end();
> +
> +	if (WARN_ON_ONCE(!id))
> +		return -ENOSPC;
> +
> +	return id > 0 ? 0 : id;
> +}
> +
> +static void btf_free_id(struct btf *btf)
> +{
> +	unsigned long flags;
> +
> +	/*
> +	 * In map-in-map, calling map_delete_elem() on outer
> +	 * map will call bpf_map_put on the inner map.
> +	 * It will then eventually call btf_free_id()
> +	 * on the inner map.  Some of the map_delete_elem()
> +	 * implementation may have irq disabled, so
> +	 * we need to use the _irqsave() version instead
> +	 * of the _bh() version.
> +	 */
> +	spin_lock_irqsave(&btf_idr_lock, flags);
> +	idr_remove(&btf_idr, btf->id);
> +	spin_unlock_irqrestore(&btf_idr_lock, flags);
> +}
> +
> static void btf_free(struct btf *btf)
> {
> 	kvfree(btf->types);
> @@ -607,15 +649,19 @@ static void btf_free(struct btf *btf)
> 	kfree(btf);
> }
> 
> -static void btf_get(struct btf *btf)
> +static void btf_free_rcu(struct rcu_head *rcu)
> {
> -	refcount_inc(&btf->refcnt);
> +	struct btf *btf = container_of(rcu, struct btf, rcu);
> +
> +	btf_free(btf);
> }
> 
> void btf_put(struct btf *btf)
> {
> -	if (btf && refcount_dec_and_test(&btf->refcnt))
> -		btf_free(btf);
> +	if (btf && refcount_dec_and_test(&btf->refcnt)) {
> +		btf_free_id(btf);
> +		call_rcu(&btf->rcu, btf_free_rcu);
> +	}
> }
> 
> static int env_resolve_init(struct btf_verifier_env *env)
> @@ -2006,10 +2052,15 @@ const struct file_operations btf_fops = {
> 	.release	= btf_release,
> };
> 
> +static int __btf_new_fd(struct btf *btf)
> +{
> +	return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
> +}
> +
> int btf_new_fd(const union bpf_attr *attr)
> {
> 	struct btf *btf;
> -	int fd;
> +	int ret;
> 
> 	btf = btf_parse(u64_to_user_ptr(attr->btf),
> 			attr->btf_size, attr->btf_log_level,
> @@ -2018,12 +2069,23 @@ int btf_new_fd(const union bpf_attr *attr)
> 	if (IS_ERR(btf))
> 		return PTR_ERR(btf);
> 
> -	fd = anon_inode_getfd("btf", &btf_fops, btf,
> -			      O_RDONLY | O_CLOEXEC);
> -	if (fd < 0)
> +	ret = btf_alloc_id(btf);
> +	if (ret) {
> +		btf_free(btf);
> +		return ret;
> +	}
> +
> +	/*
> +	 * The BTF ID is published to the userspace.
> +	 * All BTF free must go through call_rcu() from
> +	 * now on (i.e. free by calling btf_put()).
> +	 */
> +
> +	ret = __btf_new_fd(btf);
> +	if (ret < 0)
> 		btf_put(btf);
> 
> -	return fd;
> +	return ret;
> }
> 
> struct btf *btf_get_by_fd(int fd)
> @@ -2042,7 +2104,7 @@ struct btf *btf_get_by_fd(int fd)
> 	}
> 
> 	btf = f.file->private_data;
> -	btf_get(btf);
> +	refcount_inc(&btf->refcnt);
> 	fdput(f);
> 
> 	return btf;
> @@ -2062,3 +2124,29 @@ int btf_get_info_by_fd(const struct btf *btf,
> 
> 	return 0;
> }
> +
> +int btf_get_fd_by_id(u32 id)
> +{
> +	struct btf *btf;
> +	int fd;
> +
> +	rcu_read_lock();
> +	btf = idr_find(&btf_idr, id);
> +	if (!btf || !refcount_inc_not_zero(&btf->refcnt))
> +		btf = ERR_PTR(-ENOENT);
> +	rcu_read_unlock();
> +
> +	if (IS_ERR(btf))
> +		return PTR_ERR(btf);
> +
> +	fd = __btf_new_fd(btf);
> +	if (fd < 0)
> +		btf_put(btf);
> +
> +	return fd;
> +}
> +
> +u32 btf_id(const struct btf *btf)
> +{
> +	return btf->id;
> +}
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 263e13ede029..8b0a45d65454 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -252,7 +252,6 @@ static void bpf_map_free_deferred(struct work_struct *work)
> 
> 	bpf_map_uncharge_memlock(map);
> 	security_bpf_map_free(map);
> -	btf_put(map->btf);
> 	/* implementation dependent freeing */
> 	map->ops->map_free(map);
> }
> @@ -273,6 +272,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
> 	if (atomic_dec_and_test(&map->refcnt)) {
> 		/* bpf_map_free_id() must be called first */
> 		bpf_map_free_id(map, do_idr_lock);
> +		btf_put(map->btf);
> 		INIT_WORK(&map->work, bpf_map_free_deferred);
> 		schedule_work(&map->work);
> 	}
> @@ -2000,6 +2000,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
> 	info.map_flags = map->map_flags;
> 	memcpy(info.name, map->name, sizeof(map->name));
> 
> +	if (map->btf) {
> +		info.btf_id = btf_id(map->btf);
> +		info.btf_key_id = map->btf_key_id;
> +		info.btf_value_id = map->btf_value_id;
> +	}
> +
> 	if (bpf_map_is_dev_bound(map)) {
> 		err = bpf_map_offload_info_fill(&info, map);
> 		if (err)
> @@ -2057,6 +2063,19 @@ static int bpf_btf_load(const union bpf_attr *attr)
> 	return btf_new_fd(attr);
> }
> 
> +#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
> +
> +static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
> +{
> +	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
> +		return -EINVAL;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return btf_get_fd_by_id(attr->btf_id);
> +}
> +
> SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
> {
> 	union bpf_attr attr = {};
> @@ -2140,6 +2159,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
> 	case BPF_BTF_LOAD:
> 		err = bpf_btf_load(&attr);
> 		break;
> +	case BPF_BTF_GET_FD_BY_ID:
> +		err = bpf_btf_get_fd_by_id(&attr);
> +		break;
> 	default:
> 		err = -EINVAL;
> 		break;
> -- 
> 2.9.5
> 


Acked-by: Song Liu <songliubraving@...com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ