netdev - Re: [PATCH bpf-next 1/2] Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <YqNsWAH24bAIPjqy@google.com>
Date:   Fri, 10 Jun 2022 09:07:52 -0700
From:   sdf@...gle.com
To:     Quentin Monnet <quentin@...valent.com>
Cc:     Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>,
        Yafang Shao <laoar.shao@...il.com>,
        Harsh Modi <harshmodi@...gle.com>,
        Paul Chaignon <paul@...ium.io>, netdev@...r.kernel.org,
        bpf@...r.kernel.org
Subject: Re: [PATCH bpf-next 1/2] Revert "bpftool: Use libbpf 1.0 API mode
 instead of RLIMIT_MEMLOCK"

On 06/10, Quentin Monnet wrote:
> This reverts commit a777e18f1bcd32528ff5dfd10a6629b655b05eb8.

> In commit a777e18f1bcd ("bpftool: Use libbpf 1.0 API mode instead of
> RLIMIT_MEMLOCK"), we removed the rlimit bump in bpftool, because the
> kernel has switched to memcg-based memory accounting. Thanks to the
> LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK, we attempted to keep compatibility
> with other systems and ask libbpf to raise the limit for us if
> necessary.

> How do we know if memcg-based accounting is supported? There is a probe
> in libbpf to check this. But this probe currently relies on the
> availability of a given BPF helper, bpf_ktime_get_coarse_ns(), which
> landed in the same kernel version as the memory accounting change. This
> works in the generic case, but it may fail, for example, if the helper
> function has been backported to an older kernel. This has been observed
> for Google Cloud's Container-Optimized OS (COS), where the helper is
> available but rlimit is still in use. The probe succeeds, the rlimit is
> not raised, and probing features with bpftool, for example, fails.

> A patch was submitted [0] to update this probe in libbpf, based on what
> the cilium/ebpf Go library does [1]. It would lower the soft rlimit to
> 0, attempt to load a BPF object, and reset the rlimit. But it may induce
> some hard-to-debug flakiness if another process starts, or the current
> application is killed, while the rlimit is reduced, and the approach was
> discarded.

> As a workaround to ensure that the rlimit bump does not depend on the
> availability of a given helper, we restore the unconditional rlimit bump
> in bpftool for now.

> [0]  
> https://lore.kernel.org/bpf/20220609143614.97837-1-quentin@isovalent.com/
> [1] https://github.com/cilium/ebpf/blob/v0.9.0/rlimit/rlimit.go#L39

> Cc: Yafang Shao <laoar.shao@...il.com>
> Signed-off-by: Quentin Monnet <quentin@...valent.com>
> ---
>   tools/bpf/bpftool/common.c     | 8 ++++++++
>   tools/bpf/bpftool/feature.c    | 2 ++
>   tools/bpf/bpftool/main.c       | 6 +++---
>   tools/bpf/bpftool/main.h       | 2 ++
>   tools/bpf/bpftool/map.c        | 2 ++
>   tools/bpf/bpftool/pids.c       | 1 +
>   tools/bpf/bpftool/prog.c       | 3 +++
>   tools/bpf/bpftool/struct_ops.c | 2 ++
>   8 files changed, 23 insertions(+), 3 deletions(-)

> diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
> index a45b42ee8ab0..a0d4acd7c54a 100644
> --- a/tools/bpf/bpftool/common.c
> +++ b/tools/bpf/bpftool/common.c
> @@ -17,6 +17,7 @@
>   #include <linux/magic.h>
>   #include <net/if.h>
>   #include <sys/mount.h>
> +#include <sys/resource.h>
>   #include <sys/stat.h>
>   #include <sys/vfs.h>

> @@ -72,6 +73,13 @@ static bool is_bpffs(char *path)
>   	return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
>   }

> +void set_max_rlimit(void)
> +{
> +	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
> +
> +	setrlimit(RLIMIT_MEMLOCK, &rinf);

Do you think it might make sense to print to stderr some warning if
we actually happen to adjust this limit?

if (getrlimit(MEMLOCK) != RLIM_INFINITY) {
	fprintf(stderr, "Warning: resetting MEMLOCK rlimit to
	infinity!\n");
	setrlimit(RLIMIT_MEMLOCK, &rinf);
}

?

Because while it's nice that we automatically do this, this might still
lead to surprises for some users. OTOH, not sure whether people
actually read those warnings? :-/

> +}
> +
>   static int
>   mnt_fs(const char *target, const char *type, char *buff, size_t bufflen)
>   {
> diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
> index cc9e4df8c58e..bac4ef428a02 100644
> --- a/tools/bpf/bpftool/feature.c
> +++ b/tools/bpf/bpftool/feature.c
> @@ -1167,6 +1167,8 @@ static int do_probe(int argc, char **argv)
>   	__u32 ifindex = 0;
>   	char *ifname;

> +	set_max_rlimit();
> +
>   	while (argc) {
>   		if (is_prefix(*argv, "kernel")) {
>   			if (target != COMPONENT_UNSPEC) {
> diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
> index 9062ef2b8767..e81227761f5d 100644
> --- a/tools/bpf/bpftool/main.c
> +++ b/tools/bpf/bpftool/main.c
> @@ -507,9 +507,9 @@ int main(int argc, char **argv)
>   		 * It will still be rejected if users use LIBBPF_STRICT_ALL
>   		 * mode for loading generated skeleton.
>   		 */
> -		libbpf_set_strict_mode(LIBBPF_STRICT_ALL &  
> ~LIBBPF_STRICT_MAP_DEFINITIONS);
> -	} else {
> -		libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK);
> +		ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL &  
> ~LIBBPF_STRICT_MAP_DEFINITIONS);
> +		if (ret)
> +			p_err("failed to enable libbpf strict mode: %d", ret);
>   	}

>   	argc -= optind;
> diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
> index 6c311f47147e..589cb76b227a 100644
> --- a/tools/bpf/bpftool/main.h
> +++ b/tools/bpf/bpftool/main.h
> @@ -96,6 +96,8 @@ int detect_common_prefix(const char *arg, ...);
>   void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
>   void usage(void) __noreturn;

> +void set_max_rlimit(void);
> +
>   int mount_tracefs(const char *target);

>   struct obj_ref {
> diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
> index 800834be1bcb..38b6bc9c26c3 100644
> --- a/tools/bpf/bpftool/map.c
> +++ b/tools/bpf/bpftool/map.c
> @@ -1326,6 +1326,8 @@ static int do_create(int argc, char **argv)
>   		goto exit;
>   	}

> +	set_max_rlimit();
> +
>   	fd = bpf_map_create(map_type, map_name, key_size, value_size,  
> max_entries, &attr);
>   	if (fd < 0) {
>   		p_err("map create failed: %s", strerror(errno));
> diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
> index e2d00d3cd868..bb6c969a114a 100644
> --- a/tools/bpf/bpftool/pids.c
> +++ b/tools/bpf/bpftool/pids.c
> @@ -108,6 +108,7 @@ int build_obj_refs_table(struct hashmap **map, enum  
> bpf_obj_type type)
>   		p_err("failed to create hashmap for PID references");
>   		return -1;
>   	}
> +	set_max_rlimit();

>   	skel = pid_iter_bpf__open();
>   	if (!skel) {
> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
> index e71f0b2da50b..f081de398b60 100644
> --- a/tools/bpf/bpftool/prog.c
> +++ b/tools/bpf/bpftool/prog.c
> @@ -1590,6 +1590,8 @@ static int load_with_options(int argc, char **argv,  
> bool first_prog_only)
>   		}
>   	}

> +	set_max_rlimit();
> +
>   	if (verifier_logs)
>   		/* log_level1 + log_level2 + stats, but not stable UAPI */
>   		open_opts.kernel_log_level = 1 + 2 + 4;
> @@ -2287,6 +2289,7 @@ static int do_profile(int argc, char **argv)
>   		}
>   	}

> +	set_max_rlimit();
>   	err = profiler_bpf__load(profile_obj);
>   	if (err) {
>   		p_err("failed to load profile_obj");
> diff --git a/tools/bpf/bpftool/struct_ops.c  
> b/tools/bpf/bpftool/struct_ops.c
> index 2535f079ed67..e08a6ff2866c 100644
> --- a/tools/bpf/bpftool/struct_ops.c
> +++ b/tools/bpf/bpftool/struct_ops.c
> @@ -501,6 +501,8 @@ static int do_register(int argc, char **argv)
>   	if (libbpf_get_error(obj))
>   		return -1;

> +	set_max_rlimit();
> +
>   	if (bpf_object__load(obj)) {
>   		bpf_object__close(obj);
>   		return -1;
> --
> 2.34.1