netdev - Re: [PATCH bpf-next 1/2] libbpf: improve string handling for uprobe name-based attach

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAEf4BzZuz5NVzDa=srfvuMtMg6Jmy85bAaBkgSXiz8h2aTQ9Hw@mail.gmail.com>
Date:   Tue, 5 Apr 2022 17:06:30 -0700
From:   Andrii Nakryiko <andrii.nakryiko@...il.com>
To:     Alan Maguire <alan.maguire@...cle.com>
Cc:     Andrii Nakryiko <andrii@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Alexei Starovoitov <ast@...nel.org>, Martin Lau <kafai@...com>,
        Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
        john fastabend <john.fastabend@...il.com>,
        KP Singh <kpsingh@...nel.org>, bpf <bpf@...r.kernel.org>,
        Networking <netdev@...r.kernel.org>
Subject: Re: [PATCH bpf-next 1/2] libbpf: improve string handling for uprobe
 name-based attach

On Tue, Apr 5, 2022 at 2:46 PM Alan Maguire <alan.maguire@...cle.com> wrote:
>
> For uprobe attach, libraries are identified by matching a ".so"
> substring in the binary path.  This matches a lot of patterns that do
> not conform to library .so[.version] suffixes, so instead match a ".so"
> _suffix_, and if that fails match a ".so." substring for the versioned
> library case.
>

You are making two separate changes in one patch, let's split them.

> For uprobe auto-attach, the parsing can be simplified for the SEC()
> name to a single ssscanf(); the return value of the sscanf can then

too many sss :)

> be used to distinguish between sections that simply specify
> "u[ret]probe" (and thus cannot auto-attach), those that specify
> "u[ret]probe/binary_path:function+offset" etc.
>
> Suggested-by: Andrii Nakryiko <andrii@...nel.org>
> Signed-off-by: Alan Maguire <alan.maguire@...cle.com>
> ---
>  tools/lib/bpf/libbpf.c          | 77 ++++++++++++++++-------------------------
>  tools/lib/bpf/libbpf_internal.h |  5 +++
>  2 files changed, 35 insertions(+), 47 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 91ce94b..3f23e88 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10750,7 +10750,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
>         const char *search_paths[3] = {};
>         int i;
>
> -       if (strstr(file, ".so")) {
> +       if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
>                 search_paths[0] = getenv("LD_LIBRARY_PATH");
>                 search_paths[1] = "/usr/lib64:/usr/lib";
>                 search_paths[2] = arch_specific_lib_paths();
> @@ -10897,60 +10897,43 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
>  static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
>  {
>         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
> -       char *func, *probe_name, *func_end;
> -       char *func_name, binary_path[512];
> -       unsigned long long raw_offset;
> +       char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
> +       int n, ret = -EINVAL;
>         size_t offset = 0;
> -       int n;
>
>         *link = NULL;
>
> -       opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");
> -       if (opts.retprobe)
> -               probe_name = prog->sec_name + sizeof("uretprobe") - 1;
> -       else
> -               probe_name = prog->sec_name + sizeof("uprobe") - 1;
> -       if (probe_name[0] == '/')
> -               probe_name++;
> -
> -       /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
> -       if (strlen(probe_name) == 0)
> -               return 0;
> -
> -       snprintf(binary_path, sizeof(binary_path), "%s", probe_name);
> -       /* ':' should be prior to function+offset */
> -       func_name = strrchr(binary_path, ':');
> -       if (!func_name) {
> +       n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%zu",

note that previously you were using %li for offset which allows
decimal and hexadecimal formats, I think that's convenient, let's
allow that still

> +                  &probe_type, &binary_path, &func_name, &offset);
> +       switch (n) {
> +       case 1:
> +               /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
> +               ret = 0;
> +               break;
> +       case 2:
>                 pr_warn("section '%s' missing ':function[+offset]' specification\n",
>                         prog->sec_name);

please use 'prog '%s': ' prefix in these attach_xxx() functions for consistency

> -               return -EINVAL;
> -       }
> -       func_name[0] = '\0';
> -       func_name++;
> -       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
> -       if (n < 1) {
> -               pr_warn("uprobe name '%s' is invalid\n", func_name);
> -               return -EINVAL;
> -       }
> -       if (opts.retprobe && offset != 0) {
> -               free(func);
> -               pr_warn("uretprobes do not support offset specification\n");
> -               return -EINVAL;
> -       }
> -
> -       /* Is func a raw address? */
> -       errno = 0;
> -       raw_offset = strtoull(func, &func_end, 0);
> -       if (!errno && !*func_end) {
> -               free(func);
> -               func = NULL;
> -               offset = (size_t)raw_offset;
> +               break;
> +       case 3:
> +       case 4:
> +               opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");

you just parsed probe_type, strcmp() against that instead, no need for
prefix check

> +               if (opts.retprobe && offset != 0) {
> +                       pr_warn("uretprobes do not support offset specification\n");
> +                       break;
> +               }
> +               opts.func_name = func_name;
> +               *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
> +               ret = libbpf_get_error(*link);
> +               break;
> +       default:
> +               pr_warn("uprobe name '%s' is invalid\n", prog->sec_name);

Add "prog '%s': " prefix. Also, the section name is not an uprobe
name. Maybe "prog '%s': invalid format of section definition '%s'\n"?

> +               break;
>         }
> -       opts.func_name = func;
> +       free(probe_type);
> +       free(binary_path);
> +       free(func_name);
>
> -       *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
> -       free(func);
> -       return libbpf_get_error(*link);
> +       return ret;
>  }
>
>  struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
> diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
> index b6247dc..155702a 100644
> --- a/tools/lib/bpf/libbpf_internal.h
> +++ b/tools/lib/bpf/libbpf_internal.h
> @@ -103,6 +103,11 @@
>  #define str_has_pfx(str, pfx) \
>         (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
>
> +/* similar for suffix */
> +#define str_has_sfx(str, sfx) \
> +       (strlen(sfx) <= strlen(str) ? \
> +        strncmp(str + strlen(str) - strlen(sfx), sfx, strlen(sfx)) == 0 : 0)
> +

so str_has_pfx() is a macro to avoid strlen() for string literals.
Here you don't do any optimization like that and instead calculating
and recalculating strlen() multiple times. Just make this a static
inline helper function?

and you don't need strncmp() anymore, strcmp() is as safe after all
the strlen() checks and calculations



>  /* Symbol versioning is different between static and shared library.
>   * Properly versioned symbols are needed for shared library, but
>   * only the symbol of the new version is needed for static library.
> --
> 1.8.3.1
>