lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJ+HfNhuOegRbmi9=A50e_rxLttA7dDS_8GTQR6fneGgWmNsVw@mail.gmail.com>
Date:   Wed, 23 Jan 2019 15:24:50 +0100
From:   Björn Töpel <bjorn.topel@...il.com>
To:     Daniel Borkmann <daniel@...earbox.net>
Cc:     ast@...nel.org, Netdev <netdev@...r.kernel.org>,
        Björn Töpel <bjorn.topel@...el.com>,
        "Karlsson, Magnus" <magnus.karlsson@...el.com>,
        Magnus Karlsson <magnus.karlsson@...il.com>
Subject: Re: [PATCH bpf-next 3/3] xsk: add sock_diag interface for AF_XDP

Den ons 23 jan. 2019 kl 14:19 skrev Daniel Borkmann <daniel@...earbox.net>:
>
> On 01/18/2019 02:03 PM, bjorn.topel@...il.com wrote:
> > From: Björn Töpel <bjorn.topel@...el.com>
> >
> > This patch adds the sock_diag interface for querying sockets from user
> > space. Tools like iproute2 ss(8) can use this interface to list open
> > AF_XDP sockets.
> >
> > The user-space ABI is defined in linux/xdp_diag.h and includes netlink
> > request and response structs. The request can query sockets and the
> > response contains socket information about the rings, umems, inode and
> > more.
> >
> > Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
>
> Series looks good, few minor nits inline:
>
> > ---
> >  include/uapi/linux/xdp_diag.h |  72 +++++++++++++
> >  net/xdp/Kconfig               |   8 ++
> >  net/xdp/Makefile              |   1 +
> >  net/xdp/xsk.c                 |   6 +-
> >  net/xdp/xsk.h                 |  12 +++
> >  net/xdp/xsk_diag.c            | 192 ++++++++++++++++++++++++++++++++++
> >  6 files changed, 286 insertions(+), 5 deletions(-)
> >  create mode 100644 include/uapi/linux/xdp_diag.h
> >  create mode 100644 net/xdp/xsk.h
> >  create mode 100644 net/xdp/xsk_diag.c
> >
> > diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
> > new file mode 100644
> > index 000000000000..efe8ce281dce
> > --- /dev/null
> > +++ b/include/uapi/linux/xdp_diag.h
> > @@ -0,0 +1,72 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +/*
> > + * xdp_diag: interface for query/monitor XDP sockets
> > + * Copyright(c) 2019 Intel Corporation.
> > + */
> > +
> > +#ifndef _LINUX_XDP_DIAG_H
> > +#define _LINUX_XDP_DIAG_H
> > +
> > +#include <linux/types.h>
> > +
> > +struct xdp_diag_req {
> > +     __u8    sdiag_family;
> > +     __u8    sdiag_protocol;
> > +     __u16   pad;
>
> Presumably this one is for future use? Maybe better as '__u16 :16;' to
> avoid compile errors if someone tries to zero 'pad' member manually?
>

The "pad" was there simply to have an explicitly named structure hole.
I'm not following the bitfield argument. Why does that avoid compiler
errors?

> > +     __u32   xdiag_ino;
> > +     __u32   xdiag_show;
> > +     __u32   xdiag_cookie[2];
> > +};
> > +
> > +struct xdp_diag_msg {
> > +     __u8    xdiag_family;
> > +     __u8    xdiag_type;
> > +     __u32   xdiag_ino;
> > +     __u32   xdiag_cookie[2];
> > +};
> > +
> > +#define XDP_SHOW_INFO                (1 << 0) /* Basic information */
> > +#define XDP_SHOW_RING_CFG    (1 << 1)
> > +#define XDP_SHOW_UMEM                (1 << 2)
> > +#define XDP_SHOW_MEMINFO     (1 << 3)
> > +
> > +enum {
> > +     XDP_DIAG_NONE,
> > +     XDP_DIAG_INFO,
> > +     XDP_DIAG_UID,
> > +     XDP_DIAG_RX_RING,
> > +     XDP_DIAG_TX_RING,
> > +     XDP_DIAG_UMEM,
> > +     XDP_DIAG_UMEM_FILL_RING,
> > +     XDP_DIAG_UMEM_COMPLETION_RING,
> > +     XDP_DIAG_MEMINFO,
> > +     __XDP_DIAG_MAX,
> > +};
> > +
> > +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
> > +
> > +struct xdp_diag_info {
> > +     __u32   ifindex;
> > +     __u32   queue_id;
> > +};
> > +
> > +struct xdp_diag_ring {
> > +     __u32   entries; /*num descs */
> > +};
> > +
> > +#define XDP_DU_F_ZEROCOPY (1 << 0)
> > +
> > +struct xdp_diag_umem {
> > +     __u64   size;
> > +     __u32   id;
> > +     __u32   num_pages;
> > +     __u32   chunk_size;
> > +     __u32   headroom;
> > +     __u32   ifindex;
> > +     __u32   queue_id;
> > +     __u32   flags;
> > +     __u32   refs;
> > +};
> > +
> > +
>
> Nit: one newline too much
>

Good catch!

> > +#endif /* _LINUX_XDP_DIAG_H */
> > diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
> > index 90e4a7152854..0255b33cff4b 100644
> > --- a/net/xdp/Kconfig
> > +++ b/net/xdp/Kconfig
> > @@ -5,3 +5,11 @@ config XDP_SOCKETS
> >       help
> >         XDP sockets allows a channel between XDP programs and
> >         userspace applications.
> > +
> > +config XDP_SOCKETS_DIAG
> > +     tristate "XDP sockets: monitoring interface"
> > +     depends on XDP_SOCKETS
> > +     default n
> > +     help
> > +       Support for PF_XDP sockets monitoring interface used by the ss tool.
> > +       If unsure, say Y.
> > diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> > index 04f073146256..59dbfdf93dca 100644
> > --- a/net/xdp/Makefile
> > +++ b/net/xdp/Makefile
> > @@ -1 +1,2 @@
> >  obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
> > +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index 80ca48cefc42..949d3bbccb2f 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -27,14 +27,10 @@
> >
> >  #include "xsk_queue.h"
> >  #include "xdp_umem.h"
> > +#include "xsk.h"
> >
> >  #define TX_BATCH_SIZE 16
> >
> > -static struct xdp_sock *xdp_sk(struct sock *sk)
> > -{
> > -     return (struct xdp_sock *)sk;
> > -}
> > -
> >  bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
> >  {
> >       return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
> > diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
> > new file mode 100644
> > index 000000000000..ba8120610426
> > --- /dev/null
> > +++ b/net/xdp/xsk.h
> > @@ -0,0 +1,12 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright(c) 2019 Intel Corporation. */
> > +
> > +#ifndef XSK_H_
> > +#define XSK_H_
> > +
> > +static inline struct xdp_sock *xdp_sk(struct sock *sk)
> > +{
> > +     return (struct xdp_sock *)sk;
> > +}
> > +
> > +#endif /* XSK_H_ */
> > diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
> > new file mode 100644
> > index 000000000000..2585d7a4ac2f
> > --- /dev/null
> > +++ b/net/xdp/xsk_diag.c
> > @@ -0,0 +1,192 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* XDP sockets monitoring support
> > + *
> > + * Copyright(c) 2019 Intel Corporation.
> > + *
> > + * Author: Björn Töpel <bjorn.topel@...el.com>
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <net/xdp_sock.h>
> > +#include <linux/xdp_diag.h>
> > +#include <linux/sock_diag.h>
> > +
> > +#include "xsk_queue.h"
> > +#include "xsk.h"
> > +
> > +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
> > +{
> > +     struct xdp_diag_info di;
>
> Not a bug, but can we generally zero all the structs on stack such that once
> we extend them nothing gets potentially leaked.
>

Makes sense! Will fix here and...

> > +     di.ifindex = xs->dev ? xs->dev->ifindex : 0;
> > +     di.queue_id = xs->queue_id;
> > +     return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
> > +}
> > +
> > +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
> > +                          struct sk_buff *nlskb)
> > +{
> > +     struct xdp_diag_ring dr;
>
> Ditto.
>

...here...

> > +     dr.entries = queue->nentries;
> > +     return nla_put(nlskb, nl_type, sizeof(dr), &dr);
> > +}
> > +
> > +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
> > +                               struct sk_buff *nlskb)
> > +{
> > +     int err = 0;
> > +
> > +     if (xs->rx)
> > +             err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
> > +     if (!err && xs->tx)
> > +             err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
> > +     return err;
> > +}
> > +
> > +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
> > +{
> > +     struct xdp_umem *umem = xs->umem;
> > +     struct xdp_diag_umem du;
>
> Ditto
>

...and here.

> > +     int err;
> > +
> > +     if (!umem)
> > +             return 0;
> > +
> > +     du.id = umem->id;
> > +     du.size = umem->size;
> > +     du.num_pages = umem->npgs;
> > +     du.chunk_size = (__u32)(~umem->chunk_mask + 1);
> > +     du.headroom = umem->headroom;
> > +     du.ifindex = umem->dev ? umem->dev->ifindex : 0;
> > +     du.queue_id = umem->queue_id;
> > +     du.flags = 0;
> > +     if (umem->zc)
> > +             du.flags |= XDP_DU_F_ZEROCOPY;
> > +     du.refs = refcount_read(&umem->users);
> > +
> > +     err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
> > +
> > +     if (!err && umem->fq)
> > +             err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
> > +     if (!err && umem->cq) {
> > +             err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
> > +                                     nlskb);
> > +     }
> > +     return err;
> > +}
> > +
> > +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
> > +                      struct xdp_diag_req *req,
> > +                      struct user_namespace *user_ns,
> > +                      u32 portid, u32 seq, u32 flags, int sk_ino)
> > +{
> > +     struct xdp_sock *xs = xdp_sk(sk);
> > +     struct xdp_diag_msg *msg;
> > +     struct nlmsghdr *nlh;
> > +
> > +     nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
> > +                     flags);
> > +     if (!nlh)
> > +             return -EMSGSIZE;
> > +
> > +     msg = nlmsg_data(nlh);
> > +     msg->xdiag_family = AF_XDP;
> > +     msg->xdiag_type = sk->sk_type;
> > +     msg->xdiag_ino = sk_ino;
> > +     sock_diag_save_cookie(sk, msg->xdiag_cookie);
>
> Don't we have a hole in struct xdp_diag_msg after xdiag_type? Probably better
> to memset everything in the beginning as well.
>

You're right! Good catch. In general for uapi structures; Explicitly
named "holes" or not? The pad in req is exactly that. So, either add a
pad to _msg or remove in _req. What is preferred?

I'll spin up a v2.


Thanks for looking at the code!
Björn

> > +     if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_INFO) &&
> > +         nla_put_u32(nlskb, XDP_DIAG_UID,
> > +                     from_kuid_munged(user_ns, sock_i_uid(sk))))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
> > +         xsk_diag_put_rings_cfg(xs, nlskb))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_UMEM) &&
> > +         xsk_diag_put_umem(xs, nlskb))
> > +             goto out_nlmsg_trim;
> > +
> > +     if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
> > +         sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
> > +             goto out_nlmsg_trim;
> > +
> > +     nlmsg_end(nlskb, nlh);
> > +     return 0;
> > +
> > +out_nlmsg_trim:
> > +     nlmsg_cancel(nlskb, nlh);
> > +     return -EMSGSIZE;
> > +}
> > +
> > +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
> > +{
> > +     struct xdp_diag_req *req = nlmsg_data(cb->nlh);
> > +     struct net *net = sock_net(nlskb->sk);
> > +     int num = 0, s_num = cb->args[0];
> > +     struct sock *sk;
> > +
> > +     mutex_lock(&net->xdp.lock);
> > +
> > +     sk_for_each(sk, &net->xdp.list) {
> > +             if (!net_eq(sock_net(sk), net))
> > +                     continue;
> > +             if (num++ < s_num)
> > +                     continue;
> > +
> > +             if (xsk_diag_fill(sk, nlskb, req,
> > +                               sk_user_ns(NETLINK_CB(cb->skb).sk),
> > +                               NETLINK_CB(cb->skb).portid,
> > +                               cb->nlh->nlmsg_seq, NLM_F_MULTI,
> > +                               sock_i_ino(sk)) < 0) {
> > +                     num--;
> > +                     break;
> > +             }
> > +     }
> > +
> > +     mutex_unlock(&net->xdp.lock);
> > +     cb->args[0] = num;
> > +     return nlskb->len;
> > +}
> > +
> > +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
> > +{
> > +     struct netlink_dump_control c = { .dump = xsk_diag_dump };
> > +     int hdrlen = sizeof(struct xdp_diag_req);
> > +     struct net *net = sock_net(nlskb->sk);
> > +     struct xdp_diag_req *req;
> > +
> > +     if (nlmsg_len(hdr) < hdrlen)
> > +             return -EINVAL;
> > +
> > +     if (!(hdr->nlmsg_flags & NLM_F_DUMP))
> > +             return -EOPNOTSUPP;
> > +
> > +     req = nlmsg_data(hdr);
> > +     return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
> > +}
> > +
> > +static const struct sock_diag_handler xsk_diag_handler = {
> > +     .family = AF_XDP,
> > +     .dump = xsk_diag_handler_dump,
> > +};
> > +
> > +static int __init xsk_diag_init(void)
> > +{
> > +     return sock_diag_register(&xsk_diag_handler);
> > +}
> > +
> > +static void __exit xsk_diag_exit(void)
> > +{
> > +     sock_diag_unregister(&xsk_diag_handler);
> > +}
> > +
> > +module_init(xsk_diag_init);
> > +module_exit(xsk_diag_exit);
> > +MODULE_LICENSE("GPL");
> > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
> >
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ