netdev - Re: [PATCH bpf-next 2/2] nfp: bpf: add simple map op cache

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPhsuW5ExXPXYi5D2MND5JREh8EKNHUvSNoBEJ7L3-XK3GD9mA@mail.gmail.com>
Date:   Thu, 29 Aug 2019 14:29:44 -0700
From:   Song Liu <liu.song.a23@...il.com>
To:     Jakub Kicinski <jakub.kicinski@...ronome.com>
Cc:     Alexei Starovoitov <alexei.starovoitov@...il.com>,
        Daniel Borkmann <daniel@...earbox.net>,
        Networking <netdev@...r.kernel.org>, oss-drivers@...ronome.com,
        jaco.gericke@...ronome.com,
        Quentin Monnet <quentin.monnet@...ronome.com>
Subject: Re: [PATCH bpf-next 2/2] nfp: bpf: add simple map op cache

On Tue, Aug 27, 2019 at 10:40 PM Jakub Kicinski
<jakub.kicinski@...ronome.com> wrote:
>
> Each get_next and lookup call requires a round trip to the device.
> However, the device is capable of giving us a few entries back,
> instead of just one.
>
> In this patch we ask for a small yet reasonable number of entries
> (4) on every get_next call, and on subsequent get_next/lookup calls
> check this little cache for a hit. The cache is only kept for 250us,
> and is invalidated on every operation which may modify the map
> (e.g. delete or update call). Note that operations may be performed
> simultaneously, so we have to keep track of operations in flight.
>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@...ronome.com>
> ---
>  drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 179 +++++++++++++++++-
>  drivers/net/ethernet/netronome/nfp/bpf/fw.h   |   1 +
>  drivers/net/ethernet/netronome/nfp/bpf/main.c |  18 ++
>  drivers/net/ethernet/netronome/nfp/bpf/main.h |  23 +++
>  .../net/ethernet/netronome/nfp/bpf/offload.c  |   3 +
>  5 files changed, 215 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> index fcf880c82f3f..0e2db6ea79e9 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> @@ -6,6 +6,7 @@
>  #include <linux/bug.h>
>  #include <linux/jiffies.h>
>  #include <linux/skbuff.h>
> +#include <linux/timekeeping.h>
>
>  #include "../ccm.h"
>  #include "../nfp_app.h"
> @@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
>         return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
>  }
>
> +static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
> +{
> +       return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
> +              op == NFP_CCM_TYPE_BPF_MAP_DELETE;
> +}
> +
> +static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
> +{
> +       return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
> +              op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
> +}
> +
> +static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
> +{
> +       return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
> +              op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
> +}
> +
> +static unsigned int
> +nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
> +                         const u8 *key, u8 *out_key, u8 *out_value,
> +                         u32 *cache_gen)
> +{
> +       struct bpf_map *map = &nfp_map->offmap->map;
> +       struct nfp_app_bpf *bpf = nfp_map->bpf;
> +       unsigned int i, count, n_entries;
> +       struct cmsg_reply_map_op *reply;
> +
> +       n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
> +
> +       spin_lock(&nfp_map->cache_lock);
> +       *cache_gen = nfp_map->cache_gen;
> +       if (nfp_map->cache_blockers)
> +               n_entries = 1;
> +
> +       if (nfp_bpf_ctrl_op_cache_invalidate(op))
> +               goto exit_block;
> +       if (!nfp_bpf_ctrl_op_cache_capable(op))
> +               goto exit_unlock;
> +
> +       if (!nfp_map->cache)
> +               goto exit_unlock;
> +       if (nfp_map->cache_to < ktime_get_ns())
> +               goto exit_invalidate;
> +
> +       reply = (void *)nfp_map->cache->data;
> +       count = be32_to_cpu(reply->count);

Do we need to check whether count is too big (from firmware bug)?

> +
> +       for (i = 0; i < count; i++) {
> +               void *cached_key;
> +
> +               cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
> +               if (memcmp(cached_key, key, map->key_size))
> +                       continue;
> +
> +               if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
> +                       memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
> +                              map->value_size);
> +               if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
> +                       if (i + 1 == count)
> +                               break;
> +
> +                       memcpy(out_key,
> +                              nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
> +                              map->key_size);
> +               }
> +
> +               n_entries = 0;
> +               goto exit_unlock;
> +       }
> +       goto exit_unlock;
> +
> +exit_block:
> +       nfp_map->cache_blockers++;
> +exit_invalidate:
> +       dev_consume_skb_any(nfp_map->cache);
> +       nfp_map->cache = NULL;
> +exit_unlock:
> +       spin_unlock(&nfp_map->cache_lock);
> +       return n_entries;
> +}
> +
> +static void
> +nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
> +                         struct sk_buff *skb, u32 cache_gen)
> +{
> +       bool blocker, filler;
> +
> +       blocker = nfp_bpf_ctrl_op_cache_invalidate(op);
> +       filler = nfp_bpf_ctrl_op_cache_fill(op);
> +       if (blocker || filler) {
> +               u64 to = 0;
> +
> +               if (filler)
> +                       to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS;
> +
> +               spin_lock(&nfp_map->cache_lock);
> +               if (blocker) {
> +                       nfp_map->cache_blockers--;
> +                       nfp_map->cache_gen++;
> +               }
> +               if (filler && !nfp_map->cache_blockers &&
> +                   nfp_map->cache_gen == cache_gen) {
> +                       nfp_map->cache_to = to;
> +                       swap(nfp_map->cache, skb);
> +               }
> +               spin_unlock(&nfp_map->cache_lock);
> +       }
> +
> +       dev_consume_skb_any(skb);
> +}
> +
>  static int
>  nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
>                       u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
>  {
>         struct nfp_bpf_map *nfp_map = offmap->dev_priv;
> +       unsigned int n_entries, reply_entries, count;
>         struct nfp_app_bpf *bpf = nfp_map->bpf;
>         struct bpf_map *map = &offmap->map;
>         struct cmsg_reply_map_op *reply;
>         struct cmsg_req_map_op *req;
>         struct sk_buff *skb;
> +       u32 cache_gen;
>         int err;
>
>         /* FW messages have no space for more than 32 bits of flags */
>         if (flags >> 32)
>                 return -EOPNOTSUPP;
>
> +       /* Handle op cache */
> +       n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
> +                                             out_value, &cache_gen);
> +       if (!n_entries)
> +               return 0;
> +
>         skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
> -       if (!skb)
> -               return -ENOMEM;
> +       if (!skb) {
> +               err = -ENOMEM;
> +               goto err_cache_put;
> +       }
>
>         req = (void *)skb->data;
>         req->tid = cpu_to_be32(nfp_map->tid);
> -       req->count = cpu_to_be32(1);
> +       req->count = cpu_to_be32(n_entries);
>         req->flags = cpu_to_be32(flags);
>
>         /* Copy inputs */
> @@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
>                 memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
>                        map->value_size);
>
> -       skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
> -                                 nfp_bpf_cmsg_map_reply_size(bpf, 1));
> -       if (IS_ERR(skb))
> -               return PTR_ERR(skb);
> +       skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
> +       if (IS_ERR(skb)) {
> +               err = PTR_ERR(skb);
> +               goto err_cache_put;
> +       }
> +
> +       if (skb->len < sizeof(*reply)) {
> +               cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
> +                         op, skb->len);
> +               err = -EIO;
> +               goto err_free;
> +       }
>
>         reply = (void *)skb->data;
> +       count = be32_to_cpu(reply->count);
>         err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
> +       /* FW responds with message sized to hold the good entries,
> +        * plus one extra entry if there was an error.
> +        */
> +       reply_entries = count + !!err;
> +       if (n_entries > 1 && count)
> +               err = 0;
>         if (err)
>                 goto err_free;
>
> +       if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
> +               cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
> +                         op, skb->len, reply_entries);
> +               err = -EIO;
> +               goto err_free;
> +       }
> +
>         /* Copy outputs */
>         if (out_key)
>                 memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
> @@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
>                 memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
>                        map->value_size);
>
> -       dev_consume_skb_any(skb);
> +       nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
>
>         return 0;
>  err_free:
>         dev_kfree_skb_any(skb);
> +err_cache_put:
> +       nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
>         return err;
>  }
>
> @@ -275,7 +422,21 @@ unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
>
>  unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
>  {
> -       return max(NFP_NET_DEFAULT_MTU, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
> +       return max3(NFP_NET_DEFAULT_MTU,
> +                   nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
> +                   nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
> +}
> +
> +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
> +{
> +       unsigned int mtu, req_max, reply_max, entry_sz;
> +
> +       mtu = bpf->app->ctrl->dp.mtu;
> +       entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
> +       req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
> +       reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
> +
> +       return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
>  }
>
>  void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> index 06c4286bd79e..a83a0ad5e27d 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> @@ -24,6 +24,7 @@ enum bpf_cap_tlv_type {
>         NFP_BPF_CAP_TYPE_QUEUE_SELECT   = 5,
>         NFP_BPF_CAP_TYPE_ADJUST_TAIL    = 6,
>         NFP_BPF_CAP_TYPE_ABI_VERSION    = 7,
> +       NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8,
>  };
>
>  struct nfp_bpf_cap_tlv_func {
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
> index 2b1773ed3de9..8f732771d3fa 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
> @@ -299,6 +299,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
>         return 0;
>  }
>
> +static int
> +nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
> +                                u32 length)
> +{
> +       bpf->cmsg_multi_ent = true;
> +       return 0;
> +}
> +
>  static int
>  nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
>                               u32 length)
> @@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
>                                                           length))
>                                 goto err_release_free;
>                         break;
> +               case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
> +                       if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
> +                                                            length))

Do we plan to extend nfp_bpf_parse_cap_cmsg_multi_ent() to return
non-zero in the
future?

> +                               goto err_release_free;
> +                       break;
>                 default:
>                         nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
>                         break;
> @@ -426,6 +439,11 @@ static int nfp_bpf_start(struct nfp_app *app)
>                 return -EINVAL;
>         }
>
> +       if (bpf->cmsg_multi_ent)
> +               bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf);
> +       else
> +               bpf->cmsg_cache_cnt = 1;
> +
>         return 0;
>  }
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> index f4802036eb42..fac9c6f9e197 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> @@ -99,6 +99,7 @@ enum pkt_vec {
>   * @maps_neutral:      hash table of offload-neutral maps (on pointer)
>   *
>   * @abi_version:       global BPF ABI version
> + * @cmsg_cache_cnt:    number of entries to read for caching
>   *
>   * @adjust_head:       adjust head capability
>   * @adjust_head.flags:         extra flags for adjust head
> @@ -124,6 +125,7 @@ enum pkt_vec {
>   * @pseudo_random:     FW initialized the pseudo-random machinery (CSRs)
>   * @queue_select:      BPF can set the RX queue ID in packet vector
>   * @adjust_tail:       BPF can simply trunc packet size for adjust tail
> + * @cmsg_multi_ent:    FW can pack multiple map entries in a single cmsg
>   */
>  struct nfp_app_bpf {
>         struct nfp_app *app;
> @@ -134,6 +136,8 @@ struct nfp_app_bpf {
>         unsigned int cmsg_key_sz;
>         unsigned int cmsg_val_sz;
>
> +       unsigned int cmsg_cache_cnt;
> +
>         struct list_head map_list;
>         unsigned int maps_in_use;
>         unsigned int map_elems_in_use;
> @@ -169,6 +173,7 @@ struct nfp_app_bpf {
>         bool pseudo_random;
>         bool queue_select;
>         bool adjust_tail;
> +       bool cmsg_multi_ent;
>  };
>
>  enum nfp_bpf_map_use {
> @@ -183,11 +188,21 @@ struct nfp_bpf_map_word {
>         unsigned char non_zero_update   :1;
>  };
>
> +#define NFP_BPF_MAP_CACHE_CNT          4U
> +#define NFP_BPF_MAP_CACHE_TIME_NS      (250 * 1000)
> +
>  /**
>   * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
>   * @offmap:    pointer to the offloaded BPF map
>   * @bpf:       back pointer to bpf app private structure
>   * @tid:       table id identifying map on datapath
> + *
> + * @cache_lock:        protects @cache_blockers, @cache_to, @cache
> + * @cache_blockers:    number of ops in flight which block caching
> + * @cache_gen: counter incremented by every blocker on exit
> + * @cache_to:  time when cache will no longer be valid (ns)
> + * @cache:     skb with cached response
> + *
>   * @l:         link on the nfp_app_bpf->map_list list
>   * @use_map:   map of how the value is used (in 4B chunks)
>   */
> @@ -195,6 +210,13 @@ struct nfp_bpf_map {
>         struct bpf_offloaded_map *offmap;
>         struct nfp_app_bpf *bpf;
>         u32 tid;
> +
> +       spinlock_t cache_lock;
> +       u32 cache_blockers;
> +       u32 cache_gen;
> +       u64 cache_to;
> +       struct sk_buff *cache;
> +
>         struct list_head l;
>         struct nfp_bpf_map_word use_map[];
>  };
> @@ -566,6 +588,7 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
>
>  unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf);
>  unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
> +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf);
>  long long int
>  nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
>  void
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> index 39c9fec222b4..88fab6a82acf 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> @@ -385,6 +385,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
>         offmap->dev_priv = nfp_map;
>         nfp_map->offmap = offmap;
>         nfp_map->bpf = bpf;
> +       spin_lock_init(&nfp_map->cache_lock);
>
>         res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
>         if (res < 0) {
> @@ -407,6 +408,8 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
>         struct nfp_bpf_map *nfp_map = offmap->dev_priv;
>
>         nfp_bpf_ctrl_free_map(bpf, nfp_map);
> +       dev_consume_skb_any(nfp_map->cache);
> +       WARN_ON_ONCE(nfp_map->cache_blockers);
>         list_del_init(&nfp_map->l);
>         bpf->map_elems_in_use -= offmap->map.max_entries;
>         bpf->maps_in_use--;
> --
> 2.21.0
>