[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190829143643.7cdd8669@cakuba.netronome.com>
Date: Thu, 29 Aug 2019 14:36:43 -0700
From: Jakub Kicinski <jakub.kicinski@...ronome.com>
To: Song Liu <liu.song.a23@...il.com>
Cc: Alexei Starovoitov <alexei.starovoitov@...il.com>,
Daniel Borkmann <daniel@...earbox.net>,
Networking <netdev@...r.kernel.org>, oss-drivers@...ronome.com,
jaco.gericke@...ronome.com,
Quentin Monnet <quentin.monnet@...ronome.com>
Subject: Re: [PATCH bpf-next 2/2] nfp: bpf: add simple map op cache
On Thu, 29 Aug 2019 14:29:44 -0700, Song Liu wrote:
> On Tue, Aug 27, 2019 at 10:40 PM Jakub Kicinski
> <jakub.kicinski@...ronome.com> wrote:
> >
> > Each get_next and lookup call requires a round trip to the device.
> > However, the device is capable of giving us a few entries back,
> > instead of just one.
> >
> > In this patch we ask for a small yet reasonable number of entries
> > (4) on every get_next call, and on subsequent get_next/lookup calls
> > check this little cache for a hit. The cache is only kept for 250us,
> > and is invalidated on every operation which may modify the map
> > (e.g. delete or update call). Note that operations may be performed
> > simultaneously, so we have to keep track of operations in flight.
> >
> > Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
> > Reviewed-by: Quentin Monnet <quentin.monnet@...ronome.com>
> > ---
> > drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 179 +++++++++++++++++-
> > drivers/net/ethernet/netronome/nfp/bpf/fw.h | 1 +
> > drivers/net/ethernet/netronome/nfp/bpf/main.c | 18 ++
> > drivers/net/ethernet/netronome/nfp/bpf/main.h | 23 +++
> > .../net/ethernet/netronome/nfp/bpf/offload.c | 3 +
> > 5 files changed, 215 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> > index fcf880c82f3f..0e2db6ea79e9 100644
> > --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> > +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> > @@ -6,6 +6,7 @@
> > #include <linux/bug.h>
> > #include <linux/jiffies.h>
> > #include <linux/skbuff.h>
> > +#include <linux/timekeeping.h>
> >
> > #include "../ccm.h"
> > #include "../nfp_app.h"
> > @@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
> > return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
> > }
> >
> > +static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
> > +{
> > + return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
> > + op == NFP_CCM_TYPE_BPF_MAP_DELETE;
> > +}
> > +
> > +static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
> > +{
> > + return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
> > + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
> > +}
> > +
> > +static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
> > +{
> > + return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
> > + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
> > +}
> > +
> > +static unsigned int
> > +nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
> > + const u8 *key, u8 *out_key, u8 *out_value,
> > + u32 *cache_gen)
> > +{
> > + struct bpf_map *map = &nfp_map->offmap->map;
> > + struct nfp_app_bpf *bpf = nfp_map->bpf;
> > + unsigned int i, count, n_entries;
> > + struct cmsg_reply_map_op *reply;
> > +
> > + n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
> > +
> > + spin_lock(&nfp_map->cache_lock);
> > + *cache_gen = nfp_map->cache_gen;
> > + if (nfp_map->cache_blockers)
> > + n_entries = 1;
> > +
> > + if (nfp_bpf_ctrl_op_cache_invalidate(op))
> > + goto exit_block;
> > + if (!nfp_bpf_ctrl_op_cache_capable(op))
> > + goto exit_unlock;
> > +
> > + if (!nfp_map->cache)
> > + goto exit_unlock;
> > + if (nfp_map->cache_to < ktime_get_ns())
> > + goto exit_invalidate;
> > +
> > + reply = (void *)nfp_map->cache->data;
> > + count = be32_to_cpu(reply->count);
>
> Do we need to check whether count is too big (from firmware bug)?
It's validated below, when the skb is received (see my "here" below)
> > +
> > + for (i = 0; i < count; i++) {
> > + void *cached_key;
> > +
> > + cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
> > + if (memcmp(cached_key, key, map->key_size))
> > + continue;
> > +
> > + if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
> > + memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
> > + map->value_size);
> > + if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
> > + if (i + 1 == count)
> > + break;
> > +
> > + memcpy(out_key,
> > + nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
> > + map->key_size);
> > + }
> > +
> > + n_entries = 0;
> > + goto exit_unlock;
> > + }
> > + goto exit_unlock;
> > +
> > +exit_block:
> > + nfp_map->cache_blockers++;
> > +exit_invalidate:
> > + dev_consume_skb_any(nfp_map->cache);
> > + nfp_map->cache = NULL;
> > +exit_unlock:
> > + spin_unlock(&nfp_map->cache_lock);
> > + return n_entries;
> > +}
> > static int
> > nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
> > u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
> > {
> > struct nfp_bpf_map *nfp_map = offmap->dev_priv;
> > + unsigned int n_entries, reply_entries, count;
> > struct nfp_app_bpf *bpf = nfp_map->bpf;
> > struct bpf_map *map = &offmap->map;
> > struct cmsg_reply_map_op *reply;
> > struct cmsg_req_map_op *req;
> > struct sk_buff *skb;
> > + u32 cache_gen;
> > int err;
> >
> > /* FW messages have no space for more than 32 bits of flags */
> > if (flags >> 32)
> > return -EOPNOTSUPP;
> >
> > + /* Handle op cache */
> > + n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
> > + out_value, &cache_gen);
> > + if (!n_entries)
> > + return 0;
> > +
> > skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
> > - if (!skb)
> > - return -ENOMEM;
> > + if (!skb) {
> > + err = -ENOMEM;
> > + goto err_cache_put;
> > + }
> >
> > req = (void *)skb->data;
> > req->tid = cpu_to_be32(nfp_map->tid);
> > - req->count = cpu_to_be32(1);
> > + req->count = cpu_to_be32(n_entries);
> > req->flags = cpu_to_be32(flags);
> >
> > /* Copy inputs */
> > @@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
> > memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
> > map->value_size);
> >
> > - skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
> > - nfp_bpf_cmsg_map_reply_size(bpf, 1));
> > - if (IS_ERR(skb))
> > - return PTR_ERR(skb);
> > + skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
> > + if (IS_ERR(skb)) {
> > + err = PTR_ERR(skb);
> > + goto err_cache_put;
> > + }
> > +
> > + if (skb->len < sizeof(*reply)) {
> > + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
> > + op, skb->len);
> > + err = -EIO;
> > + goto err_free;
> > + }
> >
> > reply = (void *)skb->data;
> > + count = be32_to_cpu(reply->count);
> > err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
> > + /* FW responds with message sized to hold the good entries,
> > + * plus one extra entry if there was an error.
> > + */
> > + reply_entries = count + !!err;
> > + if (n_entries > 1 && count)
> > + err = 0;
> > if (err)
> > goto err_free;
> >
> > + if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
here, reply_entries is derived directly from reply->count
> > + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
> > + op, skb->len, reply_entries);
> > + err = -EIO;
> > + goto err_free;
> > + }
> > +
> > /* Copy outputs */
> > if (out_key)
> > memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
> > @@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
> > memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
> > map->value_size);
> >
> > - dev_consume_skb_any(skb);
> > + nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
> >
> > return 0;
> > err_free:
> > dev_kfree_skb_any(skb);
> > +err_cache_put:
> > + nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
> > return err;
> > }
> >
> > @@ -275,7 +422,21 @@ unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
> >
> > unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
> > {
> > - return max(NFP_NET_DEFAULT_MTU, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
> > + return max3(NFP_NET_DEFAULT_MTU,
> > + nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
> > + nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
> > +}
> > +
> > +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
> > +{
> > + unsigned int mtu, req_max, reply_max, entry_sz;
> > +
> > + mtu = bpf->app->ctrl->dp.mtu;
> > + entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
> > + req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
> > + reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
> > +
> > + return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
> > }
> >
> > void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
> > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> > index 06c4286bd79e..a83a0ad5e27d 100644
> > --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> > +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> > @@ -24,6 +24,7 @@ enum bpf_cap_tlv_type {
> > NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
> > NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
> > NFP_BPF_CAP_TYPE_ABI_VERSION = 7,
> > + NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8,
> > };
> >
> > struct nfp_bpf_cap_tlv_func {
> > diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
> > index 2b1773ed3de9..8f732771d3fa 100644
> > --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
> > +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
> > @@ -299,6 +299,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
> > return 0;
> > }
> >
> > +static int
> > +nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
> > + u32 length)
> > +{
> > + bpf->cmsg_multi_ent = true;
> > + return 0;
> > +}
> > +
> > static int
> > nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
> > u32 length)
> > @@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
> > length))
> > goto err_release_free;
> > break;
> > + case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
> > + if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
> > + length))
>
> Do we plan to extend nfp_bpf_parse_cap_cmsg_multi_ent() to return
> non-zero in the
> future?
Yes, the TLV format allows for the entry to be extended and then
parsing may fail. It's mostly a pattern the BPF TLV parsing follows,
though.
Powered by blists - more mailing lists