[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPhsuW5ExXPXYi5D2MND5JREh8EKNHUvSNoBEJ7L3-XK3GD9mA@mail.gmail.com>
Date: Thu, 29 Aug 2019 14:29:44 -0700
From: Song Liu <liu.song.a23@...il.com>
To: Jakub Kicinski <jakub.kicinski@...ronome.com>
Cc: Alexei Starovoitov <alexei.starovoitov@...il.com>,
Daniel Borkmann <daniel@...earbox.net>,
Networking <netdev@...r.kernel.org>, oss-drivers@...ronome.com,
jaco.gericke@...ronome.com,
Quentin Monnet <quentin.monnet@...ronome.com>
Subject: Re: [PATCH bpf-next 2/2] nfp: bpf: add simple map op cache
On Tue, Aug 27, 2019 at 10:40 PM Jakub Kicinski
<jakub.kicinski@...ronome.com> wrote:
>
> Each get_next and lookup call requires a round trip to the device.
> However, the device is capable of giving us a few entries back,
> instead of just one.
>
> In this patch we ask for a small yet reasonable number of entries
> (4) on every get_next call, and on subsequent get_next/lookup calls
> check this little cache for a hit. The cache is only kept for 250us,
> and is invalidated on every operation which may modify the map
> (e.g. delete or update call). Note that operations may be performed
> simultaneously, so we have to keep track of operations in flight.
>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@...ronome.com>
> ---
> drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 179 +++++++++++++++++-
> drivers/net/ethernet/netronome/nfp/bpf/fw.h | 1 +
> drivers/net/ethernet/netronome/nfp/bpf/main.c | 18 ++
> drivers/net/ethernet/netronome/nfp/bpf/main.h | 23 +++
> .../net/ethernet/netronome/nfp/bpf/offload.c | 3 +
> 5 files changed, 215 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> index fcf880c82f3f..0e2db6ea79e9 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
> @@ -6,6 +6,7 @@
> #include <linux/bug.h>
> #include <linux/jiffies.h>
> #include <linux/skbuff.h>
> +#include <linux/timekeeping.h>
>
> #include "../ccm.h"
> #include "../nfp_app.h"
> @@ -175,29 +176,151 @@ nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
> return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
> }
>
> +static bool nfp_bpf_ctrl_op_cache_invalidate(enum nfp_ccm_type op)
> +{
> + return op == NFP_CCM_TYPE_BPF_MAP_UPDATE ||
> + op == NFP_CCM_TYPE_BPF_MAP_DELETE;
> +}
> +
> +static bool nfp_bpf_ctrl_op_cache_capable(enum nfp_ccm_type op)
> +{
> + return op == NFP_CCM_TYPE_BPF_MAP_LOOKUP ||
> + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
> +}
> +
> +static bool nfp_bpf_ctrl_op_cache_fill(enum nfp_ccm_type op)
> +{
> + return op == NFP_CCM_TYPE_BPF_MAP_GETFIRST ||
> + op == NFP_CCM_TYPE_BPF_MAP_GETNEXT;
> +}
> +
> +static unsigned int
> +nfp_bpf_ctrl_op_cache_get(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
> + const u8 *key, u8 *out_key, u8 *out_value,
> + u32 *cache_gen)
> +{
> + struct bpf_map *map = &nfp_map->offmap->map;
> + struct nfp_app_bpf *bpf = nfp_map->bpf;
> + unsigned int i, count, n_entries;
> + struct cmsg_reply_map_op *reply;
> +
> + n_entries = nfp_bpf_ctrl_op_cache_fill(op) ? bpf->cmsg_cache_cnt : 1;
> +
> + spin_lock(&nfp_map->cache_lock);
> + *cache_gen = nfp_map->cache_gen;
> + if (nfp_map->cache_blockers)
> + n_entries = 1;
> +
> + if (nfp_bpf_ctrl_op_cache_invalidate(op))
> + goto exit_block;
> + if (!nfp_bpf_ctrl_op_cache_capable(op))
> + goto exit_unlock;
> +
> + if (!nfp_map->cache)
> + goto exit_unlock;
> + if (nfp_map->cache_to < ktime_get_ns())
> + goto exit_invalidate;
> +
> + reply = (void *)nfp_map->cache->data;
> + count = be32_to_cpu(reply->count);
Do we need to check whether count is too big (from firmware bug)?
> +
> + for (i = 0; i < count; i++) {
> + void *cached_key;
> +
> + cached_key = nfp_bpf_ctrl_reply_key(bpf, reply, i);
> + if (memcmp(cached_key, key, map->key_size))
> + continue;
> +
> + if (op == NFP_CCM_TYPE_BPF_MAP_LOOKUP)
> + memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, i),
> + map->value_size);
> + if (op == NFP_CCM_TYPE_BPF_MAP_GETNEXT) {
> + if (i + 1 == count)
> + break;
> +
> + memcpy(out_key,
> + nfp_bpf_ctrl_reply_key(bpf, reply, i + 1),
> + map->key_size);
> + }
> +
> + n_entries = 0;
> + goto exit_unlock;
> + }
> + goto exit_unlock;
> +
> +exit_block:
> + nfp_map->cache_blockers++;
> +exit_invalidate:
> + dev_consume_skb_any(nfp_map->cache);
> + nfp_map->cache = NULL;
> +exit_unlock:
> + spin_unlock(&nfp_map->cache_lock);
> + return n_entries;
> +}
> +
> +static void
> +nfp_bpf_ctrl_op_cache_put(struct nfp_bpf_map *nfp_map, enum nfp_ccm_type op,
> + struct sk_buff *skb, u32 cache_gen)
> +{
> + bool blocker, filler;
> +
> + blocker = nfp_bpf_ctrl_op_cache_invalidate(op);
> + filler = nfp_bpf_ctrl_op_cache_fill(op);
> + if (blocker || filler) {
> + u64 to = 0;
> +
> + if (filler)
> + to = ktime_get_ns() + NFP_BPF_MAP_CACHE_TIME_NS;
> +
> + spin_lock(&nfp_map->cache_lock);
> + if (blocker) {
> + nfp_map->cache_blockers--;
> + nfp_map->cache_gen++;
> + }
> + if (filler && !nfp_map->cache_blockers &&
> + nfp_map->cache_gen == cache_gen) {
> + nfp_map->cache_to = to;
> + swap(nfp_map->cache, skb);
> + }
> + spin_unlock(&nfp_map->cache_lock);
> + }
> +
> + dev_consume_skb_any(skb);
> +}
> +
> static int
> nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
> u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
> {
> struct nfp_bpf_map *nfp_map = offmap->dev_priv;
> + unsigned int n_entries, reply_entries, count;
> struct nfp_app_bpf *bpf = nfp_map->bpf;
> struct bpf_map *map = &offmap->map;
> struct cmsg_reply_map_op *reply;
> struct cmsg_req_map_op *req;
> struct sk_buff *skb;
> + u32 cache_gen;
> int err;
>
> /* FW messages have no space for more than 32 bits of flags */
> if (flags >> 32)
> return -EOPNOTSUPP;
>
> + /* Handle op cache */
> + n_entries = nfp_bpf_ctrl_op_cache_get(nfp_map, op, key, out_key,
> + out_value, &cache_gen);
> + if (!n_entries)
> + return 0;
> +
> skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
> - if (!skb)
> - return -ENOMEM;
> + if (!skb) {
> + err = -ENOMEM;
> + goto err_cache_put;
> + }
>
> req = (void *)skb->data;
> req->tid = cpu_to_be32(nfp_map->tid);
> - req->count = cpu_to_be32(1);
> + req->count = cpu_to_be32(n_entries);
> req->flags = cpu_to_be32(flags);
>
> /* Copy inputs */
> @@ -207,16 +330,38 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
> memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
> map->value_size);
>
> - skb = nfp_ccm_communicate(&bpf->ccm, skb, op,
> - nfp_bpf_cmsg_map_reply_size(bpf, 1));
> - if (IS_ERR(skb))
> - return PTR_ERR(skb);
> + skb = nfp_ccm_communicate(&bpf->ccm, skb, op, 0);
> + if (IS_ERR(skb)) {
> + err = PTR_ERR(skb);
> + goto err_cache_put;
> + }
> +
> + if (skb->len < sizeof(*reply)) {
> + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d!\n",
> + op, skb->len);
> + err = -EIO;
> + goto err_free;
> + }
>
> reply = (void *)skb->data;
> + count = be32_to_cpu(reply->count);
> err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
> + /* FW responds with message sized to hold the good entries,
> + * plus one extra entry if there was an error.
> + */
> + reply_entries = count + !!err;
> + if (n_entries > 1 && count)
> + err = 0;
> if (err)
> goto err_free;
>
> + if (skb->len != nfp_bpf_cmsg_map_reply_size(bpf, reply_entries)) {
> + cmsg_warn(bpf, "cmsg drop - type 0x%02x too short %d for %d entries!\n",
> + op, skb->len, reply_entries);
> + err = -EIO;
> + goto err_free;
> + }
> +
> /* Copy outputs */
> if (out_key)
> memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
> @@ -225,11 +370,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap, enum nfp_ccm_type op,
> memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
> map->value_size);
>
> - dev_consume_skb_any(skb);
> + nfp_bpf_ctrl_op_cache_put(nfp_map, op, skb, cache_gen);
>
> return 0;
> err_free:
> dev_kfree_skb_any(skb);
> +err_cache_put:
> + nfp_bpf_ctrl_op_cache_put(nfp_map, op, NULL, cache_gen);
> return err;
> }
>
> @@ -275,7 +422,21 @@ unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf)
>
> unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
> {
> - return max(NFP_NET_DEFAULT_MTU, nfp_bpf_ctrl_cmsg_min_mtu(bpf));
> + return max3(NFP_NET_DEFAULT_MTU,
> + nfp_bpf_cmsg_map_req_size(bpf, NFP_BPF_MAP_CACHE_CNT),
> + nfp_bpf_cmsg_map_reply_size(bpf, NFP_BPF_MAP_CACHE_CNT));
> +}
> +
> +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf)
> +{
> + unsigned int mtu, req_max, reply_max, entry_sz;
> +
> + mtu = bpf->app->ctrl->dp.mtu;
> + entry_sz = bpf->cmsg_key_sz + bpf->cmsg_val_sz;
> + req_max = (mtu - sizeof(struct cmsg_req_map_op)) / entry_sz;
> + reply_max = (mtu - sizeof(struct cmsg_reply_map_op)) / entry_sz;
> +
> + return min3(req_max, reply_max, NFP_BPF_MAP_CACHE_CNT);
> }
>
> void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> index 06c4286bd79e..a83a0ad5e27d 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
> @@ -24,6 +24,7 @@ enum bpf_cap_tlv_type {
> NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
> NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
> NFP_BPF_CAP_TYPE_ABI_VERSION = 7,
> + NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT = 8,
> };
>
> struct nfp_bpf_cap_tlv_func {
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
> index 2b1773ed3de9..8f732771d3fa 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
> @@ -299,6 +299,14 @@ nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
> return 0;
> }
>
> +static int
> +nfp_bpf_parse_cap_cmsg_multi_ent(struct nfp_app_bpf *bpf, void __iomem *value,
> + u32 length)
> +{
> + bpf->cmsg_multi_ent = true;
> + return 0;
> +}
> +
> static int
> nfp_bpf_parse_cap_abi_version(struct nfp_app_bpf *bpf, void __iomem *value,
> u32 length)
> @@ -375,6 +383,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
> length))
> goto err_release_free;
> break;
> + case NFP_BPF_CAP_TYPE_CMSG_MULTI_ENT:
> + if (nfp_bpf_parse_cap_cmsg_multi_ent(app->priv, value,
> + length))
Do we plan to extend nfp_bpf_parse_cap_cmsg_multi_ent() to return
non-zero in the
future?
> + goto err_release_free;
> + break;
> default:
> nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
> break;
> @@ -426,6 +439,11 @@ static int nfp_bpf_start(struct nfp_app *app)
> return -EINVAL;
> }
>
> + if (bpf->cmsg_multi_ent)
> + bpf->cmsg_cache_cnt = nfp_bpf_ctrl_cmsg_cache_cnt(bpf);
> + else
> + bpf->cmsg_cache_cnt = 1;
> +
> return 0;
> }
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> index f4802036eb42..fac9c6f9e197 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> @@ -99,6 +99,7 @@ enum pkt_vec {
> * @maps_neutral: hash table of offload-neutral maps (on pointer)
> *
> * @abi_version: global BPF ABI version
> + * @cmsg_cache_cnt: number of entries to read for caching
> *
> * @adjust_head: adjust head capability
> * @adjust_head.flags: extra flags for adjust head
> @@ -124,6 +125,7 @@ enum pkt_vec {
> * @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
> * @queue_select: BPF can set the RX queue ID in packet vector
> * @adjust_tail: BPF can simply trunc packet size for adjust tail
> + * @cmsg_multi_ent: FW can pack multiple map entries in a single cmsg
> */
> struct nfp_app_bpf {
> struct nfp_app *app;
> @@ -134,6 +136,8 @@ struct nfp_app_bpf {
> unsigned int cmsg_key_sz;
> unsigned int cmsg_val_sz;
>
> + unsigned int cmsg_cache_cnt;
> +
> struct list_head map_list;
> unsigned int maps_in_use;
> unsigned int map_elems_in_use;
> @@ -169,6 +173,7 @@ struct nfp_app_bpf {
> bool pseudo_random;
> bool queue_select;
> bool adjust_tail;
> + bool cmsg_multi_ent;
> };
>
> enum nfp_bpf_map_use {
> @@ -183,11 +188,21 @@ struct nfp_bpf_map_word {
> unsigned char non_zero_update :1;
> };
>
> +#define NFP_BPF_MAP_CACHE_CNT 4U
> +#define NFP_BPF_MAP_CACHE_TIME_NS (250 * 1000)
> +
> /**
> * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
> * @offmap: pointer to the offloaded BPF map
> * @bpf: back pointer to bpf app private structure
> * @tid: table id identifying map on datapath
> + *
> + * @cache_lock: protects @cache_blockers, @cache_to, @cache
> + * @cache_blockers: number of ops in flight which block caching
> + * @cache_gen: counter incremented by every blocker on exit
> + * @cache_to: time when cache will no longer be valid (ns)
> + * @cache: skb with cached response
> + *
> * @l: link on the nfp_app_bpf->map_list list
> * @use_map: map of how the value is used (in 4B chunks)
> */
> @@ -195,6 +210,13 @@ struct nfp_bpf_map {
> struct bpf_offloaded_map *offmap;
> struct nfp_app_bpf *bpf;
> u32 tid;
> +
> + spinlock_t cache_lock;
> + u32 cache_blockers;
> + u32 cache_gen;
> + u64 cache_to;
> + struct sk_buff *cache;
> +
> struct list_head l;
> struct nfp_bpf_map_word use_map[];
> };
> @@ -566,6 +588,7 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
>
> unsigned int nfp_bpf_ctrl_cmsg_min_mtu(struct nfp_app_bpf *bpf);
> unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf);
> +unsigned int nfp_bpf_ctrl_cmsg_cache_cnt(struct nfp_app_bpf *bpf);
> long long int
> nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
> void
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> index 39c9fec222b4..88fab6a82acf 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> @@ -385,6 +385,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
> offmap->dev_priv = nfp_map;
> nfp_map->offmap = offmap;
> nfp_map->bpf = bpf;
> + spin_lock_init(&nfp_map->cache_lock);
>
> res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
> if (res < 0) {
> @@ -407,6 +408,8 @@ nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
> struct nfp_bpf_map *nfp_map = offmap->dev_priv;
>
> nfp_bpf_ctrl_free_map(bpf, nfp_map);
> + dev_consume_skb_any(nfp_map->cache);
> + WARN_ON_ONCE(nfp_map->cache_blockers);
> list_del_init(&nfp_map->l);
> bpf->map_elems_in_use -= offmap->map.max_entries;
> bpf->maps_in_use--;
> --
> 2.21.0
>
Powered by blists - more mailing lists