[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210415002350.247ni4rqjwzguu4j@kafai-mbp.dhcp.thefacebook.com>
Date: Wed, 14 Apr 2021 17:23:50 -0700
From: Martin KaFai Lau <kafai@...com>
To: Hangbin Liu <liuhangbin@...il.com>
CC: <bpf@...r.kernel.org>, <netdev@...r.kernel.org>,
Toke Høiland-Jørgensen <toke@...hat.com>,
Jiri Benc <jbenc@...hat.com>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Eelco Chaudron <echaudro@...hat.com>, <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Lorenzo Bianconi <lorenzo.bianconi@...hat.com>,
David Ahern <dsahern@...il.com>,
Andrii Nakryiko <andrii.nakryiko@...il.com>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
John Fastabend <john.fastabend@...il.com>,
Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
Björn Töpel <bjorn.topel@...il.com>
Subject: Re: [PATCHv7 bpf-next 2/4] xdp: extend xdp_redirect_map with
broadcast support
On Wed, Apr 14, 2021 at 08:26:08PM +0800, Hangbin Liu wrote:
[ ... ]
> +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
> + u64 flags, u64 flag_mask,
> void *lookup_elem(struct bpf_map *map, u32 key))
> {
> struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
>
> /* Lower bits of the flags are used as return code on lookup failure */
> - if (unlikely(flags > XDP_TX))
> + if (unlikely(flags & ~(BPF_F_ACTION_MASK | flag_mask)))
> return XDP_ABORTED;
>
> ri->tgt_value = lookup_elem(map, ifindex);
> - if (unlikely(!ri->tgt_value)) {
> + if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
> /* If the lookup fails we want to clear out the state in the
> * redirect_info struct completely, so that if an eBPF program
> * performs multiple lookups, the last one always takes
> @@ -1482,13 +1484,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
> */
> ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
> ri->map_type = BPF_MAP_TYPE_UNSPEC;
> - return flags;
> + return flags & BPF_F_ACTION_MASK;
> }
>
> ri->tgt_index = ifindex;
> ri->map_id = map->id;
> ri->map_type = map->map_type;
>
> + if (flags & BPF_F_BROADCAST) {
> + WRITE_ONCE(ri->map, map);
Why only WRITE_ONCE on ri->map? Is it needed?
> + ri->flags = flags;
> + } else {
> + WRITE_ONCE(ri->map, NULL);
> + ri->flags = 0;
> + }
> +
> return XDP_REDIRECT;
> }
>
[ ... ]
> +int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
> + struct bpf_map *map, bool exclude_ingress)
> +{
> + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
> + int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
> + struct bpf_dtab_netdev *dst, *last_dst = NULL;
> + struct hlist_head *head;
> + struct hlist_node *next;
> + struct xdp_frame *xdpf;
> + unsigned int i;
> + int err;
> +
> + xdpf = xdp_convert_buff_to_frame(xdp);
> + if (unlikely(!xdpf))
> + return -EOVERFLOW;
> +
> + if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> + for (i = 0; i < map->max_entries; i++) {
> + dst = READ_ONCE(dtab->netdev_map[i]);
> + if (!is_valid_dst(dst, xdp, exclude_ifindex))
> + continue;
> +
> + /* we only need n-1 clones; last_dst enqueued below */
> + if (!last_dst) {
> + last_dst = dst;
> + continue;
> + }
> +
> + err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
> + if (err)
> + return err;
> +
> + last_dst = dst;
> + }
> + } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
> + for (i = 0; i < dtab->n_buckets; i++) {
> + head = dev_map_index_hash(dtab, i);
> + hlist_for_each_entry_safe(dst, next, head, index_hlist) {
hmm.... should it be hlist_for_each_entry_rcu() instead?
> + if (!is_valid_dst(dst, xdp, exclude_ifindex))
> + continue;
> +
> + /* we only need n-1 clones; last_dst enqueued below */
> + if (!last_dst) {
> + last_dst = dst;
> + continue;
> + }
> +
> + err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
> + if (err)
> + return err;
> +
> + last_dst = dst;
> + }
> + }
> + }
> +
> + /* consume the last copy of the frame */
> + if (last_dst)
> + bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
> + else
> + xdp_return_frame_rx_napi(xdpf); /* dtab is empty */
> +
> + return 0;
> +}
> +
Powered by blists - more mailing lists