[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6f22e464-21a4-1d1e-7412-080e03328f6c@cumulusnetworks.com>
Date: Wed, 28 Mar 2018 12:23:41 -0600
From: David Ahern <dsa@...ulusnetworks.com>
To: Shrijeet Mukherjee <shrijeetoss@...il.com>, daniel@...earbox.net
Cc: netdev@...r.kernel.org, roopa@...ulusnetworks.com, ast@...com
Subject: Re: [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for
EBPF
On 3/28/18 10:27 AM, Shrijeet Mukherjee wrote:
> diff --git a/include/net/mpls.h b/include/net/mpls.h
> index 2583dbc689b8..3a5b8c00823d 100644
> --- a/include/net/mpls.h
> +++ b/include/net/mpls.h
> @@ -24,14 +24,50 @@ struct mpls_shim_hdr {
> __be32 label_stack_entry;
> };
>
> +struct mpls_entry_decoded {
> + u32 label;
> + u8 ttl;
> + u8 tc;
> + u8 bos;
> +};
> +
> static inline bool eth_p_mpls(__be16 eth_type)
> {
> return eth_type == htons(ETH_P_MPLS_UC) ||
> eth_type == htons(ETH_P_MPLS_MC);
> }
>
> -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
> +static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb)
> {
> return (struct mpls_shim_hdr *)skb_network_header(skb);
> }
> +
> +static inline struct mpls_shim_hdr
> +mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos)
> +{
> + struct mpls_shim_hdr result;
> +
> + result.label_stack_entry =
> + cpu_to_be32((label << MPLS_LS_LABEL_SHIFT)
> + | (tc << MPLS_LS_TC_SHIFT)
> + | (bos ? (1 << MPLS_LS_S_SHIFT) : 0)
> + | (ttl << MPLS_LS_TTL_SHIFT));
> +
> + return result;
> +}
> +
> +static inline
> +struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
> +{
> + struct mpls_entry_decoded result;
> + unsigned int entry = be32_to_cpu(hdr->label_stack_entry);
> +
> + result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
> + result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
> + result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
> + result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
> +
> + return result;
> +}
> +
> #endif
The above should be in patch 1, though without the mpls_hdr rename.
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 18b7c510c511..2278548e1f8f 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -439,6 +439,12 @@ union bpf_attr {
> * int bpf_skb_vlan_pop(skb)
> * Return: 0 on success or negative error
> *
> + * int bpf_skb_mpls_push(skb, num_lbls, lbls[])
> + * Return 0 on success or negative error
> + *
> + * int bpf_skb_mpls_pop(skb, num_lbls)
> + * Return number of popped labels, 0 is no-op, deliver packet to current dst
> + *
> * int bpf_skb_get_tunnel_key(skb, key, size, flags)
> * int bpf_skb_set_tunnel_key(skb, key, size, flags)
> * retrieve or populate tunnel metadata
> @@ -794,7 +800,10 @@ union bpf_attr {
> FN(msg_redirect_map), \
> FN(msg_apply_bytes), \
> FN(msg_cork_bytes), \
> - FN(msg_pull_data),
> + FN(msg_pull_data), \
> + FN(skb_mpls_push), \
> + FN(skb_mpls_pop),
> +
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 00f62fafc788..c96ae8ef423d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
> }
>
> BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
> - u32, mode, u64, flags)
> + u32, mode, u64, flags)
drop the above whitespace change
> {
> if (unlikely(flags))
> return -EINVAL;
> @@ -2542,6 +2542,125 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
> .arg4_type = ARG_ANYTHING,
> };
>
> +static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
> +{
> + u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
> + int ret;
> +
> + ret = skb_cow(skb, len_diff);
> + if (unlikely(ret < 0))
> + return ret;
> +
> + skb_set_inner_protocol(skb, skb->protocol);
> + skb_reset_inner_network_header(skb);
> +
> + ret = bpf_skb_generic_push(skb, off, len_diff);
> + if (unlikely(ret < 0))
> + return ret;
> +
> + skb_reset_mac_header(skb);
> + skb_set_network_header(skb, ETH_HLEN);
> + skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
> +
> + if (skb_is_gso(skb)) {
> +/* Due to header grow, MSS needs to be downgraded. */
> + skb_shinfo(skb)->gso_size -= len_diff;
> +/* Header must be checked, and gso_segs recomputed. */
> + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> + skb_shinfo(skb)->gso_segs = 0;
> + }
> +
> + bpf_compute_data_pointers(skb);
> + return 0;
> +}
The only thing MPLS specific is the protocol setting. Seems to me you
could leverage the existing bpf_skb_net_grow or even bpf_skb_adjust_net
and then set the protocols and headers in the caller of this function.
You already do something similar in bpf_skb_mpls_pop.
> +
> +BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
> +{
> + u32 i = 0;
> + struct mpls_shim_hdr *hdr;
> + unsigned char *cursor;
> + struct mpls_entry_decoded dec;
> +
> + if (num_lbls == 0)
> + return 0;
> +
> + cursor = skb_network_header(skb);
> + do {
> + hdr = (struct mpls_shim_hdr *)cursor;
> + dec = mpls_entry_decode(hdr);
> + i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
> + } while (dec.bos != 1 && i < num_lbls);
> +
> + bpf_push_mac_rcsum(skb);
> + skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
> + skb_reset_network_header(skb);
> +
> + skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
If I pop 1 label and there are more of them, then the protocol does not
need to be adjusted -- it was already set to MPLS.
If I pop all of the labels, then the protocol is not MPLS, but whatever
the inner packet is -- IPv4 or IPv6 or other.
> + bpf_pull_mac_rcsum(skb);
> + bpf_compute_data_pointers(skb);
> +
> + return i;
> +}
> +
> +const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
> + .func = bpf_skb_mpls_pop,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_CTX,
> + .arg2_type = ARG_ANYTHING,
> +};
> +EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
> +
> +BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
> + __be32*, lbls, u32, num_lbls)
> +{
> + int ret, i;
> + unsigned int new_header_size = num_lbls * sizeof(__be32);
> + unsigned int ttl = 255;
> + struct dst_entry *dst = skb_dst(skb);
dst is not always set. e.g., any program on ingress prior to the layer 3
will not have it.
> + struct net_device *out_dev = dst->dev;
> + struct mpls_shim_hdr *hdr;
> + bool bos;
> +
> + /* Ensure there is enough space for the headers in the skb */
> + ret = bpf_skb_mpls_net_grow(skb, new_header_size);
> + if (ret < 0) {
> + trace_printk("COW was killed\n");
> + bpf_compute_data_pointers(skb);
> + return -ENOMEM;
> + }
> +
> + skb->dev = out_dev;
> +/* XXX this may need finesse to integrate with
> + * global TTL values for MPLS
> + */
> + if (dst->ops->family == AF_INET)
> + ttl = ip_hdr(skb)->ttl;
> + else if (dst->ops->family == AF_INET6)
> + ttl = ipv6_hdr(skb)->hop_limit;
> +
> + /* Push the new labels */
> + hdr = skb_mpls_hdr(skb);
> + bos = true;
> + for (i = num_lbls - 1; i >= 0; i--) {
> + hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
> + bos = false;
> + }
> +
> + bpf_compute_data_pointers(skb);
> + return 0;
> +}
> +
> +const struct bpf_func_proto bpf_skb_mpls_push_proto = {
> + .func = bpf_skb_mpls_push,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_CTX,
> + .arg2_type = ARG_PTR_TO_MEM,
> + .arg3_type = ARG_CONST_SIZE,
> +};
> +EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
> +
> static u32 __bpf_skb_min_len(const struct sk_buff *skb)
> {
> u32 min_len = skb_network_offset(skb);
> @@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
> {
> if (func == bpf_skb_vlan_push ||
> func == bpf_skb_vlan_pop ||
> + func == bpf_skb_mpls_push ||
> + func == bpf_skb_mpls_pop ||
> func == bpf_skb_store_bytes ||
> func == bpf_skb_change_proto ||
> func == bpf_skb_change_head ||
> @@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
> return &bpf_skb_vlan_push_proto;
> case BPF_FUNC_skb_vlan_pop:
> return &bpf_skb_vlan_pop_proto;
> + case BPF_FUNC_skb_mpls_push:
> + return &bpf_skb_mpls_push_proto;
> + case BPF_FUNC_skb_mpls_pop:
> + return &bpf_skb_mpls_pop_proto;
> case BPF_FUNC_skb_change_proto:
> return &bpf_skb_change_proto_proto;
> case BPF_FUNC_skb_change_type:
>
Powered by blists - more mailing lists