[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4d0e5f2b-d088-58f4-d86d-00aa444d77c0@huawei.com>
Date: Mon, 9 Jan 2023 17:19:54 +0800
From: "Ziyang Xuan (William)" <william.xuanziyang@...wei.com>
To: Willem de Bruijn <willemb@...gle.com>
CC: <ast@...nel.org>, <daniel@...earbox.net>, <andrii@...nel.org>,
<davem@...emloft.net>, <edumazet@...gle.com>, <kuba@...nel.org>,
<pabeni@...hat.com>, <bpf@...r.kernel.org>,
<netdev@...r.kernel.org>, <martin.lau@...ux.dev>,
<song@...nel.org>, <yhs@...com>, <john.fastabend@...il.com>,
<kpsingh@...nel.org>, <haoluo@...gle.com>, <jolsa@...nel.org>,
<sdf@...gle.com>
Subject: Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for
bpf_skb_adjust_room()
> On Fri, Jan 6, 2023 at 2:55 PM <sdf@...gle.com> wrote:
>>
>> On 01/06, Ziyang Xuan wrote:
>>> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
>>> Main use case is for using cls_bpf on ingress hook to decapsulate
>>> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.
>>
>> CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
>> There might be a lot of GRO/GSO context I'm missing.
>>
>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@...wei.com>
>>> ---
>>> net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
>>> 1 file changed, 32 insertions(+), 2 deletions(-)
>>
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 929358677183..73982fb4fe2e 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,
>>> u32 off, u32 len_diff,
>>> static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>> u64 flags)
>>> {
>>> + union {
>>> + struct iphdr *v4;
>>> + struct ipv6hdr *v6;
>>> + unsigned char *hdr;
>>> + } ip;
>>> + __be16 proto;
>>> int ret;
>>
>>> if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>>> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff
>>> *skb, u32 off, u32 len_diff,
>>> if (unlikely(ret < 0))
>>> return ret;
>>
>>> + ip.hdr = skb_inner_network_header(skb);
>>> + if (ip.v4->version == 4)
>>> + proto = htons(ETH_P_IP);
>>> + else
>>> + proto = htons(ETH_P_IPV6);
>>> +
>>> ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>>> if (unlikely(ret < 0))
>>> return ret;
>>
>>> + /* Match skb->protocol to new outer l3 protocol */
>>> + skb->protocol = proto;
>>> +
>>> if (skb_is_gso(skb)) {
>>> struct skb_shared_info *shinfo = skb_shinfo(skb);
>>
>>> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>> u32, mode, u64, flags)
>>> {
>>> u32 len_cur, len_diff_abs = abs(len_diff);
>>> - u32 len_min = bpf_skb_net_base_len(skb);
>>> - u32 len_max = BPF_SKB_MAX_LEN;
>>> + u32 len_min, len_max = BPF_SKB_MAX_LEN;
>>> __be16 proto = skb->protocol;
>>> bool shrink = len_diff < 0;
>>> + union {
>>> + struct iphdr *v4;
>>> + struct ipv6hdr *v6;
>>> + unsigned char *hdr;
>>> + } ip;
>>> u32 off;
>>> int ret;
>>
>>> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>> proto != htons(ETH_P_IPV6)))
>>> return -ENOTSUPP;
>>
>>> + if (unlikely(shrink && !skb->encapsulation))
>>> + return -ENOTSUPP;
>>> +
>
> This new restriction might break existing users.
>
> There is no pre-existing requirement that shrink is used solely with
> packets encapsulated by the protocol stack.
>
> Indeed, skb->encapsulation is likely not set on packets arriving from
> the wire, even if encapsulated. Referring to your comment "Main use
> case is for using cls_bpf on ingress hook to decapsulate"
>
> Can a combination of the existing bpf_skb_adjust_room and
> bpf_skb_change_proto address your problem?
Hello Willem,
I think combination bpf_skb_adjust_room and bpf_skb_change_proto can not
address my problem.
Now, bpf_skb_adjust_room() would fail for "len_cur - len_diff_abs < len_min"
when decap ipip6 packet, because "len_min" should be sizeof(struct iphdr)
but not sizeof(struct ipv6hdr).
We can remove skb->encapsulation restriction and parse outer and inner IP
header to determine ipip6 and ip6ip packets. As following:
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3498,6 +3498,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ __be16 proto;
int ret;
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
@@ -3515,10 +3521,23 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;
+ ip.hdr = skb_network_header(skb);
+ if (ip.v4->version == 4) {
+ if (ip.v4->protocol == IPPROTO_IPV6)
+ proto = htons(ETH_P_IPV6);
+ } else {
+ struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
+ if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4)
+ proto = htons(ETH_P_IP);
+ }
+
ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;
+ /* Match skb->protocol to new outer l3 protocol */
+ skb->protocol = proto;
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -3585,6 +3604,11 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32 len_max = BPF_SKB_MAX_LEN;
__be16 proto = skb->protocol;
bool shrink = len_diff < 0;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
u32 off;
int ret;
@@ -3608,6 +3632,19 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}
+ if (shrink) {
+ ip.hdr = skb_network_header(skb);
+ if (ip.v4->version == 4) {
+ if (ip.v4->protocol == IPPROTO_IPV6)
+ len_min = sizeof(struct ipv6hdr);
+ } else {
+ struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
+ if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) {
+ len_min = sizeof(struct iphdr);
+ }
+ }
+ }
+
len_cur = skb->len - skb_network_offset(skb);
Look forward to your comments and suggestions.
Thank you!
>
>>> off = skb_mac_header_len(skb);
>>> switch (mode) {
>>> case BPF_ADJ_ROOM_NET:
>>> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>> return -ENOTSUPP;
>>> }
>>
>>> + if (shrink) {
>>> + ip.hdr = skb_inner_network_header(skb);
>>> + if (ip.v4->version == 4)
>>> + len_min = sizeof(struct iphdr);
>>> + else
>>> + len_min = sizeof(struct ipv6hdr);
>>> + }
>>> +
>>> len_cur = skb->len - skb_network_offset(skb);
>>> if ((shrink && (len_diff_abs >= len_cur ||
>>> len_cur - len_diff_abs < len_min)) ||
>>> --
>>> 2.25.1
>>
> .
>
Powered by blists - more mailing lists