[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <189d7027-9148-f41e-1842-773f3f8e30a2@huawei.com>
Date: Wed, 11 Jan 2023 10:37:35 +0800
From: "Ziyang Xuan (William)" <william.xuanziyang@...wei.com>
To: Willem de Bruijn <willemb@...gle.com>
CC: <ast@...nel.org>, <daniel@...earbox.net>, <andrii@...nel.org>,
<davem@...emloft.net>, <edumazet@...gle.com>, <kuba@...nel.org>,
<pabeni@...hat.com>, <bpf@...r.kernel.org>,
<netdev@...r.kernel.org>, <martin.lau@...ux.dev>,
<song@...nel.org>, <yhs@...com>, <john.fastabend@...il.com>,
<kpsingh@...nel.org>, <haoluo@...gle.com>, <jolsa@...nel.org>,
<sdf@...gle.com>
Subject: Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for
bpf_skb_adjust_room()
>> I think you prefer like this:
>
> Yes, this looks good to me. A few comments inline.
>
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2644,6 +2644,11 @@ union bpf_attr {
>> * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
>> * L2 type as Ethernet.
>> *
>> + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
>> + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
>> + * Indicates the new IP header version after decapsulate the
>> + * outer IP header.
>> + *
>> * A call to this helper is susceptible to change the underlying
>> * packet buffer. Therefore, at load time, all checks on pointers
>> * previously done by the verifier are invalidated and must be
>> @@ -5803,6 +5808,8 @@ enum {
>> BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
>> BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
>> BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
>> + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
>> + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
>> };
>>
>> enum {
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 43cc1fe58a2c..0bbe5e67337c 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
>> #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
>> BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
>>
>> +#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
>> + BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
>> +
>> #define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
>> BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
>> BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
>> BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
>> BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
>> BPF_F_ADJ_ROOM_ENCAP_L2( \
>> - BPF_ADJ_ROOM_ENCAP_L2_MASK))
>> + BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
>> + BPF_F_ADJ_ROOM_DECAP_L3_MASK)
>>
>> static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
>> u64 flags)
>> @@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>> int ret;
>>
>> if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>> + BPF_F_ADJ_ROOM_DECAP_L3_MASK |
>> BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
>> return -EINVAL;
>>
>> @@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>> if (unlikely(ret < 0))
>> return ret;
>>
>> + /* Match skb->protocol to new outer l3 protocol */
>> + if (skb->protocol == htons(ETH_P_IP) &&
>> + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
>> + skb->protocol = htons(ETH_P_IPV6);
>> + else if (skb->protocol == htons(ETH_P_IPV6) &&
>> + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
>> + skb->protocol = htons(ETH_P_IP);
>> +
>> if (skb_is_gso(skb)) {
>> struct skb_shared_info *shinfo = skb_shinfo(skb);
>>
>> @@ -3597,6 +3610,10 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>> proto != htons(ETH_P_IPV6)))
>> return -ENOTSUPP;
>>
>> + if (unlikely(shrink && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4 &&
>> + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6))
>> + return -EINVAL;
>> +
>
> parentheses and can use mask:
>
> if (shrink && (flags & .._MASK == .._MASK)
>
> also should fail if the flags are passed but shrink is false.
Thank you for your valuable comments!
>
>> off = skb_mac_header_len(skb);
>> switch (mode) {
>> case BPF_ADJ_ROOM_NET:
>> @@ -3608,6 +3625,16 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>> return -ENOTSUPP;
>> }
>>
>> + if (shrink) {
>> + if (proto == htons(ETH_P_IP) &&
>> + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) {
>> + len_min = sizeof(struct ipv6hdr);
>> + } else if (proto == htons(ETH_P_IPV6) &&
>> + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) {
>> + len_min = sizeof(struct iphdr);
>> + }
>> + }
>> +
>
> No need to test proto first?
Indeed, I will remove them in patch v2.
>
>> len_cur = skb->len - skb_network_offset(skb);
>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>> index 464ca3f01fe7..041361bc6ccf 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -2644,6 +2644,11 @@ union bpf_attr {
>> * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
>> * L2 type as Ethernet.
>> *
>> + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
>> + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
>> + * Indicates the new IP header version after decapsulate the
>> + * outer IP header.
>> + *
>> * A call to this helper is susceptible to change the underlying
>> * packet buffer. Therefore, at load time, all checks on pointers
>> * previously done by the verifier are invalidated and must be
>> @@ -5803,6 +5808,8 @@ enum {
>> BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
>> BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
>> BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
>> + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
>> + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
>> };
>>
>> enum {
>>
> .
>
Powered by blists - more mailing lists