[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <da408a2f-270d-4c2d-b61c-7106170dbfe0@quicinc.com>
Date: Mon, 6 May 2024 12:57:54 -0700
From: "Abhishek Chauhan (ABC)" <quic_abchauha@...cinc.com>
To: Willem de Bruijn <willemdebruijn.kernel@...il.com>,
"David S. Miller"
<davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
<kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, <netdev@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, Andrew Halaney <ahalaney@...hat.com>,
"Martin
KaFai Lau" <martin.lau@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Daniel Borkmann <daniel@...earbox.net>, bpf <bpf@...r.kernel.org>
CC: <kernel@...cinc.com>
Subject: Re: [RFC PATCH bpf-next v6 2/3] net: Add additional bit to support
clockid_t timestamp type
On 5/6/2024 12:00 PM, Willem de Bruijn wrote:
> Abhishek Chauhan wrote:
>> tstamp_type is now set based on actual clockid_t compressed
>> into 2 bits.
>>
>> To make the design scalable for future needs this commit bring in
>> the change to extend the tstamp_type:1 to tstamp_type:2 to support
>> other clockid_t timestamp.
>>
>> We now support CLOCK_TAI as part of tstamp_type as part of this
>> commit with exisiting support CLOCK_MONOTONIC and CLOCK_REALTIME.
>>
>> Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@linux.dev/
>> Signed-off-by: Abhishek Chauhan <quic_abchauha@...cinc.com>
>> ---
>> Changes since v5
>> - Took care of documentation comments of tstamp_type
>> in skbuff.h as mentioned by Willem.
>> - Use of complete words instead of abbrevation in
>> macro definitions as mentioned by Willem.
>> - Fixed indentation problems
>> - Removed BPF_SKB_TSTAMP_UNSPEC and marked it
>> Deprecated as documentation, and introduced
>> BPF_SKB_CLOCK_REALTIME instead.
>> - BUILD_BUG_ON for additional enums introduced.
>> - __ip_make_skb and ip6_make_skb now has
>> tcp checks to mark tcp packet as mono tstamp base.
>> - separated the selftests/bpf changes into another patch.
>> - Made changes as per Martin in selftest bpf code and
>> tool/include/uapi/linux/bpf.h
>>
>> Changes since v4
>> - Made changes to BPF code in filter.c as per
>> Martin's comments
>> - Minor fixes on comments given on documentation
>> from Willem in skbuff.h (removed obvious ones)
>> - Made changes to ctx_rewrite.c and test_tc_dtime.c
>> - test_tc_dtime.c i am not really sure if i took care
>> of all the changes as i am not too familiar with
>> the framework.
>> - Introduce common mask SKB_TSTAMP_TYPE_MASK instead
>> of multiple SKB mask.
>> - Optimisation on BPF code as suggested by Martin.
>> - Set default case to SKB_CLOCK_REALTME.
>>
>> Changes since v3
>> - Carefully reviewed BPF APIs and made changes in
>> BPF code as well.
>> - Re-used actual clockid_t values since skbuff.h
>> indirectly includes uapi/linux/time.h
>> - Added CLOCK_TAI as part of the skb_set_delivery_time
>> handling instead of CLOCK_USER
>> - Added default in switch for unsupported and invalid
>> timestamp with an WARN_ONCE
>> - All of the above comments were given by Willem
>> - Made changes in filter.c as per Martin's comments
>> to handle invalid cases in bpf code with addition of
>> SKB_TAI_DELIVERY_TIME_MASK
>>
>> Changes since v2
>> - Minor changes to commit subject
>>
>> Changes since v1
>> - identified additional changes in BPF framework.
>> - Bit shift in SKB_MONO_DELIVERY_TIME_MASK and TC_AT_INGRESS_MASK.
>> - Made changes in skb_set_delivery_time to keep changes similar to
>> previous code for mono_delivery_time and just setting tstamp_type
>> bit 1 for userspace timestamp.
>>
>>
>> include/linux/skbuff.h | 21 +++++++++++--------
>> include/uapi/linux/bpf.h | 15 +++++++++-----
>> net/core/filter.c | 44 +++++++++++++++++++++++-----------------
>> net/ipv4/ip_output.c | 5 ++++-
>> net/ipv4/raw.c | 2 +-
>> net/ipv6/ip6_output.c | 5 ++++-
>> net/ipv6/raw.c | 2 +-
>> net/packet/af_packet.c | 7 +++----
>> 8 files changed, 61 insertions(+), 40 deletions(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index de3915e2bfdb..fe7d8dbef77e 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t;
>> enum skb_tstamp_type {
>> SKB_CLOCK_REALTIME,
>> SKB_CLOCK_MONOTONIC,
>> + SKB_CLOCK_TAI,
>> + __SKB_CLOCK_MAX = SKB_CLOCK_TAI,
>> };
>>
>> /**
>> @@ -829,8 +831,7 @@ enum skb_tstamp_type {
>> * @decrypted: Decrypted SKB
>> * @slow_gro: state present at GRO time, slower prepare step required
>> * @tstamp_type: When set, skb->tstamp has the
>> - * delivery_time in mono clock base Otherwise, the
>> - * timestamp is considered real clock base.
>> + * delivery_time clock base of skb->tstamp.
>> * @napi_id: id of the NAPI struct this skb came from
>> * @sender_cpu: (aka @napi_id) source CPU in XPS
>> * @alloc_cpu: CPU which did the skb allocation.
>> @@ -958,7 +959,7 @@ struct sk_buff {
>> /* private: */
>> __u8 __mono_tc_offset[0];
>> /* public: */
>> - __u8 tstamp_type:1; /* See skb_tstamp_type */
>> + __u8 tstamp_type:2; /* See skb_tstamp_type */
>> #ifdef CONFIG_NET_XGRESS
>> __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
>> __u8 tc_skip_classify:1;
>> @@ -1088,15 +1089,16 @@ struct sk_buff {
>> #endif
>> #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset)
>>
>> -/* if you move tc_at_ingress or mono_delivery_time
>> +/* if you move tc_at_ingress or tstamp_type
>> * around, you also must adapt these constants.
>> */
>> #ifdef __BIG_ENDIAN_BITFIELD
>> -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
>> -#define TC_AT_INGRESS_MASK (1 << 6)
>> +#define SKB_TSTAMP_TYPE_MASK (3 << 6)
>> +#define SKB_TSTAMP_TYPE_RSHIFT (6)
>> +#define TC_AT_INGRESS_MASK (1 << 5)
>> #else
>> -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
>> -#define TC_AT_INGRESS_MASK (1 << 1)
>> +#define SKB_TSTAMP_TYPE_MASK (3)
>> +#define TC_AT_INGRESS_MASK (1 << 2)
>> #endif
>> #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
>>
>> @@ -4213,6 +4215,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb,
>> case CLOCK_MONOTONIC:
>> tstamp_type = SKB_CLOCK_MONOTONIC;
>> break;
>> + case CLOCK_TAI:
>> + tstamp_type = SKB_CLOCK_TAI;
>> + break;
>> default:
>> WARN_ON_ONCE(1);
>> kt = 0;
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 90706a47f6ff..25ea393cf084 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -6207,12 +6207,17 @@ union { \
>> __u64 :64; \
>> } __attribute__((aligned(8)))
>>
>> +/* The enum used in skb->tstamp_type. It specifies the clock type
>> + * of the time stored in the skb->tstamp.
>> + */
>> enum {
>> - BPF_SKB_TSTAMP_UNSPEC,
>> - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
>> - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
>> - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
>> - * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
>> + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
>> + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
>> + BPF_SKB_CLOCK_REALTIME = 0,
>> + BPF_SKB_CLOCK_MONOTONIC = 1,
>> + BPF_SKB_CLOCK_TAI = 2,
>> + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
>> + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
>> */
>> };
>>
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index a3781a796da4..9f3df4a0d1ee 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -7726,16 +7726,20 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
>> return -EOPNOTSUPP;
>>
>> switch (tstamp_type) {
>> - case BPF_SKB_TSTAMP_DELIVERY_MONO:
>> + case BPF_SKB_CLOCK_MONOTONIC:
>> if (!tstamp)
>> return -EINVAL;
>> skb->tstamp = tstamp;
>> skb->tstamp_type = SKB_CLOCK_MONOTONIC;
>> break;
>> - case BPF_SKB_TSTAMP_UNSPEC:
>> - if (tstamp)
>> + case BPF_SKB_CLOCK_TAI:
>> + if (!tstamp)
>> return -EINVAL;
>> - skb->tstamp = 0;
>> + skb->tstamp = tstamp;
>> + skb->tstamp_type = SKB_CLOCK_TAI;
>> + break;
>> + case BPF_SKB_CLOCK_REALTIME:
>> + skb->tstamp = tstamp;
>> skb->tstamp_type = SKB_CLOCK_REALTIME;
>
> Only since there is another reason to respin.
>
> The previous code did not do this, but let's order cases by their enum
> value, starting with realtime.
>
> Also in anticipation with possible future expansions.
>
Noted I will take care of this.
>
Powered by blists - more mailing lists