linux-kernel - Re: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <508b6da5-a32e-465b-b18e-e794bf0f19ab@quicinc.com>
Date: Wed, 24 Apr 2024 11:03:34 -0700
From: "Abhishek Chauhan (ABC)" <quic_abchauha@...cinc.com>
To: Martin KaFai Lau <martin.lau@...ux.dev>,
        Willem de Bruijn
	<willemdebruijn.kernel@...il.com>
CC: "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet
	<edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni
	<pabeni@...hat.com>, <netdev@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>, Andrew Halaney <ahalaney@...hat.com>,
        "Martin
 KaFai Lau" <martin.lau@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>, bpf <bpf@...r.kernel.org>,
        <kernel@...cinc.com>
Subject: Re: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support
 clockid_t timestamp type



On 4/22/2024 11:46 AM, Martin KaFai Lau wrote:
> On 4/19/24 6:13 PM, Abhishek Chauhan (ABC) wrote:
>>
>>
>> On 4/18/2024 5:30 PM, Abhishek Chauhan (ABC) wrote:
>>>
>>>
>>> On 4/18/2024 2:57 PM, Martin KaFai Lau wrote:
>>>> On 4/18/24 1:10 PM, Abhishek Chauhan (ABC) wrote:
>>>>>>>    #ifdef CONFIG_NET_XGRESS
>>>>>>>        __u8            tc_at_ingress:1;    /* See TC_AT_INGRESS_MASK */
>>>>>>>        __u8            tc_skip_classify:1;
>>>>>>> @@ -1096,10 +1100,12 @@ struct sk_buff {
>>>>>>>     */
>>>>>>>    #ifdef __BIG_ENDIAN_BITFIELD
>>>>>>>    #define SKB_MONO_DELIVERY_TIME_MASK    (1 << 7)
>>>>>>> -#define TC_AT_INGRESS_MASK        (1 << 6)
>>>>>>> +#define SKB_TAI_DELIVERY_TIME_MASK    (1 << 6)
>>>>>>
>>>>>> SKB_TSTAMP_TYPE_BIT2_MASK?
>>>>
>>>> nit. Shorten it to just SKB_TSTAMP_TYPE_MASK?
>>>>
>>> Okay i will do the same. Noted!
>>>> #ifdef __BIG_ENDIAN_BITFIELD
>>>> #define SKB_TSTAMP_TYPE_MASK    (3 << 6)
>>>> #define SKB_TSTAMP_TYPE_RSH    (6)    /* more on this later */
>>>> #else
>>>> #define SKB_TSTAMP_TYPE_MASK    (3)
>>>> #endif
>>>>
>>>>>>
>>>>> I was thinking to keep it as TAI because it will confuse developers. I hope thats okay.
>>>>
>>>> I think it is not very useful to distinguish each bit since it is an enum value now. It becomes more like the "pkt_type:3" and its PKT_TYPE_MAX.
>>>> I see what you are saying.
>>>>>>> +#define TC_AT_INGRESS_MASK        (1 << 5)
>>>>>>>    #else
>>>>>>>    #define SKB_MONO_DELIVERY_TIME_MASK    (1 << 0)
>>>>>>> -#define TC_AT_INGRESS_MASK        (1 << 1)
>>>>>>> +#define SKB_TAI_DELIVERY_TIME_MASK    (1 << 1)
>>>>>>> +#define TC_AT_INGRESS_MASK        (1 << 2)
>>>>>>>    #endif
>>>>>>>    #define SKB_BF_MONO_TC_OFFSET        offsetof(struct sk_buff, __mono_tc_offset)
>>>>>>>    @@ -4206,6 +4212,11 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
>>>>>>>        case CLOCK_MONOTONIC:
>>>>>>>            skb->tstamp_type = SKB_CLOCK_MONO;
>>>>>>>            break;
>>>>>>> +    case CLOCK_TAI:
>>>>>>> +        skb->tstamp_type = SKB_CLOCK_TAI;
>>>>>>> +        break;
>>>>>>> +    default:
>>>>>>> +        WARN_ONCE(true, "clockid %d not supported", tstamp_type);
>>>>>>
>>>>>> and set to 0 and default tstamp_type?
>>>>>> Actually thinking about it. I feel if its unsupported just fall back to default is the correct thing. I will take care of this.
>>>>>>>        }
>>>>>>>    }
>>>>>>
>>>>>>>    >
>>>>>>    @@ -9372,10 +9378,16 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
>>>>>>>        *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
>>>>>>>                      SKB_BF_MONO_TC_OFFSET);
>>>>>>>        *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
>>>>>>> -                SKB_MONO_DELIVERY_TIME_MASK, 2);
>>>>>>> +                SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
>>>>>>> +    *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
>>>>>>> +                SKB_MONO_DELIVERY_TIME_MASK, 3);
>>>>>>> +    *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
>>>>>>> +                SKB_TAI_DELIVERY_TIME_MASK, 4);
>>>>>>>        *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
>>>>>>>        *insn++ = BPF_JMP_A(1);
>>>>>>>        *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
>>>>>>> +    *insn++ = BPF_JMP_A(1);
>>>>>>> +    *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_TAI);
>>>>
>>>> With SKB_TSTAMP_TYPE_MASK defined like above, this could be simplified like this (untested):
>>>>
>>> Let me think this through and raise it as part of the next rfc patch.
>>>> static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
>>>>                                                       struct bpf_insn *insn)
>>>> {
>>>>      __u8 value_reg = si->dst_reg;
>>>>      __u8 skb_reg = si->src_reg;
>>>>
>>>>      BUILD_BUG_ON(__SKB_CLOCK_MAX != BPF_SKB_TSTAMP_DELIVERY_TAI);
>>>>      *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
>>>>      *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
>>>> #ifdef __BIG_ENDIAN_BITFIELD
>>>>      *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSH);
>>>> #else
>>>>      BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
>>>> #endif
>>>>
>>>>      return insn;
>>>> }
>>>>
>>>>>>>          return insn;
>>>>>>>    }
>>>>>>> @@ -9418,10 +9430,26 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
>>>>>>>            __u8 tmp_reg = BPF_REG_AX;
>>>>>>>              *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
>>>>>>> +        /*check if all three bits are set*/
>>>>>>>            *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
>>>>>>> -                    TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
>>>>>>> -        *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
>>>>>>> -                    TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
>>>>>>> +                    TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
>>>>>>> +                    SKB_TAI_DELIVERY_TIME_MASK);
>>>>>>> +        /*if all 3 bits are set jump 3 instructions and clear the register */
>>>>>>> +        *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>>>>>> +                    TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
>>>>>>> +                    SKB_TAI_DELIVERY_TIME_MASK, 4);
>>>>>>> +        /*Now check Mono is set with ingress mask if so clear */
>>>>>>> +        *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>>>>>> +                    TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 3);
>>>>>>> +        /*Now Check tai is set with ingress mask if so clear */
>>>>>>> +        *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>>>>>> +                    TC_AT_INGRESS_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
>>>>>>> +        /*Now Check tai and mono are set if so clear */
>>>>>>> +        *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
>>>>>>> +                    SKB_MONO_DELIVERY_TIME_MASK |
>>>>>>> +                    SKB_TAI_DELIVERY_TIME_MASK, 1);
>>>>
>>>> Same as the bpf_convert_tstamp_type_read, this could be simplified with SKB_TSTAMP_TYPE_MASK.
>>>>
>> Willem and Martin,
>> When do we clear the tstamp and make it 0 in bpf_convert_tstamp_read? meaning which configuration?
> 
> When the bpf prog does not check the skb->tstamp_type. It is
> the "if (!prog->tstamp_type_access)" in bpf_convert_tstamp_read().
> 
> If bpf prog does not check the skb->tstamp_type and it is at ingress,
> bpf prog expects recv tstamp (ie. real clock), so it needs to clear
> out the tstamp (i.e read as 0 tstamp).
> 
>> I see previously(current upstream code) if mono_delivery is set and tc_ingress_mask is set
>> upstream code used to set the tstamp as 0.
>>
>> Which means with addition of tai mask the new implementation should take care of following cases(correct me if i am wrong)
>> 1. ( tai mask set + ingress mask set ) = Clear tstamp
>> 2. ( mono mask set + ingress mask set ) = Clear tstamp
>> 3. ( mono mask set + tai mask set + ingress mask set ) = Clear tstamp
>> 4. ( No mask set ) = Clear tstamp
>> 5. ( Tai mask set + mono mask set ) = Clear tstamp
> 
> No need to check the individual mono and tai bit here. Check the
> tstamp_type as a whole. Like in pseudo C:
> 
> if (skb->tc_at_ingress && skb->tstamp_type)
>     value_reg = 0;
> 
> untested code for tstamp_read() and tstamp_write():
> 
> static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
>                                                 const struct bpf_insn *si,
>                                                 struct bpf_insn *insn)
> {
>     __u8 value_reg = si->dst_reg;
>     __u8 skb_reg = si->src_reg;
> 
> #ifdef CONFIG_NET_XGRESS
>     /* If the tstamp_type is read,
>      * the bpf prog is aware the tstamp could have delivery time.
>      * Thus, read skb->tstamp as is if tstamp_type_access is true.
>      */
>     if (!prog->tstamp_type_access) {
>         /* AX is needed because src_reg and dst_reg could be the same */
>         __u8 tmp_reg = BPF_REG_AX;
> 
>         *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
>         *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
>         /* goto <read> */
>         BPF_JMP_A(4);
>         *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1);
>         /* goto <read> */
>         BPF_JMP_A(2);
>         /* skb->tc_at_ingress && skb->tstamp_type,
>          * read 0 as the (rcv) timestamp.
>          */
>         *insn++ = BPF_MOV64_IMM(value_reg, 0);
>         *insn++ = BPF_JMP_A(1);
>     }
> #endif
> 
>     /* <read>: value_reg = skb->tstamp */
>     *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
>                   offsetof(struct sk_buff, tstamp));
>     return insn;
> }
> 
> static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
>                                                  const struct bpf_insn *si,
>                                              struct bpf_insn *insn)
> {
>     __u8 value_reg = si->src_reg;
>     __u8 skb_reg = si->dst_reg;
> 
> #ifdef CONFIG_NET_XGRESS
>     /* If the tstamp_type is read,
>      * the bpf prog is aware the tstamp could have delivery time.
>      * Thus, write skb->tstamp as is if tstamp_type_access is true.
>      * Otherwise, writing at ingress will have to clear the
>      * mono_delivery_time (skb->tstamp_type:1)bit also.
>      */
>         if (!prog->tstamp_type_access) {
>         __u8 tmp_reg = BPF_REG_AX;
> 
>         *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
>         /* Writing __sk_buff->tstamp as ingress, goto <clear> */
>         *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
>         /* goto <store> */
>         *insn++ = BPF_JMP_A(2);
>         /* <clear>: skb->tstamp_type */
>         *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK);
>         *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
>     }
> #endif
> 
>     /* <store>: skb->tstamp = tstamp */
>     *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM,
>                    skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
>         return insn;
> }
> 

Thanks Martin. I will raise the RFC patch v5 today. Also made changes in test_tc_dtime.c which 
needs a closer review too. 

>>
>> This leaves us with only two values which can be support which is 0x1 and 0x2
>>
>> This means the tstamp_type should be either 0x1(mono) and tstamp_type 0x2 (tai) to set the value_reg with tstamp
>> Is my understanding correct ?
>>
>> Do you think the below simplified version looks okay ?
>>
>> static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
>>                         const struct bpf_insn *si,
>>                         struct bpf_insn *insn)
>> {
>>     __u8 value_reg = si->dst_reg;
>>     __u8 skb_reg = si->src_reg;
>>
>> BUILD_BUG_ON(__SKB_CLOCK_MAX != BPF_SKB_TSTAMP_DELIVERY_TAI);
>> #ifdef CONFIG_NET_XGRESS
>>     /* If the tstamp_type is read,
>>      * the bpf prog is aware the tstamp could have delivery time.
>>      * Thus, read skb->tstamp as is if tstamp_type_access is true.
>>      */
>>     if (!prog->tstamp_type_access) {
>>         /* AX is needed because src_reg and dst_reg could be the same */
>>         __u8 tmp_reg = BPF_REG_AX;
>>
>>         *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
>>         /* check if all three bits are set*/
>>         *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
>>                     TC_AT_INGRESS_MASK | SKB_TSTAMP_TYPE_MASK);
>>
>>         /* If the value of tmp_reg is 7,6,5,4,3,0 which means invalid
>>          * configuration set the tstamp to 0, value 0x1 and 0x2
>>          * is correct configuration
>>          */
>> #ifdef __BIG_ENDIAN_BITFIELD
>>         *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0x1 << SKB_TSTAMP_TYPE_RSH, 3);
>>         *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0x2 << SKB_TSTAMP_TYPE_RSH, 2);
>> #endif
>>         *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0x1, 3);
>>         *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg, 0x2, 2);
>> #endif
>>         /* skb->tc_at_ingress && skb->tstamp_type:2,
>>          * read 0 as the (rcv) timestamp.
>>          */
>>         *insn++ = BPF_MOV64_IMM(value_reg, 0);
>>         *insn++ = BPF_JMP_A(1);
>>     }
>> #endif
>>
>>     *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
>>                   offsetof(struct sk_buff, tstamp));
>>     return insn;
>> }
>>
>>
>