lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <VI1PR10MB244671875D9A041C2F9018AFAB310@VI1PR10MB2446.EURPRD10.PROD.OUTLOOK.COM>
Date:   Fri, 2 Oct 2020 11:05:01 +0000
From:   "Geva, Erez" <erez.geva.ext@...mens.com>
To:     Thomas Gleixner <tglx@...utronix.de>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Cong Wang <xiyou.wangcong@...il.com>,
        "David S . Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Jamal Hadi Salim <jhs@...atatu.com>,
        Jiri Pirko <jiri@...nulli.us>, Andrei Vagin <avagin@...il.com>,
        Dmitry Safonov <0x7f454c46@...il.com>,
        "Eric W . Biederman" <ebiederm@...ssion.com>,
        Ingo Molnar <mingo@...nel.org>,
        John Stultz <john.stultz@...aro.org>,
        Michal Kubecek <mkubecek@...e.cz>,
        Oleg Nesterov <oleg@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Richard Cochran <richardcochran@...il.com>,
        Stephen Boyd <sboyd@...nel.org>,
        Vladis Dronov <vdronov@...hat.com>,
        Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        Frederic Weisbecker <frederic@...nel.org>,
        Eric Dumazet <edumazet@...gle.com>
CC:     Vinicius Costa Gomes <vinicius.gomes@...el.com>,
        Vedang Patel <vedang.patel@...el.com>,
        "Sudler, Simon" <simon.sudler@...mens.com>,
        "Meisinger, Andreas" <andreas.meisinger@...mens.com>,
        "Bucher, Andreas" <andreas.bucher@...mens.com>,
        "henning.schild@...mens.com" <henning.schild@...mens.com>,
        "jan.kiszka@...mens.com" <jan.kiszka@...mens.com>,
        "Zirkler, Andreas" <andreas.zirkler@...mens.com>,
        "Sakic, Ermin" <ermin.sakic@...mens.com>,
        "anninh.nguyen@...mens.com" <anninh.nguyen@...mens.com>,
        "Saenger, Michael" <michael.saenger@...mens.com>,
        "Maehringer, Bernd" <bernd.maehringer@...mens.com>,
        "gisela.greinert@...mens.com" <gisela.greinert@...mens.com>,
        Erez Geva <ErezGeva2@...il.com>
Subject: Re: [PATCH 7/7] TC-ETF support PTP clocks



On 02/10/2020 02:33, Thomas Gleixner wrote:
> On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:
>
>>    - Add support for using a POSIX dynamic clock with
>>      Traffic control Earliest TxTime First (ETF) Qdisc.
>
> ....
>
>> --- a/include/uapi/linux/net_tstamp.h
>> +++ b/include/uapi/linux/net_tstamp.h
>> @@ -167,6 +167,11 @@ enum txtime_flags {
>>      SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
>>                               SOF_TXTIME_FLAGS_LAST
>>   };
>> +/*
>> + * Clock ID to use with POSIX clocks
>> + * The ID must be u8 to fit in (struct sock)->sk_clockid
>> + */
>> +#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)
>
> Random number with a random name.
>
>>   struct sock_txtime {
>>      __kernel_clockid_t      clockid;/* reference clockid */
>> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
>> index c0de4c6f9299..8e3e0a61fa58 100644
>> --- a/net/sched/sch_etf.c
>> +++ b/net/sched/sch_etf.c
>> @@ -15,6 +15,7 @@
>>   #include <linux/rbtree.h>
>>   #include <linux/skbuff.h>
>>   #include <linux/posix-timers.h>
>> +#include <linux/posix-clock.h>
>>   #include <net/netlink.h>
>>   #include <net/sch_generic.h>
>>   #include <net/pkt_sched.h>
>> @@ -40,19 +41,40 @@ struct etf_sched_data {
>>      struct rb_root_cached head;
>>      struct qdisc_watchdog watchdog;
>>      ktime_t (*get_time)(void);
>> +#ifdef CONFIG_POSIX_TIMERS
>> +    struct posix_clock *pclock; /* pointer to a posix clock */
>
> Tail comments suck because they disturb the reading flow and this
> comment has absolute zero value.
>
> Comments are required to explain things which are not obvious...
>
>> +#endif /* CONFIG_POSIX_TIMERS */
>
> Also this #ifdeffery is bonkers. How is TSN supposed to work without
> POSIX_TIMERS in the first place?
>
>>   static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
>>      [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
>>   };
>>
>> +static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)
>> +{
>> +#ifdef CONFIG_POSIX_TIMERS
>> +    if (IS_ERR_OR_NULL(q->get_time)) {
>> +            struct timespec64 ts;
>> +            int err = posix_clock_gettime(q->pclock, &ts);
>> +
>> +            if (err) {
>> +                    pr_warn("Clock is disabled (%d) for queue %d\n",
>> +                            err, q->queue);
>> +                    return 0;
>
> That's really useful error handling.
>
>> +            }
>> +            return timespec64_to_ktime(ts);
>> +    }
>> +#endif /* CONFIG_POSIX_TIMERS */
>> +    return q->get_time();
>> +}
>> +
>>   static inline int validate_input_params(struct tc_etf_qopt *qopt,
>>                                      struct netlink_ext_ack *extack)
>>   {
>>      /* Check if params comply to the following rules:
>>       *      * Clockid and delta must be valid.
>>       *
>> -     *      * Dynamic clockids are not supported.
>> +     *      * Dynamic CPU clockids are not supported.
>>       *
>>       *      * Delta must be a positive or zero integer.
>>       *
>> @@ -60,11 +82,22 @@ static inline int validate_input_params(struct tc_etf_qopt *qopt,
>>       * expect that system clocks have been synchronized to PHC.
>>       */
>>      if (qopt->clockid < 0) {
>> +#ifdef CONFIG_POSIX_TIMERS
>> +            /**
>> +             * Use of PTP clock through a posix clock.
>> +             * The TC application must open the posix clock device file
>> +             * and use the dynamic clockid from the file description.
>
> What? How is the code which calls into this guaranteed to have a valid
> file descriptor open for a particular dynamic posix clock?
>
>> +             */
>> +            if (!is_clockid_fd_clock(qopt->clockid)) {
>> +                    NL_SET_ERR_MSG(extack,
>> +                                   "Dynamic CPU clockids are not supported");
>> +                    return -EOPNOTSUPP;
>> +            }
>> +#else /* CONFIG_POSIX_TIMERS */
>>              NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
>>              return -ENOTSUPP;
>> -    }
>> -
>> -    if (qopt->clockid != CLOCK_TAI) {
>> +#endif /* CONFIG_POSIX_TIMERS */
>> +    } else if (qopt->clockid != CLOCK_TAI) {
>>              NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
>>              return -EINVAL;
>>      }
>> @@ -103,7 +136,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct etf_sched_data *q,
>>              return false;
>>
>>   skip:
>> -    now = q->get_time();
>> +    now = get_now(sch, q);
>
> Yuck.
>
> is_packet_valid() is invoked via:
>
>      __dev_queue_xmit()
>        __dev_xmit_skb()
>           etf_enqueue_timesortedlist()
>             is_packet_valid()
>
> __dev_queue_xmit() does
>
>         rcu_read_lock_bh();
>
> and your get_now() does
>
>      posix_clock_gettime()
>               down_read(&clk->rwsem);
>
>   ----> FAIL
>
> down_read() might sleep and cannot be called from a BH disabled
> region. This clearly has never been tested with any mandatory debug
> option enabled. Why am I not surprised?
>
> Aside of accessing PCH clock being slow at hell this cannot ever work
> and there is no way to make it work in any consistent form.
>
> If you have several NICs on several PCH domains then all of these
> domains should have one thing in common: CLOCK_TAI and the frequency.
>
> If that's not the case then the overall system design is just broken,
> but yes I'm aware of the fact that some industries decided to have their
> own definition of time and frequency just because they can.
>
> Handling different starting points of the domains interpretation of
> "TAI" is doable because that's just an offset, but having different
> frequencies is a nightmare.
>
> So if such a trainwreck is a valid use case which needs to be supported
> then just duct taping it into the code is not going to fly.
>
> The only way to make this work is to sit down and actually design a
> mechanism which allows to correlate the various notions of PCH time with
> the systems CLOCK_TAI, i.e. providing offset and frequency correction.
>
> Also you want to explain how user space applications should deal with
> these different time domains in a sane way.
>
> Thanks,
>
>          tglx
>

Thank you for your quick and depth feedback.

Erez

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ