[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <VI1PR10MB244671875D9A041C2F9018AFAB310@VI1PR10MB2446.EURPRD10.PROD.OUTLOOK.COM>
Date: Fri, 2 Oct 2020 11:05:01 +0000
From: "Geva, Erez" <erez.geva.ext@...mens.com>
To: Thomas Gleixner <tglx@...utronix.de>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Cong Wang <xiyou.wangcong@...il.com>,
"David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jamal Hadi Salim <jhs@...atatu.com>,
Jiri Pirko <jiri@...nulli.us>, Andrei Vagin <avagin@...il.com>,
Dmitry Safonov <0x7f454c46@...il.com>,
"Eric W . Biederman" <ebiederm@...ssion.com>,
Ingo Molnar <mingo@...nel.org>,
John Stultz <john.stultz@...aro.org>,
Michal Kubecek <mkubecek@...e.cz>,
Oleg Nesterov <oleg@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Richard Cochran <richardcochran@...il.com>,
Stephen Boyd <sboyd@...nel.org>,
Vladis Dronov <vdronov@...hat.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Frederic Weisbecker <frederic@...nel.org>,
Eric Dumazet <edumazet@...gle.com>
CC: Vinicius Costa Gomes <vinicius.gomes@...el.com>,
Vedang Patel <vedang.patel@...el.com>,
"Sudler, Simon" <simon.sudler@...mens.com>,
"Meisinger, Andreas" <andreas.meisinger@...mens.com>,
"Bucher, Andreas" <andreas.bucher@...mens.com>,
"henning.schild@...mens.com" <henning.schild@...mens.com>,
"jan.kiszka@...mens.com" <jan.kiszka@...mens.com>,
"Zirkler, Andreas" <andreas.zirkler@...mens.com>,
"Sakic, Ermin" <ermin.sakic@...mens.com>,
"anninh.nguyen@...mens.com" <anninh.nguyen@...mens.com>,
"Saenger, Michael" <michael.saenger@...mens.com>,
"Maehringer, Bernd" <bernd.maehringer@...mens.com>,
"gisela.greinert@...mens.com" <gisela.greinert@...mens.com>,
Erez Geva <ErezGeva2@...il.com>
Subject: Re: [PATCH 7/7] TC-ETF support PTP clocks
On 02/10/2020 02:33, Thomas Gleixner wrote:
> On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:
>
>> - Add support for using a POSIX dynamic clock with
>> Traffic control Earliest TxTime First (ETF) Qdisc.
>
> ....
>
>> --- a/include/uapi/linux/net_tstamp.h
>> +++ b/include/uapi/linux/net_tstamp.h
>> @@ -167,6 +167,11 @@ enum txtime_flags {
>> SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
>> SOF_TXTIME_FLAGS_LAST
>> };
>> +/*
>> + * Clock ID to use with POSIX clocks
>> + * The ID must be u8 to fit in (struct sock)->sk_clockid
>> + */
>> +#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)
>
> Random number with a random name.
>
>> struct sock_txtime {
>> __kernel_clockid_t clockid;/* reference clockid */
>> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
>> index c0de4c6f9299..8e3e0a61fa58 100644
>> --- a/net/sched/sch_etf.c
>> +++ b/net/sched/sch_etf.c
>> @@ -15,6 +15,7 @@
>> #include <linux/rbtree.h>
>> #include <linux/skbuff.h>
>> #include <linux/posix-timers.h>
>> +#include <linux/posix-clock.h>
>> #include <net/netlink.h>
>> #include <net/sch_generic.h>
>> #include <net/pkt_sched.h>
>> @@ -40,19 +41,40 @@ struct etf_sched_data {
>> struct rb_root_cached head;
>> struct qdisc_watchdog watchdog;
>> ktime_t (*get_time)(void);
>> +#ifdef CONFIG_POSIX_TIMERS
>> + struct posix_clock *pclock; /* pointer to a posix clock */
>
> Tail comments suck because they disturb the reading flow and this
> comment has absolute zero value.
>
> Comments are required to explain things which are not obvious...
>
>> +#endif /* CONFIG_POSIX_TIMERS */
>
> Also this #ifdeffery is bonkers. How is TSN supposed to work without
> POSIX_TIMERS in the first place?
>
>> static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
>> [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
>> };
>>
>> +static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)
>> +{
>> +#ifdef CONFIG_POSIX_TIMERS
>> + if (IS_ERR_OR_NULL(q->get_time)) {
>> + struct timespec64 ts;
>> + int err = posix_clock_gettime(q->pclock, &ts);
>> +
>> + if (err) {
>> + pr_warn("Clock is disabled (%d) for queue %d\n",
>> + err, q->queue);
>> + return 0;
>
> That's really useful error handling.
>
>> + }
>> + return timespec64_to_ktime(ts);
>> + }
>> +#endif /* CONFIG_POSIX_TIMERS */
>> + return q->get_time();
>> +}
>> +
>> static inline int validate_input_params(struct tc_etf_qopt *qopt,
>> struct netlink_ext_ack *extack)
>> {
>> /* Check if params comply to the following rules:
>> * * Clockid and delta must be valid.
>> *
>> - * * Dynamic clockids are not supported.
>> + * * Dynamic CPU clockids are not supported.
>> *
>> * * Delta must be a positive or zero integer.
>> *
>> @@ -60,11 +82,22 @@ static inline int validate_input_params(struct tc_etf_qopt *qopt,
>> * expect that system clocks have been synchronized to PHC.
>> */
>> if (qopt->clockid < 0) {
>> +#ifdef CONFIG_POSIX_TIMERS
>> + /**
>> + * Use of PTP clock through a posix clock.
>> + * The TC application must open the posix clock device file
>> + * and use the dynamic clockid from the file description.
>
> What? How is the code which calls into this guaranteed to have a valid
> file descriptor open for a particular dynamic posix clock?
>
>> + */
>> + if (!is_clockid_fd_clock(qopt->clockid)) {
>> + NL_SET_ERR_MSG(extack,
>> + "Dynamic CPU clockids are not supported");
>> + return -EOPNOTSUPP;
>> + }
>> +#else /* CONFIG_POSIX_TIMERS */
>> NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
>> return -ENOTSUPP;
>> - }
>> -
>> - if (qopt->clockid != CLOCK_TAI) {
>> +#endif /* CONFIG_POSIX_TIMERS */
>> + } else if (qopt->clockid != CLOCK_TAI) {
>> NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
>> return -EINVAL;
>> }
>> @@ -103,7 +136,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct etf_sched_data *q,
>> return false;
>>
>> skip:
>> - now = q->get_time();
>> + now = get_now(sch, q);
>
> Yuck.
>
> is_packet_valid() is invoked via:
>
> __dev_queue_xmit()
> __dev_xmit_skb()
> etf_enqueue_timesortedlist()
> is_packet_valid()
>
> __dev_queue_xmit() does
>
> rcu_read_lock_bh();
>
> and your get_now() does
>
> posix_clock_gettime()
> down_read(&clk->rwsem);
>
> ----> FAIL
>
> down_read() might sleep and cannot be called from a BH disabled
> region. This clearly has never been tested with any mandatory debug
> option enabled. Why am I not surprised?
>
> Aside of accessing PCH clock being slow at hell this cannot ever work
> and there is no way to make it work in any consistent form.
>
> If you have several NICs on several PCH domains then all of these
> domains should have one thing in common: CLOCK_TAI and the frequency.
>
> If that's not the case then the overall system design is just broken,
> but yes I'm aware of the fact that some industries decided to have their
> own definition of time and frequency just because they can.
>
> Handling different starting points of the domains interpretation of
> "TAI" is doable because that's just an offset, but having different
> frequencies is a nightmare.
>
> So if such a trainwreck is a valid use case which needs to be supported
> then just duct taping it into the code is not going to fly.
>
> The only way to make this work is to sit down and actually design a
> mechanism which allows to correlate the various notions of PCH time with
> the systems CLOCK_TAI, i.e. providing offset and frequency correction.
>
> Also you want to explain how user space applications should deal with
> these different time domains in a sane way.
>
> Thanks,
>
> tglx
>
Thank you for your quick and depth feedback.
Erez
Powered by blists - more mailing lists