lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 7 Apr 2023 12:27:08 -0400
From:   Jamal Hadi Salim <jhs@...atatu.com>
To:     Vladimir Oltean <vladimir.oltean@....com>
Cc:     netdev@...r.kernel.org, "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        Cong Wang <xiyou.wangcong@...il.com>,
        Jiri Pirko <jiri@...nulli.us>,
        Vinicius Costa Gomes <vinicius.gomes@...el.com>,
        Kurt Kanzenbach <kurt@...utronix.de>,
        Gerhard Engleder <gerhard@...leder-embedded.com>,
        Amritha Nambiar <amritha.nambiar@...el.com>,
        Ferenc Fejes <ferenc.fejes@...csson.com>,
        Xiaoliang Yang <xiaoliang.yang_1@....com>,
        Roger Quadros <rogerq@...nel.org>,
        Pranavi Somisetty <pranavi.somisetty@....com>,
        Harini Katakam <harini.katakam@....com>,
        Giuseppe Cavallaro <peppe.cavallaro@...com>,
        Alexandre Torgue <alexandre.torgue@...s.st.com>,
        Michael Sit Wei Hong <michael.wei.hong.sit@...el.com>,
        Mohammad Athari Bin Ismail <mohammad.athari.ismail@...el.com>,
        Oleksij Rempel <linux@...pel-privat.de>,
        Jacob Keller <jacob.e.keller@...el.com>,
        linux-kernel@...r.kernel.org, Ferenc Fejes <fejes@....elte.hu>,
        Simon Horman <simon.horman@...igine.com>
Subject: Re: [PATCH v4 net-next 7/9] net/sched: taprio: allow per-TC user
 input of FP adminStatus

On Mon, Apr 3, 2023 at 6:35 AM Vladimir Oltean <vladimir.oltean@....com> wrote:
>
> This is a duplication of the FP adminStatus logic introduced for
> tc-mqprio. Offloading is done through the tc_mqprio_qopt_offload
> structure embedded within tc_taprio_qopt_offload. So practically, if a
> device driver is written to treat the mqprio portion of taprio just like
> standalone mqprio, it gets unified handling of frame preemption.
>
> I would have reused more code with taprio, but this is mostly netlink
> attribute parsing, which is hard to transform into generic code without
> having something that stinks as a result. We have the same variables
> with the same semantics, just different nlattr type values
> (TCA_MQPRIO_TC_ENTRY=5 vs TCA_TAPRIO_ATTR_TC_ENTRY=12;
> TCA_MQPRIO_TC_ENTRY_FP=2 vs TCA_TAPRIO_TC_ENTRY_FP=3, etc) and
> consequently, different policies for the nest.
>
> Every time nla_parse_nested() is called, an on-stack table "tb" of
> nlattr pointers is allocated statically, up to the maximum understood
> nlattr type. That array size is hardcoded as a constant, but when
> transforming this into a common parsing function, it would become either
> a VLA (which the Linux kernel rightfully doesn't like) or a call to the
> allocator.
>
> Having FP adminStatus in tc-taprio can be seen as addressing the 802.1Q
> Annex S.3 "Scheduling and preemption used in combination, no HOLD/RELEASE"
> and S.4 "Scheduling and preemption used in combination with HOLD/RELEASE"
> use cases. HOLD and RELEASE events are emitted towards the underlying
> MAC Merge layer when the schedule hits a Set-And-Hold-MAC or a
> Set-And-Release-MAC gate operation. So within the tc-taprio UAPI space,
> one can distinguish between the 2 use cases by choosing whether to use
> the TC_TAPRIO_CMD_SET_AND_HOLD and TC_TAPRIO_CMD_SET_AND_RELEASE gate
> operations within the schedule, or just TC_TAPRIO_CMD_SET_GATES.
>
> A small part of the change is dedicated to refactoring the max_sdu
> nlattr parsing to put all logic under the "if" that tests for presence
> of that nlattr.
>
> Signed-off-by: Vladimir Oltean <vladimir.oltean@....com>
> Reviewed-by: Ferenc Fejes <fejes@....elte.hu>
> Reviewed-by: Simon Horman <simon.horman@...igine.com>
> ---
> v3->v4: none
> v2->v3: none
> v1->v2: slightly reword commit message
>
>  include/uapi/linux/pkt_sched.h |  1 +
>  net/sched/sch_taprio.c         | 65 +++++++++++++++++++++++++++-------
>  2 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index b8d29be91b62..51a7addc56c6 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -1252,6 +1252,7 @@ enum {
>         TCA_TAPRIO_TC_ENTRY_UNSPEC,
>         TCA_TAPRIO_TC_ENTRY_INDEX,              /* u32 */
>         TCA_TAPRIO_TC_ENTRY_MAX_SDU,            /* u32 */
> +       TCA_TAPRIO_TC_ENTRY_FP,                 /* u32 */
>
>         /* add new constants above here */
>         __TCA_TAPRIO_TC_ENTRY_CNT,
> diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
> index cbad43019172..76db9a10ef50 100644
> --- a/net/sched/sch_taprio.c
> +++ b/net/sched/sch_taprio.c
> @@ -7,6 +7,7 @@
>   */
>
>  #include <linux/ethtool.h>
> +#include <linux/ethtool_netlink.h>
>  #include <linux/types.h>
>  #include <linux/slab.h>
>  #include <linux/kernel.h>
> @@ -96,6 +97,7 @@ struct taprio_sched {
>         struct list_head taprio_list;
>         int cur_txq[TC_MAX_QUEUE];
>         u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
> +       u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
>         u32 txtime_delay;
>  };
>
> @@ -1002,6 +1004,9 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
>  static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
>         [TCA_TAPRIO_TC_ENTRY_INDEX]        = { .type = NLA_U32 },
>         [TCA_TAPRIO_TC_ENTRY_MAX_SDU]      = { .type = NLA_U32 },
> +       [TCA_TAPRIO_TC_ENTRY_FP]           = NLA_POLICY_RANGE(NLA_U32,
> +                                                             TC_FP_EXPRESS,
> +                                                             TC_FP_PREEMPTIBLE),

Same comment applies as in patch 6 here..

cheers,
jamal

>
>  static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
> @@ -1524,6 +1529,7 @@ static int taprio_enable_offload(struct net_device *dev,
>         mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
>         offload->mqprio.extack = extack;
>         taprio_sched_to_offload(dev, sched, offload, &caps);
> +       mqprio_fp_to_offload(q->fp, &offload->mqprio);
>
>         for (tc = 0; tc < TC_MAX_QUEUE; tc++)
>                 offload->max_sdu[tc] = q->max_sdu[tc];
> @@ -1671,13 +1677,14 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
>  static int taprio_parse_tc_entry(struct Qdisc *sch,
>                                  struct nlattr *opt,
>                                  u32 max_sdu[TC_QOPT_MAX_QUEUE],
> +                                u32 fp[TC_QOPT_MAX_QUEUE],
>                                  unsigned long *seen_tcs,
>                                  struct netlink_ext_ack *extack)
>  {
>         struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
>         struct net_device *dev = qdisc_dev(sch);
> -       u32 val = 0;
>         int err, tc;
> +       u32 val;
>
>         err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
>                                taprio_tc_policy, extack);
> @@ -1702,15 +1709,18 @@ static int taprio_parse_tc_entry(struct Qdisc *sch,
>
>         *seen_tcs |= BIT(tc);
>
> -       if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
> +       if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
>                 val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
> +               if (val > dev->max_mtu) {
> +                       NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
> +                       return -ERANGE;
> +               }
>
> -       if (val > dev->max_mtu) {
> -               NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
> -               return -ERANGE;
> +               max_sdu[tc] = val;
>         }
>
> -       max_sdu[tc] = val;
> +       if (tb[TCA_TAPRIO_TC_ENTRY_FP])
> +               fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
>
>         return 0;
>  }
> @@ -1720,29 +1730,51 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
>                                    struct netlink_ext_ack *extack)
>  {
>         struct taprio_sched *q = qdisc_priv(sch);
> +       struct net_device *dev = qdisc_dev(sch);
>         u32 max_sdu[TC_QOPT_MAX_QUEUE];
> +       bool have_preemption = false;
>         unsigned long seen_tcs = 0;
> +       u32 fp[TC_QOPT_MAX_QUEUE];
>         struct nlattr *n;
>         int tc, rem;
>         int err = 0;
>
> -       for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
> +       for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
>                 max_sdu[tc] = q->max_sdu[tc];
> +               fp[tc] = q->fp[tc];
> +       }
>
>         nla_for_each_nested(n, opt, rem) {
>                 if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
>                         continue;
>
> -               err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs,
> +               err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
>                                             extack);
>                 if (err)
> -                       goto out;
> +                       return err;
>         }
>
> -       for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
> +       for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
>                 q->max_sdu[tc] = max_sdu[tc];
> +               q->fp[tc] = fp[tc];
> +               if (fp[tc] != TC_FP_EXPRESS)
> +                       have_preemption = true;
> +       }
> +
> +       if (have_preemption) {
> +               if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
> +                       NL_SET_ERR_MSG(extack,
> +                                      "Preemption only supported with full offload");
> +                       return -EOPNOTSUPP;
> +               }
> +
> +               if (!ethtool_dev_mm_supported(dev)) {
> +                       NL_SET_ERR_MSG(extack,
> +                                      "Device does not support preemption");
> +                       return -EOPNOTSUPP;
> +               }
> +       }
>
> -out:
>         return err;
>  }
>
> @@ -2023,7 +2055,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
>  {
>         struct taprio_sched *q = qdisc_priv(sch);
>         struct net_device *dev = qdisc_dev(sch);
> -       int i;
> +       int i, tc;
>
>         spin_lock_init(&q->current_entry_lock);
>
> @@ -2080,6 +2112,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
>                 q->qdiscs[i] = qdisc;
>         }
>
> +       for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
> +               q->fp[tc] = TC_FP_EXPRESS;
> +
>         taprio_detect_broken_mqprio(q);
>
>         return taprio_change(sch, opt, extack);
> @@ -2223,6 +2258,7 @@ static int dump_schedule(struct sk_buff *msg,
>  }
>
>  static int taprio_dump_tc_entries(struct sk_buff *skb,
> +                                 struct taprio_sched *q,
>                                   struct sched_gate_list *sched)
>  {
>         struct nlattr *n;
> @@ -2240,6 +2276,9 @@ static int taprio_dump_tc_entries(struct sk_buff *skb,
>                                 sched->max_sdu[tc]))
>                         goto nla_put_failure;
>
> +               if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
> +                       goto nla_put_failure;
> +
>                 nla_nest_end(skb, n);
>         }
>
> @@ -2281,7 +2320,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
>             nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
>                 goto options_error;
>
> -       if (oper && taprio_dump_tc_entries(skb, oper))
> +       if (oper && taprio_dump_tc_entries(skb, q, oper))
>                 goto options_error;
>
>         if (oper && dump_schedule(skb, oper))
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ