lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <26afcaaf-abdf-42ad-1715-5af9c6f3c2ef@solarflare.com>
Date:   Wed, 24 Apr 2019 16:03:37 +0100
From:   Edward Cree <ecree@...arflare.com>
To:     Pablo Neira Ayuso <pablo@...filter.org>
CC:     Jamal Hadi Salim <jhs@...atatu.com>,
        netdev <netdev@...r.kernel.org>, "Jiri Pirko" <jiri@...nulli.us>,
        Cong Wang <xiyou.wangcong@...il.com>
Subject: Re: TC stats / hw offload question

On 24/04/2019 15:11, Pablo Neira Ayuso wrote:
> On Wed, Apr 24, 2019 at 03:05:05PM +0100, Edward Cree wrote:
>> So, I had this working for a while, by calling tcf_action_stats_update()
>>  directly on the correct element of tc->exts->actions[], instead of using
>>  tcf_exts_stats_update().  And this was fine, until I tried to port my
>>  code to the 5.1 kernel, with Pablo's flow action infrastructure.  On that
>>  it's not possible, because there is only flow_stats_update(), which takes
>>  a single bytes value for _all_ the actions in the rule.
> Then, we need to extend the API to allow to update counters per
> action, no driver in the tree has been supporting this so far.  May I
> have a look at your driver code to infer what we might need?
>
> Thanks.
Somewhat abridged and out-of-context, because this is for stuff that's a
 long way away from being ready to upstream (mumble unreleased hardware),
 here's the counter-handling part of it.

struct efx_tc_action_set {
    u16 vlan_push:2;
    u16 vlan_pop:2;
    /* other action flag bits */
    __be16 vlan_tci[2]; /* TCIs for vlan_push */
    __be16 vlan_proto[2]; /* Ethertypes for vlan_push */
    struct efx_tc_counter_index *count;
    int count_action_idx; /* Which tc_action uses the counter */
    int count_size_delta; /* bytes per packet change */
    /* other action metadata and gubbins */
};

static void calculate_count_delta(struct efx_tc_action_set *act)
{
    act->count_size_delta = 0;
    if (act->vlan_pop & 2)
        act->count_size_delta -= 4;
    if (act->vlan_pop & 1)
        act->count_size_delta -= 4;
    if (act->vlan_push & 1)
        act->count_size_delta += 4;
    if (act->vlan_push & 2)
        act->count_size_delta += 4;
}

static int efx_tc_flower_replace(struct efx_nic *efx,
                                 struct net_device *net_dev,
                                 struct tc_cls_flower_offload *tc)
{
    struct efx_tc_action_set *act;

    /* parse the match */

    tcf_exts_for_each_action(i, a, tc->exts) {
        if (a->ops && a->ops->stats_update) {
            /* act is the hw action we're building */
            act->count = allocate_a_counter();
            act->count_action_idx = i;
            calculate_count_delta(act);
        }
        /* handle the actual action */
        /* in the case of 'mirror' we'll stick 'act' on a list and
         * allocate a new one that starts out the same, to represent
         * the piped packet.
         */
    }

    /* insert the rule in hw*/
    return ...;
}

static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
                               struct tc_cls_flower_offload *tc)
{
    struct efx_tc_action_set *act;
    struct efx_tc_counter *cnt;
    u64 packets, bytes;

    rule = /* lookup the rule */;
    list_for_each_entry(act, &/*list of rule actions*/, list)
        if (act->count) {
            struct tc_action *a;
            u64 d_packets, d_bytes;

            cnt = /* get the counter */;
            /* Report only new pkts/bytes since last time TC asked */
            packets = cnt->packets;
            bytes = cnt->bytes;
            d_packets = packets - cnt->old_packets;
            /* Apply corrections for any VLAN pops/pushes before
             * our count action (but after the HW counter)
             */
            d_bytes = bytes - cnt->old_bytes +
                      act->count_size_delta * d_packets;
            a = tc->exts->actions[act->count_action_idx];
            tcf_action_stats_update(a, d_bytes, d_packets,
                                    cnt->touched, true);
            cnt->old_packets = packets;
            cnt->old_bytes = bytes;
        }
    return 0;
}

With this code, if for instance the rule is
tc match using flower blah \
action mirred egress mirror eth1 \
action vlan push blah \
action mirred egress redirect eth2

and a packed of length 100 bytes is matched, then tc stats show will report:
action mirred egress mirror eth1:
    1 packets, 100 bytes
action vlan push blah
    0 packets, 0 bytes /* Doesn't have an ops->stats_update */
action mirred egress redirect eth2
    1 packets, 104 bytes
which AIUI was the correct semantics, although no drivers upstream did it.

TC does have all the information it needs to make those corrections itself
 (it knows that the mirred eth2 comes after a vlan push), but currently it
 doesn't do so.
So one possible solution would be for flow_stats_update() to take bytes 'as
 matched', and make the corrections as it goes along.  Unfortunately, this
 doesn't quite work for decap (tunnel_key unset), because the length of the
 tunnel header is not statically known.  (For this reason, our hw counts
 bytes _after_ decap but _before_ other actions like VLAN push/pop.)  Since
 from TC's perspective tunnel_key is metadata rather than packet data, this
 works if we are willing to define the counters as not including tunnel
 headers — that's what I've done currently.  Then VLAN push/pop are the
 only TC actions that alter packet length.  It is however a bit unintuitive
 that in the case of
    tc filter add dev vxlan0 ... flower blah \
    action mirred egress mirror eth1
    action tunnel_key unset
    action mirred egress redirect eth2
 both mirreds will show the same (decapped) bytes count, yet eth1 will (at
 least as I've interpreted it for offload) receive the packet with the
 encap header still present.

So to implement this we could do something like (include/net/pkt_cls.h,
 written in mail client & not at all tested...)

static inline void
tcf_exts_stats_update(const struct tcf_exts *exts,
                      u64 bytes, u64 packets, u64 lastuse)
{
#ifdef CONFIG_NET_CLS_ACT
    int i, delta = 0;

    preempt_disable();

    for (i = 0; i < exts->nr_actions; i++) {
        struct tc_action *a = exts->actions[i];

        tcf_action_stats_update(a, bytes + packets * delta, packets,
                                lastuse, true);
        if (is_tcf_vlan(a))
            switch (tcf_vlan_action(a)) {
            case TCA_VLAN_ACT_POP:
                delta -= 4;
                break;
            case TCA_VLAN_ACT_PUSH:
                delta += 4;
                break;
            case TCA_VLAN_ACT_MODIFY:
            default:
                break;
            }
    }

    preempt_enable();
#endif
}

Does that seem reasonable?

-Ed

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ