[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181019085253.GF4558@nanopsycho.orion>
Date: Fri, 19 Oct 2018 10:52:53 +0200
From: Jiri Pirko <jiri@...nulli.us>
To: "Nambiar, Amritha" <amritha.nambiar@...el.com>
Cc: netdev@...r.kernel.org, davem@...emloft.net,
jakub.kicinski@...ronome.com, sridhar.samudrala@...el.com,
jhs@...atatu.com, xiyou.wangcong@...il.com
Subject: Re: [net-next PATCH] net: sched: cls_flower: Classify packets using
port ranges
Thu, Oct 18, 2018 at 08:24:44PM CEST, amritha.nambiar@...el.com wrote:
>On 10/18/2018 5:17 AM, Jiri Pirko wrote:
>> Fri, Oct 12, 2018 at 03:53:30PM CEST, amritha.nambiar@...el.com wrote:
>>> Added support in tc flower for filtering based on port ranges.
>>> This is a rework of the RFC patch at:
>>> https://patchwork.ozlabs.org/patch/969595/
>>>
>>> Example:
>>> 1. Match on a port range:
>>> -------------------------
>>> $ tc filter add dev enp4s0 protocol ip parent ffff:\
>>> prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\
>>> action drop
>>>
>>> $ tc -s filter show dev enp4s0 parent ffff:
>>> filter protocol ip pref 1 flower chain 0
>>> filter protocol ip pref 1 flower chain 0 handle 0x1
>>> eth_type ipv4
>>> ip_proto tcp
>>> dst_port_min 20
>>> dst_port_max 30
>>> skip_hw
>>> not_in_hw
>>> action order 1: gact action drop
>>> random type none pass val 0
>>> index 1 ref 1 bind 1 installed 181 sec used 5 sec
>>> Action statistics:
>>> Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0)
>>> backlog 0b 0p requeues 0
>>>
>>> 2. Match on IP address and port range:
>>> --------------------------------------
>>> $ tc filter add dev enp4s0 protocol ip parent ffff:\
>>> prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\
>>> skip_hw action drop
>>>
>>> $ tc -s filter show dev enp4s0 parent ffff:
>>> filter protocol ip pref 1 flower chain 0 handle 0x2
>>> eth_type ipv4
>>> ip_proto tcp
>>> dst_ip 192.168.1.1
>>> dst_port_min 100
>>> dst_port_max 200
>>> skip_hw
>>> not_in_hw
>>> action order 1: gact action drop
>>> random type none pass val 0
>>> index 2 ref 1 bind 1 installed 28 sec used 6 sec
>>> Action statistics:
>>> Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0)
>>> backlog 0b 0p requeues 0
>>>
>>> Signed-off-by: Amritha Nambiar <amritha.nambiar@...el.com>
>>> ---
>>> include/uapi/linux/pkt_cls.h | 5 ++
>>> net/sched/cls_flower.c | 134 ++++++++++++++++++++++++++++++++++++++++--
>>> 2 files changed, 132 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
>>> index 401d0c1..b569308 100644
>>> --- a/include/uapi/linux/pkt_cls.h
>>> +++ b/include/uapi/linux/pkt_cls.h
>>> @@ -405,6 +405,11 @@ enum {
>>> TCA_FLOWER_KEY_UDP_SRC, /* be16 */
>>> TCA_FLOWER_KEY_UDP_DST, /* be16 */
>>>
>>> + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */
>>> + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */
>>> + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */
>>> + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */
>>> +
>>> TCA_FLOWER_FLAGS,
>>> TCA_FLOWER_KEY_VLAN_ID, /* be16 */
>>> TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */
>>> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
>>> index 9aada2d..5f135f0 100644
>>> --- a/net/sched/cls_flower.c
>>> +++ b/net/sched/cls_flower.c
>>> @@ -55,6 +55,9 @@ struct fl_flow_key {
>>> struct flow_dissector_key_ip ip;
>>> struct flow_dissector_key_ip enc_ip;
>>> struct flow_dissector_key_enc_opts enc_opts;
>>> +
>>> + struct flow_dissector_key_ports tp_min;
>>> + struct flow_dissector_key_ports tp_max;
>>> } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
>>>
>>> struct fl_flow_mask_range {
>>> @@ -103,6 +106,11 @@ struct cls_fl_filter {
>>> struct net_device *hw_dev;
>>> };
>>>
>>> +enum fl_endpoint {
>>> + FLOWER_ENDPOINT_DST,
>>> + FLOWER_ENDPOINT_SRC
>>> +};
>>> +
>>> static const struct rhashtable_params mask_ht_params = {
>>> .key_offset = offsetof(struct fl_flow_mask, key),
>>> .key_len = sizeof(struct fl_flow_key),
>>> @@ -179,11 +187,86 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
>>> memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
>>> }
>>>
>>> +static int fl_range_compare_params(struct cls_fl_filter *filter,
>>> + struct fl_flow_key *key,
>>> + struct fl_flow_key *mkey,
>>> + enum fl_endpoint endpoint)
>>> +{
>>> + __be16 min_mask, max_mask, min_val, max_val;
>>> +
>>> + if (endpoint == FLOWER_ENDPOINT_DST) {
>>> + min_mask = htons(filter->mask->key.tp_min.dst);
>>> + max_mask = htons(filter->mask->key.tp_max.dst);
>>> + min_val = htons(filter->key.tp_min.dst);
>>> + max_val = htons(filter->key.tp_max.dst);
>>> +
>>> + if (min_mask && max_mask) {
>>> + if (htons(key->tp.dst) < min_val ||
>>> + htons(key->tp.dst) > max_val)
>>> + return -1;
>>> +
>>> + /* skb does not have min and max values */
>>> + mkey->tp_min.dst = filter->mkey.tp_min.dst;
>>> + mkey->tp_max.dst = filter->mkey.tp_max.dst;
>>> + }
>>> + } else {
>>> + min_mask = htons(filter->mask->key.tp_min.src);
>>> + max_mask = htons(filter->mask->key.tp_max.src);
>>> + min_val = htons(filter->key.tp_min.src);
>>> + max_val = htons(filter->key.tp_max.src);
>>> +
>>> + if (min_mask && max_mask) {
>>> + if (htons(key->tp.src) < min_val ||
>>> + htons(key->tp.src) > max_val)
>>> + return -1;
>>> +
>>> + /* skb does not have min and max values */
>>> + mkey->tp_min.src = filter->mkey.tp_min.src;
>>> + mkey->tp_max.src = filter->mkey.tp_max.src;
>>> + }
>>
>> You basically have 2 functions in 1 here. Just have 2 functions:
>> fl_port_range_dst_cmp()
>> and
>> fl_port_range_src_cmp()
>>
>> And avoid the "endpoint enum.
>> Also, as you return -1 or 0, just make it bool.
>>
>
>Makes sense. Will do.
>
>>
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
>>> + struct fl_flow_key *mkey,
>>> + struct fl_flow_key *key)
>>> +{
>>> + struct cls_fl_filter *filter, *f;
>>> + int ret;
>>> +
>>> + list_for_each_entry_rcu(filter, &mask->filters, list) {
>>> + ret = fl_range_compare_params(filter, key, mkey,
>>> + FLOWER_ENDPOINT_DST);
>>> + if (ret < 0)
>>> + continue;
>>> +
>>> + ret = fl_range_compare_params(filter, key, mkey,
>>> + FLOWER_ENDPOINT_SRC);
>>> + if (ret < 0)
>>> + continue;
>>> +
>>> + f = rhashtable_lookup_fast(&mask->ht,
>>> + fl_key_get_start(mkey, mask),
>>> + mask->filter_ht_params);
>>> + if (f)
>>> + return f;
>>> + }
>>> + return NULL;
>>> +}
>>> +
>>> static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
>>> - struct fl_flow_key *mkey)
>>> + struct fl_flow_key *mkey,
>>> + struct fl_flow_key *key, bool is_skb)
>>> {
>>> - return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
>>> - mask->filter_ht_params);
>>> + if ((!(mask->key.tp_min.dst && mask->key.tp_max.dst) &&
>>> + !(mask->key.tp_min.src && mask->key.tp_max.src)) || !is_skb) {
>>
>> Would be probably good to have a dedicated bit to check for and decide
>> if you do normal/range lookup. This is fast path.
>>
>
>Will fix in v2.
>
>>
>>> + return rhashtable_lookup_fast(&mask->ht,
>>
>> Remove double space ^^
>>
>
>Will fix in v2.
>
>>
>>> + fl_key_get_start(mkey, mask),
>>> + mask->filter_ht_params);
>>> + }
>>> + /* Classify based on range */
>>> + return fl_lookup_range(mask, mkey, key);
>>> }
>>>
>>> static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>>> @@ -207,8 +290,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>>> skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
>>>
>>> fl_set_masked_key(&skb_mkey, &skb_key, mask);
>>> + f = fl_lookup(mask, &skb_mkey, &skb_key, true);
>>>
>>> - f = fl_lookup(mask, &skb_mkey);
>>> if (f && !tc_skip_sw(f->flags)) {
>>> *res = f->res;
>>> return tcf_exts_exec(skb, &f->exts, res);
>>> @@ -909,6 +992,23 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
>>> sizeof(key->arp.tha));
>>> }
>>>
>>> + if (key->basic.ip_proto == IPPROTO_TCP ||
>>> + key->basic.ip_proto == IPPROTO_UDP ||
>>> + key->basic.ip_proto == IPPROTO_SCTP) {
>>> + fl_set_key_val(tb, &key->tp_min.dst,
>>> + TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
>>> + fl_set_key_val(tb, &key->tp_max.dst,
>>> + TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
>>> + fl_set_key_val(tb, &key->tp_min.src,
>>> + TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
>>> + fl_set_key_val(tb, &key->tp_max.src,
>>> + TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
>>> + }
>>> +
>>> if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
>>> tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
>>> key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
>>> @@ -1026,8 +1126,7 @@ static void fl_init_dissector(struct flow_dissector *dissector,
>>> FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
>>> FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>>> FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
>>> - FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>>> - FLOW_DISSECTOR_KEY_PORTS, tp);
>>> + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
>>> FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>>> FLOW_DISSECTOR_KEY_IP, ip);
>>> FL_KEY_SET_IF_MASKED(mask, keys, cnt,
>>> @@ -1227,7 +1326,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>>> goto errout_idr;
>>>
>>> if (!tc_skip_sw(fnew->flags)) {
>>> - if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
>>> + if (!fold && fl_lookup(fnew->mask, &fnew->mkey, NULL, false)) {
>>
>>
>> I don't undestand why do you need the "is_skb" arg here. Could you
>> please explain?
>>
>> Thanks!
>>
>
>The reason to keep the 'is_skb' arg is because, fl_lookup is called in
>two cases, one for skb classification and another for checking if a
>filter exists every-time a new filter is added. In case of skb
>classification, we need to go through the range-comparator to decide if
>the skb's port-value falls within the range-filter's min and max limits.
>In case of filter validation, the range-filter that we are trying to add
>will have min and max values, and we are validating it against other
>range-filters with min and max values. So, rhashtable lookup will
>suffice here and there is no need to go through the range-comparator in
>this case. In the above code, we are validating if a range-filter
>exists, so 'is_skb' is false.
Got it. In that case, please just have a helper:
static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask,
struct fl_flow_key *mkey)
{
return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
mask->filter_ht_params);
}
Call this helper from both fl_lookup() and fl_change()
>
>>
>>> err = -EEXIST;
>>> goto errout_mask;
>>> }
>>> @@ -1800,6 +1899,27 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
>>> sizeof(key->arp.tha))))
>>> goto nla_put_failure;
>>>
>>> + if ((key->basic.ip_proto == IPPROTO_TCP ||
>>> + key->basic.ip_proto == IPPROTO_UDP ||
>>> + key->basic.ip_proto == IPPROTO_SCTP) &&
>>> + (fl_dump_key_val(skb, &key->tp_min.dst,
>>> + TCA_FLOWER_KEY_PORT_DST_MIN,
>>> + &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
>>> + sizeof(key->tp_min.dst)) ||
>>> + fl_dump_key_val(skb, &key->tp_max.dst,
>>> + TCA_FLOWER_KEY_PORT_DST_MAX,
>>> + &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
>>> + sizeof(key->tp_max.dst)) ||
>>> + fl_dump_key_val(skb, &key->tp_min.src,
>>> + TCA_FLOWER_KEY_PORT_SRC_MIN,
>>> + &mask->tp_min.src, TCA_FLOWER_UNSPEC,
>>> + sizeof(key->tp_min.src)) ||
>>> + fl_dump_key_val(skb, &key->tp_max.src,
>>> + TCA_FLOWER_KEY_PORT_SRC_MAX,
>>> + &mask->tp_max.src, TCA_FLOWER_UNSPEC,
>>> + sizeof(key->tp_max.src))))
>>> + goto nla_put_failure;
>>> +
>>> if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
>>> (fl_dump_key_val(skb, &key->enc_ipv4.src,
>>> TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
>>>
Powered by blists - more mailing lists