lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CABKoBm2T6-BgecNr4RRkvtW2+Z4dXBrFo=Wn2TpsX=J+HTuXtg@mail.gmail.com>
Date:   Mon, 16 Oct 2017 00:05:03 -0700
From:   Andy Zhou <azhou@....org>
To:     Pravin Shelar <pshelar@....org>
Cc:     Linux Kernel Network Developers <netdev@...r.kernel.org>,
        Joe Stringer <joe@....org>, Greg Rose <gvrose8192@...il.com>
Subject: Re: [net-next RFC 3/4] openvswitch: Add meter infrastructure

On Fri, Oct 13, 2017 at 5:12 PM, Pravin Shelar <pshelar@....org> wrote:
> On Thu, Oct 12, 2017 at 3:38 PM, Andy Zhou <azhou@....org> wrote:
>> OVS kernel datapath so far does not support Openflow meter action.
>> This is the first stab at adding kernel datapath meter support.
>> This implementation supports only drop band type.
>>
>> Signed-off-by: Andy Zhou <azhou@....org>
>> ---
>>  net/openvswitch/Makefile   |   1 +
>>  net/openvswitch/datapath.c |  14 +-
>>  net/openvswitch/datapath.h |   3 +
>>  net/openvswitch/meter.c    | 611 +++++++++++++++++++++++++++++++++++++++++++++
>>  net/openvswitch/meter.h    |  54 ++++
>>  5 files changed, 681 insertions(+), 2 deletions(-)
>>  create mode 100644 net/openvswitch/meter.c
>>  create mode 100644 net/openvswitch/meter.h
>>
> ...
>
>> diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
>> new file mode 100644
>> index 000000000000..f24ebb5f7af4
>> --- /dev/null
>> +++ b/net/openvswitch/meter.c
>
> ....
> ....
>> +static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
>> +{
>> +       struct datapath *dp;
>> +       struct ovs_header *ovs_header = info->userhdr;
>> +       struct sk_buff *reply;
>> +       struct ovs_header *ovs_reply_header;
>> +       struct nlattr *nla, *band_nla;
>> +       int err;
>> +
>> +       /* Check that the datapath exists */
>> +       ovs_lock();
>> +       dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>> +       ovs_unlock();
>> +       if (!dp)
>> +               return -ENODEV;
>> +
> why dp check is required for this API?
Is it possible for another core delete the dp, before ovs_lock()
returns? Then, in theory, get_dp() can
return NULL, no?
>
>> +       reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
>> +                                         &ovs_reply_header);
>> +       if (!reply)
>> +               return PTR_ERR(reply);
>> +
>> +       if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
>> +           nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
>> +               goto nla_put_failure;
>> +
>> +       nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
>> +       if (!nla)
>> +               goto nla_put_failure;
>> +
>> +       band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
>> +       if (!band_nla)
>> +               goto nla_put_failure;
>> +       /* Currently only DROP band type is supported. */
>> +       if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
>> +               goto nla_put_failure;
>> +       nla_nest_end(reply, band_nla);
>> +       nla_nest_end(reply, nla);
>> +
>> +       genlmsg_end(reply, ovs_reply_header);
>> +       return genlmsg_reply(reply, info);
>> +
>> +nla_put_failure:
>> +       nlmsg_free(reply);
>> +       err = -EMSGSIZE;
>> +       return err;
>> +}
>> +
> ....
>
>> +static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
>> +{
>> +       struct nlattr **a = info->attrs;
>> +       struct dp_meter *meter, *old_meter;
>> +       struct sk_buff *reply;
>> +       struct ovs_header *ovs_reply_header;
>> +       struct ovs_header *ovs_header = info->userhdr;
>> +       struct datapath *dp;
>> +       int err;
>> +       u32 meter_id;
>> +       bool failed;
>> +
>> +       meter = dp_meter_create(a);
>> +       if (IS_ERR_OR_NULL(meter))
>> +               return PTR_ERR(meter);
>> +
>> +       reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
>> +                                         &ovs_reply_header);
>> +       if (IS_ERR(reply)) {
>> +               err = PTR_ERR(reply);
>> +               goto exit_free_meter;
>> +       }
>> +
>> +       ovs_lock();
>> +       dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
>> +       if (!dp) {
>> +               err = -ENODEV;
>> +               goto exit_unlock;
>> +       }
>> +
>> +       if (!a[OVS_METER_ATTR_ID]) {
>> +               err = -ENODEV;
>> +               goto exit_unlock;
>> +       }
>> +
>> +       meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
>> +
>> +       /* Cannot fail after this. */
>> +       old_meter = lookup_meter(dp, meter_id);
>> +       attach_meter(dp, meter);
>> +       ovs_unlock();
>> +
> After the unlock, it is not safe to keep the ref to old_meter. better
> to release lock at the end. we could optimize it later if required.

I see a problem here: the old_meter has not been removed from the list before
unlock. The code should have been:

old_meter = lookup_meter(dp, meter_id);
detch_meter(dp, old_meter);
attach_meter(dp, meter);
ovs_unlock();

Do you still see a problem w.r.t. unlock() here? Once detch_meter() is called,
another thread should not have access to 'old_meter' any more. right?
>
>> +       /* Build response with the meter_id and stats from
>> +        * the old meter, if any.
>> +        */
>> +       failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
>> +       WARN_ON(failed);
>> +       if (old_meter) {
>> +               spin_lock_bh(&old_meter->lock);
>> +               if (old_meter->keep_stats) {
>> +                       err = ovs_meter_cmd_reply_stats(reply, meter_id,
>> +                                                       old_meter);
>> +                       WARN_ON(err);
>> +               }
>> +               spin_unlock_bh(&old_meter->lock);
>> +               ovs_meter_free(old_meter);
>> +       }
>> +
>> +       genlmsg_end(reply, ovs_reply_header);
>> +       return genlmsg_reply(reply, info);
>> +
>> +exit_unlock:
>> +       ovs_unlock();
>> +       nlmsg_free(reply);
>> +exit_free_meter:
>> +       kfree(meter);
>> +       return err;
>> +}
>> +
> ....
>
>> +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
>> +                      struct sw_flow_key *key, u32 meter_id)
>> +{
>> +       struct dp_meter *meter;
>> +       struct dp_meter_band *band;
>> +       long long int now_ms = ktime_get_ns() / 1000 / 1000;
>> +       long long int long_delta_ms;
>> +       u32 delta_ms;
>> +       u32 cost;
>> +       int i, band_exceeded_max = -1;
>> +       u32 band_exceeded_rate = 0;
>> +
>> +       meter = lookup_meter(dp, meter_id);
>> +       /* Do not drop the packet when there is no meter. */
>> +       if (!meter)
>> +               return false;
>> +
>> +       /* Lock the meter while using it. */
>> +       spin_lock(&meter->lock);
>> +
>> +       long_delta_ms = (now_ms - meter->used); /* ms */
>> +
>> +       /* Make sure delta_ms will not be too large, so that bucket will not
>> +        * wrap around below.
>> +        */
>> +       delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
>> +                  ? meter->max_delta_t : (u32)long_delta_ms;
>> +
>> +       /* Update meter statistics.
>> +        */
>> +       meter->used = now_ms;
>> +       meter->stats.n_packets += 1;
>> +       meter->stats.n_bytes += skb->len;
>> +
>> +       /* Bucket rate is either in kilobits per second, or in packets per
>> +        * second.  We maintain the bucket in the units of either bits or
>> +        * 1/1000th of a packet, correspondingly.
>> +        * Then, when rate is multiplied with milliseconds, we get the
>> +        * bucket units:
>> +        * msec * kbps = bits, and
>> +        * msec * packets/sec = 1/1000 packets.
>> +        *
>> +        * 'cost' is the number of bucket units in this packet.
>> +        */
>> +       cost = (meter->kbps) ? skb->len * 8 : 1000;
>> +
>> +       /* Update all bands and find the one hit with the highest rate. */
>> +       for (i = 0; i < meter->n_bands; ++i) {
>> +               long long int max_bucket_size;
>> +
>> +               band = &meter->bands[i];
>> +               max_bucket_size = (band->burst_size + band->rate) * 1000;
>> +
>> +               band->bucket += delta_ms * band->rate;
>> +               if (band->bucket > max_bucket_size)
>> +                       band->bucket = max_bucket_size;
>> +
>> +               if (band->bucket >= cost) {
>> +                       band->bucket -= cost;
>> +               } else if (band->rate > band_exceeded_rate) {
>> +                       band_exceeded_rate = band->rate;
>> +                       band_exceeded_max = i;
>> +               }
>> +       }
>> +
>> +       spin_unlock(&meter->lock);
>> +
>> +       if (band_exceeded_max >= 0) {
>> +               /* Update band statistics. */
>> +               band = &meter->bands[band_exceeded_max];
>> +               band->stats.n_packets += 1;
>> +               band->stats.n_bytes += skb->len;
>> +
> Is it safe to do outside of the sipinlock?

Good catch, those should be covered by the same lock.  Will fix.
>
>> +               /* Drop band triggered, let the caller drop the 'skb'.  */
>> +               if (band->type == OVS_METER_BAND_TYPE_DROP)
>> +                       return true;
>> +       }
>> +
>> +       return false;
>> +}
>> +

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ