[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOrHB_Ape956tPpdMaRv-J2CdaWxkLf5ph57wxSL-6E7pUQ6vg@mail.gmail.com>
Date: Sun, 19 Apr 2020 10:29:31 -0700
From: Pravin Shelar <pravin.ovn@...il.com>
To: Tonghao Zhang <xiangxia.m.yue@...il.com>
Cc: Andy Zhou <azhou@....org>, Ben Pfaff <blp@....org>,
William Tu <u9012063@...il.com>,
Linux Kernel Network Developers <netdev@...r.kernel.org>,
ovs dev <dev@...nvswitch.org>
Subject: Re: [PATCH net-next v2 1/5] net: openvswitch: expand the meters
supported number
On Sat, Apr 18, 2020 at 10:25 AM <xiangxia.m.yue@...il.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@...il.com>
>
> In kernel datapath of Open vSwitch, there are only 1024
> buckets of meter in one dp. If installing more than 1024
> (e.g. 8192) meters, it may lead to the performance drop.
> But in some case, for example, Open vSwitch used as edge
> gateway, there should be 200,000+ at least, meters used for
> IP address bandwidth limitation.
>
> [Open vSwitch userspace datapath has this issue too.]
>
> For more scalable meter, this patch expands the buckets
> when necessary, so we can install more meters in the datapath.
> Introducing the struct *dp_meter_instance*, it's easy to
> expand meter though changing the *ti* point in the struct
> *dp_meter_table*.
>
> Cc: Pravin B Shelar <pshelar@....org>
> Cc: Andy Zhou <azhou@....org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@...il.com>
> ---
> net/openvswitch/datapath.h | 2 +-
> net/openvswitch/meter.c | 200 +++++++++++++++++++++++++++++--------
> net/openvswitch/meter.h | 15 ++-
> 3 files changed, 169 insertions(+), 48 deletions(-)
>
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index e239a46c2f94..785105578448 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -82,7 +82,7 @@ struct datapath {
> u32 max_headroom;
>
> /* Switch meters. */
> - struct hlist_head *meters;
> + struct dp_meter_table *meters;
lets define it as part of this struct to avoid indirection.
> };
>
> /**
> diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
> index 5010d1ddd4bd..494a0014ecd8 100644
> --- a/net/openvswitch/meter.c
> +++ b/net/openvswitch/meter.c
> @@ -19,8 +19,6 @@
> #include "datapath.h"
> #include "meter.h"
>
> -#define METER_HASH_BUCKETS 1024
> -
> static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
> [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
> [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
> @@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
> [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
> };
>
> +static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
> +{
> + return id % ti->n_meters;
> +}
> +
> static void ovs_meter_free(struct dp_meter *meter)
> {
> if (!meter)
> @@ -47,40 +50,141 @@ static void ovs_meter_free(struct dp_meter *meter)
> kfree_rcu(meter, rcu);
> }
>
> -static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
> - u32 meter_id)
> -{
> - return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
> -}
> -
> /* Call with ovs_mutex or RCU read lock. */
> -static struct dp_meter *lookup_meter(const struct datapath *dp,
> +static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
> u32 meter_id)
> {
> + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> + u32 hash = meter_hash(ti, meter_id);
> struct dp_meter *meter;
> - struct hlist_head *head;
>
> - head = meter_hash_bucket(dp, meter_id);
> - hlist_for_each_entry_rcu(meter, head, dp_hash_node,
> - lockdep_ovsl_is_held()) {
> - if (meter->id == meter_id)
> - return meter;
> - }
> + meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
> + if (meter && likely(meter->id == meter_id))
> + return meter;
> +
> return NULL;
> }
>
> -static void attach_meter(struct datapath *dp, struct dp_meter *meter)
> +static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
> +{
> + struct dp_meter_instance *ti;
> +
> + ti = kvzalloc(sizeof(*ti) +
> + sizeof(struct dp_meter *) * size,
> + GFP_KERNEL);
> + if (!ti)
> + return NULL;
Given this is a kernel space array we need to have hard limit inplace.
> +
> + ti->n_meters = size;
> +
> + return ti;
> +}
> +
> +static void dp_meter_instance_free(struct dp_meter_instance *ti)
> +{
> + kvfree(ti);
> +}
> +
> +static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
> +{
> + struct dp_meter_instance *ti;
> +
> + ti = container_of(rcu, struct dp_meter_instance, rcu);
> + kvfree(ti);
> +}
> +
> +static int
> +dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
> +{
> + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> + int n_meters = min(size, ti->n_meters);
> + struct dp_meter_instance *new_ti;
> + int i;
> +
> + new_ti = dp_meter_instance_alloc(size);
> + if (!new_ti)
> + return -ENOMEM;
> +
> + for (i = 0; i < n_meters; i++)
> + new_ti->dp_meters[i] =
> + rcu_dereference_ovsl(ti->dp_meters[i]);
> +
> + rcu_assign_pointer(tbl->ti, new_ti);
> + call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
> +
> + return 0;
> +}
> +
> +static void dp_meter_instance_insert(struct dp_meter_instance *ti,
> + struct dp_meter *meter)
> +{
> + u32 hash;
> +
> + hash = meter_hash(ti, meter->id);
> + rcu_assign_pointer(ti->dp_meters[hash], meter);
> +}
> +
> +static void dp_meter_instance_remove(struct dp_meter_instance *ti,
> + struct dp_meter *meter)
> {
> - struct hlist_head *head = meter_hash_bucket(dp, meter->id);
> + u32 hash;
>
> - hlist_add_head_rcu(&meter->dp_hash_node, head);
> + hash = meter_hash(ti, meter->id);
> + RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
> }
>
> -static void detach_meter(struct dp_meter *meter)
> +static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> {
> + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> + u32 hash = meter_hash(ti, meter->id);
> +
> + /*
> + * In generally, slot selected should be empty, because
> + * OvS uses id-pool to fetch a available id.
> + */
> + if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
> + return -EINVAL;
we could return -EBUSY instead.
> +
> + dp_meter_instance_insert(ti, meter);
> +
> + /* That function is thread-safe. */
> + if (++tbl->count >= ti->n_meters)
> + if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
> + goto expand_err;
> +
> + return 0;
> +
> +expand_err:
> + dp_meter_instance_remove(ti, meter);
> + tbl->count--;
> + return -ENOMEM;
> +}
> +
> +static void detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
> +{
> + struct dp_meter_instance *ti;
> +
> ASSERT_OVSL();
> - if (meter)
> - hlist_del_rcu(&meter->dp_hash_node);
> + if (!meter)
> + return;
> +
> + ti = rcu_dereference_ovsl(tbl->ti);
> + dp_meter_instance_remove(ti, meter);
> +
> + tbl->count--;
> +
> + /* Shrink the meter array if necessary. */
> + if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> + tbl->count <= (ti->n_meters / 4)) {
> + int half_size = ti->n_meters / 2;
> + int i;
> +
Lets add a comment about this.
> + for (i = half_size; i < ti->n_meters; i++)
> + if (rcu_dereference_ovsl(ti->dp_meters[i]))
> + return;
> +
> + dp_meter_instance_realloc(tbl, half_size);
> + }
> }
>
> static struct sk_buff *
> @@ -303,9 +407,13 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
> meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
>
> /* Cannot fail after this. */
> - old_meter = lookup_meter(dp, meter_id);
> - detach_meter(old_meter);
> - attach_meter(dp, meter);
> + old_meter = lookup_meter(dp->meters, meter_id);
in new scheme this can fail due to hash collision, lets check for NULL.
> + detach_meter(dp->meters, old_meter);
> +
> + err = attach_meter(dp->meters, meter);
> + if (err)
> + goto exit_unlock;
> +
> ovs_unlock();
>
> /* Build response with the meter_id and stats from
> @@ -365,7 +473,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
> }
>
> /* Locate meter, copy stats. */
> - meter = lookup_meter(dp, meter_id);
> + meter = lookup_meter(dp->meters, meter_id);
> if (!meter) {
> err = -ENOENT;
> goto exit_unlock;
> @@ -416,13 +524,13 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
> goto exit_unlock;
> }
>
> - old_meter = lookup_meter(dp, meter_id);
> + old_meter = lookup_meter(dp->meters, meter_id);
> if (old_meter) {
> spin_lock_bh(&old_meter->lock);
> err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
> WARN_ON(err);
> spin_unlock_bh(&old_meter->lock);
> - detach_meter(old_meter);
> + detach_meter(dp->meters, old_meter);
> }
> ovs_unlock();
> ovs_meter_free(old_meter);
> @@ -452,7 +560,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
> int i, band_exceeded_max = -1;
> u32 band_exceeded_rate = 0;
>
> - meter = lookup_meter(dp, meter_id);
> + meter = lookup_meter(dp->meters, meter_id);
> /* Do not drop the packet when there is no meter. */
> if (!meter)
> return false;
> @@ -570,32 +678,36 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
>
> int ovs_meters_init(struct datapath *dp)
> {
> - int i;
> + struct dp_meter_instance *ti;
> + struct dp_meter_table *tbl;
> +
> + tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
> + if (!tbl)
> + return -ENOMEM;
>
> - dp->meters = kmalloc_array(METER_HASH_BUCKETS,
> - sizeof(struct hlist_head), GFP_KERNEL);
> + tbl->count = 0;
>
> - if (!dp->meters)
> + ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> + if (!ti) {
> + kfree(tbl);
> return -ENOMEM;
> + }
>
> - for (i = 0; i < METER_HASH_BUCKETS; i++)
> - INIT_HLIST_HEAD(&dp->meters[i]);
> + rcu_assign_pointer(tbl->ti, ti);
> + dp->meters = tbl;
>
> return 0;
> }
>
> void ovs_meters_exit(struct datapath *dp)
> {
> + struct dp_meter_table *tbl = dp->meters;
> + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
> int i;
>
> - for (i = 0; i < METER_HASH_BUCKETS; i++) {
> - struct hlist_head *head = &dp->meters[i];
> - struct dp_meter *meter;
> - struct hlist_node *n;
> -
> - hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
> - kfree(meter);
> - }
> + for (i = 0; i < ti->n_meters; i++)
> + ovs_meter_free(ti->dp_meters[i]);
>
> - kfree(dp->meters);
> + dp_meter_instance_free(ti);
> + kfree(tbl);
> }
> diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
> index f645913870bd..d91940383bbe 100644
> --- a/net/openvswitch/meter.h
> +++ b/net/openvswitch/meter.h
> @@ -18,6 +18,7 @@
> struct datapath;
>
> #define DP_MAX_BANDS 1
> +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
>
> struct dp_meter_band {
> u32 type;
> @@ -30,9 +31,6 @@ struct dp_meter_band {
> struct dp_meter {
> spinlock_t lock; /* Per meter lock */
> struct rcu_head rcu;
> - struct hlist_node dp_hash_node; /*Element in datapath->meters
> - * hash table.
> - */
> u32 id;
> u16 kbps:1, keep_stats:1;
> u16 n_bands;
> @@ -42,6 +40,17 @@ struct dp_meter {
> struct dp_meter_band bands[];
> };
>
> +struct dp_meter_instance {
> + struct rcu_head rcu;
> + u32 n_meters;
> + struct dp_meter __rcu *dp_meters[];
> +};
> +
> +struct dp_meter_table {
> + struct dp_meter_instance __rcu *ti;
> + u32 count;
> +};
> +
> extern struct genl_family dp_meter_genl_family;
> int ovs_meters_init(struct datapath *dp);
> void ovs_meters_exit(struct datapath *dp);
> --
> 2.23.0
>
Powered by blists - more mailing lists