[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <C6B2B758-FF29-4463-8F38-757803D77779@redhat.com>
Date: Thu, 30 Jul 2020 14:33:12 +0200
From: "Eelco Chaudron" <echaudro@...hat.com>
To: "Tonghao Zhang" <xiangxia.m.yue@...il.com>
Cc: "Linux Kernel Network Developers" <netdev@...r.kernel.org>,
"David Miller" <davem@...emloft.net>,
"ovs dev" <dev@...nvswitch.org>,
"Jakub Kicinski" <kuba@...nel.org>,
"Paolo Abeni" <pabeni@...hat.com>,
"Pravin Shelar" <pshelar@....org>,
"Florian Westphal" <fw@...len.de>
Subject: Re: [PATCH net-next v3 2/2] net: openvswitch: make masks cache size
configurable
Thanks Tonghao for the review, see comments inline below!
If I get no more reviews by the end of the week I’ll send out a v4.
//Eelco
On 30 Jul 2020, at 7:07, Tonghao Zhang wrote:
> On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echaudro@...hat.com>
> wrote:
>>
>> This patch makes the masks cache size configurable, or with
>> a size of 0, disable it.
>>
>> Reviewed-by: Paolo Abeni <pabeni@...hat.com>
>> Signed-off-by: Eelco Chaudron <echaudro@...hat.com>
>> ---
>> Changes in v3:
>> - Use is_power_of_2() function
>> - Use array_size() function
>> - Fix remaining sparse errors
>>
>> Changes in v2:
>> - Fix sparse warnings
>> - Fix netlink policy items reported by Florian Westphal
>>
>> include/uapi/linux/openvswitch.h | 1
>> net/openvswitch/datapath.c | 14 +++++
>> net/openvswitch/flow_table.c | 97
>> +++++++++++++++++++++++++++++++++-----
>> net/openvswitch/flow_table.h | 10 ++++
>> 4 files changed, 108 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/uapi/linux/openvswitch.h
>> b/include/uapi/linux/openvswitch.h
>> index 7cb76e5ca7cf..8300cc29dec8 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -86,6 +86,7 @@ enum ovs_datapath_attr {
>> OVS_DP_ATTR_MEGAFLOW_STATS, /* struct
>> ovs_dp_megaflow_stats */
>> OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */
>> OVS_DP_ATTR_PAD,
>> + OVS_DP_ATTR_MASKS_CACHE_SIZE,
>> __OVS_DP_ATTR_MAX
>> };
>>
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index a54df1fe3ec4..114b2ddb8037 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void)
>> msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
>> msgsize += nla_total_size_64bit(sizeof(struct
>> ovs_dp_megaflow_stats));
>> msgsize += nla_total_size(sizeof(u32)); /*
>> OVS_DP_ATTR_USER_FEATURES */
>> + msgsize += nla_total_size(sizeof(u32)); /*
>> OVS_DP_ATTR_MASKS_CACHE_SIZE */
>>
>> return msgsize;
>> }
>> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct
>> datapath *dp, struct sk_buff *skb,
>> if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES,
>> dp->user_features))
>> goto nla_put_failure;
>>
>> + if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
>> + ovs_flow_tbl_masks_cache_size(&dp->table)))
>> + goto nla_put_failure;
>> +
>> genlmsg_end(skb, ovs_header);
>> return 0;
>>
>> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp,
>> struct nlattr *a[])
>> #endif
>> }
>>
>> + if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
>> + u32 cache_size;
>> +
>> + cache_size =
>> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
>> + ovs_flow_tbl_masks_cache_resize(&dp->table,
>> cache_size);
> Do we should return error code, if we can't change the "mask_cache"
> size ? for example, -EINVAL, -ENOMEM
Initially, I did not do this due to the fact the new value is reported,
and on failure, the old value is shown.
However thinking about it again, it makes more sense to return an error.
Will sent a v4 with the following to return:
-
-void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
size)
+int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)
{
struct mask_cache *mc = rcu_dereference(table->mask_cache);
struct mask_cache *new;
if (size == mc->cache_size)
- return;
+ return 0;
+
+ if (!is_power_of_2(size) && size != 0)
+ return -EINVAL;
new = tbl_mask_cache_alloc(size);
if (!new)
- return;
+ return -ENOMEM;
rcu_assign_pointer(table->mask_cache, new);
call_rcu(&mc->rcu, mask_cache_rcu_cb);
+
+ return 0;
}
>> + }
>> +
>> dp->user_features = user_features;
>>
>> if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
>> @@ -1894,6 +1906,8 @@ static const struct nla_policy
>> datapath_policy[OVS_DP_ATTR_MAX + 1] = {
>> [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len =
>> IFNAMSIZ - 1 },
>> [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
>> [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
>> + [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32,
>> 0,
>> + PCPU_MIN_UNIT_SIZE / sizeof(struct
>> mask_cache_entry)),
>> };
>>
>> static const struct genl_ops dp_datapath_genl_ops[] = {
>> diff --git a/net/openvswitch/flow_table.c
>> b/net/openvswitch/flow_table.c
>> index a5912ea05352..5280aeeef628 100644
>> --- a/net/openvswitch/flow_table.c
>> +++ b/net/openvswitch/flow_table.c
>> @@ -38,8 +38,8 @@
>> #define MASK_ARRAY_SIZE_MIN 16
>> #define REHASH_INTERVAL (10 * 60 * HZ)
>>
>> +#define MC_DEFAULT_HASH_ENTRIES 256
>> #define MC_HASH_SHIFT 8
>> -#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
>> #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) /
>> MC_HASH_SHIFT)
>>
>> static struct kmem_cache *flow_cache;
>> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table
>> *tbl, struct sw_flow_mask *mask)
>> }
>> }
>>
>> +static void __mask_cache_destroy(struct mask_cache *mc)
>> +{
>> + if (mc->mask_cache)
>> + free_percpu(mc->mask_cache);
> free_percpu the NULL is safe. we can remove the "if".
Makes sense, will remove the if() check.
>> + kfree(mc);
>> +}
>> +
>> +static void mask_cache_rcu_cb(struct rcu_head *rcu)
>> +{
>> + struct mask_cache *mc = container_of(rcu, struct mask_cache,
>> rcu);
>> +
>> + __mask_cache_destroy(mc);
>> +}
>> +
>> +static struct mask_cache *tbl_mask_cache_alloc(u32 size)
>> +{
>> + struct mask_cache_entry __percpu *cache = NULL;
>> + struct mask_cache *new;
>> +
>> + /* Only allow size to be 0, or a power of 2, and does not
>> exceed
>> + * percpu allocation size.
>> + */
>> + if ((!is_power_of_2(size) && size != 0) ||
>> + (size * sizeof(struct mask_cache_entry)) >
>> PCPU_MIN_UNIT_SIZE)
>> + return NULL;
>> + new = kzalloc(sizeof(*new), GFP_KERNEL);
>> + if (!new)
>> + return NULL;
>> +
>> + new->cache_size = size;
>> + if (new->cache_size > 0) {
>> + cache = __alloc_percpu(array_size(sizeof(struct
>> mask_cache_entry),
>> + new->cache_size),
>> + __alignof__(struct
>> mask_cache_entry));
>> + if (!cache) {
>> + kfree(new);
>> + return NULL;
>> + }
>> + }
>> +
>> + new->mask_cache = cache;
>> + return new;
>> +}
>> +
>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>> size)
>> +{
>> + struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> + struct mask_cache *new;
>> +
>> + if (size == mc->cache_size)
>> + return;
>> +
>> + new = tbl_mask_cache_alloc(size);
>> + if (!new)
>> + return;
>> +
>> + rcu_assign_pointer(table->mask_cache, new);
>> + call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> +}
>> +
>> int ovs_flow_tbl_init(struct flow_table *table)
>> {
>> struct table_instance *ti, *ufid_ti;
>> + struct mask_cache *mc;
>> struct mask_array *ma;
>>
>> - table->mask_cache = __alloc_percpu(sizeof(struct
>> mask_cache_entry) *
>> - MC_HASH_ENTRIES,
>> - __alignof__(struct
>> mask_cache_entry));
>> - if (!table->mask_cache)
>> + mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);
>> + if (!mc)
>> return -ENOMEM;
>>
>> ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
>> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>> rcu_assign_pointer(table->ti, ti);
>> rcu_assign_pointer(table->ufid_ti, ufid_ti);
>> rcu_assign_pointer(table->mask_array, ma);
>> + rcu_assign_pointer(table->mask_cache, mc);
>> table->last_rehash = jiffies;
>> table->count = 0;
>> table->ufid_count = 0;
>> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
>> free_mask_array:
>> __mask_array_destroy(ma);
>> free_mask_cache:
>> - free_percpu(table->mask_cache);
>> + __mask_cache_destroy(mc);
>> return -ENOMEM;
>> }
>>
>> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table
>> *table)
>> {
>> struct table_instance *ti = rcu_dereference_raw(table->ti);
>> struct table_instance *ufid_ti =
>> rcu_dereference_raw(table->ufid_ti);
>> + struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> + struct mask_array *ma =
>> rcu_dereference_ovsl(table->mask_array);
>>
>> - free_percpu(table->mask_cache);
>> - call_rcu(&table->mask_array->rcu, mask_array_rcu_cb);
>> + call_rcu(&mc->rcu, mask_cache_rcu_cb);
>> + call_rcu(&ma->rcu, mask_array_rcu_cb);
>> table_instance_destroy(table, ti, ufid_ti, false);
>> }
>>
>> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
>> flow_table *tbl,
>> u32 *n_mask_hit,
>> u32 *n_cache_hit)
>> {
>> + struct mask_cache *mc = rcu_dereference(tbl->mask_cache);
>> struct mask_array *ma = rcu_dereference(tbl->mask_array);
>> struct table_instance *ti = rcu_dereference(tbl->ti);
>> struct mask_cache_entry *entries, *ce;
>> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct
>> flow_table *tbl,
>>
>> *n_mask_hit = 0;
>> *n_cache_hit = 0;
>> - if (unlikely(!skb_hash)) {
>> + if (unlikely(!skb_hash || mc->cache_size == 0)) {
>> u32 mask_index = 0;
>> u32 cache = 0;
>>
>> @@ -749,11 +813,11 @@ struct sw_flow
>> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
>>
>> ce = NULL;
>> hash = skb_hash;
>> - entries = this_cpu_ptr(tbl->mask_cache);
>> + entries = this_cpu_ptr(mc->mask_cache);
>>
>> /* Find the cache entry 'ce' to operate on. */
>> for (seg = 0; seg < MC_HASH_SEGS; seg++) {
>> - int index = hash & (MC_HASH_ENTRIES - 1);
>> + int index = hash & (mc->cache_size - 1);
>> struct mask_cache_entry *e;
>>
>> e = &entries[index];
>> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct
>> flow_table *table)
>> return READ_ONCE(ma->count);
>> }
>>
>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)
>> +{
>> + struct mask_cache *mc = rcu_dereference(table->mask_cache);
>> +
>> + return READ_ONCE(mc->cache_size);
>> +}
>> +
>> static struct table_instance *table_instance_expand(struct
>> table_instance *ti,
>> bool ufid)
>> {
>> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table
>> *table)
>> for (i = 0; i < masks_entries; i++) {
>> int index = masks_and_count[i].index;
>>
>> - new->masks[new->count++] =
>> - rcu_dereference_ovsl(ma->masks[index]);
>> + if (ovsl_dereference(ma->masks[index]))
>> + new->masks[new->count++] = ma->masks[index];
>> }
>>
>> rcu_assign_pointer(table->mask_array, new);
>> diff --git a/net/openvswitch/flow_table.h
>> b/net/openvswitch/flow_table.h
>> index 325e939371d8..f2dba952db2f 100644
>> --- a/net/openvswitch/flow_table.h
>> +++ b/net/openvswitch/flow_table.h
>> @@ -27,6 +27,12 @@ struct mask_cache_entry {
>> u32 mask_index;
>> };
>>
>> +struct mask_cache {
>> + struct rcu_head rcu;
>> + u32 cache_size; /* Must be ^2 value. */
>> + struct mask_cache_entry __percpu *mask_cache;
>> +};
>> +
>> struct mask_count {
>> int index;
>> u64 counter;
>> @@ -53,7 +59,7 @@ struct table_instance {
>> struct flow_table {
>> struct table_instance __rcu *ti;
>> struct table_instance __rcu *ufid_ti;
>> - struct mask_cache_entry __percpu *mask_cache;
>> + struct mask_cache __rcu *mask_cache;
>> struct mask_array __rcu *mask_array;
>> unsigned long last_rehash;
>> unsigned int count;
>> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table,
>> struct sw_flow *flow,
>> const struct sw_flow_mask *mask);
>> void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow
>> *flow);
>> int ovs_flow_tbl_num_masks(const struct flow_table *table);
>> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table);
>> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32
>> size);
>> struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
>> u32 *bucket, u32 *idx);
>> struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
>>
>
>
> --
> Best regards, Tonghao
Powered by blists - more mailing lists