[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALx6S37VqLRGUXD5OCbXCL4Fheb7e+JjtDPa8vvXM7bWBWNw-w@mail.gmail.com>
Date: Thu, 17 May 2018 21:08:13 -0700
From: Tom Herbert <tom@...bertland.com>
To: Amritha Nambiar <amritha.nambiar@...el.com>
Cc: Linux Kernel Network Developers <netdev@...r.kernel.org>,
"David S. Miller" <davem@...emloft.net>,
Alexander Duyck <alexander.h.duyck@...el.com>,
Sridhar Samudrala <sridhar.samudrala@...el.com>,
Eric Dumazet <edumazet@...gle.com>,
Hannes Frederic Sowa <hannes@...essinduktion.org>
Subject: Re: [net-next PATCH v2 1/4] net: Refactor XPS for CPUs and Rx queues
On Tue, May 15, 2018 at 6:26 PM, Amritha Nambiar
<amritha.nambiar@...el.com> wrote:
> Refactor XPS code to support Tx queue selection based on
> CPU map or Rx queue map.
>
> Signed-off-by: Amritha Nambiar <amritha.nambiar@...el.com>
> ---
> include/linux/cpumask.h | 11 ++
> include/linux/netdevice.h | 72 +++++++++++++++-
> net/core/dev.c | 208 +++++++++++++++++++++++++++++----------------
> net/core/net-sysfs.c | 4 -
> 4 files changed, 215 insertions(+), 80 deletions(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index bf53d89..57f20a0 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
> #define cpu_active(cpu) ((cpu) == 0)
> #endif
>
> -/* verify cpu argument to cpumask_* operators */
> -static inline unsigned int cpumask_check(unsigned int cpu)
> +static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
> {
> #ifdef CONFIG_DEBUG_PER_CPU_MAPS
> - WARN_ON_ONCE(cpu >= nr_cpumask_bits);
> + WARN_ON_ONCE(cpu >= bits);
> #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
> +}
> +
> +/* verify cpu argument to cpumask_* operators */
> +static inline unsigned int cpumask_check(unsigned int cpu)
> +{
> + cpu_max_bits_warn(cpu, nr_cpumask_bits);
> return cpu;
> }
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 03ed492..c2eeb36 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -730,10 +730,21 @@ struct xps_map {
> */
> struct xps_dev_maps {
> struct rcu_head rcu;
> - struct xps_map __rcu *cpu_map[0];
> + struct xps_map __rcu *attr_map[0];
> };
> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
> +
> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
> (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
> +
> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
> + (_rxqs * (_tcs) * sizeof(struct xps_map *)))
> +
> +enum xps_map_type {
> + XPS_MAP_RXQS,
> + XPS_MAP_CPUS,
> + __XPS_MAP_MAX
> +};
> +
> #endif /* CONFIG_XPS */
>
> #define TC_MAX_QUEUE 16
> @@ -1891,7 +1902,7 @@ struct net_device {
> int watchdog_timeo;
>
> #ifdef CONFIG_XPS
> - struct xps_dev_maps __rcu *xps_maps;
> + struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX];
> #endif
> #ifdef CONFIG_NET_CLS_ACT
> struct mini_Qdisc __rcu *miniq_egress;
> @@ -3229,6 +3240,61 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
> #ifdef CONFIG_XPS
> int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> u16 index);
> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
> + u16 index, enum xps_map_type type);
> +
> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
> + unsigned int nr_bits)
> +{
> + cpu_max_bits_warn(j, nr_bits);
> + return test_bit(j, mask);
> +}
> +
> +static inline bool attr_test_online(unsigned long j,
> + const unsigned long *online_mask,
> + unsigned int nr_bits)
> +{
> + cpu_max_bits_warn(j, nr_bits);
> +
> + if (online_mask)
> + return test_bit(j, online_mask);
> +
> + if (j >= 0 && j < nr_bits)
> + return true;
> +
> + return false;
> +}
> +
> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
> + unsigned int nr_bits)
> +{
> + /* -1 is a legal arg here. */
> + if (n != -1)
> + cpu_max_bits_warn(n, nr_bits);
> +
> + if (srcp)
> + return find_next_bit(srcp, nr_bits, n + 1);
> +
> + return n + 1;
> +}
> +
> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
> + const unsigned long *src2p,
> + unsigned int nr_bits)
> +{
> + /* -1 is a legal arg here. */
> + if (n != -1)
> + cpu_max_bits_warn(n, nr_bits);
> +
> + if (src1p && src2p)
> + return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
> + else if (src1p)
> + return find_next_bit(src1p, nr_bits, n + 1);
> + else if (src2p)
> + return find_next_bit(src2p, nr_bits, n + 1);
> +
> + return n + 1;
> +}
> #else
> static inline int netif_set_xps_queue(struct net_device *dev,
> const struct cpumask *mask,
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 9f43901..7e5dfdb 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
> int pos;
>
> if (dev_maps)
> - map = xmap_dereference(dev_maps->cpu_map[tci]);
> + map = xmap_dereference(dev_maps->attr_map[tci]);
> if (!map)
> return false;
>
> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
> break;
> }
>
> - RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
> + RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
> kfree_rcu(map, rcu);
> return false;
> }
> @@ -2125,7 +2125,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
> int i, j;
>
> for (i = count, j = offset; i--; j++) {
> - if (!remove_xps_queue(dev_maps, cpu, j))
> + if (!remove_xps_queue(dev_maps, tci, j))
> break;
> }
>
> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
> static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
> u16 count)
> {
> + const unsigned long *possible_mask = NULL;
> + enum xps_map_type type = XPS_MAP_RXQS;
> struct xps_dev_maps *dev_maps;
> - int cpu, i;
> bool active = false;
> + unsigned int nr_ids;
> + int i, j;
>
> mutex_lock(&xps_map_mutex);
> - dev_maps = xmap_dereference(dev->xps_maps);
>
> - if (!dev_maps)
> - goto out_no_maps;
> + while (type < __XPS_MAP_MAX) {
> + dev_maps = xmap_dereference(dev->xps_maps[type]);
> + if (!dev_maps)
> + goto out_no_maps;
> +
> + if (type == XPS_MAP_CPUS) {
> + if (num_possible_cpus() > 1)
> + possible_mask = cpumask_bits(cpu_possible_mask);
> + nr_ids = nr_cpu_ids;
> + } else if (type == XPS_MAP_RXQS) {
> + nr_ids = dev->num_rx_queues;
> + }
type is an enum so this should be a switch
>
> - for_each_possible_cpu(cpu)
> - active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
> - offset, count);
> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> + j < nr_ids;)
> + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
> + count);
> + if (!active) {
> + RCU_INIT_POINTER(dev->xps_maps[type], NULL);
> + kfree_rcu(dev_maps, rcu);
> + }
>
> - if (!active) {
> - RCU_INIT_POINTER(dev->xps_maps, NULL);
> - kfree_rcu(dev_maps, rcu);
> + if (type == XPS_MAP_CPUS) {
> + for (i = offset + (count - 1); count--; i--)
> + netdev_queue_numa_node_write(
> + netdev_get_tx_queue(dev, i),
> + NUMA_NO_NODE);
> + }
> +out_no_maps:
> + type++;
> }
>
> - for (i = offset + (count - 1); count--; i--)
> - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
> - NUMA_NO_NODE);
> -
> -out_no_maps:
> mutex_unlock(&xps_map_mutex);
> }
>
> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
> netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
> }
>
> -static struct xps_map *expand_xps_map(struct xps_map *map,
> - int cpu, u16 index)
> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
> + u16 index, enum xps_map_type type)
> {
> - struct xps_map *new_map;
> int alloc_len = XPS_MIN_MAP_ALLOC;
> + struct xps_map *new_map = NULL;
> int i, pos;
>
> for (pos = 0; map && pos < map->len; pos++) {
> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
> return map;
> }
>
> - /* Need to add queue to this CPU's existing map */
> + /* Need to add tx-queue to this CPU's/rx-queue's existing map */
> if (map) {
> if (pos < map->alloc_len)
> return map;
> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
> alloc_len = map->alloc_len * 2;
> }
>
> - /* Need to allocate new map to store queue on this CPU's map */
> - new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
> - cpu_to_node(cpu));
> + /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
> + * map
> + */
> + if (type == XPS_MAP_RXQS)
> + new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
> + else if (type == XPS_MAP_CPUS)
> + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
> + cpu_to_node(attr_index));
switch here also
> if (!new_map)
> return NULL;
>
> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
> return new_map;
> }
>
> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> - u16 index)
> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
> + u16 index, enum xps_map_type type)
> {
> + const unsigned long *online_mask = NULL, *possible_mask = NULL;
> struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
> - int i, cpu, tci, numa_node_id = -2;
> + int i, j, tci, numa_node_id = -2;
> int maps_sz, num_tc = 1, tc = 0;
> struct xps_map *map, *new_map;
> bool active = false;
> + unsigned int nr_ids;
>
> if (dev->num_tc) {
> num_tc = dev->num_tc;
> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> return -EINVAL;
> }
>
> - maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
> + switch (type) {
> + case XPS_MAP_RXQS:
> + maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
> + dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]);
> + nr_ids = dev->num_rx_queues;
> + break;
> + case XPS_MAP_CPUS:
> + maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
> + if (num_possible_cpus() > 1) {
> + online_mask = cpumask_bits(cpu_online_mask);
> + possible_mask = cpumask_bits(cpu_possible_mask);
> + }
> + dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]);
> + nr_ids = nr_cpu_ids;
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> if (maps_sz < L1_CACHE_BYTES)
> maps_sz = L1_CACHE_BYTES;
>
> mutex_lock(&xps_map_mutex);
>
> - dev_maps = xmap_dereference(dev->xps_maps);
> -
> /* allocate memory for queue storage */
> - for_each_cpu_and(cpu, cpu_online_mask, mask) {
> + for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
> + j < nr_ids;) {
> if (!new_dev_maps)
> new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
> if (!new_dev_maps) {
> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> return -ENOMEM;
> }
>
> - tci = cpu * num_tc + tc;
> - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
> + tci = j * num_tc + tc;
> + map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
> NULL;
>
> - map = expand_xps_map(map, cpu, index);
> + map = expand_xps_map(map, j, index, type);
> if (!map)
> goto error;
>
> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
> }
>
> if (!new_dev_maps)
> goto out_no_new_maps;
>
> - for_each_possible_cpu(cpu) {
> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> + j < nr_ids;) {
> /* copy maps belonging to foreign traffic classes */
> - for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
> + for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
> /* fill in the new device map from the old device map */
> - map = xmap_dereference(dev_maps->cpu_map[tci]);
> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> + map = xmap_dereference(dev_maps->attr_map[tci]);
> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
> }
>
> /* We need to explicitly update tci as prevous loop
> * could break out early if dev_maps is NULL.
> */
> - tci = cpu * num_tc + tc;
> + tci = j * num_tc + tc;
>
> - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
> - /* add queue to CPU maps */
> + if (attr_test_mask(j, mask, nr_ids) &&
> + attr_test_online(j, online_mask, nr_ids)) {
> + /* add tx-queue to CPU/rx-queue maps */
> int pos = 0;
>
> - map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> + map = xmap_dereference(new_dev_maps->attr_map[tci]);
> while ((pos < map->len) && (map->queues[pos] != index))
> pos++;
>
> if (pos == map->len)
> map->queues[map->len++] = index;
> #ifdef CONFIG_NUMA
> - if (numa_node_id == -2)
> - numa_node_id = cpu_to_node(cpu);
> - else if (numa_node_id != cpu_to_node(cpu))
> - numa_node_id = -1;
> + if (type == XPS_MAP_CPUS) {
> + if (numa_node_id == -2)
> + numa_node_id = cpu_to_node(j);
> + else if (numa_node_id != cpu_to_node(j))
> + numa_node_id = -1;
> + }
> #endif
> } else if (dev_maps) {
> /* fill in the new device map from the old device map */
> - map = xmap_dereference(dev_maps->cpu_map[tci]);
> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> + map = xmap_dereference(dev_maps->attr_map[tci]);
> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
> }
>
> /* copy maps belonging to foreign traffic classes */
> for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
> /* fill in the new device map from the old device map */
> - map = xmap_dereference(dev_maps->cpu_map[tci]);
> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
> + map = xmap_dereference(dev_maps->attr_map[tci]);
> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
> }
> }
>
> - rcu_assign_pointer(dev->xps_maps, new_dev_maps);
> + if (type == XPS_MAP_RXQS)
> + rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], new_dev_maps);
> + else if (type == XPS_MAP_CPUS)
> + rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], new_dev_maps);
>
> /* Cleanup old maps */
> if (!dev_maps)
> goto out_no_old_maps;
>
> - for_each_possible_cpu(cpu) {
> - for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
> - new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> - map = xmap_dereference(dev_maps->cpu_map[tci]);
> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> + j < nr_ids;) {
> + for (i = num_tc, tci = j * num_tc; i--; tci++) {
> + new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
> + map = xmap_dereference(dev_maps->attr_map[tci]);
> if (map && map != new_map)
> kfree_rcu(map, rcu);
> }
> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> active = true;
>
> out_no_new_maps:
> - /* update Tx queue numa node */
> - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
> - (numa_node_id >= 0) ? numa_node_id :
> - NUMA_NO_NODE);
> + if (type == XPS_MAP_CPUS) {
> + /* update Tx queue numa node */
> + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
> + (numa_node_id >= 0) ?
> + numa_node_id : NUMA_NO_NODE);
> + }
>
> if (!dev_maps)
> goto out_no_maps;
>
> - /* removes queue from unused CPUs */
> - for_each_possible_cpu(cpu) {
> - for (i = tc, tci = cpu * num_tc; i--; tci++)
> + /* removes tx-queue from unused CPUs/rx-queues */
> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> + j < nr_ids;) {
> + for (i = tc, tci = j * num_tc; i--; tci++)
> active |= remove_xps_queue(dev_maps, tci, index);
> - if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
> + if (!attr_test_mask(j, mask, nr_ids) ||
> + !attr_test_online(j, online_mask, nr_ids))
> active |= remove_xps_queue(dev_maps, tci, index);
> for (i = num_tc - tc, tci++; --i; tci++)
> active |= remove_xps_queue(dev_maps, tci, index);
> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>
> /* free map if not active */
> if (!active) {
> - RCU_INIT_POINTER(dev->xps_maps, NULL);
> + if (type == XPS_MAP_RXQS)
> + RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL);
> + else if (type == XPS_MAP_CPUS)
> + RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL);
> kfree_rcu(dev_maps, rcu);
> }
>
> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> return 0;
> error:
> /* remove any maps that we added */
> - for_each_possible_cpu(cpu) {
> - for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
> - new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
> + j < nr_ids;) {
> + for (i = num_tc, tci = j * num_tc; i--; tci++) {
> + new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
> map = dev_maps ?
> - xmap_dereference(dev_maps->cpu_map[tci]) :
> + xmap_dereference(dev_maps->attr_map[tci]) :
> NULL;
> if (new_map && new_map != map)
> kfree(new_map);
> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> kfree(new_dev_maps);
> return -ENOMEM;
> }
> +
> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
> + u16 index)
> +{
> + return __netif_set_xps_queue(dev, cpumask_bits(mask), index,
> + XPS_MAP_CPUS);
> +}
> EXPORT_SYMBOL(netif_set_xps_queue);
>
> #endif
> @@ -3402,7 +3466,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
> int queue_index = -1;
>
> rcu_read_lock();
> - dev_maps = rcu_dereference(dev->xps_maps);
> + dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
> if (dev_maps) {
> unsigned int tci = skb->sender_cpu - 1;
>
> @@ -3411,7 +3475,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
> tci += netdev_get_prio_tc_map(dev, skb->priority);
> }
>
> - map = rcu_dereference(dev_maps->cpu_map[tci]);
> + map = rcu_dereference(dev_maps->attr_map[tci]);
> if (map) {
> if (map->len == 1)
> queue_index = map->queues[0];
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index c476f07..d7abd33 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
> }
>
> rcu_read_lock();
> - dev_maps = rcu_dereference(dev->xps_maps);
> + dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]);
> if (dev_maps) {
> for_each_possible_cpu(cpu) {
> int i, tci = cpu * num_tc + tc;
> struct xps_map *map;
>
> - map = rcu_dereference(dev_maps->cpu_map[tci]);
> + map = rcu_dereference(dev_maps->attr_map[tci]);
> if (!map)
> continue;
>
>
Powered by blists - more mailing lists