[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+mtBx_J=fm3+k6_NzMH9KCdrYpZPwT2XBQe4r424eNDCKzazg@mail.gmail.com>
Date: Thu, 3 May 2012 20:22:22 -0700
From: Tom Herbert <therbert@...gle.com>
To: Deng-Cheng Zhu <dczhu@...s.com>
Cc: davem@...emloft.net, netdev@...r.kernel.org, eric.dumazet@...il.com
Subject: Re: [PATCH v2] RPS: Sparse connection optimizations - v2
> +struct cpu_flow {
> + struct net_device *dev;
> + u32 rxhash;
> + unsigned long ts;
> +};
This seems like overkill, we already have the rps_flow_table and this
used in accelerated RFS so the device can also take advantage of
steering. Maybe somehow program that table for your sparse flows?
Tom
> +#endif
> +
> /*
> * This structure holds an RPS map which can be of variable length. The
> * map is an array of CPUs.
> diff --git a/net/Kconfig b/net/Kconfig
> index e07272d..d5aa682 100644
> --- a/net/Kconfig
> +++ b/net/Kconfig
> @@ -222,6 +222,28 @@ config RPS
> depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
> default y
>
> +config RPS_SPARSE_FLOW_OPTIMIZATION
> + bool "RPS optimizations for sparse flows"
> + depends on RPS
> + default n
> + ---help---
> + This feature will try to map some network flows to consecutive
> + CPUs in the RPS map. It will bring in some per packet overhead
> + but should be able to do good to network throughput in the case
> + of low number of connections while not much affecting other
> + cases. (e.g. relatively consistent and high bandwidth in single
> + connection tests).
> +
> +config NR_RPS_MAP_LOOPS
> + int "Number of loops walking RPS map before hash indexing (1-5)"
> + range 1 5
> + depends on RPS_SPARSE_FLOW_OPTIMIZATION
> + default "4"
> + ---help---
> + It defines how many loops to go through the RPS map while
> + determing target CPU to process the incoming packet. After that,
> + the decision will fall back on hash indexing the RPS map.
> +
> config RFS_ACCEL
> boolean
> depends on RPS && GENERIC_HARDIRQS
> diff --git a/net/core/dev.c b/net/core/dev.c
> index c25d453..92e292b 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2698,6 +2698,61 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
> return rflow;
> }
>
> +#ifdef CONFIG_RPS_SPARSE_FLOW_OPTIMIZATION
> +static DEFINE_PER_CPU(struct cpu_flow [CONFIG_NR_RPS_MAP_LOOPS], cpu_flows);
> +static unsigned long hash_active;
> +
> +#define FLOW_INACTIVE(now, base) (time_after((now), (base) + HZ) || \
> + unlikely(time_before((now), (base))))
> +
> +static u16 find_cpu(const struct rps_map *map, const struct sk_buff *skb)
> +{
> + struct cpu_flow *flow;
> + u16 cpu;
> + int i, l, do_alloc = 0;
> + unsigned long now = jiffies;
> +
> +retry:
> + for (l = 0; l < CONFIG_NR_RPS_MAP_LOOPS; l++) {
> + for (i = map->len - 1; i >= 0; i--) {
> + cpu = map->cpus[i];
> + flow = &per_cpu(cpu_flows, cpu)[l];
> +
> + if (do_alloc) {
> + if (flow->dev == NULL ||
> + FLOW_INACTIVE(now, flow->ts)) {
> + flow->dev = skb->dev;
> + flow->rxhash = skb->rxhash;
> + flow->ts = now;
> + return cpu;
> + }
> + } else {
> + /*
> + * Unlike hash indexing, this avoids packet
> + * processing imbalance across CPUs.
> + */
> + if (flow->rxhash == skb->rxhash &&
> + flow->dev == skb->dev &&
> + !FLOW_INACTIVE(now, flow->ts)) {
> + flow->ts = now;
> + return cpu;
> + }
> + }
> + }
> + }
> +
> + if (FLOW_INACTIVE(now, hash_active) && do_alloc == 0) {
> + do_alloc = 1;
> + goto retry;
> + }
> +
> + /* For all other flows */
> + hash_active = now;
> +
> + return map->cpus[((u64) skb->rxhash * map->len) >> 32];
> +}
> +#endif
> +
> /*
> * get_rps_cpu is called from netif_receive_skb and returns the target
> * CPU from the RPS map of the receiving queue for a given skb.
> @@ -2780,7 +2835,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
> }
>
> if (map) {
> +#ifdef CONFIG_RPS_SPARSE_FLOW_OPTIMIZATION
> + tcpu = find_cpu(map, skb);
> +#else
> tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
> +#endif
>
> if (cpu_online(tcpu)) {
> cpu = tcpu;
> --
> 1.7.1
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists