netdev - Re: [PATCH net-next v10 6/7] sch_cake: Add overhead compensation support to the rate shaper

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [day] [month] [year] [list]

Message-ID: <878t8lw74v.fsf@toke.dk>
Date:   Tue, 15 May 2018 12:22:24 +0200
From:   Toke Høiland-Jørgensen <toke@...e.dk>
To:     netdev@...r.kernel.org
Cc:     cake@...ts.bufferbloat.net
Subject: Re: [PATCH net-next v10 6/7] sch_cake: Add overhead compensation support to the rate shaper

Toke Høiland-Jørgensen <toke@...e.dk> writes:

> This commit adds configurable overhead compensation support to the rate
> shaper. With this feature, userspace can configure the actual bottleneck
> link overhead and encapsulation mode used, which will be used by the shaper
> to calculate the precise duration of each packet on the wire.
>
> This feature is needed because CAKE is often deployed one or two hops
> upstream of the actual bottleneck (which can be, e.g., inside a DSL or
> cable modem). In this case, the link layer characteristics and overhead
> reported by the kernel does not match the actual bottleneck. Being able to
> set the actual values in use makes it possible to configure the shaper rate
> much closer to the actual bottleneck rate (our experience shows it is
> possible to get with 0.1% of the actual physical bottleneck rate), thus
> keeping latency low without sacrificing bandwidth.
>
> The overhead compensation has three tunables: A fixed per-packet overhead
> size (which, if set, will be accounted from the IP packet header), a
> minimum packet size (MPU) and a framing mode supporting either ATM or PTM
> framing. We include a set of common keywords in TC to help users configure
> the right parameters. If no overhead value is set, the value reported by
> the kernel is used.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke@...e.dk>
> ---
>  net/sched/sch_cake.c |  123 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 122 insertions(+), 1 deletion(-)
>
> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> index ccc6f26b306c..6314a089a204 100644
> --- a/net/sched/sch_cake.c
> +++ b/net/sched/sch_cake.c
> @@ -275,6 +275,7 @@ enum {
>  
>  struct cobalt_skb_cb {
>  	cobalt_time_t enqueue_time;
> +	u32           adjusted_len;
>  };
>  
>  static cobalt_time_t cobalt_get_time(void)
> @@ -1130,6 +1131,87 @@ static cobalt_time_t cake_ewma(cobalt_time_t avg, cobalt_time_t sample,
>  	return avg;
>  }
>  
> +static u32 cake_overhead(struct cake_sched_data *q, struct sk_buff *skb)
> +{
> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	u32 off = skb_network_offset(skb);
> +	u32 len = qdisc_pkt_len(skb);
> +	u16 segs = 1;
> +
> +	if (unlikely(shinfo->gso_size)) {
> +		/* borrowed from qdisc_pkt_len_init() */
> +		unsigned int hdr_len;
> +
> +		hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> +
> +		/* + transport layer */
> +		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
> +					       SKB_GSO_TCPV6))) {
> +			const struct tcphdr *th;
> +			struct tcphdr _tcphdr;
> +
> +			th = skb_header_pointer(skb, skb_transport_offset(skb),
> +						sizeof(_tcphdr), &_tcphdr);
> +			if (likely(th))
> +				hdr_len += __tcp_hdrlen(th);
> +		} else {
> +			struct udphdr _udphdr;
> +
> +			if (skb_header_pointer(skb, skb_transport_offset(skb),
> +					       sizeof(_udphdr), &_udphdr))
> +				hdr_len += sizeof(struct udphdr);
> +		}
> +
> +		if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
> +			segs = DIV_ROUND_UP(skb->len - hdr_len,
> +					    shinfo->gso_size);
> +		else
> +			segs = shinfo->gso_segs;
> +
> +		/* The last segment may be shorter; we ignore this, which means
> +		 * that we will over-estimate the size of the whole GSO segment
> +		 * by the difference in size. This is conservative, so we live
> +		 * with that to avoid the complexity of dealing with it.
> +		 */
> +		len = shinfo->gso_size + hdr_len;
> +	}
> +
> +	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
> +
> +	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
> +		len -= off;
> +
> +	if (q->max_netlen < len)
> +		q->max_netlen = len;
> +	if (q->min_netlen > len)
> +		q->min_netlen = len;
> +
> +	len += q->rate_overhead;
> +
> +	if (len < q->rate_mpu)
> +		len = q->rate_mpu;
> +
> +	if (q->atm_mode == CAKE_ATM_ATM) {
> +		len += 47;
> +		len /= 48;
> +		len *= 53;
> +	} else if (q->atm_mode == CAKE_ATM_PTM) {
> +		/* Add one byte per 64 bytes or part thereof.
> +		 * This is conservative and easier to calculate than the
> +		 * precise value.
> +		 */
> +		len += (len + 63) / 64;
> +	}
> +
> +	if (q->max_adjlen < len)
> +		q->max_adjlen = len;
> +	if (q->min_adjlen > len)
> +		q->min_adjlen = len;
> +
> +	get_cobalt_cb(skb)->adjusted_len = len * segs;
> +	return len;

Well, this is embarrassing; seems that I broke this somewhere along the
way. Will resend with a fix...

-Toke