lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1360937929.6884.82.camel@edumazet-glaptop>
Date:	Fri, 15 Feb 2013 06:18:49 -0800
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Jiri Pirko <jiri@...nulli.us>
Cc:	netdev@...r.kernel.org, davem@...emloft.net, edumazet@...gle.com,
	jhs@...atatu.com, kuznet@....inr.ac.ru, j.vimal@...il.com
Subject: Re: [patch net-next RFC] tbf: handle gso skbs properly

On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote:
> According to Eric's suggestion, when gso skb can't be sent in one mtu
> time, resegment it.
> 
> Note that helper will be moved to sch_api.c probably so it can be used
> by other code.
> 
> Also, I'm not sure similar patch to this can be done for act_police. I
> have to look at that more closely.
> 
> Thanks for review.
> 
> Signed-off-by: Jiri Pirko <jiri@...nulli.us>
> ---
>  include/net/sch_generic.h |  1 +
>  net/core/dev.c            | 24 ------------------------
>  net/sched/sch_api.c       | 27 +++++++++++++++++++++++++++
>  net/sched/sch_tbf.c       | 38 +++++++++++++++++++++++++++++++++++++-
>  4 files changed, 65 insertions(+), 25 deletions(-)
> 
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 2761c90..de6db57 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -372,6 +372,7 @@ extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
>  				       struct Qdisc_ops *ops, u32 parentid);
>  extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
>  				      const struct qdisc_size_table *stab);
> +extern void qdisc_pkt_len_init(struct sk_buff *skb);
>  extern void tcf_destroy(struct tcf_proto *tp);
>  extern void tcf_destroy_chain(struct tcf_proto **fl);
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index f444736..8f86b1c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2680,30 +2680,6 @@ out:
>  	return rc;
>  }
>  
> -static void qdisc_pkt_len_init(struct sk_buff *skb)
> -{
> -	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> -
> -	qdisc_skb_cb(skb)->pkt_len = skb->len;
> -
> -	/* To get more precise estimation of bytes sent on wire,
> -	 * we add to pkt_len the headers size of all segments
> -	 */
> -	if (shinfo->gso_size)  {
> -		unsigned int hdr_len;
> -
> -		/* mac layer + network layer */
> -		hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> -
> -		/* + transport layer */
> -		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
> -			hdr_len += tcp_hdrlen(skb);
> -		else
> -			hdr_len += sizeof(struct udphdr);
> -		qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
> -	}
> -}
> -

Please dont move this function, its called in fast path and should be
inlined by compiler

>  static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  				 struct net_device *dev,
>  				 struct netdev_queue *txq)
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index fe1ba54..7672259 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -29,6 +29,8 @@
>  #include <linux/hrtimer.h>
>  #include <linux/lockdep.h>
>  #include <linux/slab.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
>  
>  #include <net/net_namespace.h>
>  #include <net/sock.h>
> @@ -464,6 +466,31 @@ out:
>  }
>  EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
>  
> +
>  void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
>  {
>  	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
> diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
> index c8388f3..bfc89be 100644
> --- a/net/sched/sch_tbf.c
> +++ b/net/sched/sch_tbf.c
> @@ -121,7 +121,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  	struct tbf_sched_data *q = qdisc_priv(sch);
>  	int ret;
>  
> -	if (qdisc_pkt_len(skb) > q->max_size)
> +	if (qdisc_pkt_len(skb) > q->max_size && !skb_is_gso(skb))
>  		return qdisc_reshape_fail(skb, sch);
>  
>  	ret = qdisc_enqueue(skb, q->qdisc);
> @@ -147,6 +147,33 @@ static unsigned int tbf_drop(struct Qdisc *sch)
>  	return len;
>  }
>  
> +static bool qdisc_gso_segment(struct Qdisc *qdisc, struct sk_buff *skb)
> +{
> +	struct sk_buff *segs;
> +	struct sk_buff *next_skb;
> +	struct sk_buff *prev_skb;
> +	int num_skbs = 0;
> +
> +	segs = skb_gso_segment(skb, 0);
> +	if (IS_ERR(segs) || !segs)
> +		return false;
> +	__skb_unlink(skb, &qdisc->q);
> +	kfree_skb(skb);
> +	skb = segs;
> +	prev_skb = (struct sk_buff *) &qdisc->q;
> +	do {
> +		next_skb = skb->next;
> +		qdisc_pkt_len_init(skb);
> +		qdisc_calculate_pkt_len(skb, qdisc);

You don't need this, packet is non gso, so you only have to 

qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_calculate_pkt_len();

> +		__skb_queue_after(&qdisc->q, prev_skb, skb);

Yes, this might need different strategy for non fifo qdisc, we can
see that later.

> +		prev_skb = skb;
> +		skb = next_skb;
> +		num_skbs++;
> +	} while (skb);
> +	qdisc_tree_decrease_qlen(qdisc, 1 - num_skbs);
> +	return true;
> +}
> +
>  static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
>  {
>  	struct tbf_sched_data *q = qdisc_priv(sch);
> @@ -167,6 +194,15 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
>  			ptoks = toks + q->ptokens;
>  			if (ptoks > q->mtu)
>  				ptoks = q->mtu;

Cache psched_l2t_ns(&q->peak, len) in a variable here to avoid double
call.

> +			if (skb_is_gso(skb) &&
> +			    (s64) psched_l2t_ns(&q->peak, len) > q->mtu &&
> +			    qdisc_gso_segment(q->qdisc, skb)) {
> +				q->qdisc->gso_skb = NULL;
> +				skb = q->qdisc->ops->peek(q->qdisc);
> +				if (unlikely(!skb))
> +					return NULL;
> +				len = qdisc_pkt_len(skb);
> +			}
>  			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
>  		}
>  		toks += q->tokens;

Thanks !


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ