[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1322630461.2596.61.camel@edumazet-laptop>
Date: Wed, 30 Nov 2011 06:21:01 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: Rick Jones <raj@...dy.cup.hp.com>
Cc: netdev@...r.kernel.org
Subject: Re: [RFC net-next] Include selection of congestion control
algorithm in that which is inherited across an accept() call
Le mardi 29 novembre 2011 à 16:31 -0800, Rick Jones a écrit :
> From: Rick Jones <rick.jones2@...com>
>
> Include congestion control algorithm in what is inherited across an
> accept() call.
>
> Signed-off-by: Rick Jones <rick.jones2@...com>
>
Hi Rick, thanks for working on this !
I am still not sure why adding new fields is needed, please see my
comment :
> ---
>
...
> include/linux/ipv6.h | 3 +++
> include/net/inet_sock.h | 3 +++
> net/ipv4/tcp_ipv4.c | 3 +++
> net/ipv4/tcp_minisocks.c | 2 +-
> net/ipv6/tcp_ipv6.c | 3 +++
> 5 files changed, 13 insertions(+), 1 deletions(-)
>
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 0c99776..1d0dde3 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -265,11 +265,14 @@ static inline int inet6_iif(const struct sk_buff *skb)
> return IP6CB(skb)->iif;
> }
>
> +struct tcp_congestion_ops;
> +
> struct inet6_request_sock {
> struct in6_addr loc_addr;
> struct in6_addr rmt_addr;
> struct sk_buff *pktopts;
> int iif;
> + const struct tcp_congestion_ops *cong_ops;
> };
>
> struct tcp6_request_sock {
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index f941964..9ec68a3 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -69,6 +69,8 @@ struct ip_options_data {
> char data[40];
> };
>
> +struct tcp_congestion_ops;
> +
> struct inet_request_sock {
> struct request_sock req;
> #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
> @@ -89,6 +91,7 @@ struct inet_request_sock {
> no_srccheck: 1;
> kmemcheck_bitfield_end(flags);
> struct ip_options_rcu *opt;
> + const struct tcp_congestion_ops *cong_ops;
> };
>
> static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index a9db4b1..79c02e2 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1254,6 +1254,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
> const u8 *hash_location;
> struct request_sock *req;
> struct inet_request_sock *ireq;
> + struct inet_connection_sock *icsk = inet_csk(sk);
> struct tcp_sock *tp = tcp_sk(sk);
> struct dst_entry *dst = NULL;
> __be32 saddr = ip_hdr(skb)->saddr;
> @@ -1341,6 +1342,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
> ireq->rmt_addr = saddr;
> ireq->no_srccheck = inet_sk(sk)->transparent;
> ireq->opt = tcp_v4_save_options(sk, skb);
> + ireq->cong_ops = (icsk->icsk_ca_ops) ? icsk->icsk_ca_ops :
> + &tcp_init_congestion_ops;
>
> if (security_inet_conn_request(sk, skb, req))
> goto drop_and_free;
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 945efff..be338d7 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -495,7 +495,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
> newtp->frto_counter = 0;
> newtp->frto_highmark = 0;
>
> - newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
> + newicsk->icsk_ca_ops = ireq->cong_ops;
At this point, sk points to listener socket and is locked, why not
simply copy inet_csk(sk)->icsk_ca_ops to newicsk->icsk_ca_ops ?
As a matter of fact, it was already copied at newsk creation (clone of
sk)
>
> tcp_set_ca_state(newsk, TCP_CA_Open);
> tcp_init_xmit_timers(newsk);
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 9d74eee..c689779 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1164,6 +1164,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
> const u8 *hash_location;
> struct request_sock *req;
> struct inet6_request_sock *treq;
> + struct inet_connection_sock *icsk = inet_csk(sk);
> struct ipv6_pinfo *np = inet6_sk(sk);
> struct tcp_sock *tp = tcp_sk(sk);
> __u32 isn = TCP_SKB_CB(skb)->when;
> @@ -1254,6 +1255,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
> TCP_ECN_create_request(req, tcp_hdr(skb));
>
> treq->iif = sk->sk_bound_dev_if;
> + treq->cong_ops = (icsk->icsk_ca_ops) ? icsk->icsk_ca_ops :
> + &tcp_init_congestion_ops;
>
> /* So that link locals have meaning */
> if (!sk->sk_bound_dev_if &&
So my suggestion would be to use this two lines patch instead :
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 945efff..6b066e2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -495,8 +495,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
- newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
-
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists