[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK6E8=fk6omj6XHFSdiGWHaeeZ8Ss0ACdPKf179NWaYkh5K2eQ@mail.gmail.com>
Date: Tue, 28 Apr 2015 14:01:29 -0700
From: Yuchung Cheng <ycheng@...gle.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: "David S. Miller" <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Eric Dumazet <eric.dumazet@...il.com>,
Matt Mathis <mattmathis@...gle.com>,
Eric Salo <salo@...gle.com>, Martin Lau <kafai@...com>,
Chris Rapier <rapier@....edu>
Subject: Re: [PATCH net-next 1/2] tcp: add tcpi_bytes_acked to tcp_info
On Tue, Apr 28, 2015 at 10:32 AM, Eric Dumazet <edumazet@...gle.com> wrote:
>
> This patch tracks total number of bytes acked for a TCP socket.
> This is the sum of all changes done to tp->snd_una, and allows
> for precise tracking of delivered data.
>
> RFC4898 named this : tcpEStatsAppHCThruOctetsAcked
>
> This is a 64bit field, and can be fetched both from TCP_INFO
> getsockopt() if one has a handle on a TCP socket, or from inet_diag
> netlink facility (iproute2/ss patch will follow)
>
> Note that tp->bytes_acked was placed near tp->snd_una for
> best data locality and minimal performance impact.
>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Cc: Matt Mathis <mattmathis@...gle.com>
> Cc: Eric Salo <salo@...gle.com>
> Cc: Yuchung Cheng <ycheng@...gle.com>
> Cc: Martin Lau <kafai@...com>
> Cc: Chris Rapier <rapier@....edu>
Acked-by: Yuchung Cheng <ycheng@...gle.com>
> ---
> include/linux/tcp.h | 4 ++++
> include/net/tcp.h | 2 +-
> include/uapi/linux/tcp.h | 1 +
> net/ipv4/tcp.c | 6 +++++-
> net/ipv4/tcp_input.c | 13 +++++++++++--
> 5 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 0caa3a2d4106..0f73b43171da 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -150,6 +150,10 @@ struct tcp_sock {
> u32 rcv_wup; /* rcv_nxt on last window update sent */
> u32 snd_nxt; /* Next sequence we send */
>
> + u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
> + * sum(delta(snd_una)), or how many bytes
> + * were acked.
> + */
> u32 snd_una; /* First byte we want an ack for */
> u32 snd_sml; /* Last byte of the most recently transmitted small packet */
> u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 051dc5c2802d..dd7b4ea6a10c 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -576,7 +576,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
> }
>
> /* tcp.c */
> -void tcp_get_info(const struct sock *, struct tcp_info *);
> +void tcp_get_info(struct sock *, struct tcp_info *);
>
> /* Read 'sendfile()'-style from a TCP socket */
> typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index 3b9718328d8b..6666e98a0af9 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -189,6 +189,7 @@ struct tcp_info {
>
> __u64 tcpi_pacing_rate;
> __u64 tcpi_max_pacing_rate;
> + __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
> };
>
> /* for TCP_MD5SIG socket option */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 8c5cd9efebbc..4bf0e8ca7b5b 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2592,7 +2592,7 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
> #endif
>
> /* Return information about state of tcp endpoint in API format. */
> -void tcp_get_info(const struct sock *sk, struct tcp_info *info)
> +void tcp_get_info(struct sock *sk, struct tcp_info *info)
> {
> const struct tcp_sock *tp = tcp_sk(sk);
> const struct inet_connection_sock *icsk = inet_csk(sk);
> @@ -2663,6 +2663,10 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
>
> rate = READ_ONCE(sk->sk_max_pacing_rate);
> info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
> +
> + spin_lock_bh(&sk->sk_lock.slock);
> + info->tcpi_bytes_acked = tp->bytes_acked;
> + spin_unlock_bh(&sk->sk_lock.slock);
> }
> EXPORT_SYMBOL_GPL(tcp_get_info);
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 3a4d9b34bed4..378d3f4d4dc3 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3280,6 +3280,15 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
> (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
> }
>
> +/* If we update tp->snd_una, also update tp->bytes_acked */
> +static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
> +{
> + u32 delta = ack - tp->snd_una;
> +
> + tp->bytes_acked += delta;
> + tp->snd_una = ack;
> +}
> +
> /* Update our send window.
> *
> * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
> @@ -3315,7 +3324,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
> }
> }
>
> - tp->snd_una = ack;
> + tcp_snd_una_update(tp, ack);
>
> return flag;
> }
> @@ -3497,7 +3506,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> * Note, we use the fact that SND.UNA>=SND.WL2.
> */
> tcp_update_wl(tp, ack_seq);
> - tp->snd_una = ack;
> + tcp_snd_una_update(tp, ack);
> flag |= FLAG_WIN_UPDATE;
>
> tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
> --
> 2.2.0.rc0.207.ga3a616c
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists