[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180510215943.94513-1-edumazet@google.com>
Date: Thu, 10 May 2018 14:59:43 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>
Cc: netdev <netdev@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: [PATCH v2 net-next] tcp: switch pacing timer to softirq based hrtimer
linux-4.16 got support for softirq based hrtimers.
TCP can switch its pacing hrtimer to this variant, since this
avoids going through a tasklet and some atomic operations.
pacing timer logic looks like other (jiffies based) tcp timers.
v2: use hrtimer_try_to_cancel() in tcp_clear_xmit_timers()
to correctly release reference on socket if needed.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/net/tcp.h | 4 ++-
net/ipv4/tcp_output.c | 69 ++++++++++++++++---------------------------
net/ipv4/tcp_timer.c | 2 +-
3 files changed, 29 insertions(+), 46 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cf803fe0fb86b6a0fb1876a9f775a9c6e6a28ac4..3b1d617b01109b133b4ecafa9ee46173851083f8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -557,7 +557,9 @@ void tcp_fin(struct sock *sk);
void tcp_init_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk)
{
- hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
+ if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
+ sock_put(sk);
+
inet_csk_clear_xmit_timers(sk);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d07c0dcc99aaa55c4da963599c8286c8baa1f783..0d8f950a9006598c70dbf51e281a3fe32dfaa234 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -772,7 +772,7 @@ struct tsq_tasklet {
};
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
-static void tcp_tsq_handler(struct sock *sk)
+static void tcp_tsq_write(struct sock *sk)
{
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
@@ -789,6 +789,16 @@ static void tcp_tsq_handler(struct sock *sk)
0, GFP_ATOMIC);
}
}
+
+static void tcp_tsq_handler(struct sock *sk)
+{
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk))
+ tcp_tsq_write(sk);
+ else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
+ sock_hold(sk);
+ bh_unlock_sock(sk);
+}
/*
* One tasklet per cpu tries to send more skbs.
* We run in tasklet context but need to disable irqs when
@@ -816,16 +826,7 @@ static void tcp_tasklet_func(unsigned long data)
smp_mb__before_atomic();
clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
- if (!sk->sk_lock.owned &&
- test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk)) {
- clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
- tcp_tsq_handler(sk);
- }
- bh_unlock_sock(sk);
- }
-
+ tcp_tsq_handler(sk);
sk_free(sk);
}
}
@@ -853,9 +854,10 @@ void tcp_release_cb(struct sock *sk)
nflags = flags & ~TCP_DEFERRED_ALL;
} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
- if (flags & TCPF_TSQ_DEFERRED)
- tcp_tsq_handler(sk);
-
+ if (flags & TCPF_TSQ_DEFERRED) {
+ tcp_tsq_write(sk);
+ __sock_put(sk);
+ }
/* Here begins the tricky part :
* We are called from release_sock() with :
* 1) BH disabled
@@ -929,7 +931,7 @@ void tcp_wfree(struct sk_buff *skb)
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
goto out;
- nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
if (nval != oval)
continue;
@@ -948,37 +950,17 @@ void tcp_wfree(struct sk_buff *skb)
sk_free(sk);
}
-/* Note: Called under hard irq.
- * We can not call TCP stack right away.
+/* Note: Called under soft irq.
+ * We can call TCP stack right away, unless socket is owned by user.
*/
enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
{
struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
struct sock *sk = (struct sock *)tp;
- unsigned long nval, oval;
- for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
- struct tsq_tasklet *tsq;
- bool empty;
+ tcp_tsq_handler(sk);
+ sock_put(sk);
- if (oval & TSQF_QUEUED)
- break;
-
- nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
- nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
- if (nval != oval)
- continue;
-
- if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
- break;
- /* queue this socket to tasklet queue */
- tsq = this_cpu_ptr(&tsq_tasklet);
- empty = list_empty(&tsq->head);
- list_add(&tp->tsq_node, &tsq->head);
- if (empty)
- tasklet_schedule(&tsq->tasklet);
- break;
- }
return HRTIMER_NORESTART;
}
@@ -1011,7 +993,8 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
do_div(len_ns, rate);
hrtimer_start(&tcp_sk(sk)->pacing_timer,
ktime_add_ns(ktime_get(), len_ns),
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS_PINNED_SOFT);
+ sock_hold(sk);
}
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
@@ -1078,7 +1061,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
/* if no packet is in qdisc/device queue, then allow XPS to select
* another queue. We can be called from tcp_tsq_handler()
- * which holds one reference to sk_wmem_alloc.
+ * which holds one reference to sk.
*
* TODO: Ideally, in-flight pure ACK packets should not matter here.
* One way to get this would be to set skb->truesize = 2 on them.
@@ -2185,7 +2168,7 @@ static int tcp_mtu_probe(struct sock *sk)
static bool tcp_pacing_check(const struct sock *sk)
{
return tcp_needs_internal_pacing(sk) &&
- hrtimer_active(&tcp_sk(sk)->pacing_timer);
+ hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
}
/* TCP Small Queues :
@@ -2365,8 +2348,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
skb, limit, mss_now, gfp)))
break;
- if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
- clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
if (tcp_small_queue_check(sk, skb, 0))
break;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index f7d944855f8ebd0a312fe73a53a56ab8d451ee44..92bdf64fffae3a5be291ca419eb21276b4c8cbae 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -713,6 +713,6 @@ void tcp_init_xmit_timers(struct sock *sk)
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS_PINNED_SOFT);
tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
}
--
2.17.0.441.gb46fe60e1d-goog
Powered by blists - more mailing lists