[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231124002720.102537-7-dima@arista.com>
Date: Fri, 24 Nov 2023 00:27:19 +0000
From: Dmitry Safonov <dima@...sta.com>
To: David Ahern <dsahern@...nel.org>,
Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>,
Jakub Kicinski <kuba@...nel.org>,
"David S. Miller" <davem@...emloft.net>
Cc: linux-kernel@...r.kernel.org,
Dmitry Safonov <dima@...sta.com>,
Dmitry Safonov <0x7f454c46@...il.com>,
Francesco Ruggeri <fruggeri05@...il.com>,
Salam Noureddine <noureddine@...sta.com>,
Simon Horman <horms@...nel.org>,
netdev@...r.kernel.org
Subject: [PATCH v2 6/7] net/tcp: Add sne_lock to access SNEs
RFC 5925 (6.2):
> TCP-AO emulates a 64-bit sequence number space by inferring when to
> increment the high-order 32-bit portion (the SNE) based on
> transitions in the low-order portion (the TCP sequence number).
snd_sne and rcv_sne are the upper 4 bytes of extended SEQ number.
Unfortunately, reading two 4-bytes pointers can't be performed
atomically (without synchronization).
Let's keep it KISS and add an rwlock - that shouldn't create much
contention as SNE are updated every 4Gb of traffic and the atomic region
is quite small.
Fixes: 64382c71a557 ("net/tcp: Add TCP-AO SNE support")
Signed-off-by: Dmitry Safonov <dima@...sta.com>
---
include/net/tcp_ao.h | 2 +-
net/ipv4/tcp_ao.c | 34 +++++++++++++++++++++-------------
net/ipv4/tcp_input.c | 16 ++++++++++++++--
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 647781080613..beea3e6b39e2 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -123,6 +123,7 @@ struct tcp_ao_info {
*/
u32 snd_sne;
u32 rcv_sne;
+ rwlock_t sne_lock;
refcount_t refcnt; /* Protects twsk destruction */
struct rcu_head rcu;
};
@@ -212,7 +213,6 @@ enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
const struct sk_buff *skb, unsigned short int family,
const struct request_sock *req, int l3index,
const struct tcp_ao_hdr *aoh);
-u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq);
struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index,
const union tcp_ao_addr *addr,
int family, int sndid, int rcvid);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 2d000e275ce7..74db80aeeef3 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -230,6 +230,7 @@ static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags)
return NULL;
INIT_HLIST_HEAD(&ao->head);
refcount_set(&ao->refcnt, 1);
+ rwlock_init(&ao->sne_lock);
return ao;
}
@@ -472,10 +473,8 @@ static int tcp_ao_hash_pseudoheader(unsigned short int family,
return -EAFNOSUPPORT;
}
-u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq)
+static u32 tcp_ao_compute_sne(u32 sne, u32 next_seq, u32 seq)
{
- u32 sne = next_sne;
-
if (before(seq, next_seq)) {
if (seq > next_seq)
sne--;
@@ -483,7 +482,6 @@ u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq)
if (seq < next_seq)
sne++;
}
-
return sne;
}
@@ -763,14 +761,15 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
*keyid = (*key)->rcvid;
} else {
struct tcp_ao_key *rnext_key;
- u32 snd_basis;
+ const u32 *snd_basis;
+ unsigned long flags;
if (sk->sk_state == TCP_TIME_WAIT) {
ao_info = rcu_dereference(tcp_twsk(sk)->ao_info);
- snd_basis = tcp_twsk(sk)->tw_snd_nxt;
+ snd_basis = &tcp_twsk(sk)->tw_snd_nxt;
} else {
ao_info = rcu_dereference(tcp_sk(sk)->ao_info);
- snd_basis = tcp_sk(sk)->snd_una;
+ snd_basis = &tcp_sk(sk)->snd_una;
}
if (!ao_info)
return -ENOENT;
@@ -781,8 +780,10 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
*traffic_key = snd_other_key(*key);
rnext_key = READ_ONCE(ao_info->rnext_key);
*keyid = rnext_key->rcvid;
- *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne),
- snd_basis, seq);
+ read_lock_irqsave(&ao_info->sne_lock, flags);
+ *sne = tcp_ao_compute_sne(ao_info->snd_sne,
+ READ_ONCE(*snd_basis), seq);
+ read_unlock_irqrestore(&ao_info->sne_lock, flags);
}
return 0;
}
@@ -795,6 +796,7 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_ao_info *ao;
void *tkey_buf = NULL;
+ unsigned long flags;
u8 *traffic_key;
u32 sne;
@@ -816,8 +818,10 @@ int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp->af_specific->ao_calc_key_sk(key, traffic_key,
sk, ao->lisn, disn, true);
}
- sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una),
- ntohl(th->seq));
+ read_lock_irqsave(&ao->sne_lock, flags);
+ sne = tcp_ao_compute_sne(ao->snd_sne,
+ READ_ONCE(tp->snd_una), ntohl(th->seq));
+ read_unlock_irqrestore(&ao->sne_lock, flags);
tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key,
hash_location - (u8 *)th, sne);
kfree(tkey_buf);
@@ -938,8 +942,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
/* Fast-path */
if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) {
- enum skb_drop_reason err;
struct tcp_ao_key *current_key;
+ enum skb_drop_reason err;
+ unsigned long flags;
/* Check if this socket's rnext_key matches the keyid in the
* packet. If not we lookup the key based on the keyid
@@ -956,8 +961,11 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
if (unlikely(th->syn && !th->ack))
goto verify_hash;
- sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt,
+ read_lock_irqsave(&info->sne_lock, flags);
+ sne = tcp_ao_compute_sne(info->rcv_sne,
+ READ_ONCE(tcp_sk(sk)->rcv_nxt),
ntohl(th->seq));
+ read_unlock_irqrestore(&info->sne_lock, flags);
/* Established socket, traffic key are cached */
traffic_key = rcv_other_key(key);
err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bcb55d98004c..fc3c27ce2b73 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3582,8 +3582,14 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack)
ao = rcu_dereference_protected(tp->ao_info,
lockdep_sock_is_held((struct sock *)tp));
- if (ao && ack < tp->snd_una)
+ if (ao && ack < tp->snd_una) {
+ unsigned long flags;
+
+ write_lock_irqsave(&ao->sne_lock, flags);
ao->snd_sne++;
+ tp->snd_una = ack;
+ write_unlock_irqrestore(&ao->sne_lock, flags);
+ }
#endif
}
@@ -3608,8 +3614,14 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq)
ao = rcu_dereference_protected(tp->ao_info,
lockdep_sock_is_held((struct sock *)tp));
- if (ao && seq < tp->rcv_nxt)
+ if (ao && seq < tp->rcv_nxt) {
+ unsigned long flags;
+
+ write_lock_irqsave(&ao->sne_lock, flags);
ao->rcv_sne++;
+ WRITE_ONCE(tp->rcv_nxt, seq);
+ write_unlock_irqrestore(&ao->sne_lock, flags);
+ }
#endif
}
--
2.43.0
Powered by blists - more mailing lists