[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f1e13509-9544-0fa5-4cb4-520c068bde6a@kylinos.cn>
Date: Wed, 19 Jun 2024 14:54:15 +0800
From: luoxuanqiang <luoxuanqiang@...inos.cn>
To: Kuniyuki Iwashima <kuniyu@...zon.com>
Cc: alexandre.ferrieux@...nge.com, davem@...emloft.net, dccp@...r.kernel.org,
dsahern@...nel.org, edumazet@...gle.com, fw@...len.de, kuba@...nel.org,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org, pabeni@...hat.com
Subject: Re: [PATCH net v3] Fix race for duplicate reqsk on identical SYN
在 2024/6/18 01:59, Kuniyuki Iwashima 写道:
> From: luoxuanqiang <luoxuanqiang@...inos.cn>
> Date: Mon, 17 Jun 2024 15:56:40 +0800
>> When bonding is configured in BOND_MODE_BROADCAST mode, if two identical
>> SYN packets are received at the same time and processed on different CPUs,
>> it can potentially create the same sk (sock) but two different reqsk
>> (request_sock) in tcp_conn_request().
>>
>> These two different reqsk will respond with two SYNACK packets, and since
>> the generation of the seq (ISN) incorporates a timestamp, the final two
>> SYNACK packets will have different seq values.
>>
>> The consequence is that when the Client receives and replies with an ACK
>> to the earlier SYNACK packet, we will reset(RST) it.
>>
>> ========================================================================
>>
>> This behavior is consistently reproducible in my local setup,
>> which comprises:
>>
>> | NETA1 ------ NETB1 |
>> PC_A --- bond --- | | --- bond --- PC_B
>> | NETA2 ------ NETB2 |
>>
>> - PC_A is the Server and has two network cards, NETA1 and NETA2. I have
>> bonded these two cards using BOND_MODE_BROADCAST mode and configured
>> them to be handled by different CPU.
>>
>> - PC_B is the Client, also equipped with two network cards, NETB1 and
>> NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode.
>>
>> If the client attempts a TCP connection to the server, it might encounter
>> a failure. Capturing packets from the server side reveals:
>>
>> 10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
>> 10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
>> localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116,
>> localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <==
>> 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
>> 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
>> localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <==
>> localhost > 10.10.10.10.45182: Flags [R], seq 2967855117,
>>
>> Two SYNACKs with different seq numbers are sent by localhost,
>> resulting in an anomaly.
>>
>> ========================================================================
>>
>> The attempted solution is as follows:
>> In the tcp_conn_request(), while inserting reqsk into the ehash table,
>> it also checks if an entry already exists. If found, it avoids
>> reinsertion and releases it.
>>
>> Simultaneously, In the reqsk_queue_hash_req(), the start of the
>> req->rsk_timer is adjusted to be after successful insertion.
>>
>> Signed-off-by: luoxuanqiang <luoxuanqiang@...inos.cn>
>> ---
>> include/net/inet_connection_sock.h | 4 ++--
>> net/dccp/ipv4.c | 2 +-
>> net/dccp/ipv6.c | 2 +-
>> net/ipv4/inet_connection_sock.c | 19 +++++++++++++------
>> net/ipv4/tcp_input.c | 9 ++++++++-
>> 5 files changed, 25 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
>> index 7d6b1254c92d..8ebab6220dbc 100644
>> --- a/include/net/inet_connection_sock.h
>> +++ b/include/net/inet_connection_sock.h
>> @@ -263,8 +263,8 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
>> struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
>> struct request_sock *req,
>> struct sock *child);
>> -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
>> - unsigned long timeout);
>> +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
>> + unsigned long timeout, bool *found_dup_sk);
>> struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
>> struct request_sock *req,
>> bool own_req);
>> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
>> index ff41bd6f99c3..13aafdeb9205 100644
>> --- a/net/dccp/ipv4.c
>> +++ b/net/dccp/ipv4.c
>> @@ -657,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>> if (dccp_v4_send_response(sk, req))
>> goto drop_and_free;
>>
>> - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
>> + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT, NULL);
>> reqsk_put(req);
>> return 0;
>>
>> diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
>> index 85f4b8fdbe5e..493cdb12ce2b 100644
>> --- a/net/dccp/ipv6.c
>> +++ b/net/dccp/ipv6.c
>> @@ -400,7 +400,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>> if (dccp_v6_send_response(sk, req))
>> goto drop_and_free;
>>
>> - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
>> + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT, NULL);
>> reqsk_put(req);
>> return 0;
>>
>> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
>> index d81f74ce0f02..2fa9b33ae26a 100644
>> --- a/net/ipv4/inet_connection_sock.c
>> +++ b/net/ipv4/inet_connection_sock.c
>> @@ -1122,25 +1122,32 @@ static void reqsk_timer_handler(struct timer_list *t)
>> inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
>> }
>>
>> -static void reqsk_queue_hash_req(struct request_sock *req,
>> - unsigned long timeout)
>> +static bool reqsk_queue_hash_req(struct request_sock *req,
>> + unsigned long timeout, bool *found_dup_sk)
>> {
> Given any changes here in reqsk_queue_hash_req() conflicts with 4.19
> (oldest stable) and DCCP does not check found_dup_sk, you can define
> found_dup_sk here, then you need not touch DCCP at all.
Apologies for not fully understanding your advice. If we cannot modify
the content of reqsk_queue_hash_req() and should avoid touching the DCCP
part, it seems the issue requires reworking some interfaces. Specifically:
The call flow to add reqsk to ehash is as follows:
tcp_conn_request()
dccp_v4(6)_conn_request()
-> inet_csk_reqsk_queue_hash_add()
-> reqsk_queue_hash_req()
-> inet_ehash_insert()
tcp_conn_request() needs to call the same interface inet_csk_reqsk_queue_hash_add()
as dccp_v4(6)_conn_request(), but the critical section for installation check and
insertion into ehash is within inet_ehash_insert().
If reqsk_queue_hash_req() should not be modified, then we need to rewrite
the interfaces to distinguish them. I don't see how redefining found_dup_sk
alone can resolve this conflict point. I may be lacking a more holistic
perspective on this matter. I sincerely hope to receive further guidance
from you. Thanks! ORZ
>
>> + if (!inet_ehash_insert(req_to_sk(req), NULL, found_dup_sk))
>> + return false;
>> +
>> + /* The timer needs to be setup after a successful insertion. */
>> timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
>> mod_timer(&req->rsk_timer, jiffies + timeout);
>>
>> - inet_ehash_insert(req_to_sk(req), NULL, NULL);
>> /* before letting lookups find us, make sure all req fields
>> * are committed to memory and refcnt initialized.
>> */
>> smp_wmb();
>> refcount_set(&req->rsk_refcnt, 2 + 1);
>> + return true;
>> }
>>
>> -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
>> - unsigned long timeout)
>> +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
>> + unsigned long timeout, bool *found_dup_sk)
>> {
>> - reqsk_queue_hash_req(req, timeout);
>> + if (!reqsk_queue_hash_req(req, timeout, found_dup_sk))
>> + return false;
>> +
>> inet_csk_reqsk_queue_added(sk);
>> + return true;
>> }
>> EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
>>
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index 9c04a9c8be9d..e006c374f781 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -7255,8 +7255,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>> } else {
>> tcp_rsk(req)->tfo_listener = false;
>> if (!want_cookie) {
>> + bool found_dup_sk = false;
>> +
>> req->timeout = tcp_timeout_init((struct sock *)req);
>> - inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
>> + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, req->timeout,
>> + &found_dup_sk))) {
>> + reqsk_free(req);
>> + return 0;
>> + }
>> +
>> }
>> af_ops->send_synack(sk, dst, &fl, req, &foc,
>> !want_cookie ? TCP_SYNACK_NORMAL :
>> --
>> 2.25.1
Powered by blists - more mailing lists