lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <49a05d612fc8968b17780ed82ecb1b96dcf78e5a.1701358163.git.gnault@redhat.com> Date: Thu, 30 Nov 2023 16:40:46 +0100 From: Guillaume Nault <gnault@...hat.com> To: David Miller <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com> Cc: netdev@...r.kernel.org, David Ahern <dsahern@...nel.org>, Kuniyuki Iwashima <kuniyu@...zon.com>, Michal Kubecek <mkubecek@...e.cz> Subject: [PATCH net-next v3] tcp: Dump bound-only sockets in inet_diag. Walk the hashinfo->bhash2 table so that inet_diag can dump TCP sockets that are bound but haven't yet called connect() or listen(). The code is inspired by the ->lhash2 loop. However there's no manual test of the source port, since this kind of filtering is already handled by inet_diag_bc_sk(). Also, a maximum of 16 sockets are dumped at a time, to avoid running with bh disabled for too long. There's no TCP state for bound but otherwise inactive sockets. Such sockets normally map to TCP_CLOSE. However, "ss -l", which is supposed to only dump listening sockets, actually requests the kernel to dump sockets in either the TCP_LISTEN or TCP_CLOSE states. To avoid dumping bound-only sockets with "ss -l", we therefore need to define a new pseudo-state (TCP_BOUND_INACTIVE) that user space will be able to set explicitly. With an IPv4, an IPv6 and an IPv6-only socket, bound respectively to 40000, 64000, 60000, an updated version of iproute2 could work as follow: $ ss -t state bound-inactive Recv-Q Send-Q Local Address:Port Peer Address:Port Process 0 0 0.0.0.0:40000 0.0.0.0:* 0 0 [::]:60000 [::]:* 0 0 *:64000 *:* Signed-off-by: Guillaume Nault <gnault@...hat.com> --- v3: * Grab sockets with sock_hold(), instead of refcount_inc_not_zero() (Kuniyuki Iwashima). * Use a new TCP pseudo-state (TCP_BOUND_INACTIVE), to dump bound-only sockets, so that "ss -l" won't print them (Eric Dumazet). v2: * Use ->bhash2 instead of ->bhash (Kuniyuki Iwashima). * Process no more than 16 sockets at a time (Kuniyuki Iwashima). include/net/tcp_states.h | 2 + include/uapi/linux/bpf.h | 1 + net/ipv4/inet_diag.c | 86 +++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp.c | 1 + 4 files changed, 89 insertions(+), 1 deletion(-) diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index cc00118acca1..d60e8148ff4c 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -22,6 +22,7 @@ enum { TCP_LISTEN, TCP_CLOSING, /* Now a valid state */ TCP_NEW_SYN_RECV, + TCP_BOUND_INACTIVE, /* Pseudo-state for inet_diag */ TCP_MAX_STATES /* Leave at the end! */ }; @@ -43,6 +44,7 @@ enum { TCPF_LISTEN = (1 << TCP_LISTEN), TCPF_CLOSING = (1 << TCP_CLOSING), TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV), + TCPF_BOUND_INACTIVE = (1 << TCP_BOUND_INACTIVE), }; #endif /* _LINUX_TCP_STATES_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a5498242eaa..8ee2404d077c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6892,6 +6892,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, + BPF_TCP_BOUND_INACTIVE, BPF_TCP_MAX_STATES /* Leave at the end! */ }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7d0e7aaa71e0..05fa0edd78b1 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1077,10 +1077,94 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, s_i = num = s_num = 0; } +/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets + * with bh disabled. + */ +#define SKARR_SZ 16 + + /* Dump bound but inactive (not listening, connecting, etc.) sockets */ + if (cb->args[0] == 1) { + if (!(idiag_states & TCPF_BOUND_INACTIVE)) + goto skip_bind_ht; + + for (i = s_i; i < hashinfo->bhash_size; i++) { + struct inet_bind_hashbucket *ibb; + struct inet_bind2_bucket *tb2; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; + +resume_bind_walk: + num = 0; + accum = 0; + ibb = &hashinfo->bhash2[i]; + + spin_lock_bh(&ibb->lock); + inet_bind_bucket_for_each(tb2, &ibb->chain) { + if (!net_eq(ib2_net(tb2), net)) + continue; + + sk_for_each_bound_bhash2(sk, &tb2->owners) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next_bind; + + if (sk->sk_state != TCP_CLOSE || + !inet->inet_num) + goto next_bind; + + if (r->sdiag_family != AF_UNSPEC && + r->sdiag_family != sk->sk_family) + goto next_bind; + + if (!inet_diag_bc_sk(bc, sk)) + goto next_bind; + + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + goto pause_bind_walk; +next_bind: + num++; + } + } +pause_bind_walk: + spin_unlock_bh(&ibb->lock); + + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = inet_sk_diag_fill(sk_arr[idx], + NULL, skb, cb, + r, NLM_F_MULTI, + net_admin); + if (res < 0) + num = num_arr[idx]; + } + sock_gen_put(sk_arr[idx]); + } + if (res < 0) + goto done; + + cond_resched(); + + if (accum == SKARR_SZ) { + s_num = num + 1; + goto resume_bind_walk; + } + + s_num = 0; + } +skip_bind_ht: + cb->args[0] = 2; + s_i = num = s_num = 0; + } + if (!(idiag_states & ~TCPF_LISTEN)) goto out; -#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53bcc17c91e4..a100df07d34a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2605,6 +2605,7 @@ void tcp_set_state(struct sock *sk, int state) BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN); BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING); BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_BOUND_INACTIVE != (int)TCP_BOUND_INACTIVE); BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES); /* bpf uapi header bpf.h defines an anonymous enum with values -- 2.39.2
Powered by blists - more mailing lists