netdev - Re: [PATCH v4 bpf-next 2/3] bpf: tcp: Support arbitrary SYN Cookie.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8b7aab72-5105-42fb-9a49-06b2682d9e3f@linux.dev>
Date: Tue, 5 Dec 2023 17:20:53 -0800
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Kuniyuki Iwashima <kuniyu@...zon.com>
Cc: Kuniyuki Iwashima <kuni1840@...il.com>, bpf@...r.kernel.org,
 netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>,
 Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>,
 Andrii Nakryiko <andrii@...nel.org>
Subject: Re: [PATCH v4 bpf-next 2/3] bpf: tcp: Support arbitrary SYN Cookie.

On 12/4/23 5:34 PM, Kuniyuki Iwashima wrote:
> diff --git a/include/net/request_sock.h b/include/net/request_sock.h
> index 144c39db9898..2efffe2c05d0 100644
> --- a/include/net/request_sock.h
> +++ b/include/net/request_sock.h
> @@ -83,6 +83,41 @@ static inline struct sock *req_to_sk(struct request_sock *req)
>   	return (struct sock *)req;
>   }
>   
> +/**
> + * skb_steal_sock - steal a socket from an sk_buff
> + * @skb: sk_buff to steal the socket from
> + * @refcounted: is set to true if the socket is reference-counted
> + * @prefetched: is set to true if the socket was assigned from bpf
> + */
> +static inline struct sock *
> +skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
> +{
> +	struct sock *sk = skb->sk;
> +
> +	if (!skb->sk) {
> +		*prefetched = false;
> +		*refcounted = false;
> +		return NULL;
> +	}
> +
> +	*prefetched = skb_sk_is_prefetched(skb);
> +	if (*prefetched) {
> +#if IS_ENABLED(CONFIG_SYN_COOKIES)
> +		if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
> +			*refcounted = false;
> +			return inet_reqsk(sk)->rsk_listener;

If it does not break later logic, I would set inet_reqsk(sk)->rsk_listener to 
NULL to avoid inconsistency when the later inet[6]_lookup_reuseport() selects 
another listener. skb_steal_sock() steals the inet_reqsk(sk)->rsk_listener in 
this sense.


> +		}
> +#endif
> +		*refcounted = sk_is_refcounted(sk);
> +	} else {
> +		*refcounted = true;
> +	}
> +
> +	skb->destructor = NULL;
> +	skb->sk = NULL;
> +	return sk;
> +}
> +

[ ... ]

> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0adaa4afa35f..a43f7627c5fd 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -11816,6 +11816,94 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
>   
>   	return 0;
>   }
> +
> +__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk,
> +					struct tcp_cookie_attributes *attr,
> +					int attr__sz)
> +{
> +#if IS_ENABLED(CONFIG_SYN_COOKIES)
> +	const struct request_sock_ops *ops;
> +	struct inet_request_sock *ireq;
> +	struct tcp_request_sock *treq;
> +	struct request_sock *req;
> +	__u16 min_mss;
> +
> +	if (attr__sz != sizeof(*attr) || attr->tcp_opt.unused)
> +		return -EINVAL;
> +
> +	if (!sk)
> +		return -EINVAL;
> +
> +	if (!skb_at_tc_ingress(skb))
> +		return -EINVAL;
> +
> +	if (dev_net(skb->dev) != sock_net(sk))
> +		return -ENETUNREACH;
> +
> +	switch (skb->protocol) {
> +	case htons(ETH_P_IP):
> +		ops = &tcp_request_sock_ops;
> +		min_mss = 536;
> +		break;
> +#if IS_BUILTIN(CONFIG_IPV6)
> +	case htons(ETH_P_IPV6):
> +		ops = &tcp6_request_sock_ops;
> +		min_mss = IPV6_MIN_MTU - 60;
> +		break;
> +#endif
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN)
> +		return -EINVAL;
> +
> +	if (attr->tcp_opt.mss_clamp < min_mss) {
> +		__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);

hmm... this one I am not sure if the kfunc should decide what counted as 
SYNCOOKIESFAILED or not. Beside, the bpf prog should have already rejected the 
skb as part of its cookie validation logic. Thus, reaching here is more like a 
bpf prog's error instead.

I would leave the SYNCOOKIESFAILED usage for the kernel tcp layer only. The 
existing bpf_tcp_check_syncookie() helper does not increment SYNCOOKIESFAILED also.

> +		return -EINVAL;
> +	}
> +
> +	if (attr->tcp_opt.wscale_ok &&
> +	    (attr->tcp_opt.snd_wscale > TCP_MAX_WSCALE ||
> +	     attr->tcp_opt.rcv_wscale > TCP_MAX_WSCALE)) {
> +		__NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);

Same here.

> +		return -EINVAL;
> +	}
> +
> +	if (sk_is_mptcp(sk))
> +		req = mptcp_subflow_reqsk_alloc(ops, sk, false);
> +	else
> +		req = inet_reqsk_alloc(ops, sk, false);
> +
> +	if (!req)
> +		return -ENOMEM;
> +
> +	ireq = inet_rsk(req);
> +	treq = tcp_rsk(req);
> +
> +	req->syncookie = 1;
> +	req->rsk_listener = sk;
> +	req->mss = attr->tcp_opt.mss_clamp;
> +
> +	ireq->snd_wscale = attr->tcp_opt.snd_wscale;
> +	ireq->rcv_wscale = attr->tcp_opt.rcv_wscale;
> +	ireq->wscale_ok = attr->tcp_opt.wscale_ok;
> +	ireq->tstamp_ok	= attr->tcp_opt.tstamp_ok;
> +	ireq->sack_ok = attr->tcp_opt.sack_ok;
> +	ireq->ecn_ok = attr->ecn_ok;
> +
> +	treq->req_usec_ts = attr->usec_ts_ok;
> +
> +	skb_orphan(skb);
> +	skb->sk = req_to_sk(req);
> +	skb->destructor = sock_pfree;
> +
> +	return 0;
> +#else
> +	return -EOPNOTSUPP;
> +#endif
> +}
> +
>   __bpf_kfunc_end_defs();
>   
>   int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
> @@ -11844,6 +11932,10 @@ BTF_SET8_START(bpf_kfunc_check_set_sock_addr)
>   BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path)
>   BTF_SET8_END(bpf_kfunc_check_set_sock_addr)
>   
> +BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk)
> +BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk)
> +BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk)
> +
>   static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
>   	.owner = THIS_MODULE,
>   	.set = &bpf_kfunc_check_set_skb,
> @@ -11859,6 +11951,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = {
>   	.set = &bpf_kfunc_check_set_sock_addr,
>   };
>   
> +static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = {
> +	.owner = THIS_MODULE,
> +	.set = &bpf_kfunc_check_set_tcp_reqsk,
> +};
> +
>   static int __init bpf_kfunc_init(void)
>   {
>   	int ret;
> @@ -11874,8 +11971,9 @@ static int __init bpf_kfunc_init(void)
>   	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
>   	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
>   	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
> -	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> -						&bpf_kfunc_set_sock_addr);
> +	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> +					       &bpf_kfunc_set_sock_addr);
> +	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
>   }
>   late_initcall(bpf_kfunc_init);
>   
> diff --git a/net/core/sock.c b/net/core/sock.c
> index fef349dd72fa..998950e97dfe 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2579,8 +2579,18 @@ EXPORT_SYMBOL(sock_efree);
>   #ifdef CONFIG_INET
>   void sock_pfree(struct sk_buff *skb)
>   {
> -	if (sk_is_refcounted(skb->sk))
> -		sock_gen_put(skb->sk);
> +	struct sock *sk = skb->sk;
> +
> +	if (!sk_is_refcounted(sk))
> +		return;
> +
> +	if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
> +		inet_reqsk(sk)->rsk_listener = NULL;
> +		reqsk_free(inet_reqsk(sk));
> +		return;
> +	}
> +
> +	sock_gen_put(sk);
>   }
>   EXPORT_SYMBOL(sock_pfree);
>   #endif /* CONFIG_INET */