[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20190829123409.GA815@bistromath.localdomain>
Date: Thu, 29 Aug 2019 14:34:09 +0200
From: Sabrina Dubroca <sd@...asysnail.net>
To: Steffen Klassert <steffen.klassert@...unet.com>
Cc: netdev@...r.kernel.org, Herbert Xu <herbert@...dor.apana.org.au>
Subject: Re: [PATCH ipsec-next 7/7] xfrm: add espintcp (RFC 8229)
2019-08-29, 09:04:31 +0200, Steffen Klassert wrote:
> On Wed, Aug 21, 2019 at 11:46:25PM +0200, Sabrina Dubroca wrote:
> > +static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
> > +{
> > + struct xfrm_encap_tmpl *encap = x->encap;
> > + struct esp_tcp_sk *esk;
> > + __be16 sport, dport;
> > + struct sock *nsk;
> > + struct sock *sk;
> > +
> > + sk = rcu_dereference(x->encap_sk);
> > + if (sk && sk->sk_state == TCP_ESTABLISHED)
> > + return sk;
> > +
> > + spin_lock_bh(&x->lock);
> > + sport = encap->encap_sport;
> > + dport = encap->encap_dport;
> > + nsk = rcu_dereference_protected(x->encap_sk,
> > + lockdep_is_held(&x->lock));
> > + if (sk && sk == nsk) {
> > + esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
> > + if (!esk) {
> > + spin_unlock_bh(&x->lock);
> > + return ERR_PTR(-ENOMEM);
> > + }
> > + RCU_INIT_POINTER(x->encap_sk, NULL);
> > + esk->sk = sk;
> > + call_rcu(&esk->rcu, esp_free_tcp_sk);
>
> I don't understand this, can you please explain what you are doing here?
If we get to this block, the current encap_sk is not in ESTABLISHED
state and cannot be used to send data. We want to get rid of it (reset
x->encap_sk) and find a new usable socket. The weird kmalloc dance is
just here so that we can do the sock_put after an RCU grace period,
which I think is needed so that we don't destruct a socket that's
still used by packets in flight.
That's code I copied from Herbert's first submission, I may be missing
some details here.
> > + }
> > + spin_unlock_bh(&x->lock);
> > +
> > + sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
> > + dport, x->props.saddr.a4, sport, 0);
> > + if (!sk)
> > + return ERR_PTR(-ENOENT);
> > +
> > + if (!tcp_is_ulp_esp(sk)) {
> > + sock_put(sk);
> > + return ERR_PTR(-EINVAL);
> > + }
> > +
> > + spin_lock_bh(&x->lock);
> > + nsk = rcu_dereference_protected(x->encap_sk,
> > + lockdep_is_held(&x->lock));
> > + if (encap->encap_sport != sport ||
> > + encap->encap_dport != dport) {
> > + sock_put(sk);
> > + sk = nsk ?: ERR_PTR(-EREMCHG);
> > + } else if (sk == nsk) {
> > + sock_put(sk);
> > + } else {
> > + rcu_assign_pointer(x->encap_sk, sk);
> > + }
> > + spin_unlock_bh(&x->lock);
> > +
> > + return sk;
> > +}
> > +
> > +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
> > +{
> > + struct sock *sk;
> > + int err;
> > +
> > + rcu_read_lock();
> > +
> > + sk = esp_find_tcp_sk(x);
> > + err = PTR_ERR(sk);
> > + if (IS_ERR(sk))
>
> Maybe better 'if (err)'?
That will work if I replace PTR_ERR with PTR_ERR_OR_ZERO.
> > + goto out;
> > +
> > + bh_lock_sock(sk);
> > + if (sock_owned_by_user(sk)) {
> > + err = espintcp_queue_out(sk, skb);
> > + if (err < 0)
> > + goto unlock_sock;
>
> This goto is not needed.
Will fix.
> > + } else {
> > + err = espintcp_push_skb(sk, skb);
> > + }
> > +
> > +unlock_sock:
> > + bh_unlock_sock(sk);
> > +out:
> > + rcu_read_unlock();
> > + return err;
> > +}
> > +
> > +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
> > + struct sk_buff *skb)
> > +{
> > + struct dst_entry *dst = skb_dst(skb);
> > + struct xfrm_state *x = dst->xfrm;
> > +
> > + return esp_output_tcp_finish(x, skb);
> > +}
> > +
> > +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
> > +{
> > + int err;
> > +
> > + local_bh_disable();
> > + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
> > + local_bh_enable();
> > +
> > + /* EINPROGRESS just happens to do the right thing. It
> > + * actually means that the skb has been consumed and
> > + * isn't coming back.
> > + */
> > + return err ?: -EINPROGRESS;
> > +}
> > +#endif
> > +
> > static void esp_output_done(struct crypto_async_request *base, int err)
> > {
> > struct sk_buff *skb = base->data;
> > @@ -147,7 +272,13 @@ static void esp_output_done(struct crypto_async_request *base, int err)
> > secpath_reset(skb);
> > xfrm_dev_resume(skb);
> > } else {
> > - xfrm_output_resume(skb, err);
> > +#ifdef CONFIG_XFRM_ESPINTCP
>
> Do we really need all these ifdef? I guess most of them could
> be avoided with some code refactorization.
If we compile espintcp out, esp_output_tail_tcp will be dead code so
it might as well not be compiled in either.
The more trivial operations (like in esp_input_done2) could be
compiled in anyway. That might be considered a bit inconsistent,
because I need to keep the ifdef in esp_init_state (so that we land in
the default case and error out when espintcp is disabled).
Then I can provide variants of some functions (esp_output_tcp_encap,
esp_output_tail_tcp) that always fail for !XFRM_ESPINTCP.
> > + if (!err &&
> > + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
> > + esp_output_tail_tcp(x, skb);
> > + else
> > +#endif
> > + xfrm_output_resume(skb, err);
> > }
> > }
>
> ...
>
> > @@ -296,7 +460,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
> > struct sk_buff *trailer;
> > int tailen = esp->tailen;
> >
> > - /* this is non-NULL only with UDP Encapsulation */
> > + /* this is non-NULL only with TCP/UDP Encapsulation */
> > if (x->encap) {
> > int err = esp_output_encap(x, skb, esp);
> >
> > @@ -491,6 +655,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
> > if (sg != dsg)
> > esp_ssg_unref(x, tmp);
> >
> > +#ifdef CONFIG_XFRM_ESPINTCP
> > + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
> > + err = esp_output_tail_tcp(x, skb);
> > +#endif
> > +
> > error_free:
> > kfree(tmp);
> > error:
> > @@ -617,10 +786,16 @@ int esp_input_done2(struct sk_buff *skb, int err)
> >
> > if (x->encap) {
> > struct xfrm_encap_tmpl *encap = x->encap;
> > + struct tcphdr *th = (void *)(skb_network_header(skb) + ihl);
>
> This gives a unused variable warning if CONFIG_XFRM_ESPINTCP
> is not set.
Oops, will fix. Or that will take care of itself if I just remove the ifdef below.
> > struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
> > __be16 source;
> >
> > switch (x->encap->encap_type) {
> > +#ifdef CONFIG_XFRM_ESPINTCP
(this one)
> > + case TCP_ENCAP_ESPINTCP:
> > + source = th->source;
> > + break;
> > +#endif
> > case UDP_ENCAP_ESPINUDP:
> > case UDP_ENCAP_ESPINUDP_NON_IKE:
> > source = uh->source;
> > @@ -1017,6 +1192,14 @@ static int esp_init_state(struct xfrm_state *x)
> > case UDP_ENCAP_ESPINUDP_NON_IKE:
> > x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
> > break;
> > +#ifdef CONFIG_XFRM_ESPINTCP
> > + case TCP_ENCAP_ESPINTCP:
> > + /* only the length field, TCP encap is done by
> > + * the socket
> > + */
> > + x->props.header_len += 2;
> > + break;
> > +#endif
> > }
> > }
> >
> > diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
> > index c967fc3c38c8..ccc012b3ea10 100644
> > --- a/net/xfrm/Kconfig
> > +++ b/net/xfrm/Kconfig
> > @@ -71,6 +71,15 @@ config XFRM_IPCOMP
> > select CRYPTO
> > select CRYPTO_DEFLATE
> >
> > +config XFRM_ESPINTCP
> > + bool "ESP in TCP encapsulation (RFC 8229)"
> > + depends on XFRM && INET_ESP
> > + select STREAM_PARSER
>
> I'm getting these compile errors:
>
> espintcp.o: In function `espintcp_close':
> /home/klassert/git/linux-stk/net/xfrm/espintcp.c:469: undefined reference to `sk_msg_free'
> net/xfrm/espintcp.o: In function `espintcp_sendmsg':
> /home/klassert/git/linux-stk/net/xfrm/espintcp.c:302: undefined reference to `sk_msg_alloc'
> /home/klassert/git/linux-stk/net/xfrm/espintcp.c:316: undefined reference to `sk_msg_memcopy_from_iter'
> /home/klassert/git/linux-stk/net/xfrm/espintcp.c:341: undefined reference to `sk_msg_free'
> /home/klassert/git/linux-stk/net/xfrm/espintcp.c:321: undefined reference to `sk_msg_memcopy_from_iter'
> /home/klassert/git/linux-stk/Makefile:1067: recipe for target 'vmlinux' failed
> make[1]: *** [vmlinux] Error 1
>
> I guess you need to select NET_SOCK_MSG.
Right, I'll go fix the Kconfig entry.
> Btw. I've updated the ipsec-next tree, so patch 1 is not needed anymore.
Cool, I'll drop it.
> Everything else looks good!
Thanks for the review.
--
Sabrina
Powered by blists - more mailing lists