[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CADvbK_cG-yAAqUjGMVcmewP1Cc-7HqRLWsn2j_yWu_hmxqP5Eg@mail.gmail.com>
Date: Wed, 5 Nov 2025 20:01:54 -0500
From: Xin Long <lucien.xin@...il.com>
To: Paolo Abeni <pabeni@...hat.com>
Cc: network dev <netdev@...r.kernel.org>, quic@...ts.linux.dev, davem@...emloft.net,
kuba@...nel.org, Eric Dumazet <edumazet@...gle.com>, Simon Horman <horms@...nel.org>,
Stefan Metzmacher <metze@...ba.org>, Moritz Buhl <mbuhl@...nbsd.org>, Tyler Fanelli <tfanelli@...hat.com>,
Pengtao He <hepengtao@...omi.com>, Thomas Dreibholz <dreibh@...ula.no>, linux-cifs@...r.kernel.org,
Steve French <smfrench@...il.com>, Namjae Jeon <linkinjeon@...nel.org>,
Paulo Alcantara <pc@...guebit.com>, Tom Talpey <tom@...pey.com>, kernel-tls-handshake@...ts.linux.dev,
Chuck Lever <chuck.lever@...cle.com>, Jeff Layton <jlayton@...nel.org>,
Steve Dickson <steved@...hat.com>, Hannes Reinecke <hare@...e.de>, Alexander Aring <aahringo@...hat.com>,
David Howells <dhowells@...hat.com>, Matthieu Baerts <matttbe@...nel.org>,
John Ericson <mail@...nericson.me>, Cong Wang <xiyou.wangcong@...il.com>,
"D . Wythe" <alibuda@...ux.alibaba.com>, Jason Baron <jbaron@...mai.com>,
illiliti <illiliti@...tonmail.com>, Sabrina Dubroca <sd@...asysnail.net>,
Marcelo Ricardo Leitner <marcelo.leitner@...il.com>, Daniel Stenberg <daniel@...x.se>,
Andy Gospodarek <andrew.gospodarek@...adcom.com>
Subject: Re: [PATCH net-next v4 04/15] quic: provide family ops for address
and protocol
On Tue, Nov 4, 2025 at 5:27 AM Paolo Abeni <pabeni@...hat.com> wrote:
>
> On 10/29/25 3:35 PM, Xin Long wrote:
> > +static int quic_v4_flow_route(struct sock *sk, union quic_addr *da, union quic_addr *sa,
> > + struct flowi *fl)
> > +{
> > + struct flowi4 *fl4;
> > + struct rtable *rt;
> > +
> > + if (__sk_dst_check(sk, 0))
> > + return 1;
> > +
> > + memset(fl, 0x00, sizeof(*fl));
> > + fl4 = &fl->u.ip4;
> > + fl4->saddr = sa->v4.sin_addr.s_addr;
> > + fl4->fl4_sport = sa->v4.sin_port;
> > + fl4->daddr = da->v4.sin_addr.s_addr;
> > + fl4->fl4_dport = da->v4.sin_port;
> > + fl4->flowi4_proto = IPPROTO_UDP;
> > + fl4->flowi4_oif = sk->sk_bound_dev_if;
> > +
> > + fl4->flowi4_scope = ip_sock_rt_scope(sk);
> > + fl4->flowi4_dscp = inet_sk_dscp(inet_sk(sk));
> > +
> > + rt = ip_route_output_key(sock_net(sk), fl4);
> > + if (IS_ERR(rt))
> > + return PTR_ERR(rt);
> > +
> > + if (!sa->v4.sin_family) {
>
> The above check is strange. Any special reason to not use
> quic_v4_is_any_addr()?
>
quic_v4_is_any_addr() looks better, will try to replace it.
> > + sa->v4.sin_family = AF_INET;
> > + sa->v4.sin_addr.s_addr = fl4->saddr;
> > + }
> > + sk_setup_caps(sk, &rt->dst);
> > + return 0;
> > +}
> > +
> > +static int quic_v6_flow_route(struct sock *sk, union quic_addr *da, union quic_addr *sa,
> > + struct flowi *fl)
> > +{
> > + struct ipv6_pinfo *np = inet6_sk(sk);
> > + struct ip6_flowlabel *flowlabel;
> > + struct dst_entry *dst;
> > + struct flowi6 *fl6;
> > +
> > + if (__sk_dst_check(sk, np->dst_cookie))
> > + return 1;
> > +
> > + memset(fl, 0x00, sizeof(*fl));
> > + fl6 = &fl->u.ip6;
> > + fl6->saddr = sa->v6.sin6_addr;
> > + fl6->fl6_sport = sa->v6.sin6_port;
> > + fl6->daddr = da->v6.sin6_addr;
> > + fl6->fl6_dport = da->v6.sin6_port;
> > + fl6->flowi6_proto = IPPROTO_UDP;
> > + fl6->flowi6_oif = sk->sk_bound_dev_if;
> > +
> > + if (inet6_test_bit(SNDFLOW, sk)) {
> > + fl6->flowlabel = (da->v6.sin6_flowinfo & IPV6_FLOWINFO_MASK);
> > + if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
> > + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
> > + if (IS_ERR(flowlabel))
> > + return -EINVAL;
> > + fl6_sock_release(flowlabel);
> > + }
> > + }
> > +
> > + dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, NULL);
> > + if (IS_ERR(dst))
> > + return PTR_ERR(dst);
> > +
> > + if (!sa->v6.sin6_family) {
>
> (similar question here)
>
right.
> [...]
> > +static int quic_v4_get_mtu_info(struct sk_buff *skb, u32 *info)
> > +{
> > + struct icmphdr *hdr;
> > +
> > + hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr));
>
> Noting the above relies on headers being already pulled in the linear
> part. Later patch will do skb_linarize(), but that looks overkill and
> should hit performance badly. Instead you should use pskb_may_pull() &&
> friends.
This path (ICMP error path) doesn't need to parse frames and bundled
packets, so yes we can use pskb_may_pull().
However, in the normal QUIC packet receive path:
- for short header packet path, the packet format is:
Before decryption:
UDP hdr | QUIC hdr | conn_id | encrypted text
After decryption:
UDP hdr | QUIC hdr | conn_id | frame1 hdr | frame1 data | frame2 hdr
| frame2 data ...
When parsing the frames, it's hard to do it without linearizing the
skb, also fields in these frame headers are always variable-length
integers, making the parsing more difficult if it's not a linearized
buffer.
- for long header (handshake) packet path, more complex, packets can
be bundled like:
UDP hdr | QUIC hdr1 | encrypted text | QUIC hdr2 | encrypted text |
...
>
> > + if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED) {
> > + *info = ntohs(hdr->un.frag.mtu);
> > + return 0;
> > + }
> > +
> > + /* Defer other types' processing to UDP error handler. */
> > + return 1;
> > +}
> > +
> > +static int quic_v6_get_mtu_info(struct sk_buff *skb, u32 *info)
> > +{
> > + struct icmp6hdr *hdr;
> > +
> > + hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr));
> > + if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG) {
> > + *info = ntohl(hdr->icmp6_mtu);
> > + return 0;
> > + }
> > +
> > + /* Defer other types' processing to UDP error handler. */
> > + return 1;
> > +}
> > +
> > +static u8 quic_v4_get_msg_ecn(struct sk_buff *skb)
> > +{
> > + return (ip_hdr(skb)->tos & INET_ECN_MASK);
> > +}
> > +
> > +static u8 quic_v6_get_msg_ecn(struct sk_buff *skb)
> > +{
> > + return (ipv6_get_dsfield(ipv6_hdr(skb)) & INET_ECN_MASK);
> > +}
> > +
> > +static int quic_v4_get_user_addr(struct sock *sk, union quic_addr *a, struct sockaddr *addr,
> > + int addr_len)
> > +{
> > + u32 len = sizeof(struct sockaddr_in);
> > +
> > + if (addr_len < len || addr->sa_family != AF_INET)
> > + return 1;
> > + if (ipv4_is_multicast(quic_addr(addr)->v4.sin_addr.s_addr))
> > + return 1;
> > + memcpy(a, addr, len);
> > + return 0;
> > +}
>
> It looks like the above function is not used in this series?!? (well
> it's called by quic_get_user_addr() which in turn is unsed.
>
> Perhaps drop from here and add later as needed?
Sure, I will drop:
quic_seq_dump_addr()
quic_get_msg_ecn()
quic_get_user_addr()
quic_get_pref_addr()
quic_set_pref_addr()
quic_set_sk_addr()
quic_set_sk_ecn()
>
> Also the name sounds possibly misleading, I read it as it should copy
> data to user-space and return value could possibly be an errnum.
>
Maybe quic_get_addr_from_user()? and I will return -EINVAL instead
of 1 in the err path.
> > +static void quic_v4_get_pref_addr(struct sock *sk, union quic_addr *addr, u8 **pp, u32 *plen)
> > +{
> > + u8 *p = *pp;
> > +
> > + memcpy(&addr->v4.sin_addr, p, QUIC_ADDR4_LEN);
> > + p += QUIC_ADDR4_LEN;
> > + memcpy(&addr->v4.sin_port, p, QUIC_PORT_LEN);
> > + p += QUIC_PORT_LEN;
> > + addr->v4.sin_family = AF_INET;
> > + /* Skip over IPv6 address and port, not used for AF_INET sockets. */
> > + p += QUIC_ADDR6_LEN;
> > + p += QUIC_PORT_LEN;
> > +
> > + if (!addr->v4.sin_port || quic_v4_is_any_addr(addr) ||
> > + ipv4_is_multicast(addr->v4.sin_addr.s_addr))
> > + memset(addr, 0, sizeof(*addr));
> > + *plen -= (p - *pp);
> > + *pp = p;
> > +}
>
> Similarly unused?
>
> > +static bool quic_v4_cmp_sk_addr(struct sock *sk, union quic_addr *a, union quic_addr *addr)
> > +{
> > + if (a->v4.sin_port != addr->v4.sin_port)
> > + return false;
> > + if (a->v4.sin_family != addr->v4.sin_family)
> > + return false;
> > + if (a->v4.sin_addr.s_addr == htonl(INADDR_ANY) ||
> > + addr->v4.sin_addr.s_addr == htonl(INADDR_ANY))
> > + return true;
> > + return a->v4.sin_addr.s_addr == addr->v4.sin_addr.s_addr;
> > +}
> > +
> > +static bool quic_v6_cmp_sk_addr(struct sock *sk, union quic_addr *a, union quic_addr *addr)
> > +{
> > + if (a->v4.sin_port != addr->v4.sin_port)
> > + return false;
> > +
> > + if (a->sa.sa_family == AF_INET && addr->sa.sa_family == AF_INET) {
> > + if (a->v4.sin_addr.s_addr == htonl(INADDR_ANY) ||
> > + addr->v4.sin_addr.s_addr == htonl(INADDR_ANY))
> > + return true;
> > + return a->v4.sin_addr.s_addr == addr->v4.sin_addr.s_addr;
> > + }
> > +
> > + if (a->sa.sa_family != addr->sa.sa_family) {
> > + if (ipv6_only_sock(sk))
> > + return false;
> > + if (a->sa.sa_family == AF_INET6 && ipv6_addr_any(&a->v6.sin6_addr))
> > + return true;
> > + if (a->sa.sa_family == AF_INET && addr->sa.sa_family == AF_INET6 &&
>
> Below this code assumes that sa_family is either AF_INET or AF_INET6. If
> such assumtion hold, you should use here, too. and drop the
> 'addr->sa.sa_family == AF_INET6' condition.
I agree.
>
> > + ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
> > + addr->v6.sin6_addr.s6_addr32[3] == a->v4.sin_addr.s_addr)
> > + return true;
> > + if (addr->sa.sa_family == AF_INET && a->sa.sa_family == AF_INET6 &&
> > + ipv6_addr_v4mapped(&a->v6.sin6_addr) &&
> > + a->v6.sin6_addr.s6_addr32[3] == addr->v4.sin_addr.s_addr)
> > + return true;
>
> Nothing this branch does not handle the 'ipv6_addr_any(&addr->v6.sin6_addr)'
>
Will add a helper:
static bool quic_v4_match_v6_addr(union quic_addr *a4, union quic_addr *a6)
{
if (ipv6_addr_any(&a6->v6.sin6_addr))
return true;
if (ipv6_addr_v4mapped(&a6->v6.sin6_addr) &&
a6->v6.sin6_addr.s6_addr32[3] == a4->v4.sin_addr.s_addr)
return true;
return false;
}
and change this branch to:
if (a->sa.sa_family == AF_INET)
return quic_v4_match_v6_addr(a, addr);
return quic_v4_match_v6_addr(addr, a);
Thanks.
Powered by blists - more mailing lists