[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cded8ce84b3af0a8120e60aea06a0296eed2037c.1763994509.git.lucien.xin@gmail.com>
Date: Mon, 24 Nov 2025 09:28:28 -0500
From: Xin Long <lucien.xin@...il.com>
To: network dev <netdev@...r.kernel.org>,
quic@...ts.linux.dev
Cc: davem@...emloft.net,
kuba@...nel.org,
Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>,
Stefan Metzmacher <metze@...ba.org>,
Moritz Buhl <mbuhl@...nbsd.org>,
Tyler Fanelli <tfanelli@...hat.com>,
Pengtao He <hepengtao@...omi.com>,
Thomas Dreibholz <dreibh@...ula.no>,
linux-cifs@...r.kernel.org,
Steve French <smfrench@...il.com>,
Namjae Jeon <linkinjeon@...nel.org>,
Paulo Alcantara <pc@...guebit.com>,
Tom Talpey <tom@...pey.com>,
kernel-tls-handshake@...ts.linux.dev,
Chuck Lever <chuck.lever@...cle.com>,
Jeff Layton <jlayton@...nel.org>,
Steve Dickson <steved@...hat.com>,
Hannes Reinecke <hare@...e.de>,
Alexander Aring <aahringo@...hat.com>,
David Howells <dhowells@...hat.com>,
Matthieu Baerts <matttbe@...nel.org>,
John Ericson <mail@...nericson.me>,
Cong Wang <xiyou.wangcong@...il.com>,
"D . Wythe" <alibuda@...ux.alibaba.com>,
Jason Baron <jbaron@...mai.com>,
illiliti <illiliti@...tonmail.com>,
Sabrina Dubroca <sd@...asysnail.net>,
Marcelo Ricardo Leitner <marcelo.leitner@...il.com>,
Daniel Stenberg <daniel@...x.se>,
Andy Gospodarek <andrew.gospodarek@...adcom.com>
Subject: [PATCH net-next v5 15/16] quic: add packet builder base
This patch introduces 'quic_packet' to handle packing of QUIC packets on
the transmit (TX) path.
It provides functionality for frame packing and packet construction. The
packet configuration includes setting the path, calculating overhead,
and verifying routing. Frames are appended to the packet before it is
created with the queued frames.
Once assembled, the packet is encrypted, bundled, and sent out. There
is also support to flush the packet when no additional frames remain.
Functions to create application (short) and handshake (long) packets
are currently placeholders for future implementation.
- quic_packet_config(): Set the path, compute overhead, and verify routing.
- quic_packet_tail(): Append a frame to the packet for transmission.
- quic_packet_create_and_xmit(): Create and send the packet with the queued
frames.
- quic_packet_xmit(): Encrypt, bundle, and send out the packet.
- quic_packet_flush(): Send the packet if there's nothing left to bundle.
Signed-off-by: Xin Long <lucien.xin@...il.com>
---
v3:
- Adjust global connection and listen socket hashtable operations
based on the new hashtable type.
- Introduce quic_packet_backlog_schedule() to enqueue Initial packets
to quic_net.backlog_list and defer their decryption for ALPN demux
to quic_packet_backlog_work() on quic_net.work, since
quic_crypto_initial_keys_install()/crypto_aead_setkey() must run
in process context.
v4:
- Update quic_(listen_)sock_lookup() to support lockless socket
lookup using hlist_nulls_node APIs.
- Use quic_wq for QUIC packet backlog processing work.
v5:
- Rename quic_packet_create() to quic_packet_create_and_xmit()
(suggested by Paolo).
- Move the packet parser base code to a separate patch, keeping only
the packet builder base in this patch (suggested by Paolo).
- Change sent_time timestamp from u32 to u64 to improve accuracy.
---
net/quic/Makefile | 2 +-
net/quic/packet.c | 313 ++++++++++++++++++++++++++++++++++++++++++++++
net/quic/packet.h | 121 ++++++++++++++++++
net/quic/socket.c | 1 +
net/quic/socket.h | 7 ++
5 files changed, 443 insertions(+), 1 deletion(-)
create mode 100644 net/quic/packet.c
create mode 100644 net/quic/packet.h
diff --git a/net/quic/Makefile b/net/quic/Makefile
index 645ee470c95e..4a43052eb441 100644
--- a/net/quic/Makefile
+++ b/net/quic/Makefile
@@ -6,4 +6,4 @@
obj-$(CONFIG_IP_QUIC) += quic.o
quic-y := common.o family.o protocol.o socket.o stream.o connid.o path.o \
- cong.o pnspace.o crypto.o timer.o frame.o
+ cong.o pnspace.o crypto.o timer.o frame.o packet.o
diff --git a/net/quic/packet.c b/net/quic/packet.c
new file mode 100644
index 000000000000..348e760aa197
--- /dev/null
+++ b/net/quic/packet.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* QUIC kernel implementation
+ * (C) Copyright Red Hat Corp. 2023
+ *
+ * This file is part of the QUIC kernel implementation
+ *
+ * Initialization/cleanup for QUIC protocol support.
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@...il.com>
+ */
+
+#include "socket.h"
+
+#define QUIC_HLEN 1
+
+/* Make these fixed for easy coding. */
+#define QUIC_PACKET_NUMBER_LEN QUIC_PN_MAX_LEN
+#define QUIC_PACKET_LENGTH_LEN 4
+
+static struct sk_buff *quic_packet_handshake_create(struct sock *sk)
+{
+ struct quic_packet *packet = quic_packet(sk);
+ struct quic_frame *frame, *next;
+
+ /* Free all frames for now, and future patches will implement the actual creation logic. */
+ list_for_each_entry_safe(frame, next, &packet->frame_list, list) {
+ list_del(&frame->list);
+ quic_frame_put(frame);
+ }
+ return NULL;
+}
+
+static int quic_packet_number_check(struct sock *sk)
+{
+ return 0;
+}
+
+static struct sk_buff *quic_packet_app_create(struct sock *sk)
+{
+ struct quic_packet *packet = quic_packet(sk);
+ struct quic_frame *frame, *next;
+
+ /* Free all frames for now, and future patches will implement the actual creation logic. */
+ list_for_each_entry_safe(frame, next, &packet->frame_list, list) {
+ list_del(&frame->list);
+ quic_frame_put(frame);
+ }
+ return NULL;
+}
+
+/* Update the MSS and inform congestion control. */
+void quic_packet_mss_update(struct sock *sk, u32 mss)
+{
+ struct quic_packet *packet = quic_packet(sk);
+ struct quic_cong *cong = quic_cong(sk);
+
+ packet->mss[0] = (u16)mss;
+ quic_cong_set_mss(cong, packet->mss[0] - packet->taglen[0]);
+}
+
+/* Perform routing for the QUIC packet on the specified path, update header length and MSS
+ * accordingly, reset path and start PMTU timer.
+ */
+int quic_packet_route(struct sock *sk)
+{
+ struct quic_path_group *paths = quic_paths(sk);
+ struct quic_packet *packet = quic_packet(sk);
+ struct quic_config *c = quic_config(sk);
+ union quic_addr *sa, *da;
+ u32 pmtu;
+ int err;
+
+ da = quic_path_daddr(paths, packet->path);
+ sa = quic_path_saddr(paths, packet->path);
+ err = quic_flow_route(sk, da, sa, &paths->fl);
+ if (err)
+ return err;
+
+ packet->hlen = quic_encap_len(da);
+ pmtu = min_t(u32, dst_mtu(__sk_dst_get(sk)), QUIC_PATH_MAX_PMTU);
+ quic_packet_mss_update(sk, pmtu - packet->hlen);
+
+ quic_path_pl_reset(paths);
+ quic_timer_reset(sk, QUIC_TIMER_PMTU, c->plpmtud_probe_interval);
+ return 0;
+}
+
+/* Configure the QUIC packet header and routing based on encryption level and path. */
+int quic_packet_config(struct sock *sk, u8 level, u8 path)
+{
+ struct quic_conn_id_set *dest = quic_dest(sk), *source = quic_source(sk);
+ struct quic_packet *packet = quic_packet(sk);
+ struct quic_config *c = quic_config(sk);
+ u32 hlen = QUIC_HLEN;
+
+ /* If packet already has data, no need to reconfigure. */
+ if (!quic_packet_empty(packet))
+ return 0;
+
+ packet->ack_eliciting = 0;
+ packet->frame_len = 0;
+ packet->ipfragok = 0;
+ packet->padding = 0;
+ packet->frames = 0;
+ hlen += QUIC_PACKET_NUMBER_LEN; /* Packet number length. */
+ hlen += quic_conn_id_choose(dest, path)->len; /* DCID length. */
+ if (level) {
+ hlen += 1; /* Length byte for DCID. */
+ hlen += 1 + quic_conn_id_active(source)->len; /* Length byte + SCID length. */
+ if (level == QUIC_CRYPTO_INITIAL) /* Include token for Initial packets. */
+ hlen += quic_var_len(quic_token(sk)->len) + quic_token(sk)->len;
+ hlen += QUIC_VERSION_LEN; /* Version length. */
+ hlen += QUIC_PACKET_LENGTH_LEN; /* Packet length field length. */
+ /* Allow fragmentation if PLPMTUD is enabled, as it no longer relies on ICMP
+ * Toobig messages to discover the path MTU.
+ */
+ packet->ipfragok = !!c->plpmtud_probe_interval;
+ }
+ packet->level = level;
+ packet->len = (u16)hlen;
+ packet->overhead = (u8)hlen;
+
+ if (packet->path != path) { /* If the path changed, update and reset routing cache. */
+ packet->path = path;
+ __sk_dst_reset(sk);
+ }
+
+ /* Perform routing and MSS update for the configured packet. */
+ if (quic_packet_route(sk) < 0)
+ return -1;
+ return 0;
+}
+
+static void quic_packet_encrypt_done(struct sk_buff *skb, int err)
+{
+ /* Free it for now, future patches will implement the actual deferred transmission logic. */
+ kfree_skb(skb);
+}
+
+/* Coalescing Packets. */
+static int quic_packet_bundle(struct sock *sk, struct sk_buff *skb)
+{
+ struct quic_skb_cb *head_cb, *cb = QUIC_SKB_CB(skb);
+ struct quic_packet *packet = quic_packet(sk);
+ struct sk_buff *p;
+
+ if (!packet->head) { /* First packet to bundle: initialize the head. */
+ packet->head = skb;
+ cb->last = skb;
+ goto out;
+ }
+
+ /* If bundling would exceed MSS, flush the current bundle. */
+ if (packet->head->len + skb->len >= packet->mss[0]) {
+ quic_packet_flush(sk);
+ packet->head = skb;
+ cb->last = skb;
+ goto out;
+ }
+ /* Bundle it and update metadata for the aggregate skb. */
+ p = packet->head;
+ head_cb = QUIC_SKB_CB(p);
+ if (head_cb->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ head_cb->last->next = skb;
+ p->data_len += skb->len;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+ head_cb->last = skb;
+ head_cb->ecn |= cb->ecn; /* Merge ECN flags. */
+
+out:
+ /* rfc9000#section-12.2:
+ * Packets with a short header (Section 17.3) do not contain a Length field and so
+ * cannot be followed by other packets in the same UDP datagram.
+ *
+ * so Return 1 to flush if it is a Short header packet.
+ */
+ return !cb->level;
+}
+
+/* Transmit a QUIC packet, possibly encrypting and bundling it. */
+int quic_packet_xmit(struct sock *sk, struct sk_buff *skb)
+{
+ struct quic_packet *packet = quic_packet(sk);
+ struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
+ struct net *net = sock_net(sk);
+ int err;
+
+ /* Skip encryption if taglen == 0 (e.g., disable_1rtt_encryption). */
+ if (!packet->taglen[quic_hdr(skb)->form])
+ goto xmit;
+
+ cb->crypto_done = quic_packet_encrypt_done;
+ /* Associate skb with sk to ensure sk is valid during async encryption completion. */
+ WARN_ON(!skb_set_owner_sk_safe(skb, sk));
+ err = quic_crypto_encrypt(quic_crypto(sk, packet->level), skb);
+ if (err) {
+ if (err != -EINPROGRESS) {
+ QUIC_INC_STATS(net, QUIC_MIB_PKT_ENCDROP);
+ kfree_skb(skb);
+ return err;
+ }
+ QUIC_INC_STATS(net, QUIC_MIB_PKT_ENCBACKLOGS);
+ return err;
+ }
+ if (!cb->resume) /* Encryption completes synchronously. */
+ QUIC_INC_STATS(net, QUIC_MIB_PKT_ENCFASTPATHS);
+
+xmit:
+ if (quic_packet_bundle(sk, skb))
+ quic_packet_flush(sk);
+ return 0;
+}
+
+/* Create and transmit a new QUIC packet. */
+int quic_packet_create_and_xmit(struct sock *sk)
+{
+ struct quic_packet *packet = quic_packet(sk);
+ struct sk_buff *skb;
+ int err;
+
+ err = quic_packet_number_check(sk);
+ if (err)
+ goto err;
+
+ if (packet->level)
+ skb = quic_packet_handshake_create(sk);
+ else
+ skb = quic_packet_app_create(sk);
+ if (!skb) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = quic_packet_xmit(sk, skb);
+ if (err && err != -EINPROGRESS)
+ goto err;
+
+ /* Return 1 if at least one ACK-eliciting (non-PING) frame was sent. */
+ return !!packet->frames;
+err:
+ pr_debug("%s: err: %d\n", __func__, err);
+ return 0;
+}
+
+/* Flush any coalesced/bundled QUIC packets. */
+void quic_packet_flush(struct sock *sk)
+{
+ struct quic_path_group *paths = quic_paths(sk);
+ struct quic_packet *packet = quic_packet(sk);
+
+ if (packet->head) {
+ quic_lower_xmit(sk, packet->head,
+ quic_path_daddr(paths, packet->path), &paths->fl);
+ packet->head = NULL;
+ }
+}
+
+/* Append a frame to the tail of the current QUIC packet. */
+int quic_packet_tail(struct sock *sk, struct quic_frame *frame)
+{
+ struct quic_packet *packet = quic_packet(sk);
+ u8 taglen;
+
+ /* Reject frame if it doesn't match the packet's encryption level or path, or if
+ * padding is already in place (no further frames should be added).
+ */
+ if (frame->level != (packet->level % QUIC_CRYPTO_EARLY) ||
+ frame->path != packet->path || packet->padding)
+ return 0;
+
+ /* Check if frame would exceed the current datagram MSS (excluding AEAD tag). */
+ taglen = quic_packet_taglen(packet);
+ if (packet->len + frame->len > packet->mss[frame->dgram] - taglen) {
+ /* If some data has already been added to the packet, bail out. */
+ if (packet->len != packet->overhead)
+ return 0;
+ /* Otherwise, allow IP fragmentation for this packet unless it’s a PING probe. */
+ if (!quic_frame_ping(frame->type))
+ packet->ipfragok = 1;
+ }
+ if (frame->padding)
+ packet->padding = frame->padding;
+
+ /* Track frames that require retransmission if lost (i.e., ACK-eliciting and non-PING). */
+ if (frame->ack_eliciting) {
+ packet->ack_eliciting = 1;
+ if (!quic_frame_ping(frame->type)) {
+ packet->frames++;
+ packet->frame_len += frame->len;
+ }
+ }
+
+ list_move_tail(&frame->list, &packet->frame_list);
+ packet->len += frame->len;
+ return frame->len;
+}
+
+void quic_packet_init(struct sock *sk)
+{
+ struct quic_packet *packet = quic_packet(sk);
+
+ INIT_LIST_HEAD(&packet->frame_list);
+ packet->taglen[0] = QUIC_TAG_LEN;
+ packet->taglen[1] = QUIC_TAG_LEN;
+ packet->mss[0] = QUIC_TAG_LEN;
+ packet->mss[1] = QUIC_TAG_LEN;
+
+ packet->version = QUIC_VERSION_V1;
+}
diff --git a/net/quic/packet.h b/net/quic/packet.h
new file mode 100644
index 000000000000..85efeba6199b
--- /dev/null
+++ b/net/quic/packet.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* QUIC kernel implementation
+ * (C) Copyright Red Hat Corp. 2023
+ *
+ * This file is part of the QUIC kernel implementation
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@...il.com>
+ */
+
+struct quic_packet {
+ struct quic_conn_id dcid; /* Dest Connection ID from received packet */
+ struct quic_conn_id scid; /* Source Connection ID from received packet */
+ union quic_addr daddr; /* Dest address from received packet */
+ union quic_addr saddr; /* Source address from received packet */
+
+ struct list_head frame_list; /* List of frames to pack into packet for send */
+ struct sk_buff *head; /* Head skb for packet bundling on send */
+ u16 frame_len; /* Length of all ack-eliciting frames excluding PING */
+ u8 taglen[2]; /* Tag length for short and long packets */
+ u32 version; /* QUIC version used/selected during handshake */
+ u8 errframe; /* Frame type causing packet processing failure */
+ u8 overhead; /* QUIC header length excluding frames */
+ u16 errcode; /* Error code on packet processing failure */
+ u16 frames; /* Number of ack-eliciting frames excluding PING */
+ u16 mss[2]; /* MSS for datagram and non-datagram packets */
+ u16 hlen; /* UDP + IP header length for sending */
+ u16 len; /* QUIC packet length excluding taglen for sending */
+
+ u8 ack_eliciting:1; /* Packet contains ack-eliciting frames to send */
+ u8 ack_requested:1; /* Packet contains ack-eliciting frames received */
+ u8 ack_immediate:1; /* Send ACK immediately (skip ack_delay timer) */
+ u8 non_probing:1; /* Packet has ack-eliciting frames excluding NEW_CONNECTION_ID */
+ u8 has_sack:1; /* Packet has ACK frames received */
+ u8 ipfragok:1; /* Allow IP fragmentation */
+ u8 padding:1; /* Packet has padding frames */
+ u8 path:1; /* Path identifier used to send this packet */
+ u8 level; /* Encryption level used */
+};
+
+struct quic_packet_sent {
+ struct list_head list; /* Link in sent packet list for ACK tracking */
+ u64 sent_time; /* Timestamp when packet was sent */
+ s64 number; /* Packet number */
+ u8 level; /* Packet number space */
+ u8 ecn:2; /* ECN bits */
+
+ u16 frame_len; /* Combined length of all frames held */
+ u16 frames; /* Number of frames held */
+
+ struct quic_frame *frame_array[]; /* Array of pointers to held frames */
+};
+
+#define QUIC_PACKET_INITIAL_V1 0
+#define QUIC_PACKET_0RTT_V1 1
+#define QUIC_PACKET_HANDSHAKE_V1 2
+#define QUIC_PACKET_RETRY_V1 3
+
+#define QUIC_PACKET_INITIAL_V2 1
+#define QUIC_PACKET_0RTT_V2 2
+#define QUIC_PACKET_HANDSHAKE_V2 3
+#define QUIC_PACKET_RETRY_V2 0
+
+#define QUIC_PACKET_INITIAL QUIC_PACKET_INITIAL_V1
+#define QUIC_PACKET_0RTT QUIC_PACKET_0RTT_V1
+#define QUIC_PACKET_HANDSHAKE QUIC_PACKET_HANDSHAKE_V1
+#define QUIC_PACKET_RETRY QUIC_PACKET_RETRY_V1
+
+#define QUIC_VERSION_LEN 4
+
+static inline u8 quic_packet_taglen(struct quic_packet *packet)
+{
+ return packet->taglen[!!packet->level];
+}
+
+static inline void quic_packet_set_taglen(struct quic_packet *packet, u8 taglen)
+{
+ packet->taglen[0] = taglen;
+}
+
+static inline u32 quic_packet_mss(struct quic_packet *packet)
+{
+ return packet->mss[0] - packet->taglen[!!packet->level];
+}
+
+static inline u32 quic_packet_max_payload(struct quic_packet *packet)
+{
+ return packet->mss[0] - packet->overhead - packet->taglen[!!packet->level];
+}
+
+static inline u32 quic_packet_max_payload_dgram(struct quic_packet *packet)
+{
+ return packet->mss[1] - packet->overhead - packet->taglen[!!packet->level];
+}
+
+static inline int quic_packet_empty(struct quic_packet *packet)
+{
+ return list_empty(&packet->frame_list);
+}
+
+static inline void quic_packet_reset(struct quic_packet *packet)
+{
+ packet->level = 0;
+ packet->errcode = 0;
+ packet->errframe = 0;
+ packet->has_sack = 0;
+ packet->non_probing = 0;
+ packet->ack_requested = 0;
+ packet->ack_immediate = 0;
+}
+
+int quic_packet_tail(struct sock *sk, struct quic_frame *frame);
+int quic_packet_config(struct sock *sk, u8 level, u8 path);
+
+int quic_packet_xmit(struct sock *sk, struct sk_buff *skb);
+int quic_packet_create_and_xmit(struct sock *sk);
+int quic_packet_route(struct sock *sk);
+
+void quic_packet_mss_update(struct sock *sk, u32 mss);
+void quic_packet_flush(struct sock *sk);
+void quic_packet_init(struct sock *sk);
diff --git a/net/quic/socket.c b/net/quic/socket.c
index 0a6e59f85d32..f3a2b11fb251 100644
--- a/net/quic/socket.c
+++ b/net/quic/socket.c
@@ -48,6 +48,7 @@ static int quic_init_sock(struct sock *sk)
quic_cong_init(quic_cong(sk));
quic_timer_init(sk);
+ quic_packet_init(sk);
if (quic_stream_init(quic_streams(sk)))
return -ENOMEM;
diff --git a/net/quic/socket.h b/net/quic/socket.h
index eeb510e0a09b..a463b80a76fc 100644
--- a/net/quic/socket.h
+++ b/net/quic/socket.h
@@ -20,6 +20,7 @@
#include "path.h"
#include "cong.h"
+#include "packet.h"
#include "frame.h"
#include "protocol.h"
@@ -77,6 +78,7 @@ struct quic_sock {
struct quic_pnspace space[QUIC_PNSPACE_MAX];
struct quic_crypto crypto[QUIC_CRYPTO_MAX];
+ struct quic_packet packet;
struct quic_timer timers[QUIC_TIMER_MAX];
};
@@ -155,6 +157,11 @@ static inline struct quic_crypto *quic_crypto(const struct sock *sk, u8 level)
return &quic_sk(sk)->crypto[level];
}
+static inline struct quic_packet *quic_packet(const struct sock *sk)
+{
+ return &quic_sk(sk)->packet;
+}
+
static inline void *quic_timer(const struct sock *sk, u8 type)
{
return (void *)&quic_sk(sk)->timers[type];
--
2.47.1
Powered by blists - more mailing lists