[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240607054041.2032352-13-chopps@chopps.org>
Date: Fri, 7 Jun 2024 01:40:36 -0400
From: Christian Hopps <chopps@...pps.org>
To: devel@...ux-ipsec.org
Cc: Steffen Klassert <steffen.klassert@...unet.com>,
netdev@...r.kernel.org,
Christian Hopps <chopps@...pps.org>,
Christian Hopps <chopps@...n.net>
Subject: [PATCH ipsec-next v3 12/17] xfrm: iptfs: add basic receive packet (tunnel egress) handling
From: Christian Hopps <chopps@...n.net>
Add handling of packets received from the tunnel. This implements
tunnel egress functionality.
Signed-off-by: Christian Hopps <chopps@...n.net>
---
net/xfrm/xfrm_iptfs.c | 283 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 283 insertions(+)
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
index 73b8d51af3c8..ad7aa4f16e8e 100644
--- a/net/xfrm/xfrm_iptfs.c
+++ b/net/xfrm/xfrm_iptfs.c
@@ -19,6 +19,10 @@
#include "xfrm_inout.h"
+/* IPTFS encap (header) values. */
+#define IPTFS_SUBTYPE_BASIC 0
+#define IPTFS_SUBTYPE_CC 1
+
/* 1) skb->head should be cache aligned.
* 2) when resv is for L2 headers (i.e., ethernet) we want the cacheline to
* start -16 from data.
@@ -56,6 +60,17 @@ struct xfrm_iptfs_data {
static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu);
static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me);
+/* ================= */
+/* Utility Functions */
+/* ================= */
+
+static u64 __esp_seq(struct sk_buff *skb)
+{
+ u64 seq = ntohl(XFRM_SKB_CB(skb)->seq.input.low);
+
+ return seq | (u64)ntohl(XFRM_SKB_CB(skb)->seq.input.hi) << 32;
+}
+
/* ================= */
/* SK_BUFF Functions */
/* ================= */
@@ -165,6 +180,273 @@ static int skb_copy_bits_seq(struct skb_seq_state *st, int offset, void *to,
}
}
+/* ================================== */
+/* IPTFS Receiving (egress) Functions */
+/* ================================== */
+
+/**
+ * iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
+ * @skblen: the total data size for `skb`.
+ * @st: The source for the rest of the data to copy into `skb`.
+ * @off: The offset into @st to copy data from.
+ * @len: The length of data to copy from @st into `skb`. This must be <=
+ * @skblen.
+ *
+ * Create a new sk_buff `skb` with @skblen of packet data space. If non-zero,
+ * copy @rlen bytes of @runt into `skb`. Then using seq functions copy @len
+ * bytes from @st into `skb` starting from @off.
+ *
+ * It is an error for @len to be greater than the amount of data left in @st.
+ *
+ * Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
+ */
+static struct sk_buff *
+iptfs_pskb_extract_seq(u32 skblen, struct skb_seq_state *st, u32 off, int len)
+{
+ struct sk_buff *skb = iptfs_alloc_skb(st->root_skb, skblen, false);
+
+ if (!skb)
+ return NULL;
+ if (skb_copy_bits_seq(st, off, skb_put(skb, len), len)) {
+ XFRM_INC_STATS(dev_net(st->root_skb->dev),
+ LINUX_MIB_XFRMINERROR);
+ kfree_skb(skb);
+ return NULL;
+ }
+ return skb;
+}
+
+/**
+ * iptfs_complete_inner_skb() - finish preparing the inner packet for gro recv.
+ * @x: xfrm state
+ * @skb: the inner packet
+ *
+ * Finish the standard xfrm processing on the inner packet prior to sending back
+ * through gro_cells_receive. We do this separately b/c we are building a list
+ * of packets in the hopes that one day a list will be taken by
+ * xfrm_input.
+ */
+static void iptfs_complete_inner_skb(struct xfrm_state *x, struct sk_buff *skb)
+{
+ skb_reset_network_header(skb);
+
+ /* The packet is going back through gro_cells_receive no need to
+ * set this.
+ */
+ skb_reset_transport_header(skb);
+
+ /* Packet already has checksum value set. */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Our skb will contain the header data copied when this outer packet
+ * which contained the start of this inner packet. This is true
+ * when we allocate a new skb as well as when we reuse the existing skb.
+ */
+ if (ip_hdr(skb)->version == 0x4) {
+ struct iphdr *iph = ip_hdr(skb);
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph);
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP_ECN_set_ce(iph);
+
+ skb->protocol = htons(ETH_P_IP);
+ } else {
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph);
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP6_ECN_set_ce(skb, iph);
+
+ skb->protocol = htons(ETH_P_IPV6);
+ }
+}
+
+/**
+ * iptfs_input() - handle receipt of iptfs payload
+ * @x: xfrm state
+ * @skb: the packet
+ *
+ * Process the IPTFS payload in `skb` and consume it afterwards.
+ */
+static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ u8 hbytes[sizeof(struct ipv6hdr)];
+ struct ip_iptfs_cc_hdr iptcch;
+ struct skb_seq_state skbseq;
+ struct list_head sublist; /* rename this it's just a list */
+ struct sk_buff *first_skb, *next;
+ const unsigned char *old_mac;
+ struct xfrm_iptfs_data *xtfs;
+ struct ip_iptfs_hdr *ipth;
+ struct iphdr *iph;
+ struct net *net;
+ u32 remaining, iplen, iphlen, data, tail;
+ u32 blkoff;
+ u64 seq;
+
+ xtfs = x->mode_data;
+ net = dev_net(skb->dev);
+ first_skb = NULL;
+
+ seq = __esp_seq(skb);
+
+ /* Large enough to hold both types of header */
+ ipth = (struct ip_iptfs_hdr *)&iptcch;
+
+ /* Save the old mac header if set */
+ old_mac = skb_mac_header_was_set(skb) ? skb_mac_header(skb) : NULL;
+
+ skb_prepare_seq_read(skb, 0, skb->len, &skbseq);
+
+ /* Get the IPTFS header and validate it */
+
+ if (skb_copy_bits_seq(&skbseq, 0, ipth, sizeof(*ipth))) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+ data = sizeof(*ipth);
+
+ /* Set data past the basic header */
+ if (ipth->subtype == IPTFS_SUBTYPE_CC) {
+ /* Copy the rest of the CC header */
+ remaining = sizeof(iptcch) - sizeof(*ipth);
+ if (skb_copy_bits_seq(&skbseq, data, ipth + 1, remaining)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+ data += remaining;
+ } else if (ipth->subtype != IPTFS_SUBTYPE_BASIC) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+ goto done;
+ }
+
+ if (ipth->flags != 0) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+ goto done;
+ }
+
+ INIT_LIST_HEAD(&sublist);
+
+ /* Fragment handling in following commits */
+ blkoff = ntohs(ipth->block_offset);
+ data += blkoff;
+
+ /* New packets */
+ tail = skb->len;
+ while (data < tail) {
+ __be16 protocol = 0;
+
+ /* Gather information on the next data block.
+ * `data` points to the start of the data block.
+ */
+ remaining = tail - data;
+
+ /* try and copy enough bytes to read length from ipv4/ipv6 */
+ iphlen = min_t(u32, remaining, 6);
+ if (skb_copy_bits_seq(&skbseq, data, hbytes, iphlen)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+
+ iph = (struct iphdr *)hbytes;
+ if (iph->version == 0x4) {
+ /* must have at least tot_len field present */
+ if (remaining < 4)
+ break;
+
+ iplen = be16_to_cpu(iph->tot_len);
+ iphlen = iph->ihl << 2;
+ protocol = cpu_to_be16(ETH_P_IP);
+ XFRM_MODE_SKB_CB(skbseq.root_skb)->tos = iph->tos;
+ } else if (iph->version == 0x6) {
+ /* must have at least payload_len field present */
+ if (remaining < 6)
+ break;
+
+ iplen = be16_to_cpu(
+ ((struct ipv6hdr *)hbytes)->payload_len);
+ iplen += sizeof(struct ipv6hdr);
+ iphlen = sizeof(struct ipv6hdr);
+ protocol = cpu_to_be16(ETH_P_IPV6);
+ XFRM_MODE_SKB_CB(skbseq.root_skb)->tos =
+ ipv6_get_dsfield((struct ipv6hdr *)iph);
+ } else if (iph->version == 0x0) {
+ /* pad */
+ break;
+ } else {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+
+ if (unlikely(skbseq.stepped_offset)) {
+ /* We need to reset our seq read, it can't backup at
+ * this point.
+ */
+ struct sk_buff *save = skbseq.root_skb;
+
+ skb_abort_seq_read(&skbseq);
+ skb_prepare_seq_read(save, data, tail, &skbseq);
+ }
+
+ if (!first_skb)
+ first_skb = skb;
+
+ /* Fragment handling in following commits */
+ if (iplen > remaining)
+ break;
+
+ skb = iptfs_pskb_extract_seq(iplen, &skbseq, data, iplen);
+ if (!skb) {
+ /* skip to next packet or done */
+ data += iplen;
+ continue;
+ }
+
+ skb->protocol = protocol;
+ if (old_mac) {
+ /* rebuild the mac header */
+ skb_set_mac_header(skb, -first_skb->mac_len);
+ memcpy(skb_mac_header(skb), old_mac,
+ first_skb->mac_len);
+ eth_hdr(skb)->h_proto = skb->protocol;
+ }
+
+ data += iplen;
+ iptfs_complete_inner_skb(x, skb);
+ list_add_tail(&skb->list, &sublist);
+ }
+
+ /* Send the packets! */
+ list_for_each_entry_safe(skb, next, &sublist, list) {
+ skb_list_del_init(skb);
+ if (xfrm_input(skb, 0, 0, -3))
+ kfree_skb(skb);
+ }
+
+done:
+ skb = skbseq.root_skb;
+ skb_abort_seq_read(&skbseq);
+
+ if (first_skb) {
+ consume_skb(first_skb);
+ } else {
+ /* skb is the original passed in skb, but we didn't get far
+ * enough to process it as the first_skb.
+ */
+ kfree_skb(skb);
+ }
+
+ /* We always have dealt with the input SKB, either we are re-using it,
+ * or we have freed it. Return EINPROGRESS so that xfrm_input stops
+ * processing it.
+ */
+ return -EINPROGRESS;
+}
+
/* ================================= */
/* IPTFS Sending (ingress) Functions */
/* ================================= */
@@ -1132,6 +1414,7 @@ static const struct xfrm_mode_cbs iptfs_mode_cbs = {
.sa_len = iptfs_sa_len,
.clone = iptfs_clone,
.get_inner_mtu = iptfs_get_inner_mtu,
+ .input = iptfs_input,
.output = iptfs_output_collect,
.prepare_output = iptfs_prepare_output,
};
--
2.45.2
Powered by blists - more mailing lists