[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250306230203.1550314-13-nikolay@enfabrica.net>
Date: Fri, 7 Mar 2025 01:02:02 +0200
From: Nikolay Aleksandrov <nikolay@...abrica.net>
To: netdev@...r.kernel.org
Cc: shrijeet@...abrica.net,
alex.badea@...sight.com,
eric.davis@...adcom.com,
rip.sohan@....com,
dsahern@...nel.org,
bmt@...ich.ibm.com,
roland@...abrica.net,
nikolay@...abrica.net,
winston.liu@...sight.com,
dan.mihailescu@...sight.com,
kheib@...hat.com,
parth.v.parikh@...sight.com,
davem@...hat.com,
ian.ziemba@....com,
andrew.tauferner@...nelisnetworks.com,
welch@....com,
rakhahari.bhunia@...sight.com,
kingshuk.mandal@...sight.com,
linux-rdma@...r.kernel.org,
kuba@...nel.org,
pabeni@...hat.com
Subject: [RFC PATCH 12/13] drivers: ultraeth: add initiator and target idle timeout support
Add control packet header structure and a helper that builds a control
packet and transmits it. Currently it supports only CLOSE types,
use it to implement initiator and target timeout support by using the close
state machine. Upon initiator timeout we move to either ACK_WAIT (if
pending acks) or CLOSE_ACK_WAIT state, in the latter case we also send a
control message with CLOSE type. Upon target timeout we issue a REQ_CLOSE
control message and if it isn't answered in the timeout period we send a
NACK CLOSING_IN_ERR and close the PDC.
Signed-off-by: Nikolay Aleksandrov <nikolay@...abrica.net>
Signed-off-by: Alex Badea <alex.badea@...sight.com>
---
drivers/ultraeth/uet_pdc.c | 241 ++++++++++++++++++++++++++++++---
drivers/ultraeth/uet_pds.c | 40 +++++-
include/net/ultraeth/uet_pdc.h | 12 +-
include/net/ultraeth/uet_pds.h | 5 +
include/uapi/linux/ultraeth.h | 19 +++
5 files changed, 293 insertions(+), 24 deletions(-)
diff --git a/drivers/ultraeth/uet_pdc.c b/drivers/ultraeth/uet_pdc.c
index 4f19bc68b570..5967095867dc 100644
--- a/drivers/ultraeth/uet_pdc.c
+++ b/drivers/ultraeth/uet_pdc.c
@@ -21,6 +21,14 @@ struct metadata_dst *uet_pdc_dst(const struct uet_pdc_key *key, __be16 dport,
return mdst;
}
+void uet_pdc_rx_refresh(struct uet_pdc *pdc)
+{
+ unsigned long rx_jiffies = jiffies;
+
+ if (rx_jiffies != READ_ONCE(pdc->rx_last_jiffies))
+ WRITE_ONCE(pdc->rx_last_jiffies, rx_jiffies);
+}
+
static void uet_pdc_xmit(struct uet_pdc *pdc, struct sk_buff *skb)
{
skb->dev = pds_netdev(pdc->pds);
@@ -97,10 +105,19 @@ static void uet_pdc_rtx_timer_expired(struct timer_list *t)
continue;
}
if (UET_SKB_CB(skb)->rtx_attempts == UET_PDC_RTX_DEFAULT_MAX) {
+ struct uet_prologue_hdr *prologue;
+
/* XXX: close connection, count drops etc */
- netdev_dbg(pds_netdev(pdc->pds), "%s: psn: %u too many rtx attempts: %u\n",
+ prologue = (struct uet_prologue_hdr *)skb->data;
+ netdev_dbg(pds_netdev(pdc->pds), "%s: psn: %u type: %u too many rtx attempts: %u\n",
__func__, UET_SKB_CB(skb)->psn,
+ uet_prologue_type(prologue),
UET_SKB_CB(skb)->rtx_attempts);
+ if (uet_prologue_type(prologue) == UET_PDS_TYPE_CTRL_MSG &&
+ uet_prologue_ctl_type(prologue) == UET_CTL_TYPE_CLOSE) {
+ uet_pdc_destroy(pdc);
+ goto out_unlock;
+ }
/* if dropping the oldest packet move window */
if (UET_SKB_CB(skb)->psn == pdc->tx_base_psn)
uet_pdc_mpr_advance_tx(pdc, 1);
@@ -114,6 +131,7 @@ static void uet_pdc_rtx_timer_expired(struct timer_list *t)
mod_timer(&pdc->rtx_timer, jiffies +
nsecs_to_jiffies(smallest_diff));
+out_unlock:
spin_unlock(&pdc->lock);
}
@@ -228,6 +246,154 @@ static int uet_pdc_rtx_queue(struct uet_pdc *pdc, struct sk_buff *skb, u32 psn)
return 0;
}
+static s64 uet_pdc_get_psn(struct uet_pdc *pdc)
+{
+ unsigned long fzb = find_first_zero_bit(pdc->tx_bitmap, UET_PDC_MPR);
+
+ if (unlikely(fzb == UET_PDC_MPR))
+ return -1;
+
+ set_bit(fzb, pdc->tx_bitmap);
+
+ return pdc->tx_base_psn + fzb;
+}
+
+static void uet_pdc_put_psn(struct uet_pdc *pdc, u32 psn)
+{
+ unsigned long psn_bit = psn - pdc->tx_base_psn;
+
+ clear_bit(psn_bit, pdc->tx_bitmap);
+}
+
+static int uet_pdc_tx_ctl(struct uet_pdc *pdc, u8 ctl_type, u8 flags,
+ __be32 psn, __be32 payload)
+{
+ struct uet_pds_ctl_hdr *ctl;
+ struct sk_buff *skb;
+ int ret;
+
+ /* both CLOSE types need to be retransmitted and need a new PSN */
+ switch (ctl_type) {
+ case UET_CTL_TYPE_CLOSE:
+ case UET_CTL_TYPE_REQ_CLOSE:
+ /* payload & psn must be 0 */
+ if (payload || psn)
+ return -EINVAL;
+ /* AR must be set */
+ flags |= UET_PDS_CTL_FLAG_AR;
+ break;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ skb = alloc_skb(sizeof(struct uet_pds_ctl_hdr), GFP_ATOMIC);
+ if (!skb)
+ return -ENOBUFS;
+ ctl = skb_put(skb, sizeof(*ctl));
+ uet_pdc_build_prologue(&ctl->prologue, UET_PDS_TYPE_CTRL_MSG,
+ ctl_type, flags);
+ if (!psn) {
+ s64 psn_new = uet_pdc_get_psn(pdc);
+
+ if (psn_new == -1) {
+ kfree_skb(skb);
+ return -ENOSPC;
+ }
+ psn = cpu_to_be32(psn_new);
+ }
+ ctl->psn = psn;
+ ctl->spdcid = cpu_to_be16(pdc->spdcid);
+ ctl->dpdcid_pdc_info_offset = cpu_to_be16(pdc->dpdcid);
+ ctl->payload = payload;
+
+ ret = uet_pdc_rtx_queue(pdc, skb, be32_to_cpu(psn));
+ if (ret) {
+ uet_pdc_put_psn(pdc, be32_to_cpu(psn));
+ kfree_skb(skb);
+ return ret;
+ }
+ uet_pdc_xmit(pdc, skb);
+
+ return 0;
+}
+
+static void uet_pdc_close(struct uet_pdc *pdc)
+{
+ u8 state;
+ int ret;
+
+ /* we have already transmitted the close control packet */
+ if (pdc->state > UET_PDC_EP_STATE_ACK_WAIT)
+ return;
+
+ if (!RB_EMPTY_ROOT(&pdc->rtx_queue)) {
+ if (pdc->state == UET_PDC_EP_STATE_ACK_WAIT)
+ return;
+ state = UET_PDC_EP_STATE_ACK_WAIT;
+ } else {
+ u8 ctl_type, ctl_flags = 0;
+
+ if (pdc->is_initiator) {
+ ctl_type = UET_CTL_TYPE_CLOSE;
+ state = UET_PDC_EP_STATE_CLOSE_ACK_WAIT;
+ ctl_flags = UET_PDS_CTL_FLAG_AR;
+ } else {
+ ctl_type = UET_CTL_TYPE_REQ_CLOSE;
+ state = UET_PDC_EP_STATE_CLOSE_WAIT;
+ }
+ ret = uet_pdc_tx_ctl(pdc, ctl_type, ctl_flags, 0, 0);
+ if (ret)
+ return;
+ }
+
+ pdc->state = state;
+}
+
+static void uet_pdc_timeout_timer_expired(struct timer_list *t)
+{
+ struct uet_pdc *pdc = from_timer(pdc, t, timeout_timer);
+ unsigned long now = jiffies, last_rx;
+ bool rearm_timer = true;
+
+ last_rx = READ_ONCE(pdc->rx_last_jiffies);
+ if (time_after_eq(last_rx, now) ||
+ time_after_eq(last_rx + UET_PDC_IDLE_TIMEOUT_JIFFIES, now))
+ goto rearm_timeout;
+ spin_lock(&pdc->lock);
+ switch (pdc->state) {
+ case UET_PDC_EP_STATE_ACK_WAIT:
+ uet_pdc_close(pdc);
+ fallthrough;
+ case UET_PDC_EP_STATE_CLOSE_WAIT:
+ case UET_PDC_EP_STATE_CLOSE_ACK_WAIT:
+ /* we waited too long for the last acks */
+ if (time_before_eq(last_rx + (UET_PDC_IDLE_TIMEOUT_JIFFIES * 2),
+ now)) {
+ if (!pdc->is_initiator)
+ uet_pds_send_nack(pdc->pds, &pdc->key,
+ pdc->metadata->u.tun_info.key.tp_dst,
+ 0,
+ cpu_to_be16(pdc->spdcid),
+ cpu_to_be16(pdc->dpdcid),
+ UET_PDS_NACK_CLOSING_IN_ERR,
+ cpu_to_be32(pdc->rx_base_psn + 1),
+ 0);
+ uet_pdc_destroy(pdc);
+ rearm_timer = false;
+ }
+ break;
+ default:
+ uet_pdc_close(pdc);
+ break;
+ }
+ spin_unlock(&pdc->lock);
+rearm_timeout:
+ if (rearm_timer)
+ mod_timer(&pdc->timeout_timer,
+ now + UET_PDC_IDLE_TIMEOUT_JIFFIES);
+}
+
/* use the approach as nf nat, try a few rounds starting at random offset */
static bool uet_pdc_id_get(struct uet_pdc *pdc)
{
@@ -301,6 +467,7 @@ struct uet_pdc *uet_pdc_create(struct uet_pds *pds, u32 rx_base_psn, u8 state,
if (!pdc->ack_bitmap)
goto err_ack_bitmap;
timer_setup(&pdc->rtx_timer, uet_pdc_rtx_timer_expired, 0);
+ timer_setup(&pdc->timeout_timer, uet_pdc_timeout_timer_expired, 0);
pdc->metadata = uet_pdc_dst(key, dport, tos);
if (!pdc->metadata)
goto err_tun_dst;
@@ -331,6 +498,9 @@ struct uet_pdc *uet_pdc_create(struct uet_pds *pds, u32 rx_base_psn, u8 state,
}
out:
+ mod_timer(&pdc->timeout_timer,
+ jiffies + UET_PDC_IDLE_TIMEOUT_JIFFIES);
+
return pdc_ins;
err_ep_insert:
@@ -351,6 +521,7 @@ struct uet_pdc *uet_pdc_create(struct uet_pds *pds, u32 rx_base_psn, u8 state,
void uet_pdc_free(struct uet_pdc *pdc)
{
+ timer_delete_sync(&pdc->timeout_timer);
timer_delete_sync(&pdc->rtx_timer);
uet_pdc_rtx_purge(pdc);
dst_release(&pdc->metadata->dst);
@@ -367,25 +538,6 @@ void uet_pdc_destroy(struct uet_pdc *pdc)
uet_pds_pdc_gc_queue(pdc);
}
-static s64 uet_pdc_get_psn(struct uet_pdc *pdc)
-{
- unsigned long fzb = find_first_zero_bit(pdc->tx_bitmap, UET_PDC_MPR);
-
- if (unlikely(fzb == UET_PDC_MPR))
- return -1;
-
- set_bit(fzb, pdc->tx_bitmap);
-
- return pdc->tx_base_psn + fzb;
-}
-
-static void uet_pdc_put_psn(struct uet_pdc *pdc, u32 psn)
-{
- unsigned long psn_bit = psn - pdc->tx_base_psn;
-
- clear_bit(psn_bit, pdc->tx_bitmap);
-}
-
static int uet_pdc_build_req(struct uet_pdc *pdc,
struct sk_buff *skb, u8 type, u8 flags)
{
@@ -685,8 +837,17 @@ int uet_pdc_rx_ack(struct uet_pdc *pdc, struct sk_buff *skb,
remote_fep_addr);
break;
case UET_PDC_EP_STATE_ACK_WAIT:
+ ret = uet_job_fep_queue_skb(pds_context(pdc->pds),
+ uet_ses_rsp_job_id(ses_rsp), skb,
+ remote_fep_addr);
+ if (!RB_EMPTY_ROOT(&pdc->rtx_queue) || ret < 0)
+ break;
+ uet_pdc_close(pdc);
+ ret = 1;
break;
case UET_PDC_EP_STATE_CLOSE_ACK_WAIT:
+ uet_pdc_destroy(pdc);
+ ret = 0;
break;
}
@@ -919,3 +1080,43 @@ void uet_pdc_rx_nack(struct uet_pdc *pdc, struct sk_buff *skb)
out:
spin_unlock(&pdc->lock);
}
+
+int uet_pdc_rx_ctl(struct uet_pdc *pdc, struct sk_buff *skb,
+ __be32 remote_fep_addr)
+{
+ struct uet_pds_ctl_hdr *ctl = pds_ctl_hdr(skb);
+ u32 ctl_psn = be32_to_cpu(ctl->psn);
+ int ret = -EINVAL;
+
+ spin_lock(&pdc->lock);
+ netdev_dbg(pds_netdev(pdc->pds), "%s: CTRL pdc: [ spdcid: %u dpdcid: %u rx_base_psn %u ] "
+ "ctrl header: [ ctl_type: %u psn: %u ]\n",
+ __func__, pdc->spdcid, pdc->dpdcid, pdc->rx_base_psn,
+ uet_prologue_ctl_type(&ctl->prologue), ctl_psn);
+ if (psn_mpr_pos(pdc->rx_base_psn, ctl_psn) != UET_PDC_MPR_CUR)
+ goto out;
+ switch (uet_prologue_ctl_type(&ctl->prologue)) {
+ case UET_CTL_TYPE_CLOSE:
+ /* only the initiator can send CLOSE */
+ if (pdc->is_initiator)
+ break;
+ ret = 0;
+ uet_pdc_send_ses_ack(pdc, UET_SES_RSP_RC_NULL, 0,
+ be32_to_cpu(ctl->psn),
+ 0, true);
+ uet_pdc_destroy(pdc);
+ break;
+ case UET_CTL_TYPE_REQ_CLOSE:
+ /* only the target can send REQ_CLOSE */
+ if (!pdc->is_initiator)
+ break;
+ uet_pdc_close(pdc);
+ break;
+ default:
+ break;
+ }
+out:
+ spin_unlock(&pdc->lock);
+
+ return ret;
+}
diff --git a/drivers/ultraeth/uet_pds.c b/drivers/ultraeth/uet_pds.c
index c144b6df8327..9ab0a088b308 100644
--- a/drivers/ultraeth/uet_pds.c
+++ b/drivers/ultraeth/uet_pds.c
@@ -195,13 +195,18 @@ static int uet_pds_rx_ack(struct uet_pds *pds, struct sk_buff *skb,
struct uet_pds_req_hdr *pds_req = pds_req_hdr(skb);
u16 pdcid = be16_to_cpu(pds_req->dpdcid);
struct uet_pdc *pdc;
+ int ret;
pdc = rhashtable_lookup_fast(&pds->pdcid_hash, &pdcid,
uet_pds_pdcid_rht_params);
if (!pdc)
return -ENOENT;
- return uet_pdc_rx_ack(pdc, skb, remote_fep_addr);
+ ret = uet_pdc_rx_ack(pdc, skb, remote_fep_addr);
+ if (ret >= 0)
+ uet_pdc_rx_refresh(pdc);
+
+ return ret;
}
static void uet_pds_rx_nack(struct uet_pds *pds, struct sk_buff *skb)
@@ -218,6 +223,26 @@ static void uet_pds_rx_nack(struct uet_pds *pds, struct sk_buff *skb)
uet_pdc_rx_nack(pdc, skb);
}
+static int uet_pds_rx_ctl(struct uet_pds *pds, struct sk_buff *skb,
+ __be32 remote_fep_addr)
+{
+ struct uet_pds_ctl_hdr *ctl = pds_ctl_hdr(skb);
+ u16 pdcid = be16_to_cpu(ctl->dpdcid_pdc_info_offset);
+ struct uet_pdc *pdc;
+ int ret;
+
+ pdc = rhashtable_lookup_fast(&pds->pdcid_hash, &pdcid,
+ uet_pds_pdcid_rht_params);
+ if (!pdc)
+ return -ENOENT;
+
+ ret = uet_pdc_rx_ctl(pdc, skb, remote_fep_addr);
+ if (ret >= 0)
+ uet_pdc_rx_refresh(pdc);
+
+ return ret;
+}
+
static struct uet_pdc *uet_pds_new_pdc_rx(struct uet_pds *pds,
struct sk_buff *skb,
__be16 dport, u32 ack_gen_trigger,
@@ -245,6 +270,7 @@ static int uet_pds_rx_req(struct uet_pds *pds, struct sk_buff *skb,
struct uet_pdc_key key = {};
struct uet_fep *fep;
struct uet_pdc *pdc;
+ int ret;
key.src_ip = local_fep_addr;
key.dst_ip = remote_fep_addr;
@@ -303,7 +329,11 @@ static int uet_pds_rx_req(struct uet_pds *pds, struct sk_buff *skb,
return PTR_ERR(pdc);
}
- return uet_pdc_rx_req(pdc, skb, remote_fep_addr, tos);
+ ret = uet_pdc_rx_req(pdc, skb, remote_fep_addr, tos);
+ if (ret >= 0)
+ uet_pdc_rx_refresh(pdc);
+
+ return ret;
}
static bool uet_pds_rx_valid_req_next_hdr(const struct uet_prologue_hdr *prologue)
@@ -368,6 +398,12 @@ int uet_pds_rx(struct uet_pds *pds, struct sk_buff *skb, __be32 local_fep_addr,
ret = uet_pds_rx_req(pds, skb, local_fep_addr, remote_fep_addr,
dport, tos);
break;
+ case UET_PDS_TYPE_CTRL_MSG:
+ offset += sizeof(struct uet_pds_ctl_hdr);
+ if (!pskb_may_pull(skb, offset))
+ break;
+ ret = uet_pds_rx_ctl(pds, skb, remote_fep_addr);
+ break;
case UET_PDS_TYPE_NACK:
if (uet_prologue_next_hdr(prologue) != UET_PDS_NEXT_HDR_NONE)
break;
diff --git a/include/net/ultraeth/uet_pdc.h b/include/net/ultraeth/uet_pdc.h
index 60aecc15d0f1..02d2d5716c48 100644
--- a/include/net/ultraeth/uet_pdc.h
+++ b/include/net/ultraeth/uet_pdc.h
@@ -22,6 +22,8 @@
#define UET_PDC_MPR 128
#define UET_PDC_SACK_BITS 64
#define UET_PDC_SACK_MASK (U64_MAX << 3)
+#define UET_PDC_IDLE_TIMEOUT_SEC 60
+#define UET_PDC_IDLE_TIMEOUT_JIFFIES (UET_PDC_IDLE_TIMEOUT_SEC * HZ)
#define UET_SKB_CB(skb) ((struct uet_skb_cb *)&((skb)->cb[0]))
@@ -38,7 +40,8 @@ enum {
UET_PDC_EP_STATE_ESTABLISHED,
UET_PDC_EP_STATE_QUIESCE,
UET_PDC_EP_STATE_ACK_WAIT,
- UET_PDC_EP_STATE_CLOSE_ACK_WAIT
+ UET_PDC_EP_STATE_CLOSE_ACK_WAIT,
+ UET_PDC_EP_STATE_CLOSE_WAIT
};
struct uet_pdc_key {
@@ -88,7 +91,7 @@ struct uet_pdc {
int rtx_max;
struct timer_list rtx_timer;
unsigned long rtx_timeout;
-
+ unsigned long rx_last_jiffies;
unsigned long *rx_bitmap;
unsigned long *tx_bitmap;
unsigned long *ack_bitmap;
@@ -102,6 +105,8 @@ struct uet_pdc {
u32 ack_gen_min_pkt_add;
u32 ack_gen_count;
+ struct timer_list timeout_timer;
+
struct rb_root rtx_queue;
struct hlist_node gc_node;
@@ -121,8 +126,11 @@ int uet_pdc_rx_ack(struct uet_pdc *pdc, struct sk_buff *skb,
__be32 remote_fep_addr);
int uet_pdc_tx_req(struct uet_pdc *pdc, struct sk_buff *skb, u8 type);
void uet_pdc_rx_nack(struct uet_pdc *pdc, struct sk_buff *skb);
+int uet_pdc_rx_ctl(struct uet_pdc *pdc, struct sk_buff *skb,
+ __be32 remote_fep_addr);
struct metadata_dst *uet_pdc_dst(const struct uet_pdc_key *key, __be16 dport,
u8 tos);
+void uet_pdc_rx_refresh(struct uet_pdc *pdc);
static inline void uet_pdc_build_prologue(struct uet_prologue_hdr *prologue,
u8 type, u8 next, u8 flags)
diff --git a/include/net/ultraeth/uet_pds.h b/include/net/ultraeth/uet_pds.h
index 4e9794a4d3de..fc2414cc2de8 100644
--- a/include/net/ultraeth/uet_pds.h
+++ b/include/net/ultraeth/uet_pds.h
@@ -73,6 +73,11 @@ static inline struct uet_pds_ack_ext_hdr *pds_ack_ext_hdr(const struct sk_buff *
return (struct uet_pds_ack_ext_hdr *)(pds_ack_hdr(skb) + 1);
}
+static inline struct uet_pds_ctl_hdr *pds_ctl_hdr(const struct sk_buff *skb)
+{
+ return (struct uet_pds_ctl_hdr *)skb_network_header(skb);
+}
+
static inline struct uet_ses_rsp_hdr *pds_ack_ses_rsp_hdr(const struct sk_buff *skb)
{
/* TODO: ack_ext_hdr, CC_STATE, etc. */
diff --git a/include/uapi/linux/ultraeth.h b/include/uapi/linux/ultraeth.h
index 53d2124bc285..c1d5457073e1 100644
--- a/include/uapi/linux/ultraeth.h
+++ b/include/uapi/linux/ultraeth.h
@@ -247,6 +247,25 @@ struct uet_pds_nack_hdr {
__be32 payload;
} __attribute__ ((__packed__));
+/* control packet flags */
+enum {
+ UET_PDS_CTL_FLAG_RSV21 = (1 << 0),
+ UET_PDS_CTL_FLAG_RSV22 = (1 << 1),
+ UET_PDS_CTL_FLAG_SYN = (1 << 2),
+ UET_PDS_CTL_FLAG_AR = (1 << 3),
+ UET_PDS_CTL_FLAG_RETX = (1 << 4),
+ UET_PDS_CTL_FLAG_RSV11 = (1 << 5),
+ UET_PDS_CTL_FLAG_RSV12 = (1 << 6),
+};
+
+struct uet_pds_ctl_hdr {
+ struct uet_prologue_hdr prologue;
+ __be32 psn;
+ __be16 spdcid;
+ __be16 dpdcid_pdc_info_offset;
+ __be32 payload;
+} __attribute__ ((__packed__));
+
/* ses request op codes */
enum {
UET_SES_REQ_OP_NOOP = 0x00,
--
2.48.1
Powered by blists - more mailing lists