>From 50d8dfcf781fd3e9ad1d6123ff204b9f77a185ea Mon Sep 17 00:00:00 2001 From: Christopher Zimmermann Date: Mon, 17 Aug 2009 21:27:50 +0200 Subject: [PATCH 2/3] net: software TX time stamping This patch implements the software fallback to TX time stamping. The necessary access to the buffer and socket are secured by taking references before calling ndo_start_xmit(). That avoids race conditions (buffer remains available even if transmission completes before ndo_start_xmit() returns) and works even if the driver calls skb_orphan(). The caller of skb_tstamp_tx() is now responsible for providing the socket, which requires minor changes in users of the previous call: add skb->sk as parameter to get the old behavior. --- drivers/net/igb/igb_main.c | 2 +- include/linux/skbuff.h | 11 +++++--- net/core/dev.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ net/core/skbuff.c | 4 +- 4 files changed, 75 insertions(+), 7 deletions(-) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index adb09d3..3ce4d78 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -4370,7 +4370,7 @@ static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb) shhwtstamps.hwtstamp = ns_to_ktime(ns); shhwtstamps.syststamp = timecompare_transform(&adapter->compare, ns); - skb_tstamp_tx(skb, &shhwtstamps); + skb_tstamp_tx(skb, skb->sk, &shhwtstamps); } } } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f2c69a2..e29e43d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1873,17 +1873,20 @@ static inline ktime_t net_invalid_timestamp(void) /** * skb_tstamp_tx - queue clone of skb with send time stamps - * @orig_skb: the original outgoing packet + * @skb: the original outgoing packet + * @sk: sending socket (either from skb->sk or previous sock_get()), + * may be NULL * @hwtstamps: hardware time stamps, may be NULL if not available * - * If the skb has a socket associated, then this function clones the + * If the socket is available, then this function clones the * skb (thus sharing the actual data and optional structures), stores * the optional hardware time stamping information (if non NULL) or * generates a software time stamp (otherwise), then queues the clone * to the error queue of the socket. Errors are silently ignored. */ -extern void skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps); +extern void skb_tstamp_tx(struct sk_buff *skb, + struct sock *sk, + struct skb_shared_hwtstamps *hwtstamps); extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 6a94475..e20e3ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1679,10 +1679,71 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } +/** + * struct tx_tstamp_context - context for software TX time stamping + * @sk: socket reference, NULL if nothing to do + * @skb: packet reference + */ +struct tx_tstamp_context { + struct sock *sk; + struct sk_buff *skb; +}; + +/** + * tx_tstamp_start - check for TX software time stamping and prepare for it + * @skb: buffer which is being sent + * @context: needs to be initialized for tx_tstamp_end() + */ +static void tx_tstamp_start(struct sk_buff *skb, + struct tx_tstamp_context *context) +{ + union skb_shared_tx *shtx = skb_tx(skb); + /* + * Prepare for TX time stamping in software if requested. + * This could be optimized so that device drivers + * do that themselves, which avoids the skb/sk ref/unref + * overhead. + */ + if (unlikely(shtx->software && + skb->sk)) { + context->sk = skb->sk; + sock_hold(skb->sk); + context->skb = skb_get(skb); + } else { + /* TX software time stamping not requested/not possible. */ + context->sk = NULL; + } +} + +/** + * tx_tstamp_end - finish the work started by tx_tstamp_end() + * @context: may contain socket and buffer references + * @rc: result of ndo_start_xmit() - only do time stamping if packet was sent + */ +static void tx_tstamp_end(struct tx_tstamp_context *context, int rc) +{ + if (unlikely(context->sk)) { + union skb_shared_tx *shtx = skb_tx(context->skb); + /* + * Checking shtx->software again is a bit redundant: it must + * have been set in tx_tstamp_start(), but perhaps it + * was cleared in the meantime to disable the TX software + * fallback. + */ + if (likely(!rc) && + unlikely(shtx->software && + !shtx->in_progress)) + skb_tstamp_tx(context->skb, context->sk, NULL); + sock_put(context->sk); + kfree_skb(context->skb); + } +} + int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; + struct tx_tstamp_context context; int rc; if (likely(!skb->next)) { @@ -1703,6 +1764,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); + tx_tstamp_start(skb, &context); rc = ops->ndo_start_xmit(skb, dev); if (rc == 0) txq_trans_update(txq); @@ -1720,6 +1782,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * the skb destructor before the call and restoring it * afterwards, then doing the skb_orphan() ourselves? */ + tx_tstamp_end(&context, rc); return rc; } @@ -1729,7 +1792,9 @@ gso: skb->next = nskb->next; nskb->next = NULL; + tx_tstamp_start(skb, &context); rc = ops->ndo_start_xmit(nskb, dev); + tx_tstamp_end(&context, rc); if (unlikely(rc)) { nskb->next = skb->next; skb->next = nskb; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9e0597d..78ae3e9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2971,9 +2971,9 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) EXPORT_SYMBOL_GPL(skb_cow_data); void skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps) + struct sock *sk, + struct skb_shared_hwtstamps *hwtstamps) { - struct sock *sk = orig_skb->sk; struct sock_exterr_skb *serr; struct sk_buff *skb; int err; -- 1.6.3.3