lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+BoTQ=u_xPuqTVOVaFTQNRrJ+UTXe89SY+=+7Y1LpxxrkRDfg@mail.gmail.com>
Date:	Tue, 10 Feb 2015 11:33:48 +0100
From:	Michal Kazior <michal.kazior@...to.com>
To:	Eric Dumazet <eric.dumazet@...il.com>
Cc:	Neal Cardwell <ncardwell@...gle.com>,
	linux-wireless <linux-wireless@...r.kernel.org>,
	Network Development <netdev@...r.kernel.org>,
	Eyal Perry <eyalpe@....mellanox.co.il>
Subject: Re: Throughput regression with `tcp: refine TSO autosizing`

On 9 February 2015 at 16:11, Eric Dumazet <eric.dumazet@...il.com> wrote:
> On Mon, 2015-02-09 at 14:47 +0100, Michal Kazior wrote:
[...]
> This is not what I suggested.
>
> If you test this on any other network device, you'll have
> sk->sk_tx_completion_delay_us == 0
>
> amount = 0 * (sk->sk_pacing_rate >> 10); --> 0
> limit = max(2 * skb->truesize, amount >> 10); --> 2 * skb->truesize

You're right. Sorry for mixing up.


> So non TSO/GSO NIC will not be able to queue more than 2 MSS (one MSS
> per skb)
>
> Then if you store only the last tx completion, you have the possibility
> of having a last packet of a train (say a retransmit) to make it very
> low.
>
> Ideally the formula would be in TCP something very fast to compute :
>
> amount = (sk->sk_pacing_rate >> 10) + sk->tx_completion_delay_cushion;
> limit = max(2 * skb->truesize, amount);
> limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
>
> So a 'problematic' driver would have to do the math (64 bit maths) like
> this :
>
>
> sk->tx_completion_delay_cushion = ewma_tx_delay * sk->sk_pacing_rate;

Hmm. So I've done like you suggested (hopefully I didn't mix anything
up this time around).

I now get pre-regression performance, ~250mbps on 1 flow, 600mbps on 5
flows (vs 250mbps whatever number of flows).


MichaƂ


diff --git a/drivers/net/wireless/ath/ath10k/core.c
b/drivers/net/wireless/ath/ath10k/core.c
index 367e896..a29111c 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/of.h>
+#include <linux/average.h>

 #include "core.h"
 #include "mac.h"
@@ -1423,6 +1424,7 @@ struct ath10k *ath10k_core_create(size_t
priv_size, struct device *dev,
        init_dummy_netdev(&ar->napi_dev);
        ieee80211_napi_add(ar->hw, &ar->napi, &ar->napi_dev,
                           ath10k_core_napi_dummy_poll, 64);
+       ewma_init(&ar->tx_delay_us, 16384, 8);

        ret = ath10k_debug_create(ar);
        if (ret)
diff --git a/drivers/net/wireless/ath/ath10k/core.h
b/drivers/net/wireless/ath/ath10k/core.h
index 3be3a59..34f6d78 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -24,6 +24,7 @@
 #include <linux/pci.h>
 #include <linux/uuid.h>
 #include <linux/time.h>
+#include <linux/average.h>

 #include "htt.h"
 #include "htc.h"
@@ -82,6 +83,7 @@ struct ath10k_skb_cb {
        dma_addr_t paddr;
        u8 eid;
        u8 vdev_id;
+       ktime_t stamp;

        struct {
                u8 tid;
@@ -625,6 +627,7 @@ struct ath10k {

        struct net_device napi_dev;
        struct napi_struct napi;
+       struct ewma tx_delay_us;

 #ifdef CONFIG_ATH10K_DEBUGFS
        struct ath10k_debug debug;
diff --git a/drivers/net/wireless/ath/ath10k/mac.c
b/drivers/net/wireless/ath/ath10k/mac.c
index 15e47f4..5efb2a7 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2620,6 +2620,7 @@ static void ath10k_tx(struct ieee80211_hw *hw,
        if (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)
                ath10k_dbg(ar, ATH10K_DBG_MAC,
"IEEE80211_TX_CTL_NO_CCK_RATE\n");

+       ATH10K_SKB_CB(skb)->stamp = ktime_get();
        ATH10K_SKB_CB(skb)->htt.is_offchan = false;
        ATH10K_SKB_CB(skb)->htt.tid = ath10k_tx_h_get_tid(hdr);
        ATH10K_SKB_CB(skb)->vdev_id = ath10k_tx_h_get_vdev_id(ar, vif);
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c
b/drivers/net/wireless/ath/ath10k/txrx.c
index 3f00cec..0f5f0f2 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -15,6 +15,8 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */

+#include <net/sock.h>
+#include <linux/average.h>
 #include "core.h"
 #include "txrx.h"
 #include "htt.h"
@@ -82,6 +84,16 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt,

        ath10k_report_offchan_tx(htt->ar, msdu);

+       if (msdu->sk) {
+               ewma_add(&ar->tx_delay_us,
+                        ktime_to_ns(ktime_sub(ktime_get(), skb_cb->stamp)) /
+                        NSEC_PER_USEC);
+
+               ACCESS_ONCE(msdu->sk->sk_tx_completion_delay_cushion) =
+                               (ewma_read(&ar->tx_delay_us) *
+                                msdu->sk->sk_pacing_rate) >> 20;
+       }
+
        info = IEEE80211_SKB_CB(msdu);
        memset(&info->status, 0, sizeof(info->status));
        trace_ath10k_txrx_tx_unref(ar, tx_done->msdu_id);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2210fec..6772543 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -391,6 +391,7 @@ struct sock {
        gfp_t                   sk_allocation;
        u32                     sk_pacing_rate; /* bytes per second */
        u32                     sk_max_pacing_rate;
+       u32                     sk_tx_completion_delay_cushion;
        netdev_features_t       sk_route_caps;
        netdev_features_t       sk_route_nocaps;
        int                     sk_gso_type;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 65caf8b..526a568 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1996,6 +1996,7 @@ static bool tcp_write_xmit(struct sock *sk,
unsigned int mss_now, int nonagle,
        max_segs = tcp_tso_autosize(sk, mss_now);
        while ((skb = tcp_send_head(sk))) {
                unsigned int limit;
+               unsigned int amount;

                tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                BUG_ON(!tso_segs);
@@ -2053,7 +2054,9 @@ static bool tcp_write_xmit(struct sock *sk,
unsigned int mss_now, int nonagle,
                 * of queued bytes to ensure line rate.
                 * One example is wifi aggregation (802.11 AMPDU)
                 */
-               limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+               amount = (sk->sk_pacing_rate >> 10) +
+                        sk->sk_tx_completion_delay_cushion;
+               limit = max(2 * skb->truesize, amount);
                limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);

                if (atomic_read(&sk->sk_wmem_alloc) > limit) {
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ