lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 27 Mar 2018 02:07:05 +0300
From:   Tal Gilboa <talgi@...lanox.com>
To:     Florian Fainelli <f.fainelli@...il.com>, netdev@...r.kernel.org
Cc:     davem@...emloft.net, jaedon.shin@...il.com, pgynther@...gle.com,
        opendmb@...il.com, michal.chan@...adcom.com, gospo@...adcom.com,
        saeedm@...lanox.com
Subject: Re: [PATCH net-next 1/2] net: systemport: Implement adaptive
 interrupt coalescing

On 3/27/2018 12:36 AM, Florian Fainelli wrote:
> On 03/26/2018 02:22 PM, Tal Gilboa wrote:
>> On 3/23/2018 4:19 AM, Florian Fainelli wrote:
>>> Implement support for adaptive RX and TX interrupt coalescing using
>>> net_dim. We have each of our TX ring and our single RX ring implement a
>>> bcm_sysport_net_dim structure which holds an interrupt counter, number
>>> of packets, bytes, and a container for a net_dim instance.
>>>
>>> Signed-off-by: Florian Fainelli <f.fainelli@...il.com>
>>> ---
>>>    drivers/net/ethernet/broadcom/bcmsysport.c | 141
>>> ++++++++++++++++++++++++++---
>>>    drivers/net/ethernet/broadcom/bcmsysport.h |  14 +++
>>>    2 files changed, 140 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c
>>> b/drivers/net/ethernet/broadcom/bcmsysport.c
>>> index f15a8fc6dfc9..5a5a726bafa4 100644
>>> --- a/drivers/net/ethernet/broadcom/bcmsysport.c
>>> +++ b/drivers/net/ethernet/broadcom/bcmsysport.c
>>> @@ -15,6 +15,7 @@
>>>    #include <linux/module.h>
>>>    #include <linux/kernel.h>
>>>    #include <linux/netdevice.h>
>>> +#include <linux/net_dim.h>
>>
>> I don't think you need this include. You already include net_dim in
>> bcmsysport.h and include the bcmsysport.h here.
> 
> Indeed.
> 
>>
>>>    #include <linux/etherdevice.h>
>>>    #include <linux/platform_device.h>
>>>    #include <linux/of.h>
>>> @@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device
>>> *dev,
>>>        return 0;
>>>    }
>>>    +static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
>>> +{
>>> +    u32 reg;
>>> +
>>> +    reg = rdma_readl(priv, RDMA_MBDONE_INTR);
>>> +    reg &= ~(RDMA_INTR_THRESH_MASK |
>>> +         RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
>>> +    reg |= priv->dim.coal_pkts;
>>> +    reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
>>> +                RDMA_TIMEOUT_SHIFT;
>>> +    rdma_writel(priv, reg, RDMA_MBDONE_INTR);
>>> +}
>>> +
>>> +static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring
>>> *ring)
>>> +{
>>> +    struct bcm_sysport_priv *priv = ring->priv;
>>> +    u32 reg;
>>> +
>>> +    reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
>>> +    reg &= ~(RING_INTR_THRESH_MASK |
>>> +         RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
>>> +    reg |= ring->dim.coal_pkts;
>>> +    reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
>>> +                RING_TIMEOUT_SHIFT;
>>> +    tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
>>> +}
>>> +
>>
>> I wouldn't couple these functions with dim. This implies dim is always
>> used. IMO, would be more clear to use a generic method which takes usecs
>> and packets as an argument.
> 
> I did not want to create an additional structure for storing coalescing
> parameters, but if you prefer I make this function take two parameters,
> that sounds entirely reasonable.
> 
>>
>>>    static int bcm_sysport_get_coalesce(struct net_device *dev,
>>>                        struct ethtool_coalesce *ec)
>>>    {
>>>        struct bcm_sysport_priv *priv = netdev_priv(dev);
>>> +    struct bcm_sysport_tx_ring *ring;
>>> +    unsigned int i;
>>>        u32 reg;
>>>          reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
>>>          ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 /
>>> 1000;
>>>        ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
>>> +    for (i = 0; i < dev->num_tx_queues; i++) {
>>> +        ring = &priv->tx_rings[i];
>>> +        ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
>>> +    }
>>>          reg = rdma_readl(priv, RDMA_MBDONE_INTR);
>>>          ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 /
>>> 1000;
>>>        ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK;
>>> +    ec->use_adaptive_rx_coalesce = priv->dim.use_dim;
>>>          return 0;
>>>    }
>>> @@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct
>>> net_device *dev,
>>>                        struct ethtool_coalesce *ec)
>>>    {
>>>        struct bcm_sysport_priv *priv = netdev_priv(dev);
>>> +    struct bcm_sysport_tx_ring *ring;
>>>        unsigned int i;
>>> -    u32 reg;
>>>          /* Base system clock is 125Mhz, DMA timeout is this reference
>>> clock
>>>         * divided by 1024, which yield roughly 8.192 us, our maximum
>>> value has
>>> @@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct
>>> net_device *dev,
>>>            return -EINVAL;
>>>          for (i = 0; i < dev->num_tx_queues; i++) {
>>> -        reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i));
>>> -        reg &= ~(RING_INTR_THRESH_MASK |
>>> -             RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
>>> -        reg |= ec->tx_max_coalesced_frames;
>>> -        reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
>>> -             RING_TIMEOUT_SHIFT;
>>> -        tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i));
>>> +        ring = &priv->tx_rings[i];
>>> +        ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
>>> +        ring->dim.coal_usecs = ec->tx_coalesce_usecs;
>>> +        if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
>>> +            ring->dim.coal_pkts = 1;
>>> +            ring->dim.coal_usecs = 0;
>>> +        }
>>> +        ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
>>> +        bcm_sysport_set_tx_coalesce(ring);
>>>        }
>>
>> If I understand correctly, if I disable dim, moderation is set to
>> {usecs,packets}={0,1} regardless of the input from ethtool right?
> 
> Correct, these are the default coalescing parameters that the driver
> sets. As mentioned before, since I am not storing any coalescing
> parameters other than these two, there is no copy of what an user might
> have previously provided, falling back to the defaults seemed reasonable.

Consider this example: ethtool -C <int> adaptive-tx on; ethtool -C 
<intf> adaptive-tx off tx-usecs 8 tx-frames 32;
In this case the actual moderation would be {0,1} instead of the 
requested {8,32}. Setting default values is ok unless requested 
otherwise. I would also use macros for default values.

> 
>> Doesn't this break the wanted behavior? As mentioned above, I would
>> decouple dim from the set_tx/rx_coalesce() function. Also, when dim is
>> enabled, why change dim.coal_pkts/usecs? They would just be overwritten
>> in the next iteration of net_dim.
> 
> Indeed, that is not necessary.
> 
>>
>>>    -    reg = rdma_readl(priv, RDMA_MBDONE_INTR);
>>> -    reg &= ~(RDMA_INTR_THRESH_MASK |
>>> -         RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
>>> -    reg |= ec->rx_max_coalesced_frames;
>>> -    reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) <<
>>> -                RDMA_TIMEOUT_SHIFT;
>>> -    rdma_writel(priv, reg, RDMA_MBDONE_INTR);
>>> +    priv->dim.coal_usecs = ec->rx_coalesce_usecs;
>>> +    priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
>>> +
>>> +    if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
>>> +        priv->dim.coal_pkts = 1;
>>> +        priv->dim.coal_usecs = 0;
>>> +    }
>>> +    priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
>>> +    bcm_sysport_set_rx_coalesce(priv);
>>
>> Same comment as above.
>>
>>>          return 0;
>>>    }
>>> @@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct
>>> bcm_sysport_priv *priv,
>>>        struct bcm_sysport_stats64 *stats64 = &priv->stats64;
>>>        struct net_device *ndev = priv->netdev;
>>>        unsigned int processed = 0, to_process;
>>> +    unsigned int processed_bytes = 0;
>>>        struct bcm_sysport_cb *cb;
>>>        struct sk_buff *skb;
>>>        unsigned int p_index;
>>> @@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct
>>> bcm_sysport_priv *priv,
>>>             */
>>>            skb_pull(skb, sizeof(*rsb) + 2);
>>>            len -= (sizeof(*rsb) + 2);
>>> +        processed_bytes += len;
>>>              /* UniMAC may forward CRC */
>>>            if (priv->crc_fwd) {
>>> @@ -824,6 +865,9 @@ static unsigned int bcm_sysport_desc_rx(struct
>>> bcm_sysport_priv *priv,
>>>                priv->rx_read_ptr = 0;
>>>        }
>>>    +    priv->dim.packets = processed;
>>> +    priv->dim.bytes = processed_bytes;
>>> +
>>>        return processed;
>>>    }
>>>    @@ -900,6 +944,8 @@ static unsigned int
>>> __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
>>>        ring->packets += pkts_compl;
>>>        ring->bytes += bytes_compl;
>>>        u64_stats_update_end(&priv->syncp);
>>> +    ring->dim.packets = pkts_compl;
>>> +    ring->dim.bytes = bytes_compl;
>>>          ring->c_index = c_index;
>>>    @@ -945,6 +991,7 @@ static int bcm_sysport_tx_poll(struct
>>> napi_struct *napi, int budget)
>>>    {
>>>        struct bcm_sysport_tx_ring *ring =
>>>            container_of(napi, struct bcm_sysport_tx_ring, napi);
>>> +    struct net_dim_sample dim_sample;
>>>        unsigned int work_done = 0;
>>>          work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
>>> @@ -961,6 +1008,12 @@ static int bcm_sysport_tx_poll(struct
>>> napi_struct *napi, int budget)
>>>            return 0;
>>>        }
>>>    +    if (ring->dim.use_dim) {
>>> +        net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
>>> +                   ring->dim.bytes, &dim_sample);
>>> +        net_dim(&ring->dim.dim, dim_sample);
>>> +    }
>>> +
>>>        return budget;
>>>    }
>>>    @@ -976,6 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct
>>> *napi, int budget)
>>>    {
>>>        struct bcm_sysport_priv *priv =
>>>            container_of(napi, struct bcm_sysport_priv, napi);
>>> +    struct net_dim_sample dim_sample;
>>>        unsigned int work_done = 0;
>>>          work_done = bcm_sysport_desc_rx(priv, budget);
>>> @@ -998,6 +1052,12 @@ static int bcm_sysport_poll(struct napi_struct
>>> *napi, int budget)
>>>            intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
>>>        }
>>>    +    if (priv->dim.use_dim) {
>>> +        net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
>>> +                   priv->dim.bytes, &dim_sample);
>>> +        net_dim(&priv->dim.dim, dim_sample);
>>> +    }
>>> +
>>>        return work_done;
>>>    }
>>>    @@ -1016,6 +1076,40 @@ static void
>>> bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
>>>        netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
>>>    }
>>>    +static void bcm_sysport_dim_work(struct work_struct *work)
>>> +{
>>> +    struct net_dim *dim = container_of(work, struct net_dim, work);
>>> +    struct bcm_sysport_net_dim *ndim =
>>> +            container_of(dim, struct bcm_sysport_net_dim, dim);
>>> +    struct bcm_sysport_priv *priv =
>>> +            container_of(ndim, struct bcm_sysport_priv, dim);
>>> +    struct net_dim_cq_moder cur_profile =
>>> +                net_dim_get_profile(dim->mode, dim->profile_ix);
>>> +
>>> +    priv->dim.coal_usecs = cur_profile.usec;
>>> +    priv->dim.coal_pkts = cur_profile.pkts;
>>> +
>>> +    bcm_sysport_set_rx_coalesce(priv);
>>> +    dim->state = NET_DIM_START_MEASURE;
>>> +}
>>> +
>>> +static void bcm_sysport_dim_tx_work(struct work_struct *work)
>>> +{
>>> +    struct net_dim *dim = container_of(work, struct net_dim, work);
>>> +    struct bcm_sysport_net_dim *ndim =
>>> +            container_of(dim, struct bcm_sysport_net_dim, dim);
>>> +    struct bcm_sysport_tx_ring *ring =
>>> +            container_of(ndim, struct bcm_sysport_tx_ring, dim);
>>> +    struct net_dim_cq_moder cur_profile =
>>> +                net_dim_get_profile(dim->mode, dim->profile_ix);
>>> +
>>> +    ring->dim.coal_usecs = cur_profile.usec;
>>> +    ring->dim.coal_pkts = cur_profile.pkts;
>>> +
>>> +    bcm_sysport_set_tx_coalesce(ring);
>>> +    dim->state = NET_DIM_START_MEASURE;
>>> +}
>>> +
>>>    /* RX and misc interrupt routine */
>>>    static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
>>>    {
>>> @@ -1034,6 +1128,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq,
>>> void *dev_id)
>>>        }
>>>          if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
>>> +        priv->dim.event_ctr++;
>>>            if (likely(napi_schedule_prep(&priv->napi))) {
>>>                /* disable RX interrupts */
>>>                intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
>>> @@ -1061,6 +1156,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq,
>>> void *dev_id)
>>>                continue;
>>>              txr = &priv->tx_rings[ring];
>>> +        txr->dim.event_ctr++;
>>>              if (likely(napi_schedule_prep(&txr->napi))) {
>>>                intrl2_0_mask_set(priv, ring_bit);
>>> @@ -1093,6 +1189,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq,
>>> void *dev_id)
>>>                continue;
>>>              txr = &priv->tx_rings[ring];
>>> +        txr->dim.event_ctr++;
>>>              if (likely(napi_schedule_prep(&txr->napi))) {
>>>                intrl2_1_mask_set(priv, BIT(ring));
>>> @@ -1358,6 +1455,16 @@ static void bcm_sysport_adj_link(struct
>>> net_device *dev)
>>>            phy_print_status(phydev);
>>>    }
>>>    +static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
>>> +                 void (*cb)(struct work_struct *work))
>>> +{
>>> +    INIT_WORK(&dim->dim.work, cb);
>>> +    dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
>>> +    dim->event_ctr = 0;
>>> +    dim->packets = 0;
>>> +    dim->bytes = 0;
>>> +}
>>
>> What about default values for coal_usecs/pkts? dim supports it through
>> net_dim_get_def_profile(mode) function.
> 
> OK, thanks I did not know that.
> 

I'll add it to the documentation.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ