[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2b45b242-ead7-58a8-531c-dd4a1b583833@gmail.com>
Date: Thu, 2 Nov 2017 13:14:53 -0700
From: Florian Fainelli <f.fainelli@...il.com>
To: Miquel Raynal <miquel.raynal@...e-electrons.com>,
"David S . Miller" <davem@...emloft.net>
Cc: Thomas Petazzoni <thomas.petazzoni@...e-electrons.com>,
Antoine Tenart <antoine.tenart@...e-electrons.com>,
Gregory Clement <gregory.clement@...e-electrons.com>,
Nadav Haklai <nadavh@...vell.com>, netdev@...r.kernel.org,
Stefan Chulski <stefanc@...vell.com>
Subject: Re: [PATCH] net: mvpp2: add ethtool GOP statistics
On 11/02/2017 11:52 AM, Miquel Raynal wrote:
> Add ethtool statistics support by reading the GOP statistics from the
> hardware counters. Also implement a workqueue to gather the statistics
> every second or some 32-bit counters could overflow.
>
> Suggested-by: Stefan Chulski <stefanc@...vell.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@...e-electrons.com>
> ---
> drivers/net/ethernet/marvell/mvpp2.c | 226 ++++++++++++++++++++++++++++++++++-
> 1 file changed, 220 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
> index 97efe4733661..fb92a0927116 100644
> --- a/drivers/net/ethernet/marvell/mvpp2.c
> +++ b/drivers/net/ethernet/marvell/mvpp2.c
> @@ -769,6 +769,44 @@ enum mvpp2_bm_type {
> MVPP2_BM_SWF_SHORT
> };
>
> +/* GMAC MIB Counters register definitions */
> +#define MVPP21_MIB_COUNTERS_OFFSET 0x1000
> +#define MVPP21_MIB_COUNTERS_PORT_SZ 0x400
> +#define MVPP22_MIB_COUNTERS_OFFSET 0x0
> +#define MVPP22_MIB_COUNTERS_PORT_SZ 0x100
> +
> +#define MVPP2_MIB_GOOD_OCTETS_RCVD_LOW 0x0
> +#define MVPP2_MIB_GOOD_OCTETS_RCVD_HIGH 0x4
> +#define MVPP2_MIB_BAD_OCTETS_RCVD 0x8
> +#define MVPP2_MIB_CRC_ERRORS_SENT 0xc
> +#define MVPP2_MIB_UNICAST_FRAMES_RCVD 0x10
> +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD 0x18
> +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD 0x1c
> +#define MVPP2_MIB_FRAMES_64_OCTETS 0x20
> +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS 0x24
> +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS 0x28
> +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS 0x2c
> +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS 0x30
> +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS 0x34
> +#define MVPP2_MIB_GOOD_OCTETS_SENT_LOW 0x38
> +#define MVPP2_MIB_GOOD_OCTETS_SENT_HIGH 0x3c
> +#define MVPP2_MIB_UNICAST_FRAMES_SENT 0x40
> +#define MVPP2_MIB_MULTICAST_FRAMES_SENT 0x48
> +#define MVPP2_MIB_BROADCAST_FRAMES_SENT 0x4c
> +#define MVPP2_MIB_FC_SENT 0x54
> +#define MVPP2_MIB_FC_RCVD 0x58
> +#define MVPP2_MIB_RX_FIFO_OVERRUN 0x5c
> +#define MVPP2_MIB_UNDERSIZE_RCVD 0x60
> +#define MVPP2_MIB_FRAGMENTS_RCVD 0x64
> +#define MVPP2_MIB_OVERSIZE_RCVD 0x68
> +#define MVPP2_MIB_JABBER_RCVD 0x6c
> +#define MVPP2_MIB_MAC_RCV_ERROR 0x70
> +#define MVPP2_MIB_BAD_CRC_EVENT 0x74
> +#define MVPP2_MIB_COLLISION 0x78
> +#define MVPP2_MIB_LATE_COLLISION 0x7c
> +
> +#define MVPP2_MIB_COUNTERS_STATS_DELAY (1 * HZ)
> +
> /* Definitions */
>
> /* Shared Packet Processor resources */
> @@ -796,6 +834,7 @@ struct mvpp2 {
> struct clk *axi_clk;
>
> /* List of pointers to port structures */
> + int port_count;
> struct mvpp2_port **port_list;
>
> /* Aggregated TXQs */
> @@ -817,6 +856,10 @@ struct mvpp2 {
>
> /* Maximum number of RXQs per port */
> unsigned int max_port_rxqs;
> +
> + /* Workqueue to gather hardware statistics */
> + struct delayed_work stats_work;
> + struct workqueue_struct *stats_queue;
> };
>
> struct mvpp2_pcpu_stats {
> @@ -879,6 +922,7 @@ struct mvpp2_port {
> u16 tx_ring_size;
> u16 rx_ring_size;
> struct mvpp2_pcpu_stats __percpu *stats;
> + u64 *ethtool_stats;
>
> phy_interface_t phy_interface;
> struct device_node *phy_node;
> @@ -4743,9 +4787,137 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port)
> writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
> }
>
> +static u64 mvpp2_read_count(struct mvpp2_port *port, unsigned int offset)
> +{
> + bool reg_is_64b =
> + (offset == MVPP2_MIB_GOOD_OCTETS_RCVD_LOW) ||
> + (offset == MVPP2_MIB_GOOD_OCTETS_SENT_LOW);
This does not scale very well, put that in your statistics structure and
define a member "reg_is_64b" there such that you can pass a pointer to
one of these members here, and check, on per-counter basis whether this
is needed or not.
> + void __iomem *base;
> + u64 val;
> +
> + if (port->priv->hw_version == MVPP21)
> + base = port->priv->lms_base + MVPP21_MIB_COUNTERS_OFFSET +
> + port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
> + else
> + base = port->priv->iface_base + MVPP22_MIB_COUNTERS_OFFSET +
> + port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;
> +
> + val = readl(base + offset);
> + if (reg_is_64b)
> + val += (u64)readl(base + offset + 4) << 32;
So the value gets latched when the higher part gets read last?
> +
> + return val;
> +}
> +
> +struct mvpp2_ethtool_statistics {
> + unsigned int offset;
> + const char string[ETH_GSTRING_LEN];
Add your reg_is_64b member here too.
> +};
> +
> +/* Due to the fact that software statistics and hardware statistics are, by
> + * design, incremented at different moments in the chain of packet processing,
> + * it is very likely that incoming packets could have been dropped after being
> + * counted by hardware but before reaching software statistics (most probably
> + * multicast packets), and in the oppposite way, during transmission, FCS bytes
> + * are added in between as well as TSO skb will be split and header bytes added.
> + */
OK, not sure what to make of that comment.
> +static struct mvpp2_ethtool_statistics mvpp2_ethtool_stats[] = {
> + { MVPP2_MIB_GOOD_OCTETS_RCVD_LOW, "good_octets_received" },
> + { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
> + { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
> + { MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" },
> + { MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" },
> + { MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" },
> + { MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" },
> + { MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" },
> + { MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" },
> + { MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" },
> + { MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" },
> + { MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" },
> + { MVPP2_MIB_GOOD_OCTETS_SENT_LOW, "good_octets_sent" },
> + { MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" },
> + { MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" },
> + { MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" },
> + { MVPP2_MIB_FC_SENT, "fc_sent" },
> + { MVPP2_MIB_FC_RCVD, "fc_received" },
> + { MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" },
> + { MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" },
> + { MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" },
> + { MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" },
> + { MVPP2_MIB_JABBER_RCVD, "jabber_received" },
> + { MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" },
> + { MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" },
> + { MVPP2_MIB_COLLISION, "collision" },
> + { MVPP2_MIB_LATE_COLLISION, "late_collision" },
> +};
> +
> +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
> + u8 *data)
> +{
> + if (sset == ETH_SS_STATS) {
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++)
> + memcpy(data + i * ETH_GSTRING_LEN,
> + &mvpp2_ethtool_stats[i].string, ETH_GSTRING_LEN);
> + }
> +}
> +
> +static void mvpp2_gather_hw_statistics(struct work_struct *work)
> +{
> + struct delayed_work *del_work = to_delayed_work(work);
> + struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work);
> + struct mvpp2_port *port;
> + u64 *pstats;
> + int i, j;
> +
> + for (i = 0; i < priv->port_count; i++) {
> + if (!priv->port_list[i])
> + continue;
> +
> + port = priv->port_list[i];
> + pstats = port->ethtool_stats;
port->ethtool_stats was allocated this way:
port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats)) instead of
ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64)
This is probably working right now because mvpp2_ethtool_stats is much
bigger than ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64).
> + for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_stats); j++)
> + *pstats++ += mvpp2_read_count(
> + port, mvpp2_ethtool_stats[j].offset);
You might want to look into the helper functions from
include/linux/u64_stats_sync.h to safely add a 32-bit quantity to a
64-bit quantity on 32-bit hosts.
> + }
> +
> + /* No need to read again the counters right after this function if it
> + * was called asynchronously by the user (ie. use of ethtool).
> + */
> + cancel_delayed_work(&priv->stats_work);
> + queue_delayed_work(priv->stats_queue, &priv->stats_work,
> + MVPP2_MIB_COUNTERS_STATS_DELAY);
> +}
> +
> +static void mvpp2_ethtool_get_stats(struct net_device *dev,
> + struct ethtool_stats *stats, u64 *data)
> +{
> + struct mvpp2_port *port = netdev_priv(dev);
> +
> + /* Update statistics for all ports, copy only those actually needed */
> + mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
> +
> + memcpy(data, port->ethtool_stats,
> + sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_stats));
> +}
> +
> +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
> +{
> + if (sset == ETH_SS_STATS)
> + return ARRAY_SIZE(mvpp2_ethtool_stats);
> +
> + return -EOPNOTSUPP;
> +}
> +
> static void mvpp2_port_reset(struct mvpp2_port *port)
> {
> u32 val;
> + int i;
unsigned int i
> +
> + /* Read the GOP statistics to reset the hardware counters */
> + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++)
> + mvpp2_read_count(port, mvpp2_ethtool_stats[i].offset);
>
> val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
> ~MVPP2_GMAC_PORT_RESET_MASK;
> @@ -7199,6 +7371,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
> .get_drvinfo = mvpp2_ethtool_get_drvinfo,
> .get_ringparam = mvpp2_ethtool_get_ringparam,
> .set_ringparam = mvpp2_ethtool_set_ringparam,
> + .get_strings = mvpp2_ethtool_get_strings,
> + .get_ethtool_stats = mvpp2_ethtool_get_stats,
> + .get_sset_count = mvpp2_ethtool_get_sset_count,
> .get_link_ksettings = phy_ethtool_get_link_ksettings,
> .set_link_ksettings = phy_ethtool_set_link_ksettings,
> };
> @@ -7613,13 +7788,19 @@ static int mvpp2_port_probe(struct platform_device *pdev,
> port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
> }
>
> - /* Alloc per-cpu stats */
> + /* Alloc per-cpu and ethtool stats */
> port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
> if (!port->stats) {
> err = -ENOMEM;
> goto err_free_irq;
> }
>
> + port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats), GFP_KERNEL);
> + if (!port->ethtool_stats) {
> + err = -ENOMEM;
> + goto err_free_stats;
> + }
Should not the above be kcalloc(sizeof(u64),
ARRAY_SIZE(mvpp2_ethtool_stats), GFP_KERNEL)? That is, an array of
ARRAY_SIZE(mvpp2_ethtool_stats) elements, each sizeof(u64) bytes wide?
> +
> mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
>
> port->tx_ring_size = MVPP2_MAX_TXD;
> @@ -7629,7 +7810,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
> err = mvpp2_port_init(port);
> if (err < 0) {
> dev_err(&pdev->dev, "failed to init port %d\n", id);
> - goto err_free_stats;
> + goto err_free_ethstats;
> }
>
> mvpp2_port_periodic_xon_disable(port);
> @@ -7685,6 +7866,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
> err_free_txq_pcpu:
> for (i = 0; i < port->ntxqs; i++)
> free_percpu(port->txqs[i]->pcpu);
> +err_free_ethstats:
> + kfree(port->ethtool_stats);
> err_free_stats:
> free_percpu(port->stats);
> err_free_irq:
> @@ -7707,6 +7890,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
> of_node_put(port->phy_node);
> free_percpu(port->pcpu);
> free_percpu(port->stats);
> + kfree(port->ethtool_stats);
> for (i = 0; i < port->ntxqs; i++)
> free_percpu(port->txqs[i]->pcpu);
> mvpp2_queue_vectors_deinit(port);
> @@ -7893,7 +8077,7 @@ static int mvpp2_probe(struct platform_device *pdev)
> struct mvpp2 *priv;
> struct resource *res;
> void __iomem *base;
> - int port_count, i;
> + int i;
> int err;
>
> priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> @@ -8008,14 +8192,14 @@ static int mvpp2_probe(struct platform_device *pdev)
> goto err_mg_clk;
> }
>
> - port_count = of_get_available_child_count(dn);
> - if (port_count == 0) {
> + priv->port_count = of_get_available_child_count(dn);
> + if (priv->port_count == 0) {
> dev_err(&pdev->dev, "no ports enabled\n");
> err = -ENODEV;
> goto err_mg_clk;
> }
>
> - priv->port_list = devm_kcalloc(&pdev->dev, port_count,
> + priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count,
> sizeof(*priv->port_list),
> GFP_KERNEL);> if (!priv->port_list) {
> @@ -8023,6 +8207,20 @@ static int mvpp2_probe(struct platform_device *pdev)
> goto err_mg_clk;
> }
>
> + /* Statistics must be gathered regularly because some of them (like
> + * packets counters) are 32-bit registers and could overflow quite
> + * quickly. For instance, a 10Gb link used at full bandwidth with the
> + * smallest packets (64B) will overflow a 32-bit counter in less than
> + * 30 seconds. Then, use a workqueue to fill 64-bit counters.
> + */
> + priv->stats_queue = create_singlethread_workqueue("mvpp2_hw_stats");
> + if (!priv->stats_queue) {
> + err = -ENOMEM;
> + goto err_mg_clk;
> + }
If I have multiple of these network devices in my system, it would be
nice to have an unique identifier after mvpp22_hw_stats to help
differentiate them (and possibly change their scheduling priorities),
how about using "mvpp2_hw_stats/%d"?
> +
> + INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics);
> +
> /* Initialize ports */
> i = 0;
> for_each_available_child_of_node(dn, port_node) {
> @@ -8033,6 +8231,10 @@ static int mvpp2_probe(struct platform_device *pdev)
> }
>
> platform_set_drvdata(pdev, priv);
> +
> + queue_delayed_work(priv->stats_queue, &priv->stats_work,
> + MVPP2_MIB_COUNTERS_STATS_DELAY);
If the network interface is not used (ndo_open is not called) we have
this workqueue running for nothing because the statistics should not
even increase, and this is just creating unnecessary system activity for
nothing.
> +
> return 0;
>
> err_mg_clk:
> @@ -8053,6 +8255,18 @@ static int mvpp2_remove(struct platform_device *pdev)
> struct device_node *port_node;
> int i = 0;
>
> + /* This work recall himself within a delay. If the cancellation returned
> + * a non-zero value, it means a work is still running. In that case, use
> + * use the flush (returns when the running work will be done) and cancel
> + * the new work that was just submitted to the queue but not started yet
> + * due to the delay.
> + */
> + if (!cancel_delayed_work(&priv->stats_work)) {
> + flush_workqueue(priv->stats_queue);
> + cancel_delayed_work(&priv->stats_work);
> + }
Similarly, this needs to be moved to the ndo_stop() function.
> + destroy_workqueue(priv->stats_queue);
> +
> for_each_available_child_of_node(dn, port_node) {
> if (priv->port_list[i])
> mvpp2_port_remove(priv->port_list[i]);
>
--
Florian
Powered by blists - more mailing lists