[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120714055034.GB4808@jonmason-lab>
Date: Fri, 13 Jul 2012 22:50:35 -0700
From: Jon Mason <jon.mason@...el.com>
To: Jiri Pirko <jiri@...nulli.us>
Cc: linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
linux-pci@...r.kernel.org, Dave Jiang <dave.jiang@...el.com>
Subject: Re: [RFC 2/2] net: Add support for NTB virtual ethernet device
On Sat, Jul 14, 2012 at 01:14:03AM +0200, Jiri Pirko wrote:
> Fri, Jul 13, 2012 at 11:45:00PM CEST, jon.mason@...el.com wrote:
> >A virtual ethernet device that uses the NTB transport API to send/receive data.
> >
> >Signed-off-by: Jon Mason <jon.mason@...el.com>
> >---
> > drivers/net/Kconfig | 4 +
> > drivers/net/Makefile | 1 +
> > drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 416 insertions(+), 0 deletions(-)
> > create mode 100644 drivers/net/ntb_netdev.c
> >
> >diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> >index 0c2bd80..9bf8a71 100644
> >--- a/drivers/net/Kconfig
> >+++ b/drivers/net/Kconfig
> >@@ -178,6 +178,10 @@ config NETPOLL_TRAP
> > config NET_POLL_CONTROLLER
> > def_bool NETPOLL
> >
> >+config NTB_NETDEV
> >+ tristate "Virtual Ethernet over NTB"
> >+ depends on NTB
> >+
> > config RIONET
> > tristate "RapidIO Ethernet over messaging driver support"
> > depends on RAPIDIO
> >diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> >index 3d375ca..9890148 100644
> >--- a/drivers/net/Makefile
> >+++ b/drivers/net/Makefile
> >@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH) += usb/
> > obj-$(CONFIG_USB_CDC_PHONET) += usb/
> >
> > obj-$(CONFIG_HYPERV_NET) += hyperv/
> >+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
> >diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
> >new file mode 100644
> >index 0000000..bcbd9d4
> >--- /dev/null
> >+++ b/drivers/net/ntb_netdev.c
> >@@ -0,0 +1,411 @@
> >+/*
> >+ * This file is provided under a dual BSD/GPLv2 license. When using or
> >+ * redistributing this file, you may do so under either license.
> >+ *
> >+ * GPL LICENSE SUMMARY
> >+ *
> >+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
> >+ *
> >+ * This program is free software; you can redistribute it and/or modify
> >+ * it under the terms of version 2 of the GNU General Public License as
> >+ * published by the Free Software Foundation.
> >+ *
> >+ * This program is distributed in the hope that it will be useful, but
> >+ * WITHOUT ANY WARRANTY; without even the implied warranty of
> >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> >+ * General Public License for more details.
> >+ *
> >+ * You should have received a copy of the GNU General Public License
> >+ * along with this program; if not, write to the Free Software
> >+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> >+ * The full GNU General Public License is included in this distribution
> >+ * in the file called LICENSE.GPL.
> >+ *
> >+ * BSD LICENSE
> >+ *
> >+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
> >+ *
> >+ * Redistribution and use in source and binary forms, with or without
> >+ * modification, are permitted provided that the following conditions
> >+ * are met:
> >+ *
> >+ * * Redistributions of source code must retain the above copyright
> >+ * notice, this list of conditions and the following disclaimer.
> >+ * * Redistributions in binary form must reproduce the above copy
> >+ * notice, this list of conditions and the following disclaimer in
> >+ * the documentation and/or other materials provided with the
> >+ * distribution.
> >+ * * Neither the name of Intel Corporation nor the names of its
> >+ * contributors may be used to endorse or promote products derived
> >+ * from this software without specific prior written permission.
> >+ *
> >+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >+ *
> >+ * Intel PCIe NTB Network Linux driver
> >+ *
> >+ * Contact Information:
> >+ * Jon Mason <jon.mason@...el.com>
> >+ */
> >+#include <linux/etherdevice.h>
> >+#include <linux/ethtool.h>
> >+#include <linux/module.h>
> >+#include <linux/ntb.h>
> >+
> >+#define NTB_NETDEV_VER "0.4"
>
> Is it really necessary to provide this in-file versioning? Doesn't
> kernel version itself do the trick?
Not necessarily. This may be distributed as a package outside of the kernel and the version is useful for debug.
>
> >+
> >+MODULE_DESCRIPTION(KBUILD_MODNAME);
> >+MODULE_VERSION(NTB_NETDEV_VER);
> >+MODULE_LICENSE("Dual BSD/GPL");
> >+MODULE_AUTHOR("Intel Corporation");
> >+
> >+struct ntb_netdev {
> >+ struct net_device *ndev;
> >+ struct ntb_transport_qp *qp;
> >+};
> >+
> >+#define NTB_TX_TIMEOUT_MS 1000
> >+#define NTB_RXQ_SIZE 100
> >+
> >+static struct net_device *netdev;
> >+
> >+static void ntb_netdev_event_handler(int status)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(netdev);
> >+
> >+ pr_debug("%s: Event %x, Link %x\n", KBUILD_MODNAME, status,
> >+ ntb_transport_link_query(dev->qp));
> >+
> >+ /* Currently, only link status event is supported */
> >+ if (status)
> >+ netif_carrier_on(netdev);
> >+ else
> >+ netif_carrier_off(netdev);
> >+}
> >+
> >+static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp)
> >+{
> >+ struct net_device *ndev = netdev;
> >+ struct sk_buff *skb;
> >+ int len, rc;
> >+
> >+ while ((skb = ntb_transport_rx_dequeue(qp, &len))) {
> >+ pr_debug("%s: %d byte payload received\n", __func__, len);
> >+
> >+ skb_put(skb, len);
> >+ skb->protocol = eth_type_trans(skb, ndev);
> >+ skb->ip_summed = CHECKSUM_NONE;
> >+
> >+ if (netif_rx(skb) == NET_RX_DROP) {
> >+ ndev->stats.rx_errors++;
> >+ ndev->stats.rx_dropped++;
> >+ } else {
> >+ ndev->stats.rx_packets++;
> >+ ndev->stats.rx_bytes += len;
> >+ }
> >+
> >+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
> >+ if (!skb) {
> >+ ndev->stats.rx_errors++;
> >+ ndev->stats.rx_frame_errors++;
> >+ pr_err("%s: No skb\n", __func__);
> >+ break;
> >+ }
> >+
> >+ rc = ntb_transport_rx_enqueue(qp, skb, skb->data,
> >+ ndev->mtu + ETH_HLEN);
> >+ if (rc) {
> >+ ndev->stats.rx_errors++;
> >+ ndev->stats.rx_fifo_errors++;
> >+ pr_err("%s: error re-enqueuing\n", __func__);
> >+ break;
> >+ }
> >+ }
> >+}
> >+
> >+static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp)
> >+{
> >+ struct net_device *ndev = netdev;
> >+ struct sk_buff *skb;
> >+ int len;
> >+
> >+ while ((skb = ntb_transport_tx_dequeue(qp, &len))) {
> >+ ndev->stats.tx_packets++;
> >+ ndev->stats.tx_bytes += skb->len;
> >+ dev_kfree_skb(skb);
> >+ }
> >+
> >+ if (netif_queue_stopped(ndev))
> >+ netif_wake_queue(ndev);
> >+}
> >+
> >+static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
> >+ struct net_device *ndev)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ int rc;
> >+
> >+ pr_debug("%s: ntb_transport_tx_enqueue\n", KBUILD_MODNAME);
> >+
> >+ rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
> >+ if (rc)
> >+ goto err;
> >+
> >+ return NETDEV_TX_OK;
> >+
> >+err:
> >+ ndev->stats.tx_dropped++;
> >+ ndev->stats.tx_errors++;
> >+ netif_stop_queue(ndev);
> >+ return NETDEV_TX_BUSY;
> >+}
> >+
> >+static int ntb_netdev_open(struct net_device *ndev)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ struct sk_buff *skb;
> >+ int rc, i, len;
> >+
> >+ /* Add some empty rx bufs */
> >+ for (i = 0; i < NTB_RXQ_SIZE; i++) {
> >+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
> >+ if (!skb) {
> >+ rc = -ENOMEM;
> >+ goto err;
> >+ }
> >+
> >+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
> >+ ndev->mtu + ETH_HLEN);
> >+ if (rc == -EINVAL)
> >+ goto err;
> >+ }
> >+
> >+ netif_carrier_off(ndev);
> >+ ntb_transport_link_up(dev->qp);
> >+
> >+ return 0;
> >+
> >+err:
> >+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
> >+ kfree(skb);
> >+ return rc;
> >+}
> >+
> >+static int ntb_netdev_close(struct net_device *ndev)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ struct sk_buff *skb;
> >+ int len;
> >+
> >+ ntb_transport_link_down(dev->qp);
> >+
> >+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
> >+ kfree(skb);
> >+
> >+ return 0;
> >+}
> >+
> >+static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ struct sk_buff *skb;
> >+ int len, rc;
> >+
> >+ if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
> >+ return -EINVAL;
> >+
> >+ if (!netif_running(ndev)) {
> >+ ndev->mtu = new_mtu;
> >+ return 0;
> >+ }
> >+
> >+ /* Bring down the link and dispose of posted rx entries */
> >+ ntb_transport_link_down(dev->qp);
> >+
> >+ if (ndev->mtu < new_mtu) {
> >+ int i;
> >+
> >+ for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
> >+ kfree(skb);
> >+
> >+ for (; i; i--) {
> >+ skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
> >+ if (!skb) {
> >+ rc = -ENOMEM;
> >+ goto err;
> >+ }
> >+
> >+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
> >+ new_mtu + ETH_HLEN);
> >+ if (rc) {
> >+ kfree(skb);
> >+ goto err;
> >+ }
> >+ }
> >+ }
> >+
> >+ ndev->mtu = new_mtu;
> >+
> >+ ntb_transport_link_up(dev->qp);
> >+
> >+ return 0;
> >+
> >+err:
> >+ ntb_transport_link_down(dev->qp);
> >+
> >+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
> >+ kfree(skb);
> >+
> >+ pr_err("Error changing MTU, device inoperable\n");
>
> Would be maybe better to use netdev_err here (and on similar other
> places)
Good point
>
> Also, it might be good to provide rollback in case any of
> netdev_alloc_skb() fails.
>
> >+ return rc;
> >+}
> >+
> >+static void ntb_netdev_tx_timeout(struct net_device *ndev)
> >+{
> >+ if (netif_running(ndev))
> >+ netif_wake_queue(ndev);
> >+}
> >+
> >+static const struct net_device_ops ntb_netdev_ops = {
> >+ .ndo_open = ntb_netdev_open,
> >+ .ndo_stop = ntb_netdev_close,
> >+ .ndo_start_xmit = ntb_netdev_start_xmit,
> >+ .ndo_change_mtu = ntb_netdev_change_mtu,
> >+ .ndo_tx_timeout = ntb_netdev_tx_timeout,
> >+ .ndo_set_mac_address = eth_mac_addr,
>
> Does your device support mac change while it's up and running?
It's virtual ethernet, so there is no hardware limitation, only what is acceptable for the remote side to receive.
>
> >+};
> >+
> >+static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
> >+ struct ethtool_drvinfo *info)
> >+{
> >+ strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
> >+ strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
> >+}
> >+
> >+static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
> >+ "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
> >+ "rx_frame_errors", "rx_fifo_errors",
> >+ "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
> >+};
> >+
> >+static int ntb_get_stats_count(__attribute__((unused)) struct net_device *dev)
> >+{
> >+ return ARRAY_SIZE(ntb_nic_stats);
> >+}
> >+
> >+static int ntb_get_sset_count(struct net_device *dev, int sset)
> >+{
> >+ switch (sset) {
> >+ case ETH_SS_STATS:
> >+ return ntb_get_stats_count(dev);
> >+ default:
> >+ return -EOPNOTSUPP;
> >+ }
> >+}
> >+
> >+static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
> >+ u32 sset, u8 *data)
> >+{
> >+ switch (sset) {
> >+ case ETH_SS_STATS:
> >+ memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
> >+ }
> >+}
> >+
> >+static void
> >+ntb_get_ethtool_stats(struct net_device *dev,
> >+ __attribute__((unused)) struct ethtool_stats *stats,
> >+ u64 *data)
> >+{
> >+ int i = 0;
> >+
> >+ data[i++] = dev->stats.rx_packets;
> >+ data[i++] = dev->stats.rx_bytes;
> >+ data[i++] = dev->stats.rx_errors;
> >+ data[i++] = dev->stats.rx_dropped;
> >+ data[i++] = dev->stats.rx_length_errors;
> >+ data[i++] = dev->stats.rx_frame_errors;
> >+ data[i++] = dev->stats.rx_fifo_errors;
> >+ data[i++] = dev->stats.tx_packets;
> >+ data[i++] = dev->stats.tx_bytes;
> >+ data[i++] = dev->stats.tx_errors;
> >+ data[i++] = dev->stats.tx_dropped;
> >+}
> >+
> >+static const struct ethtool_ops ntb_ethtool_ops = {
> >+ .get_drvinfo = ntb_get_drvinfo,
> >+ .get_sset_count = ntb_get_sset_count,
> >+ .get_strings = ntb_get_strings,
> >+ .get_ethtool_stats = ntb_get_ethtool_stats,
> >+ .get_link = ethtool_op_get_link,
> >+};
> >+
> >+static int __init ntb_netdev_init_module(void)
> >+{
> >+ struct ntb_netdev *dev;
> >+ int rc;
> >+
> >+ pr_info("%s: Probe\n", KBUILD_MODNAME);
> >+
> >+ netdev = alloc_etherdev(sizeof(struct ntb_netdev));
>
> I might be missing something but this place (module init) does not seems
> like a good place to do alloc_etherdev(). Do you want to support only
> one netdevice instance?
>
> Anyway, I think that using "static netdev" should be avoided in any case.
>
It would fail the probe if there is no underlying ntb hardware, but it would make sense to check for that before allocing the etherdev.
Thanks for the comments!
> >+ if (!netdev)
> >+ return -ENOMEM;
> >+
> >+ dev = netdev_priv(netdev);
> >+ dev->ndev = netdev;
> >+ netdev->features = NETIF_F_HIGHDMA;
> >+
> >+ netdev->hw_features = netdev->features;
> >+ netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
> >+
> >+ random_ether_addr(netdev->perm_addr);
> >+ memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
> >+
> >+ netdev->netdev_ops = &ntb_netdev_ops;
> >+ SET_ETHTOOL_OPS(netdev, &ntb_ethtool_ops);
> >+
> >+ dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
> >+ ntb_netdev_tx_handler,
> >+ ntb_netdev_event_handler);
> >+ if (!dev->qp) {
> >+ rc = -EIO;
> >+ goto err;
> >+ }
> >+
> >+ netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
> >+
> >+ rc = register_netdev(netdev);
> >+ if (rc)
> >+ goto err1;
> >+
> >+ pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
> >+ return 0;
> >+
> >+err1:
> >+ ntb_transport_free_queue(dev->qp);
> >+err:
> >+ free_netdev(netdev);
> >+ return rc;
> >+}
> >+module_init(ntb_netdev_init_module);
> >+
> >+static void __exit ntb_netdev_exit_module(void)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(netdev);
> >+
> >+ unregister_netdev(netdev);
> >+ ntb_transport_free_queue(dev->qp);
> >+ free_netdev(netdev);
> >+
> >+ pr_info("%s: Driver removed\n", KBUILD_MODNAME);
> >+}
> >+module_exit(ntb_netdev_exit_module);
> >--
> >1.7.5.4
> >
> >--
> >To unsubscribe from this list: send the line "unsubscribe netdev" in
> >the body of a message to majordomo@...r.kernel.org
> >More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists