lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 05 Oct 2014 17:07:06 -0700
From:	John Fastabend <john.fastabend@...il.com>
To:	dborkman@...hat.com, fw@...len.de, gerlitz.or@...il.com,
	hannes@...essinduktion.org
Cc:	netdev@...r.kernel.org, john.ronciak@...el.com, amirv@...lanox.com,
	eric.dumazet@...il.com, danny.zhou@...el.com
Subject: [net-next PATCH v1 2/3] net: sched: add direct ring acces via
 af_packet to ixgbe

This implements the necessary ndo ops to support the af_packet
interface to directly own and manipulate queues.

Signed-off-by: Danny Zhou <danny.zhou@...el.com>
Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |    3 
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   23 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  232 ++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h    |    1 
 4 files changed, 251 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 673d820..2f6eadf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -678,6 +678,9 @@ struct ixgbe_adapter {
 
 	struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS];
 
+	/* Direct User Space Queues */
+	struct sock *sk_handles[MAX_RX_QUEUES];
+
 	/* DCB parameters */
 	struct ieee_pfc *ixgbe_ieee_pfc;
 	struct ieee_ets *ixgbe_ieee_ets;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index cff383b..01a6e55 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2581,12 +2581,17 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
 		return -EOPNOTSUPP;
 
+	if (fsp->ring_cookie > MAX_RX_QUEUES)
+		return -EINVAL;
+
 	/*
 	 * Don't allow programming if the action is a queue greater than
-	 * the number of online Rx queues.
+	 * the number of online Rx queues unless it is a user space
+	 * queue.
 	 */
 	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
-	    (fsp->ring_cookie >= adapter->num_rx_queues))
+	    (fsp->ring_cookie >= adapter->num_rx_queues) &&
+	    !adapter->sk_handles[fsp->ring_cookie])
 		return -EINVAL;
 
 	/* Don't allow indexes to exist outside of available space */
@@ -2663,12 +2668,18 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	/* apply mask and compute/store hash */
 	ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
 
+	/* Set input action to reg_idx for driver owned queues otherwise
+	 * use the absolute index for user space queues.
+	 */
+	if (fsp->ring_cookie < adapter->num_rx_queues &&
+	    fsp->ring_cookie != IXGBE_FDIR_DROP_QUEUE)
+		input->action = adapter->rx_ring[input->action]->reg_idx;
+
 	/* program filters to filter memory */
 	err = ixgbe_fdir_write_perfect_filter_82599(hw,
-				&input->filter, input->sw_idx,
-				(input->action == IXGBE_FDIR_DROP_QUEUE) ?
-				IXGBE_FDIR_DROP_QUEUE :
-				adapter->rx_ring[input->action]->reg_idx);
+						    &input->filter,
+						    input->sw_idx,
+						    input->action);
 	if (err)
 		goto err_out_w_lock;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 06ef5a3..6506550 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -48,7 +48,9 @@
 #include <linux/if_macvlan.h>
 #include <linux/if_bridge.h>
 #include <linux/prefetch.h>
+#include <linux/mm.h>
 #include <scsi/fc/fc_fcoe.h>
+#include <linux/if_packet.h>
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
@@ -70,6 +72,8 @@ const char ixgbe_driver_version[] = DRV_VERSION;
 static const char ixgbe_copyright[] =
 				"Copyright (c) 1999-2014 Intel Corporation.";
 
+static unsigned int *dummy_page_buf;
+
 static const struct ixgbe_info *ixgbe_info_tbl[] = {
 	[board_82598] = &ixgbe_82598_info,
 	[board_82599] = &ixgbe_82599_info,
@@ -3122,6 +3126,16 @@ static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter,
 	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
 }
 
+static bool ixgbe_have_user_queues(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < MAX_RX_QUEUES; i++)
+		if (adapter->sk_handles[i])
+			return true;
+	return false;
+}
+
 static void ixgbe_disable_rx_drop(struct ixgbe_adapter *adapter,
 				  struct ixgbe_ring *ring)
 {
@@ -3156,7 +3170,8 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
 	 *  and performance reasons.
 	 */
 	if (adapter->num_vfs || (adapter->num_rx_queues > 1 &&
-	    !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) {
+	    !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en) ||
+	    ixgbe_have_user_queues(adapter)) {
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]);
 	} else {
@@ -7812,6 +7827,210 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 	kfree(fwd_adapter);
 }
 
+static int ixgbe_ndo_split_queue_pairs(struct net_device *dev,
+				       unsigned int start_from,
+				       unsigned int qpairs_num,
+				       struct sock *sk)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	unsigned int qpair_index;
+
+	/* allocate whatever availiable qpairs */
+	if (start_from == PACKET_QPAIRS_START_ANY) {
+		unsigned int count = 0;
+
+		for (qpair_index = adapter->num_rx_queues;
+		     qpair_index < MAX_RX_QUEUES;
+		     qpair_index++) {
+			if (!adapter->sk_handles[qpair_index]) {
+				count++;
+				if (count == qpairs_num) {
+					start_from = qpair_index - count + 1;
+					break;
+				}
+			} else {
+				count = 0;
+			}
+		}
+	}
+
+	/* otherwise the caller specified exact queues */
+	if ((start_from > MAX_TX_QUEUES) ||
+	    (start_from > MAX_RX_QUEUES) ||
+	    (start_from + qpairs_num > MAX_TX_QUEUES) ||
+	    (start_from + qpairs_num > MAX_RX_QUEUES))
+		return -EINVAL;
+
+	/* If the qpairs are being used by the driver do not let user space
+	 * consume the queues. Also if the queue has already been allocated
+	 * to a socket do fail the request.
+	 */
+	for (qpair_index = start_from;
+	     qpair_index < start_from + qpairs_num;
+	     qpair_index++) {
+		if ((qpair_index < adapter->num_tx_queues) ||
+		    (qpair_index < adapter->num_rx_queues))
+			return -EINVAL;
+
+		if (adapter->sk_handles[qpair_index] != NULL)
+			return -EBUSY;
+	}
+
+	/* remember the sk handle for each queue pair */
+	for (qpair_index = start_from;
+	     qpair_index < start_from + qpairs_num;
+	     qpair_index++)
+		adapter->sk_handles[qpair_index] = sk;
+
+	return start_from;
+}
+
+static int ixgbe_ndo_get_queue_pairs(struct net_device *dev,
+				     unsigned int *start_from,
+				     unsigned int *qpairs_num,
+				     struct sock *sk)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	unsigned int qpair_index;
+
+	*qpairs_num = 0;
+
+	for (qpair_index = adapter->num_tx_queues;
+	     qpair_index < MAX_RX_QUEUES;
+	     qpair_index++) {
+		if (adapter->sk_handles[qpair_index] == sk) {
+			if (*qpairs_num == 0)
+				*start_from = qpair_index;
+			*qpairs_num = *qpairs_num + 1;
+		}
+	}
+
+	return 0;
+}
+
+static int ixgbe_ndo_return_queue_pairs(struct net_device *dev, struct sock *sk)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	unsigned int qpair_index;
+
+	for (qpair_index = adapter->num_tx_queues;
+	     qpair_index < MAX_TX_QUEUES;
+	     qpair_index++) {
+		if (adapter->sk_handles[qpair_index] == sk)
+			adapter->sk_handles[qpair_index] = NULL;
+	}
+
+	return 0;
+}
+
+/* Rx descriptor starts from 0x1000 and Tx descriptor starts from 0x6000
+ * both the TX and RX descriptors use 4K pages.
+ */
+#define RX_DESC_ADDR_OFFSET		0x1000
+#define TX_DESC_ADDR_OFFSET		0x6000
+#define PAGE_SIZE_4K			4096
+
+static int
+ixgbe_ndo_qpair_map_region(struct net_device *dev,
+			   struct tpacket_dev_qpair_map_region_info *info)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	/* no need to map systme memory to userspace for ixgbe */
+	info->tp_dev_sysm_sz = 0;
+	info->tp_num_sysm_map_regions = 0;
+
+	info->tp_dev_bar_sz = pci_resource_len(adapter->pdev, 0);
+	info->tp_num_map_regions = 2;
+
+	info->regions[0].page_offset = RX_DESC_ADDR_OFFSET;
+	info->regions[0].page_sz = PAGE_SIZE;
+	info->regions[0].page_cnt = 1;
+	info->regions[1].page_offset = TX_DESC_ADDR_OFFSET;
+	info->regions[1].page_sz = PAGE_SIZE;
+	info->regions[1].page_cnt = 1;
+
+	return 0;
+}
+
+static int ixgbe_ndo_get_device_desc_info(struct net_device *dev,
+					  struct tpacket_dev_info *dev_info)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int max_queues;
+
+	max_queues = max(adapter->num_rx_queues, adapter->num_tx_queues);
+
+	dev_info->tp_device_id = adapter->hw.device_id;
+	dev_info->tp_vendor_id = adapter->hw.vendor_id;
+	dev_info->tp_subsystem_device_id = adapter->hw.subsystem_device_id;
+	dev_info->tp_subsystem_vendor_id = adapter->hw.subsystem_vendor_id;
+	dev_info->tp_revision_id = adapter->hw.revision_id;
+	dev_info->tp_numa_node = dev_to_node(&dev->dev);
+
+	dev_info->tp_num_total_qpairs = min(MAX_RX_QUEUES, MAX_TX_QUEUES);
+	dev_info->tp_num_inuse_qpairs = max_queues;
+
+	dev_info->tp_rxdesc_size = sizeof(union ixgbe_adv_rx_desc);
+	dev_info->tp_rxdesc_ver = 1;
+	dev_info->tp_txdesc_size = sizeof(union ixgbe_adv_tx_desc);
+	dev_info->tp_txdesc_ver = 1;
+
+	return 0;
+}
+
+static int
+ixgbe_ndo_qpair_page_map(struct vm_area_struct *vma, struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	phys_addr_t phy_addr = pci_resource_start(adapter->pdev, 0);
+	unsigned long pfn_rx = (phy_addr + RX_DESC_ADDR_OFFSET) >> PAGE_SHIFT;
+	unsigned long pfn_tx = (phy_addr + TX_DESC_ADDR_OFFSET) >> PAGE_SHIFT;
+	unsigned long dummy_page_phy;
+	pgprot_t pre_vm_page_prot;
+	unsigned long start;
+	unsigned int i;
+	int err;
+
+	if (!dummy_page_buf) {
+		dummy_page_buf = kzalloc(PAGE_SIZE_4K, GFP_KERNEL);
+		if (!dummy_page_buf)
+			return -ENOMEM;
+
+		for (i = 0; i < PAGE_SIZE_4K / sizeof(unsigned int); i++)
+			dummy_page_buf[i] = 0xdeadbeef;
+	}
+
+	dummy_page_phy = virt_to_phys(dummy_page_buf);
+	pre_vm_page_prot = vma->vm_page_prot;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	/* assume the vm_start is 4K aligned address */
+	for (start = vma->vm_start;
+	     start < vma->vm_end;
+	     start += PAGE_SIZE_4K) {
+		if (start == vma->vm_start + RX_DESC_ADDR_OFFSET) {
+			err = remap_pfn_range(vma, start, pfn_rx, PAGE_SIZE_4K,
+					      vma->vm_page_prot);
+			if (err)
+				return -EAGAIN;
+		} else if (start == vma->vm_start + TX_DESC_ADDR_OFFSET) {
+			err = remap_pfn_range(vma, start, pfn_tx, PAGE_SIZE_4K,
+					      vma->vm_page_prot);
+			if (err)
+				return -EAGAIN;
+		} else {
+			unsigned long addr = dummy_page_phy > PAGE_SHIFT;
+
+			err = remap_pfn_range(vma, start, addr, PAGE_SIZE_4K,
+					      pre_vm_page_prot);
+			if (err)
+				return -EAGAIN;
+		}
+	}
+	return 0;
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
@@ -7856,6 +8075,12 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
 	.ndo_dfwd_add_station	= ixgbe_fwd_add,
 	.ndo_dfwd_del_station	= ixgbe_fwd_del,
+	.ndo_split_queue_pairs	= ixgbe_ndo_split_queue_pairs,
+	.ndo_get_queue_pairs	= ixgbe_ndo_get_queue_pairs,
+	.ndo_return_queue_pairs = ixgbe_ndo_return_queue_pairs,
+	.ndo_get_device_qpair_map_region_info = ixgbe_ndo_qpair_map_region,
+	.ndo_get_device_desc_info  = ixgbe_ndo_get_device_desc_info,
+	.ndo_direct_qpair_page_map = ixgbe_ndo_qpair_page_map,
 };
 
 /**
@@ -8054,7 +8279,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw->back = adapter;
 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
-	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+	hw->pci_hw_addr = pci_resource_start(pdev, 0);
+
+	hw->hw_addr = ioremap(hw->pci_hw_addr,
 			      pci_resource_len(pdev, 0));
 	adapter->io_addr = hw->hw_addr;
 	if (!hw->hw_addr) {
@@ -8705,6 +8932,7 @@ module_init(ixgbe_init_module);
  **/
 static void __exit ixgbe_exit_module(void)
 {
+	kfree(dummy_page_buf);
 #ifdef CONFIG_IXGBE_DCA
 	dca_unregister_notify(&dca_notifier);
 #endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index dfd55d8..26e9163 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3022,6 +3022,7 @@ struct ixgbe_mbx_info {
 
 struct ixgbe_hw {
 	u8 __iomem			*hw_addr;
+	phys_addr_t			pci_hw_addr;
 	void				*back;
 	struct ixgbe_mac_info		mac;
 	struct ixgbe_addr_filter_info	addr_ctrl;

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists