lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Sun, 05 Oct 2014 17:07:06 -0700 From: John Fastabend <john.fastabend@...il.com> To: dborkman@...hat.com, fw@...len.de, gerlitz.or@...il.com, hannes@...essinduktion.org Cc: netdev@...r.kernel.org, john.ronciak@...el.com, amirv@...lanox.com, eric.dumazet@...il.com, danny.zhou@...el.com Subject: [net-next PATCH v1 2/3] net: sched: add direct ring acces via af_packet to ixgbe This implements the necessary ndo ops to support the af_packet interface to directly own and manipulate queues. Signed-off-by: Danny Zhou <danny.zhou@...el.com> Signed-off-by: John Fastabend <john.r.fastabend@...el.com> --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 3 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 23 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 232 ++++++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 4 files changed, 251 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 673d820..2f6eadf 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -678,6 +678,9 @@ struct ixgbe_adapter { struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS]; + /* Direct User Space Queues */ + struct sock *sk_handles[MAX_RX_QUEUES]; + /* DCB parameters */ struct ieee_pfc *ixgbe_ieee_pfc; struct ieee_ets *ixgbe_ieee_ets; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index cff383b..01a6e55 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2581,12 +2581,17 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) return -EOPNOTSUPP; + if (fsp->ring_cookie > MAX_RX_QUEUES) + return -EINVAL; + /* * Don't allow programming if the action is a queue greater than - * the number of online Rx queues. + * the number of online Rx queues unless it is a user space + * queue. */ if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) && - (fsp->ring_cookie >= adapter->num_rx_queues)) + (fsp->ring_cookie >= adapter->num_rx_queues) && + !adapter->sk_handles[fsp->ring_cookie]) return -EINVAL; /* Don't allow indexes to exist outside of available space */ @@ -2663,12 +2668,18 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* apply mask and compute/store hash */ ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + /* Set input action to reg_idx for driver owned queues otherwise + * use the absolute index for user space queues. + */ + if (fsp->ring_cookie < adapter->num_rx_queues && + fsp->ring_cookie != IXGBE_FDIR_DROP_QUEUE) + input->action = adapter->rx_ring[input->action]->reg_idx; + /* program filters to filter memory */ err = ixgbe_fdir_write_perfect_filter_82599(hw, - &input->filter, input->sw_idx, - (input->action == IXGBE_FDIR_DROP_QUEUE) ? - IXGBE_FDIR_DROP_QUEUE : - adapter->rx_ring[input->action]->reg_idx); + &input->filter, + input->sw_idx, + input->action); if (err) goto err_out_w_lock; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 06ef5a3..6506550 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -48,7 +48,9 @@ #include <linux/if_macvlan.h> #include <linux/if_bridge.h> #include <linux/prefetch.h> +#include <linux/mm.h> #include <scsi/fc/fc_fcoe.h> +#include <linux/if_packet.h> #include "ixgbe.h" #include "ixgbe_common.h" @@ -70,6 +72,8 @@ const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2014 Intel Corporation."; +static unsigned int *dummy_page_buf; + static const struct ixgbe_info *ixgbe_info_tbl[] = { [board_82598] = &ixgbe_82598_info, [board_82599] = &ixgbe_82599_info, @@ -3122,6 +3126,16 @@ static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl); } +static bool ixgbe_have_user_queues(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0; i < MAX_RX_QUEUES; i++) + if (adapter->sk_handles[i]) + return true; + return false; +} + static void ixgbe_disable_rx_drop(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring) { @@ -3156,7 +3170,8 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) * and performance reasons. */ if (adapter->num_vfs || (adapter->num_rx_queues > 1 && - !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) { + !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en) || + ixgbe_have_user_queues(adapter)) { for (i = 0; i < adapter->num_rx_queues; i++) ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]); } else { @@ -7812,6 +7827,210 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) kfree(fwd_adapter); } +static int ixgbe_ndo_split_queue_pairs(struct net_device *dev, + unsigned int start_from, + unsigned int qpairs_num, + struct sock *sk) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + unsigned int qpair_index; + + /* allocate whatever availiable qpairs */ + if (start_from == PACKET_QPAIRS_START_ANY) { + unsigned int count = 0; + + for (qpair_index = adapter->num_rx_queues; + qpair_index < MAX_RX_QUEUES; + qpair_index++) { + if (!adapter->sk_handles[qpair_index]) { + count++; + if (count == qpairs_num) { + start_from = qpair_index - count + 1; + break; + } + } else { + count = 0; + } + } + } + + /* otherwise the caller specified exact queues */ + if ((start_from > MAX_TX_QUEUES) || + (start_from > MAX_RX_QUEUES) || + (start_from + qpairs_num > MAX_TX_QUEUES) || + (start_from + qpairs_num > MAX_RX_QUEUES)) + return -EINVAL; + + /* If the qpairs are being used by the driver do not let user space + * consume the queues. Also if the queue has already been allocated + * to a socket do fail the request. + */ + for (qpair_index = start_from; + qpair_index < start_from + qpairs_num; + qpair_index++) { + if ((qpair_index < adapter->num_tx_queues) || + (qpair_index < adapter->num_rx_queues)) + return -EINVAL; + + if (adapter->sk_handles[qpair_index] != NULL) + return -EBUSY; + } + + /* remember the sk handle for each queue pair */ + for (qpair_index = start_from; + qpair_index < start_from + qpairs_num; + qpair_index++) + adapter->sk_handles[qpair_index] = sk; + + return start_from; +} + +static int ixgbe_ndo_get_queue_pairs(struct net_device *dev, + unsigned int *start_from, + unsigned int *qpairs_num, + struct sock *sk) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + unsigned int qpair_index; + + *qpairs_num = 0; + + for (qpair_index = adapter->num_tx_queues; + qpair_index < MAX_RX_QUEUES; + qpair_index++) { + if (adapter->sk_handles[qpair_index] == sk) { + if (*qpairs_num == 0) + *start_from = qpair_index; + *qpairs_num = *qpairs_num + 1; + } + } + + return 0; +} + +static int ixgbe_ndo_return_queue_pairs(struct net_device *dev, struct sock *sk) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + unsigned int qpair_index; + + for (qpair_index = adapter->num_tx_queues; + qpair_index < MAX_TX_QUEUES; + qpair_index++) { + if (adapter->sk_handles[qpair_index] == sk) + adapter->sk_handles[qpair_index] = NULL; + } + + return 0; +} + +/* Rx descriptor starts from 0x1000 and Tx descriptor starts from 0x6000 + * both the TX and RX descriptors use 4K pages. + */ +#define RX_DESC_ADDR_OFFSET 0x1000 +#define TX_DESC_ADDR_OFFSET 0x6000 +#define PAGE_SIZE_4K 4096 + +static int +ixgbe_ndo_qpair_map_region(struct net_device *dev, + struct tpacket_dev_qpair_map_region_info *info) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + /* no need to map systme memory to userspace for ixgbe */ + info->tp_dev_sysm_sz = 0; + info->tp_num_sysm_map_regions = 0; + + info->tp_dev_bar_sz = pci_resource_len(adapter->pdev, 0); + info->tp_num_map_regions = 2; + + info->regions[0].page_offset = RX_DESC_ADDR_OFFSET; + info->regions[0].page_sz = PAGE_SIZE; + info->regions[0].page_cnt = 1; + info->regions[1].page_offset = TX_DESC_ADDR_OFFSET; + info->regions[1].page_sz = PAGE_SIZE; + info->regions[1].page_cnt = 1; + + return 0; +} + +static int ixgbe_ndo_get_device_desc_info(struct net_device *dev, + struct tpacket_dev_info *dev_info) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + int max_queues; + + max_queues = max(adapter->num_rx_queues, adapter->num_tx_queues); + + dev_info->tp_device_id = adapter->hw.device_id; + dev_info->tp_vendor_id = adapter->hw.vendor_id; + dev_info->tp_subsystem_device_id = adapter->hw.subsystem_device_id; + dev_info->tp_subsystem_vendor_id = adapter->hw.subsystem_vendor_id; + dev_info->tp_revision_id = adapter->hw.revision_id; + dev_info->tp_numa_node = dev_to_node(&dev->dev); + + dev_info->tp_num_total_qpairs = min(MAX_RX_QUEUES, MAX_TX_QUEUES); + dev_info->tp_num_inuse_qpairs = max_queues; + + dev_info->tp_rxdesc_size = sizeof(union ixgbe_adv_rx_desc); + dev_info->tp_rxdesc_ver = 1; + dev_info->tp_txdesc_size = sizeof(union ixgbe_adv_tx_desc); + dev_info->tp_txdesc_ver = 1; + + return 0; +} + +static int +ixgbe_ndo_qpair_page_map(struct vm_area_struct *vma, struct net_device *dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + phys_addr_t phy_addr = pci_resource_start(adapter->pdev, 0); + unsigned long pfn_rx = (phy_addr + RX_DESC_ADDR_OFFSET) >> PAGE_SHIFT; + unsigned long pfn_tx = (phy_addr + TX_DESC_ADDR_OFFSET) >> PAGE_SHIFT; + unsigned long dummy_page_phy; + pgprot_t pre_vm_page_prot; + unsigned long start; + unsigned int i; + int err; + + if (!dummy_page_buf) { + dummy_page_buf = kzalloc(PAGE_SIZE_4K, GFP_KERNEL); + if (!dummy_page_buf) + return -ENOMEM; + + for (i = 0; i < PAGE_SIZE_4K / sizeof(unsigned int); i++) + dummy_page_buf[i] = 0xdeadbeef; + } + + dummy_page_phy = virt_to_phys(dummy_page_buf); + pre_vm_page_prot = vma->vm_page_prot; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + /* assume the vm_start is 4K aligned address */ + for (start = vma->vm_start; + start < vma->vm_end; + start += PAGE_SIZE_4K) { + if (start == vma->vm_start + RX_DESC_ADDR_OFFSET) { + err = remap_pfn_range(vma, start, pfn_rx, PAGE_SIZE_4K, + vma->vm_page_prot); + if (err) + return -EAGAIN; + } else if (start == vma->vm_start + TX_DESC_ADDR_OFFSET) { + err = remap_pfn_range(vma, start, pfn_tx, PAGE_SIZE_4K, + vma->vm_page_prot); + if (err) + return -EAGAIN; + } else { + unsigned long addr = dummy_page_phy > PAGE_SHIFT; + + err = remap_pfn_range(vma, start, addr, PAGE_SIZE_4K, + pre_vm_page_prot); + if (err) + return -EAGAIN; + } + } + return 0; +} + static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, @@ -7856,6 +8075,12 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, .ndo_dfwd_add_station = ixgbe_fwd_add, .ndo_dfwd_del_station = ixgbe_fwd_del, + .ndo_split_queue_pairs = ixgbe_ndo_split_queue_pairs, + .ndo_get_queue_pairs = ixgbe_ndo_get_queue_pairs, + .ndo_return_queue_pairs = ixgbe_ndo_return_queue_pairs, + .ndo_get_device_qpair_map_region_info = ixgbe_ndo_qpair_map_region, + .ndo_get_device_desc_info = ixgbe_ndo_get_device_desc_info, + .ndo_direct_qpair_page_map = ixgbe_ndo_qpair_page_map, }; /** @@ -8054,7 +8279,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->back = adapter; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); - hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + hw->pci_hw_addr = pci_resource_start(pdev, 0); + + hw->hw_addr = ioremap(hw->pci_hw_addr, pci_resource_len(pdev, 0)); adapter->io_addr = hw->hw_addr; if (!hw->hw_addr) { @@ -8705,6 +8932,7 @@ module_init(ixgbe_init_module); **/ static void __exit ixgbe_exit_module(void) { + kfree(dummy_page_buf); #ifdef CONFIG_IXGBE_DCA dca_unregister_notify(&dca_notifier); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index dfd55d8..26e9163 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3022,6 +3022,7 @@ struct ixgbe_mbx_info { struct ixgbe_hw { u8 __iomem *hw_addr; + phys_addr_t pci_hw_addr; void *back; struct ixgbe_mac_info mac; struct ixgbe_addr_filter_info addr_ctrl; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists