Al Viro (1): arm64: don't pull uaccess.h into *.S Chris Wilson (1): cpuhp Chun-Hao Lin (1): r8169: add support for RTL8168 series add-on card. Daniel Borkmann (1): net, sched: fix soft lockup in tc_classify Eric Dumazet (1): ipvlan: fix various issues in ipvlan_process_multicast() Florian Fainelli (1): net: korina: Fix NAPI versus resources freeing Haishuang Yan (1): ipv4: Namespaceify tcp_tw_reuse knob Jason Wang (1): net: xdp: remove unused bfp_warn_invalid_xdp_buffer() Jon Paul Maloy (1): tipc: don't send FIN message from connectionless socket Kweh, Hock Leong (1): net: stmmac: fix incorrect bit set in gmac4 mdio addr register Linus Torvalds (2): Merge git://git.kernel.org/.../davem/net Merge branch 'linus' of git://git.kernel.org/.../herbert/crypto-2.6 Mahesh Bandewar (1): ipvlan: fix multicast processing Romain Perier (1): crypto: marvell - Copy IVDIG before launching partial DMA ahash requests Sedat Dilek (3): perf/x86/amd/ibs: Fix typo after cleanup state names in cpu/hotplug Merge branch 'for-4.10/cpu-hotplug-fixes' into 4.10.0-rc1-3-iniza-small Merge branch 'for-4.10/cpuhp-fixes-ickle' into 4.10.0-rc1-3-iniza-small Tariq Toukan (1): net/mlx4_en: Fix user prio field in XDP forward Thomas Gleixner (2): smp/hotplug: Undo tglxs brainfart x86/mce/AMD: Make the init code more robust pravin shelar (1): openvswitch: upcall: Fix vlan handling. arch/arm64/include/asm/asm-uaccess.h | 65 +++++++++++++++++++++++ arch/arm64/include/asm/uaccess.h | 64 ---------------------- arch/arm64/kernel/entry.S | 2 +- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/xen/hypercall.S | 2 +- arch/x86/events/amd/ibs.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 ++ drivers/crypto/marvell/cesa.h | 3 +- drivers/crypto/marvell/hash.c | 34 +++++++++++- drivers/crypto/marvell/tdma.c | 9 +++- drivers/net/ethernet/korina.c | 8 +-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 +- drivers/net/ethernet/realtek/r8169.c | 1 + drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 4 +- drivers/net/ipvlan/ipvlan.h | 5 ++ drivers/net/ipvlan/ipvlan_core.c | 60 +++++++++++++-------- drivers/net/ipvlan/ipvlan_main.c | 7 ++- include/linux/filter.h | 1 - include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - kernel/cpu.c | 14 +++-- net/core/filter.c | 6 --- net/ipv4/sysctl_net_ipv4.c | 14 ++--- net/ipv4/tcp_ipv4.c | 4 +- net/openvswitch/datapath.c | 1 - net/openvswitch/flow.c | 54 +++++++++---------- net/sched/cls_api.c | 4 +- net/tipc/socket.c | 24 +++++---- 32 files changed, 240 insertions(+), 166 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h new file mode 100644 index 000000000000..df411f3e083c --- /dev/null +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -0,0 +1,65 @@ +#ifndef __ASM_ASM_UACCESS_H +#define __ASM_ASM_UACCESS_H + +#include +#include +#include +#include + +/* + * User access enabling/disabling macros. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + .macro __uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro __uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_disable, tmp1 +alternative_if_not ARM64_HAS_PAN + __uaccess_ttbr0_disable \tmp1 +alternative_else_nop_endif + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + __uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else_nop_endif + .endm +#else + .macro uaccess_ttbr0_disable, tmp1 + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 + .endm +#endif + +/* + * These macros are no-ops when UAO is present. + */ + .macro uaccess_disable_not_uao, tmp1 + uaccess_ttbr0_disable \tmp1 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(1) +alternative_else_nop_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 + uaccess_ttbr0_enable \tmp1, \tmp2 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(0) +alternative_else_nop_endif + .endm + +#endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index d26750ca6e06..46da3ea638bb 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -22,8 +22,6 @@ #include #include -#ifndef __ASSEMBLY__ - /* * User space memory access functions */ @@ -424,66 +422,4 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#else /* __ASSEMBLY__ */ - -#include - -/* - * User access enabling/disabling macros. - */ -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - .macro __uaccess_ttbr0_disable, tmp1 - mrs \tmp1, ttbr1_el1 // swapper_pg_dir - add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir - msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb - .endm - - .macro __uaccess_ttbr0_enable, tmp1 - get_thread_info \tmp1 - ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 - msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 - isb - .endm - - .macro uaccess_ttbr0_disable, tmp1 -alternative_if_not ARM64_HAS_PAN - __uaccess_ttbr0_disable \tmp1 -alternative_else_nop_endif - .endm - - .macro uaccess_ttbr0_enable, tmp1, tmp2 -alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 -alternative_else_nop_endif - .endm -#else - .macro uaccess_ttbr0_disable, tmp1 - .endm - - .macro uaccess_ttbr0_enable, tmp1, tmp2 - .endm -#endif - -/* - * These macros are no-ops when UAO is present. - */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(1) -alternative_else_nop_endif - .endm - - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 -alternative_if ARM64_ALT_PAN_NOT_UAO - SET_PSTATE_PAN(0) -alternative_else_nop_endif - .endm - -#endif /* __ASSEMBLY__ */ - #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7504f40d7ee..923841ffe4a9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index add4a1334085..e88fb99c1561 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,7 +17,7 @@ */ #include -#include +#include .text diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index fd6cd05593f9..4b5d826895ff 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -17,7 +17,7 @@ #include #include -#include +#include /* * Copy from user space to a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index d828540ded6f..47184c3a97da 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -19,7 +19,7 @@ #include #include -#include +#include /* * Copy from user space to user space (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 3e6ae2663b82..351f0766f7a6 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -17,7 +17,7 @@ #include #include -#include +#include /* * Copy to user space from a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 17f422a4dc55..83c27b6e6dca 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -23,7 +23,7 @@ #include #include #include -#include +#include /* * flush_icache_range(start,end) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 47cf3f9d89ff..947830a459d2 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,7 +49,7 @@ #include #include -#include +#include #include diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 05612a2529c8..496e60391fac 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1010,7 +1010,7 @@ static __init int amd_ibs_init(void) * all online cpus. */ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - "perf/x86/amd/ibs:STARTING", + "perf/x86/amd/ibs:starting", x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ffacfdcacb85..a5fd137417a2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1182,6 +1182,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) const char *name = get_name(bank, NULL); int err = 0; + if (!dev) + return -ENODEV; + if (is_shared_bank(bank)) { nb = node_to_amd_nb(amd_get_nb_id(cpu)); diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index a768da7138a1..b7872f62f674 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -273,7 +273,8 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_SRC_IN_SRAM BIT(30) #define CESA_TDMA_END_OF_REQ BIT(29) #define CESA_TDMA_BREAK_CHAIN BIT(28) -#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) +#define CESA_TDMA_SET_STATE BIT(27) +#define CESA_TDMA_TYPE_MSK GENMASK(26, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 317cf029c0cf..77c0fb936f47 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -280,13 +280,32 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) sreq->offset = 0; } +static void mv_cesa_ahash_dma_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_req *base = &creq->base; + + /* We must explicitly set the digest state. */ + if (base->chain.first->flags & CESA_TDMA_SET_STATE) { + struct mv_cesa_engine *engine = base->engine; + int i; + + /* Set the hash state in the IVDIG regs. */ + for (i = 0; i < ARRAY_SIZE(creq->state); i++) + writel_relaxed(creq->state[i], engine->regs + + CESA_IVDIG(i)); + } + + mv_cesa_dma_step(base); +} + static void mv_cesa_ahash_step(struct crypto_async_request *req) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->base); + mv_cesa_ahash_dma_step(ahashreq); else mv_cesa_ahash_std_step(ahashreq); } @@ -584,12 +603,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; + bool set_state = false; int ret; u32 type; basereq->chain.first = NULL; basereq->chain.last = NULL; + if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl)) + set_state = true; + if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); @@ -683,6 +706,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (type != CESA_TDMA_RESULT) basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; + if (set_state) { + /* + * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to + * let the step logic know that the IVDIG registers should be + * explicitly set before launching a TDMA chain. + */ + basereq->chain.first->flags |= CESA_TDMA_SET_STATE; + } + return 0; err_free_tdma: diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 4416b88eca70..c76375ff376d 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -109,7 +109,14 @@ void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, last->next = dreq->chain.first; engine->chain.last = dreq->chain.last; - if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + /* + * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on + * the last element of the current chain, or if the request + * being queued needs the IV regs to be set before lauching + * the request. + */ + if (!(last->flags & CESA_TDMA_BREAK_CHAIN) && + !(dreq->chain.first->flags & CESA_TDMA_SET_STATE)) last->next_dma = dreq->chain.first->cur_dma; } } diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index cbeea915f026..8037426ec50f 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work) DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); + korina_free_ring(dev); + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); return; @@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev) tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; writel(tmp, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); cancel_work_sync(&lp->restart_task); + korina_free_ring(dev); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index bcd955339058..edbe200ac2fa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev) /* Configure tx cq's and rings */ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) { - u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1; + u8 num_tx_rings_p_up = t == TX ? + priv->num_tx_rings_p_up : priv->tx_ring_num[t]; for (i = 0; i < priv->tx_ring_num[t]; i++) { /* Configure cq */ diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index f9b97f5946f8..44389c90056a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -326,6 +326,7 @@ enum cfg_version { static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8129), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8136), 0, 0, RTL_CFG_2 }, + { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8161), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index fda01f770eff..b0344c213752 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -116,7 +116,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 value = MII_WRITE | MII_BUSY; + u32 value = MII_BUSY; value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; @@ -126,6 +126,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, & priv->hw->mii.clk_csr_mask; if (priv->plat->has_gmac4) value |= MII_GMAC4_WRITE; + else + value |= MII_WRITE; /* Wait until any existing MII operation is complete */ if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address)) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 031093e1c25f..dbfbb33ac66c 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -99,6 +99,11 @@ struct ipvl_port { int count; }; +struct ipvl_skb_cb { + bool tx_pkt; +}; +#define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0])) + static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) { return rcu_dereference(d->rx_handler_data); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b4e990743e1d..83ce74acf82d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -198,7 +198,7 @@ void ipvlan_process_multicast(struct work_struct *work) unsigned int mac_hash; int ret; u8 pkt_type; - bool hlocal, dlocal; + bool tx_pkt; __skb_queue_head_init(&list); @@ -207,8 +207,11 @@ void ipvlan_process_multicast(struct work_struct *work) spin_unlock_bh(&port->backlog.lock); while ((skb = __skb_dequeue(&list)) != NULL) { + struct net_device *dev = skb->dev; + bool consumed = false; + ethh = eth_hdr(skb); - hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); + tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; mac_hash = ipvlan_mac_hash(ethh->h_dest); if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) @@ -216,41 +219,45 @@ void ipvlan_process_multicast(struct work_struct *work) else pkt_type = PACKET_MULTICAST; - dlocal = false; rcu_read_lock(); list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (hlocal && (ipvlan->dev == skb->dev)) { - dlocal = true; + if (tx_pkt && (ipvlan->dev == skb->dev)) continue; - } if (!test_bit(mac_hash, ipvlan->mac_filters)) continue; - + if (!(ipvlan->dev->flags & IFF_UP)) + continue; ret = NET_RX_DROP; len = skb->len + ETH_HLEN; nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - goto acct; - - nskb->pkt_type = pkt_type; - nskb->dev = ipvlan->dev; - if (hlocal) - ret = dev_forward_skb(ipvlan->dev, nskb); - else - ret = netif_rx(nskb); -acct: + local_bh_disable(); + if (nskb) { + consumed = true; + nskb->pkt_type = pkt_type; + nskb->dev = ipvlan->dev; + if (tx_pkt) + ret = dev_forward_skb(ipvlan->dev, nskb); + else + ret = netif_rx(nskb); + } ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + local_bh_enable(); } rcu_read_unlock(); - if (dlocal) { + if (tx_pkt) { /* If the packet originated here, send it out. */ skb->dev = port->dev; skb->pkt_type = pkt_type; dev_queue_xmit(skb); } else { - kfree_skb(skb); + if (consumed) + consume_skb(skb); + else + kfree_skb(skb); } + if (dev) + dev_put(dev); } } @@ -470,15 +477,24 @@ static int ipvlan_process_outbound(struct sk_buff *skb) } static void ipvlan_multicast_enqueue(struct ipvl_port *port, - struct sk_buff *skb) + struct sk_buff *skb, bool tx_pkt) { if (skb->protocol == htons(ETH_P_PAUSE)) { kfree_skb(skb); return; } + /* Record that the deferred packet is from TX or RX path. By + * looking at mac-addresses on packet will lead to erronus decisions. + * (This would be true for a loopback-mode on master device or a + * hair-pin mode of the switch.) + */ + IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; + spin_lock(&port->backlog.lock); if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { + if (skb->dev) + dev_hold(skb->dev); __skb_queue_tail(&port->backlog, skb); spin_unlock(&port->backlog.lock); schedule_work(&port->wq); @@ -537,7 +553,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) } else if (is_multicast_ether_addr(eth->h_dest)) { ipvlan_skb_crossing_ns(skb, NULL); - ipvlan_multicast_enqueue(ipvlan->port, skb); + ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS; } @@ -634,7 +650,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, */ if (nskb) { ipvlan_skb_crossing_ns(nskb, NULL); - ipvlan_multicast_enqueue(port, nskb); + ipvlan_multicast_enqueue(port, nskb, false); } } } else { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 693ec5b66222..8b0f99300cbc 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -135,6 +135,7 @@ static int ipvlan_port_create(struct net_device *dev) static void ipvlan_port_destroy(struct net_device *dev) { struct ipvl_port *port = ipvlan_port_get_rtnl(dev); + struct sk_buff *skb; dev->priv_flags &= ~IFF_IPVLAN_MASTER; if (port->mode == IPVLAN_MODE_L3S) { @@ -144,7 +145,11 @@ static void ipvlan_port_destroy(struct net_device *dev) } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); - __skb_queue_purge(&port->backlog); + while ((skb = __skb_dequeue(&port->backlog)) != NULL) { + if (skb->dev) + dev_put(skb->dev); + kfree_skb(skb); + } kfree(port); } diff --git a/include/linux/filter.h b/include/linux/filter.h index 702314253797..a0934e6c9bab 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -610,7 +610,6 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_buffer(void); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f0cf5a1b777e..0378e88f6fd3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -110,6 +110,7 @@ struct netns_ipv4 { int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_tw_reuse; int sysctl_igmp_max_memberships; int sysctl_igmp_max_msf; diff --git a/include/net/tcp.h b/include/net/tcp.h index 207147b4c6b2..6061963cca98 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -252,7 +252,6 @@ extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; -extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; diff --git a/kernel/cpu.c b/kernel/cpu.c index 042fd7e8e030..d131ca77a4d8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1330,10 +1330,7 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, state = ret; } sp = cpuhp_get_step(state); - if (name && sp->name) { - ret = -EBUSY; - goto out; - } + WARN_ON(name && sp->name); sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; @@ -1471,6 +1468,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, bool multi_instance) { int cpu, ret = 0; + bool dynstate; if (cpuhp_cb_check(state) || !name) return -EINVAL; @@ -1480,6 +1478,12 @@ int __cpuhp_setup_state(enum cpuhp_state state, ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); + dynstate = state == CPUHP_AP_ONLINE_DYN; + if (ret > 0 && dynstate) { + state = ret; + ret = 0; + } + if (ret || !invoke || !startup) goto out; @@ -1508,7 +1512,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, * If the requested state is CPUHP_AP_ONLINE_DYN, return the * dynamically allocated state in case of success. */ - if (!ret && state == CPUHP_AP_ONLINE_DYN) + if (!ret && dynstate) return state; return ret; } diff --git a/net/core/filter.c b/net/core/filter.c index e6c412b94dec..1969b3f118c1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2972,12 +2972,6 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -void bpf_warn_invalid_xdp_buffer(void) -{ - WARN_ONCE(1, "Illegal XDP buffer encountered, expect throughput degradation\n"); -} -EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_buffer); - static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..22cbd61079b5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -433,13 +433,6 @@ static struct ctl_table ipv4_table[] = { .extra2 = &tcp_adv_win_scale_max, }, { - .procname = "tcp_tw_reuse", - .data = &sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_tw_reuse", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30d81f533ada..fe9da4fb96bf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,7 +84,6 @@ #include #include -int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; #ifdef CONFIG_TCP_MD5SIG @@ -120,7 +119,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && + (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) @@ -2456,6 +2455,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; + net->ipv4.sysctl_tcp_tw_reuse = 0; return 0; fail: diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2d4c4d3911c0..9c62b6325f7a 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -606,7 +606,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; - packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 08aa926cd5cf..2c0a00f7f1b7 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -312,7 +312,8 @@ static bool icmp6hdr_ok(struct sk_buff *skb) * Returns 0 if it encounters a non-vlan or incomplete packet. * Returns 1 after successfully parsing vlan tag. */ -static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, + bool untag_vlan) { struct vlan_head *vh = (struct vlan_head *)skb->data; @@ -330,7 +331,20 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); key_vh->tpid = vh->tpid; - __skb_pull(skb, sizeof(struct vlan_head)); + if (unlikely(untag_vlan)) { + int offset = skb->data - skb_mac_header(skb); + u16 tci; + int err; + + __skb_push(skb, offset); + err = __skb_vlan_pop(skb, &tci); + __skb_pull(skb, offset); + if (err) + return err; + __vlan_hwaccel_put_tag(skb, key_vh->tpid, tci); + } else { + __skb_pull(skb, sizeof(struct vlan_head)); + } return 1; } @@ -351,13 +365,13 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) key->eth.vlan.tpid = skb->vlan_proto; } else { /* Parse outer vlan tag in the non-accelerated case. */ - res = parse_vlan_tag(skb, &key->eth.vlan); + res = parse_vlan_tag(skb, &key->eth.vlan, true); if (res <= 0) return res; } /* Parse inner vlan tag. */ - res = parse_vlan_tag(skb, &key->eth.cvlan); + res = parse_vlan_tag(skb, &key->eth.cvlan, false); if (res <= 0) return res; @@ -800,29 +814,15 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, if (err) return err; - if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { - /* key_extract assumes that skb->protocol is set-up for - * layer 3 packets which is the case for other callers, - * in particular packets recieved from the network stack. - * Here the correct value can be set from the metadata - * extracted above. - */ - skb->protocol = key->eth.type; - } else { - struct ethhdr *eth; - - skb_reset_mac_header(skb); - eth = eth_hdr(skb); - - /* Normally, setting the skb 'protocol' field would be - * handled by a call to eth_type_trans(), but it assumes - * there's a sending device, which we may not have. - */ - if (eth_proto_is_802_3(eth->h_proto)) - skb->protocol = eth->h_proto; - else - skb->protocol = htons(ETH_P_802_2); - } + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets received from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + * For L2 packet key eth type would be zero. skb protocol + * would be set to correct value later during key-extact. + */ + skb->protocol = key->eth.type; return key_extract(skb, key); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3fbba79a4ef0..1ecdf809b5fa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -148,13 +148,15 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) unsigned long cl; unsigned long fh; int err; - int tp_created = 0; + int tp_created; if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: + tp_created = 0; + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); if (err < 0) return err; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 333c5dae0072..800caaa699a1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -441,15 +441,19 @@ static void __tipc_shutdown(struct socket *sock, int error) while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { if (TIPC_SKB_CB(skb)->bytes_read) { kfree_skb(skb); - } else { - if (!tipc_sk_type_connectionless(sk) && - sk->sk_state != TIPC_DISCONNECTING) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, error); + continue; + } + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); } + tipc_sk_respond(sk, skb, error); } + + if (tipc_sk_type_connectionless(sk)) + return; + if (sk->sk_state != TIPC_DISCONNECTING) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, @@ -457,10 +461,8 @@ static void __tipc_shutdown(struct socket *sock, int error) tsk->portid, error); if (skb) tipc_node_xmit_skb(net, skb, dnode, tsk->portid); - if (!tipc_sk_type_connectionless(sk)) { - tipc_node_remove_conn(net, dnode, tsk->portid); - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - } + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); } }