lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1307769482.2872.62.camel@edumazet-laptop>
Date:	Sat, 11 Jun 2011 07:18:02 +0200
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Jeff Kirsher <jeffrey.t.kirsher@...el.com>
Cc:	davem@...emloft.net, Vasu Dev <vasu.dev@...el.com>,
	netdev@...r.kernel.org, gospo@...hat.com
Subject: Re: [net-next 13/13] ixgbe: use per NUMA node lock for FCoE DDP

Le vendredi 10 juin 2011 à 20:02 -0700, Jeff Kirsher a écrit :
> From: Vasu Dev <vasu.dev@...el.com>
> 
> Adds per NUMA node lock to have CPU pass thru its NUMA lock
> first before contending for global DDP fcoe->lock to setup
> DDP lock, this is to reduce contentions across NUMA nodes.
> 
> Allocates and initialize per NUMA node lock in added
> ixgbe_fcoe_lock_init and then have current CPU's numa_node_id
> based NUMA node lock acquired before taking global fcoe->lock.
> 
> The node lock is allocated from its NUMA node using kzalloc_node.
> 
> Signed-off-by: Vasu Dev <vasu.dev@...el.com>
> Tested-by: Ross Brattain <ross.b.brattain@...el.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
> ---
>  drivers/net/ixgbe/ixgbe_fcoe.c |   50 ++++++++++++++++++++++++++++++++++++++-
>  drivers/net/ixgbe/ixgbe_fcoe.h |    1 +
>  2 files changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
> index f5f39ed..aadff4f 100644
> --- a/drivers/net/ixgbe/ixgbe_fcoe.c
> +++ b/drivers/net/ixgbe/ixgbe_fcoe.c
> @@ -109,6 +109,7 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
>  	len = ddp->len;
>  	/* if there an error, force to invalidate ddp context */
>  	if (ddp->err) {
> +		spin_lock(fcoe->node_lock[numa_node_id()]);
>  		spin_lock_bh(&fcoe->lock);
>  		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCFLT, 0);
>  		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCFLTRW,
> @@ -122,6 +123,7 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
>  				(xid | IXGBE_FCDMARW_RE));
>  		fcbuff = IXGBE_READ_REG(&adapter->hw, IXGBE_FCBUFF);
>  		spin_unlock_bh(&fcoe->lock);
> +		spin_unlock(fcoe->node_lock[numa_node_id()]);
>  		if (fcbuff & IXGBE_FCBUFF_VALID)
>  			udelay(100);
>  	}
> @@ -294,6 +296,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
>  
>  	/* program DMA context */
>  	hw = &adapter->hw;
> +	spin_lock(fcoe->node_lock[numa_node_id()]);
>  	spin_lock_bh(&fcoe->lock);
>  
>  	/* turn on last frame indication for target mode as FCP_RSPtarget is
> @@ -315,6 +318,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
>  	IXGBE_WRITE_REG(hw, IXGBE_FCFLTRW, fcfltrw);
>  
>  	spin_unlock_bh(&fcoe->lock);
> +	spin_unlock(fcoe->node_lock[numa_node_id()]);
>  
>  	return 1;
>  
> @@ -634,6 +638,42 @@ static void ixgbe_fcoe_ddp_pools_alloc(struct ixgbe_adapter *adapter)
>  	}
>  }
>  
> +static void ixgbe_fcoe_locks_free(struct ixgbe_fcoe *fcoe)
> +{
> +	int node;
> +
> +	if (!fcoe->node_lock)
> +		return;
> +
> +	for_each_node_with_cpus(node)
> +			kfree(fcoe->node_lock[node]);
> +
> +	kfree(fcoe->node_lock);
> +	fcoe->node_lock = NULL;
> +}
> +
> +static void ixgbe_fcoe_lock_init(struct ixgbe_fcoe *fcoe)
> +{
> +	int node;
> +	spinlock_t *node_lock;
> +
> +	fcoe->node_lock = kzalloc(sizeof(node_lock) * num_possible_nodes(),
> +				  GFP_KERNEL);

Hmm...

1) Think of what happens if some machine has 3 possible nodes : 0, 2, 3

	-> You should use nr_node_ids instead of num_possible_nodes() 

2) Make sure this block cant have false sharing : Allocate at least a
full cache line : On a typical 2 node machine, you currently allocate
16bytes of memory, and this small block could share a contended cache
line.


> +	if (!fcoe->node_lock)
> +		return;
> +
> +	for_each_node_with_cpus(node) {
> +		node_lock = kzalloc_node(sizeof(*node_lock) , GFP_KERNEL, node);
> +		if (!node_lock) {
> +			ixgbe_fcoe_locks_free(fcoe);
> +			return;
> +		}
> +		spin_lock_init(node_lock);
> +		fcoe->node_lock[node] = node_lock;
> +	}
> +	spin_lock_init(&fcoe->lock);
> +}
> +

...

>  
>  /**
> diff --git a/drivers/net/ixgbe/ixgbe_fcoe.h b/drivers/net/ixgbe/ixgbe_fcoe.h
> index d876e7a..8618892 100644
> --- a/drivers/net/ixgbe/ixgbe_fcoe.h
> +++ b/drivers/net/ixgbe/ixgbe_fcoe.h
> @@ -69,6 +69,7 @@ struct ixgbe_fcoe {
>  	struct pci_pool **pool;
>  	atomic_t refcnt;
>  	spinlock_t lock;
> +	struct spinlock **node_lock;

Wont this read_mostly pointer sits in often modified cache line ?

>  	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
>  	unsigned char *extra_ddp_buffer;
>  	dma_addr_t extra_ddp_buffer_dma;


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ