lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Thu, 01 Apr 2010 10:16:40 +0800
From:	Miao Xie <miaox@...fujitsu.com>
To:	David Rientjes <rientjes@...gle.com>
CC:	Nick Piggin <npiggin@...e.de>,
	Lee Schermerhorn <lee.schermerhorn@...com>,
	Paul Menage <menage@...gle.com>,
	Linux-Kernel <linux-kernel@...r.kernel.org>,
	Linux-MM <linux-mm@...ck.org>, Li Zefan <lizf@...fujitsu.com>,
	Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [PATCH V2 4/4] cpuset,mm: update task's mems_allowed lazily

on 2010-3-31 18:34, David Rientjes wrote:
> On Wed, 31 Mar 2010, Miao Xie wrote:
> 
>> diff --git a/mm/mmzone.c b/mm/mmzone.c
>> index f5b7d17..43ac21b 100644
>> --- a/mm/mmzone.c
>> +++ b/mm/mmzone.c
>> @@ -58,6 +58,7 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
>>  					nodemask_t *nodes,
>>  					struct zone **zone)
>>  {
>> +	nodemask_t tmp_nodes;
>>  	/*
>>  	 * Find the next suitable zone to use for the allocation.
>>  	 * Only filter based on nodemask if it's set
>> @@ -65,10 +66,16 @@ struct zoneref *next_zones_zonelist(struct zoneref *z,
>>  	if (likely(nodes == NULL))
>>  		while (zonelist_zone_idx(z) > highest_zoneidx)
>>  			z++;
>> -	else
>> -		while (zonelist_zone_idx(z) > highest_zoneidx ||
>> -				(z->zone && !zref_in_nodemask(z, nodes)))
>> -			z++;
>> +	else {
>> +		tmp_nodes = *nodes;
>> +		if (nodes_empty(tmp_nodes))
>> +			while (zonelist_zone_idx(z) > highest_zoneidx)
>> +				z++;
>> +		else
>> +			while (zonelist_zone_idx(z) > highest_zoneidx ||
>> +				(z->zone && !zref_in_nodemask(z, &tmp_nodes)))
>> +				z++;
>> +	}
>>  
>>  	*zone = zonelist_zone(z);
>>  	return z;
> 
> Unfortunately, you can't allocate a nodemask_t on the stack here because 
> this is used in the iteration for get_page_from_freelist() which can occur 
> very deep in the stack already and there's a probability of overflow.  
> Dynamically allocating a nodemask_t simply wouldn't scale here, either, 
> since it would allocate on every iteration of a zonelist.
> 

Maybe it is better to fix this problem by checking this function's return
value, because this function will return NULL if seeing an empty nodemask.

The new patch is attached below.
---
 kernel/cpuset.c |   11 +++++++++--
 mm/mempolicy.c  |   28 +++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d109467..fbb1f1c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -952,8 +952,6 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 static void cpuset_change_task_nodemask(struct task_struct *tsk,
 					nodemask_t *newmems)
 {
-	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
-	mpol_rebind_task(tsk, &tsk->mems_allowed);
 	mpol_rebind_task(tsk, newmems);
 	tsk->mems_allowed = *newmems;
 }
@@ -2442,6 +2440,15 @@ int cpuset_mem_spread_node(void)
 	node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
 	if (node == MAX_NUMNODES)
 		node = first_node(current->mems_allowed);
+
+	/*
+	 * if node is still MAX_NUMNODES, that means the kernel allocator saw
+	 * an empty nodemask. In that case, the kernel allocator allocate
+	 * memory on the current node.
+	 */
+	if (unlikely(node == MAX_NUMNODES))
+		node = numa_node_id();
+
 	current->cpuset_mem_spread_rotor = node;
 	return node;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8034abd..0905b84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1449,8 +1449,16 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
 		 * the first node in the mask instead.
 		 */
 		if (unlikely(gfp & __GFP_THISNODE) &&
-				unlikely(!node_isset(nd, policy->v.nodes)))
+				unlikely(!node_isset(nd, policy->v.nodes))) {
 			nd = first_node(policy->v.nodes);
+			/*
+			 * When rebinding task->mempolicy, th kernel allocator
+			 * may see an empty nodemask, and first_node() returns
+			 * MAX_NUMNODES, In that case, we will use current node.
+			 */
+			if (unlikely(nd == MAX_NUMNODES))
+				nd = numa_node_id();
+		}
 		break;
 	case MPOL_INTERLEAVE: /* should not happen */
 		break;
@@ -1510,6 +1518,14 @@ unsigned slab_node(struct mempolicy *policy)
 		(void)first_zones_zonelist(zonelist, highest_zoneidx,
 							&policy->v.nodes,
 							&zone);
+		/*
+		 * When rebinding task->mempolicy, th kernel allocator
+		 * may see an empty nodemask, and first_zones_zonelist()
+		 * returns a NULL zone, In that case, we will use current
+		 * node.
+		 */
+		if (unlikely(zone == NULL))
+			return numa_node_id();
 		return zone->node;
 	}
 
@@ -1529,10 +1545,18 @@ static unsigned offset_il_node(struct mempolicy *pol,
 
 	if (!nnodes)
 		return numa_node_id();
+
 	target = (unsigned int)off % nnodes;
 	c = 0;
 	do {
 		nid = next_node(nid, pol->v.nodes);
+		/*
+		 * When rebinding task->mempolicy, th kernel allocator
+		 * may see an empty nodemask, and next_node() returns
+		 * MAX_NUMNODES, In that case, we will use current node.
+		 */
+		if (unlikely(nid == MAX_NUMNODES))
+			return numa_node_id();
 		c++;
 	} while (c <= target);
 	return nid;
@@ -1631,7 +1655,9 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
 	case MPOL_BIND:
 		/* Fall through */
 	case MPOL_INTERLEAVE:
+		task_lock(current);
 		*mask =  mempolicy->v.nodes;
+		task_unlock(current);
 		break;
 
 	default:
-- 
1.6.5.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ