lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220804130342.63355-1-feng.tang@intel.com>
Date:   Thu,  4 Aug 2022 21:03:42 +0800
From:   Feng Tang <feng.tang@...el.com>
To:     Michal Hocko <mhocko@...e.com>,
        Muchun Song <songmuchun@...edance.com>,
        Mike Kravetz <mike.kravetz@...cle.com>,
        Andrew Morton <akpm@...ux-foundation.org>
Cc:     Dave Hansen <hansen.dave@...el.com>,
        Ben Widawsky <bwidawsk@...nel.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, Feng Tang <feng.tang@...el.com>
Subject: [PATCH] mm/hugetlb: add dedicated func to get 'allowed' nodemask for current process

Muchun Song found that after MPOL_PREFERRED_MANY policy was introduced
in commit b27abaccf8e8 ("mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes")
[1], the policy_nodemask_current()'s semantics for this new policy
has been changed, which returns 'preferred' nodes instead of 'allowed'
nodes, and could hurt the usage of its caller in hugetlb:
allowed_mems_nr().

Michal found the policy_nodemask_current() is only used by hugetlb,
and suggested to move it to hugetlb code with more explicit name to
enforce the 'allowed' semantics for which only MPOL_BIND policy
matters.

One note for the new policy_mbind_nodemask() is, the cross check
from MPOL_BIND, gfp flags and cpuset configuration can lead to
a no available node case, which is considered to be broken
configuration, and 'NULL' (equals all nodes) will be returned.

apply_policy_zone() is made extern to be called in hugetlb code
and its return value is changed to bool.

[1]. https://lore.kernel.org/lkml/20220801084207.39086-1-songmuchun@bytedance.com/t/
Reported-by: Muchun Song <songmuchun@...edance.com>
Suggested-by: Michal Hocko <mhocko@...e.com>
Signed-off-by: Feng Tang <feng.tang@...el.com>
---
 include/linux/mempolicy.h | 13 +------------
 mm/hugetlb.c              | 24 ++++++++++++++++++++----
 mm/mempolicy.c            |  2 +-
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 668389b4b53d..d232de7cdc56 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -151,13 +151,6 @@ extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
 				const nodemask_t *mask);
 extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
 
-static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
-{
-	struct mempolicy *mpol = get_task_policy(current);
-
-	return policy_nodemask(gfp, mpol);
-}
-
 extern unsigned int mempolicy_slab_node(void);
 
 extern enum zone_type policy_zone;
@@ -189,6 +182,7 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
 	return  (pol->mode == MPOL_PREFERRED_MANY);
 }
 
+extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
 
 #else
 
@@ -294,11 +288,6 @@ static inline void mpol_put_task_policy(struct task_struct *task)
 {
 }
 
-static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
-{
-	return NULL;
-}
-
 static inline bool mpol_is_preferred_many(struct mempolicy *pol)
 {
 	return  false;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a18c071c294e..ad84bb85b6de 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4330,18 +4330,34 @@ static int __init default_hugepagesz_setup(char *s)
 }
 __setup("default_hugepagesz=", default_hugepagesz_setup);
 
+static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
+{
+#ifdef CONFIG_NUMA
+	struct mempolicy *mpol = get_task_policy(current);
+
+	/*
+	 * Only enforce MPOL_BIND policy which overlaps with cpuset policy
+	 * (from policy_nodemask) specifically for hugetlb case
+	 */
+	if (mpol->mode == MPOL_BIND &&
+		(apply_policy_zone(mpol, gfp_zone(gfp)) &&
+		 cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
+		return &mpol->nodes;
+#endif
+	return NULL;
+}
+
 static unsigned int allowed_mems_nr(struct hstate *h)
 {
 	int node;
 	unsigned int nr = 0;
-	nodemask_t *mpol_allowed;
+	nodemask_t *mbind_nodemask;
 	unsigned int *array = h->free_huge_pages_node;
 	gfp_t gfp_mask = htlb_alloc_mask(h);
 
-	mpol_allowed = policy_nodemask_current(gfp_mask);
-
+	mbind_nodemask = policy_mbind_nodemask(gfp_mask);
 	for_each_node_mask(node, cpuset_current_mems_allowed) {
-		if (!mpol_allowed || node_isset(node, *mpol_allowed))
+		if (!mbind_nodemask || node_isset(node, *mbind_nodemask))
 			nr += array[node];
 	}
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d39b01fd52fe..9f15bc533601 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1805,7 +1805,7 @@ bool vma_policy_mof(struct vm_area_struct *vma)
 	return pol->flags & MPOL_F_MOF;
 }
 
-static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
+bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
 {
 	enum zone_type dynamic_policy_zone = policy_zone;
 
-- 
2.27.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ