lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <44577b73-2e2d-5571-4c8b-3233e3776a52@alibaba-inc.com>
Date:   Fri, 27 Oct 2017 00:15:17 +0800
From:   "Yang Shi" <yang.s@...baba-inc.com>
To:     Michal Hocko <mhocko@...nel.org>
Cc:     cl@...ux.com, penberg@...nel.org, rientjes@...gle.com,
        iamjoonsoo.kim@....com, akpm@...ux-foundation.org,
        linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] mm: oom: dump single excessive slab cache when oom



On 10/26/17 7:53 AM, Michal Hocko wrote:
> On Thu 26-10-17 06:49:00, Yang Shi wrote:
>> Per the discussion with David [1], it looks more reasonable to just dump
> 
> Please try to avoid external references in the changelog as much as
> possible.

OK.

> 
>> the single excessive slab cache instead of dumping all slab caches when
>> oom.
> 
> You meant to say
> "to just dump all slab caches which excess 10% of the total memory."
> 
> While we are at it. Abusing calc_mem_size seems to be rather clumsy and
> tt is not nodemask aware so you the whole thing is dubious for NUMA
> constrained OOMs.

Since we just need the total memory size of the node for NUMA 
constrained OOM, we should be able to use show_mem_node_skip() to bring 
in nodemask.

> 
> The more I think about this the more I am convinced that this is just
> fiddling with the code without a good reason and without much better
> outcome.

I don't get you. Do you mean the benefit is not that much with just 
dumping excessive slab caches?

Thanks,
Yang


>   
>> Dump single excessive slab cache if its size is > 10% of total system
> 
> s@...gle@all@
> 
>> memory size when oom regardless it is unreclaimable.
>>
>> [1] https://marc.info/?l=linux-mm&m=150819933626604&w=2
>>
>> Suggested-by: David Rientjes <rientjes@...gle.com>
>> Signed-off-by: Yang Shi <yang.s@...baba-inc.com>
>> ---
>>   mm/oom_kill.c    | 22 +---------------------
>>   mm/slab.h        |  4 ++--
>>   mm/slab_common.c | 21 ++++++++++++++++-----
>>   3 files changed, 19 insertions(+), 28 deletions(-)
>>
>> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
>> index 26add8a..f996f29 100644
>> --- a/mm/oom_kill.c
>> +++ b/mm/oom_kill.c
>> @@ -162,25 +162,6 @@ static bool oom_unkillable_task(struct task_struct *p,
>>   	return false;
>>   }
>>   
>> -/*
>> - * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
>> - * than all user memory (LRU pages)
>> - */
>> -static bool is_dump_unreclaim_slabs(void)
>> -{
>> -	unsigned long nr_lru;
>> -
>> -	nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
>> -		 global_node_page_state(NR_INACTIVE_ANON) +
>> -		 global_node_page_state(NR_ACTIVE_FILE) +
>> -		 global_node_page_state(NR_INACTIVE_FILE) +
>> -		 global_node_page_state(NR_ISOLATED_ANON) +
>> -		 global_node_page_state(NR_ISOLATED_FILE) +
>> -		 global_node_page_state(NR_UNEVICTABLE);
>> -
>> -	return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru);
>> -}
>> -
>>   /**
>>    * oom_badness - heuristic function to determine which candidate task to kill
>>    * @p: task struct of which task we should calculate
>> @@ -443,8 +424,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p)
>>   		mem_cgroup_print_oom_info(oc->memcg, p);
>>   	else {
>>   		show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
>> -		if (is_dump_unreclaim_slabs())
>> -			dump_unreclaimable_slab();
>> +		dump_slab_cache();
>>   	}
>>   	if (sysctl_oom_dump_tasks)
>>   		dump_tasks(oc->memcg, oc->nodemask);
>> diff --git a/mm/slab.h b/mm/slab.h
>> index 6a86025..818b569 100644
>> --- a/mm/slab.h
>> +++ b/mm/slab.h
>> @@ -507,9 +507,9 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
>>   int memcg_slab_show(struct seq_file *m, void *p);
>>   
>>   #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
>> -void dump_unreclaimable_slab(void);
>> +void dump_slab_cache(void);
>>   #else
>> -static inline void dump_unreclaimable_slab(void)
>> +static inline void dump_slab_cache(void)
>>   {
>>   }
>>   #endif
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index 1b14fe0..e5bfa07 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -1311,7 +1311,18 @@ static int slab_show(struct seq_file *m, void *p)
>>   	return 0;
>>   }
>>   
>> -void dump_unreclaimable_slab(void)
>> +static bool inline is_dump_slabs(struct kmem_cache *s, struct slabinfo *sinfo)
>> +{
>> +	unsigned long total = 0, reserved = 0, highmem = 0;
>> +	unsigned long slab_size = sinfo->num_objs * s->size;
>> +
>> +	calc_mem_size(&total, &reserved, &highmem);
>> +
>> +	/* Check if single slab > 10% of total memory size */
>> +	return (slab_size > (total * PAGE_SIZE / 10));
>> +}
>> +
>> +void dump_slab_cache(void)
>>   {
>>   	struct kmem_cache *s, *s2;
>>   	struct slabinfo sinfo;
>> @@ -1324,20 +1335,20 @@ void dump_unreclaimable_slab(void)
>>   	 * without acquiring the mutex.
>>   	 */
>>   	if (!mutex_trylock(&slab_mutex)) {
>> -		pr_warn("excessive unreclaimable slab but cannot dump stats\n");
>> +		pr_warn("excessive slab cache but cannot dump stats\n");
>>   		return;
>>   	}
>>   
>> -	pr_info("Unreclaimable slab info:\n");
>> +	pr_info("The list of excessive single slab cache:\n");
>>   	pr_info("Name                      Used          Total\n");
>>   
>>   	list_for_each_entry_safe(s, s2, &slab_caches, list) {
>> -		if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
>> +		if (!is_root_cache(s))
>>   			continue;
>>   
>>   		get_slabinfo(s, &sinfo);
>>   
>> -		if (sinfo.num_objs > 0)
>> +		if (is_dump_slabs(s, &sinfo))
>>   			pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
>>   				(sinfo.active_objs * s->size) / 1024,
>>   				(sinfo.num_objs * s->size) / 1024);
>> -- 
>> 1.8.3.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ