linux-kernel - Re: mm/memory.c: Add update local tlb for smp race

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <c5cec2be-c6ae-3c1f-d3b5-2e5e68e2dc2b@loongson.cn>
Date:   Mon, 18 May 2020 11:33:25 +0800
From:   maobibo <maobibo@...ngson.cn>
To:     David Hildenbrand <david@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>
Cc:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        linux-mips@...r.kernel.org
Subject: Re: mm/memory.c: Add update local tlb for smp race



On 05/16/2020 05:34 PM, maobibo wrote:
> 
> 
> On 05/15/2020 09:50 PM, David Hildenbrand wrote:
>> On 14.05.20 08:50, Bibo Mao wrote:
>>> If there are two threads hitting page fault at the address, one
>>> thread updates pte entry and local tlb, the other thread can update
>>> local tlb also, rather than give up and let page fault happening
>>> again.
>>
>> Let me suggest
>>
>> "mm/memory: optimize concurrent page faults at same address
>>
>> If two threads concurrently fault at the same address, the thread that
>> won the race updates the PTE and its local TLB. For now, the other
>> thread gives up, simply does nothing, and continues.
>>
>> It could happen that this second thread triggers another fault, whereby
>> it only updates its local TLB while handling the fault. Instead of
>> triggering another fault, let's directly update the local TLB of the
>> second thread.
>> "
>>
>> If I got the intention of this patch correctly.
>>
>> Are there any performance numbers to support this patch?
>>
>> (I can't say too much about the correctness and/or usefulness of this patch)
> 
> yes, that is the situation. On MIPS platform software can update TLB,
> so update_mmu_cache is used here. This does not happen frequently, and with the three series patches in later mail. I test lat_pagefault in lmbench, here is is result:
> 
> with these three series patches, 
> # ./lat_pagefault  -N 10  /tmp/1 
> Pagefaults on /tmp/1: 1.4973 microseconds
> # ./lat_pagefault -P 4 -N 10  /tmp/1 
> Pagefaults on /tmp/1: 1.5716 microseconds
> 
> original version, without these three series patch
> #  ./lat_pagefault  -N 10  /tmp/1 
> Pagefaults on /tmp/1: 1.6489 microseconds
> # ./lat_pagefault -P 4 -N 10  /tmp/1
> Pagefaults on /tmp/1: 1.7214 microseconds
> 

I tested the three patches one by one, there is no obvious improvement with
lat_pagefault case, I guess that it happens seldom where multiple threads access
the same page at the same time. 

The improvement is because of another modification where pte_mkyoung is added
to get readable privilege on MIPS system.

regards
bibo, mao

>>>
>>> 	modified:   mm/memory.c
>>
>> This does not belong into a patch description.
> 
> well, I will modify the patch description.
> 
> regards
> bibo,mao
> 
> 
>>
>>
>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>> ---
>>>  mm/memory.c | 30 ++++++++++++++++++++++--------
>>>  1 file changed, 22 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index f703fe8..3a741ce 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -2436,11 +2436,10 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
>>>  		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>>>  			/*
>>>  			 * Other thread has already handled the fault
>>> -			 * and we don't need to do anything. If it's
>>> -			 * not the case, the fault will be triggered
>>> -			 * again on the same address.
>>> +			 * and update local tlb only
>>>  			 */
>>>  			ret = false;
>>> +			update_mmu_cache(vma, addr, vmf->pte);
>>>  			goto pte_unlock;
>>>  		}
>>>  
>>> @@ -2463,8 +2462,9 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
>>>  		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
>>>  		locked = true;
>>>  		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>>> -			/* The PTE changed under us. Retry page fault. */
>>> +			/* The PTE changed under us. update local tlb */
>>>  			ret = false;
>>> +			update_mmu_cache(vma, addr, vmf->pte);
>>>  			goto pte_unlock;
>>>  		}
>>>  
>>> @@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>>  		}
>>>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>>  		entry = mk_pte(new_page, vma->vm_page_prot);
>>> +		entry = pte_mkyoung(entry);
>>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>>  		/*
>>>  		 * Clear the pte entry and flush it first, before updating the
>>> @@ -2752,6 +2753,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>>  		new_page = old_page;
>>>  		page_copied = 1;
>>>  	} else {
>>> +		update_mmu_cache(vma, vmf->address, vmf->pte);
>>>  		mem_cgroup_cancel_charge(new_page, memcg, false);
>>>  	}
>>>  
>>> @@ -2812,6 +2814,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
>>>  	 * pte_offset_map_lock.
>>>  	 */
>>>  	if (!pte_same(*vmf->pte, vmf->orig_pte)) {
>>> +		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
>>>  		pte_unmap_unlock(vmf->pte, vmf->ptl);
>>>  		return VM_FAULT_NOPAGE;
>>>  	}
>>> @@ -2936,6 +2939,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>>>  			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>>  					vmf->address, &vmf->ptl);
>>>  			if (!pte_same(*vmf->pte, vmf->orig_pte)) {
>>> +				update_mmu_cache(vma, vmf->address, vmf->pte);
>>>  				unlock_page(vmf->page);
>>>  				pte_unmap_unlock(vmf->pte, vmf->ptl);
>>>  				put_page(vmf->page);
>>> @@ -3341,8 +3345,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>>  						vma->vm_page_prot));
>>>  		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
>>>  				vmf->address, &vmf->ptl);
>>> -		if (!pte_none(*vmf->pte))
>>> +		if (!pte_none(*vmf->pte)) {
>>> +			update_mmu_cache(vma, vmf->address, vmf->pte);
>>>  			goto unlock;
>>> +		}
>>>  		ret = check_stable_address_space(vma->vm_mm);
>>>  		if (ret)
>>>  			goto unlock;
>>> @@ -3373,13 +3379,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>>  	__SetPageUptodate(page);
>>>  
>>>  	entry = mk_pte(page, vma->vm_page_prot);
>>> +	entry = pte_mkyoung(entry);
>>>  	if (vma->vm_flags & VM_WRITE)
>>>  		entry = pte_mkwrite(pte_mkdirty(entry));
>>>  
>>>  	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
>>>  			&vmf->ptl);
>>> -	if (!pte_none(*vmf->pte))
>>> +	if (!pte_none(*vmf->pte)) {
>>> +		update_mmu_cache(vma, vmf->address, vmf->pte);
>>>  		goto release;
>>> +	}
>>>  
>>>  	ret = check_stable_address_space(vma->vm_mm);
>>>  	if (ret)
>>> @@ -3646,11 +3655,14 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>>  	}
>>>  
>>>  	/* Re-check under ptl */
>>> -	if (unlikely(!pte_none(*vmf->pte)))
>>> +	if (unlikely(!pte_none(*vmf->pte))) {
>>> +		update_mmu_cache(vma, vmf->address, vmf->pte);
>>>  		return VM_FAULT_NOPAGE;
>>> +	}
>>>  
>>>  	flush_icache_page(vma, page);
>>>  	entry = mk_pte(page, vma->vm_page_prot);
>>> +	entry = pte_mkyoung(entry);
>>>  	if (write)
>>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>>  	/* copy-on-write page */
>>> @@ -4224,8 +4236,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
>>>  	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
>>>  	spin_lock(vmf->ptl);
>>>  	entry = vmf->orig_pte;
>>> -	if (unlikely(!pte_same(*vmf->pte, entry)))
>>> +	if (unlikely(!pte_same(*vmf->pte, entry))) {
>>> +		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
>>>  		goto unlock;
>>> +	}
>>>  	if (vmf->flags & FAULT_FLAG_WRITE) {
>>>  		if (!pte_write(entry))
>>>  			return do_wp_page(vmf);
>>>
>>
>>