[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ff12ec30-b0df-aedd-a713-4fb77a4e092a@loongson.cn>
Date: Wed, 17 Sep 2025 09:08:49 +0800
From: Bibo Mao <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: WANG Xuerui <kernel@...0n.name>, kvm@...r.kernel.org,
loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] LoongArch: KVM: Fix VM migration failure with PTW enabled
On 2025/9/16 下午10:21, Huacai Chen wrote:
> On Mon, Sep 15, 2025 at 9:22 AM Bibo Mao <maobibo@...ngson.cn> wrote:
>>
>>
>>
>> On 2025/9/14 上午9:57, Huacai Chen wrote:
>>> Hi, Bibo,
>>>
>>> On Wed, Sep 10, 2025 at 3:14 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>>
>>>> With PTW disabled system, bit Dirty is HW bit for page writing, however
>>>> with PTW enabled system, bit Write is HW bit for page writing. Previously
>>>> bit Write is treated as SW bit to record page writable attribute for fast
>>>> page fault handling in the secondary MMU, however with PTW enabled machine,
>>>> this bit is used by HW already.
>>>>
>>>> Here define KVM_PAGE_SOFT_WRITE with SW bit _PAGE_MODIFIED, so that it can
>>>> work on both PTW disabled and enabled machines. And with HW write bit, both
>>>> bit Dirty and Write is set or clear.
>>>>
>>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>>> ---
>>>> arch/loongarch/include/asm/kvm_mmu.h | 20 ++++++++++++++++----
>>>> arch/loongarch/kvm/mmu.c | 8 ++++----
>>>> 2 files changed, 20 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/arch/loongarch/include/asm/kvm_mmu.h b/arch/loongarch/include/asm/kvm_mmu.h
>>>> index 099bafc6f797..efcd593c42b1 100644
>>>> --- a/arch/loongarch/include/asm/kvm_mmu.h
>>>> +++ b/arch/loongarch/include/asm/kvm_mmu.h
>>>> @@ -16,6 +16,13 @@
>>>> */
>>>> #define KVM_MMU_CACHE_MIN_PAGES (CONFIG_PGTABLE_LEVELS - 1)
>>>>
>>>> +/*
>>>> + * _PAGE_MODIFIED is SW pte bit, it records page ever written on host
>>>> + * kernel, on secondary MMU it records page writable in order to fast
>>>> + * path handling
>>>> + */
>>>> +#define KVM_PAGE_SOFT_WRITE _PAGE_MODIFIED
>>> KVM_PAGE_WRITEABLE is more suitable.
>> both are ok for me.
>>>
>>>> +
>>>> #define _KVM_FLUSH_PGTABLE 0x1
>>>> #define _KVM_HAS_PGMASK 0x2
>>>> #define kvm_pfn_pte(pfn, prot) (((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
>>>> @@ -52,11 +59,16 @@ static inline void kvm_set_pte(kvm_pte_t *ptep, kvm_pte_t val)
>>>> WRITE_ONCE(*ptep, val);
>>>> }
>>>>
>>>> -static inline int kvm_pte_write(kvm_pte_t pte) { return pte & _PAGE_WRITE; }
>>>> -static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & _PAGE_DIRTY; }
>>>> +static inline int kvm_pte_soft_write(kvm_pte_t pte) { return pte & KVM_PAGE_SOFT_WRITE; }
>>> The same, kvm_pte_mkwriteable() is more suitable.
>> kvm_pte_writable() here ? and kvm_pte_mkwriteable() for the bellowing
>> sentense.
>>
>> If so, that is ok, both are ok for me.
> Yes.
>
>>>
>>>> +static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & __WRITEABLE; }
>>> _PAGE_DIRTY and _PAGE_WRITE are always set/cleared at the same time,
>>> so the old version still works.
>> Although it is workable, I still want to remove single bit _PAGE_DIRTY
>> checking here.
> I want to check a single bit because "kvm_pte_write() return
> _PAGE_WRITE and kvm_pte_dirty() return _PAGE_DIRTY" looks more
> natural.
kvm_pte_write() is not needed any more and removed here. This is only
kvm_pte_writable() to check software writable bit, kvm_pte_dirty() to
check HW write bit.
There is no reason to check single bit with _PAGE_WRITE or _PAGE_DIRTY,
since there is different meaning on machines with/without HW PTW.
Regards
Bibo Mao
>
> You may argue that kvm_pte_mkdirty() set both _PAGE_WRITE and
> _PAGE_DIRTY so kvm_pte_dirty() should also return both. But I think
> kvm_pte_mkdirty() in this patch is just a "reasonable optimization".
> Because strictly speaking, we need both kvm_pte_mkdirty() and
> kvm_pte_mkwrite() and call the pair when needed.
>
> Huacai
>
>>
>> Regards
>> Bibo Mao
>>>
>>>> static inline int kvm_pte_young(kvm_pte_t pte) { return pte & _PAGE_ACCESSED; }
>>>> static inline int kvm_pte_huge(kvm_pte_t pte) { return pte & _PAGE_HUGE; }
>>>>
>>>> +static inline kvm_pte_t kvm_pte_mksoft_write(kvm_pte_t pte)
>>>> +{
>>>> + return pte | KVM_PAGE_SOFT_WRITE;
>>>> +}
>>>> +
>>>> static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte)
>>>> {
>>>> return pte | _PAGE_ACCESSED;
>>>> @@ -69,12 +81,12 @@ static inline kvm_pte_t kvm_pte_mkold(kvm_pte_t pte)
>>>>
>>>> static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte)
>>>> {
>>>> - return pte | _PAGE_DIRTY;
>>>> + return pte | __WRITEABLE;
>>>> }
>>>>
>>>> static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte)
>>>> {
>>>> - return pte & ~_PAGE_DIRTY;
>>>> + return pte & ~__WRITEABLE;
>>>> }
>>>>
>>>> static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte)
>>>> diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
>>>> index ed956c5cf2cc..68749069290f 100644
>>>> --- a/arch/loongarch/kvm/mmu.c
>>>> +++ b/arch/loongarch/kvm/mmu.c
>>>> @@ -569,7 +569,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ
>>>> /* Track access to pages marked old */
>>>> new = kvm_pte_mkyoung(*ptep);
>>>> if (write && !kvm_pte_dirty(new)) {
>>>> - if (!kvm_pte_write(new)) {
>>>> + if (!kvm_pte_soft_write(new)) {
>>>> ret = -EFAULT;
>>>> goto out;
>>>> }
>>>> @@ -856,9 +856,9 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
>>>> prot_bits |= _CACHE_SUC;
>>>>
>>>> if (writeable) {
>>>> - prot_bits |= _PAGE_WRITE;
>>>> + prot_bits = kvm_pte_mksoft_write(prot_bits);
>>>> if (write)
>>>> - prot_bits |= __WRITEABLE;
>>>> + prot_bits = kvm_pte_mkdirty(prot_bits);
>>>> }
>>>>
>>>> /* Disable dirty logging on HugePages */
>>>> @@ -904,7 +904,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
>>>> kvm_release_faultin_page(kvm, page, false, writeable);
>>>> spin_unlock(&kvm->mmu_lock);
>>>>
>>>> - if (prot_bits & _PAGE_DIRTY)
>>>> + if (kvm_pte_dirty(prot_bits))
>>>> mark_page_dirty_in_slot(kvm, memslot, gfn);
>>>>
>>>> out:
>>> To save time, I just change the whole patch like this, you can confirm
>>> whether it woks:
>>>
>>> diff --git a/arch/loongarch/include/asm/kvm_mmu.h
>>> b/arch/loongarch/include/asm/kvm_mmu.h
>>> index 099bafc6f797..882f60c72b46 100644
>>> --- a/arch/loongarch/include/asm/kvm_mmu.h
>>> +++ b/arch/loongarch/include/asm/kvm_mmu.h
>>> @@ -16,6 +16,13 @@
>>> */
>>> #define KVM_MMU_CACHE_MIN_PAGES (CONFIG_PGTABLE_LEVELS - 1)
>>>
>>> +/*
>>> + * _PAGE_MODIFIED is SW pte bit, it records page ever written on host
>>> + * kernel, on secondary MMU it records page writable in order to fast
>>> + * path handling
>>> + */
>>> +#define KVM_PAGE_WRITEABLE _PAGE_MODIFIED
>>> +
>>> #define _KVM_FLUSH_PGTABLE 0x1
>>> #define _KVM_HAS_PGMASK 0x2
>>> #define kvm_pfn_pte(pfn, prot) (((pfn) << PFN_PTE_SHIFT) |
>>> pgprot_val(prot))
>>> @@ -56,6 +63,7 @@ static inline int kvm_pte_write(kvm_pte_t pte) {
>>> return pte & _PAGE_WRITE; }
>>> static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte &
>>> _PAGE_DIRTY; }
>>> static inline int kvm_pte_young(kvm_pte_t pte) { return pte &
>>> _PAGE_ACCESSED; }
>>> static inline int kvm_pte_huge(kvm_pte_t pte) { return pte &
>>> _PAGE_HUGE; }
>>> +static inline int kvm_pte_writeable(kvm_pte_t pte) { return pte &
>>> KVM_PAGE_WRITEABLE; }
>>>
>>> static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte)
>>> {
>>> @@ -69,12 +77,12 @@ static inline kvm_pte_t kvm_pte_mkold(kvm_pte_t
>>> pte)
>>>
>>> static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte)
>>> {
>>> - return pte | _PAGE_DIRTY;
>>> + return pte | __WRITEABLE;
>>> }
>>>
>>> static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte)
>>> {
>>> - return pte & ~_PAGE_DIRTY;
>>> + return pte & ~__WRITEABLE;
>>> }
>>>
>>> static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte)
>>> @@ -87,6 +95,11 @@ static inline kvm_pte_t kvm_pte_mksmall(kvm_pte_t
>>> pte)
>>> return pte & ~_PAGE_HUGE;
>>> }
>>>
>>> +static inline kvm_pte_t kvm_pte_mkwriteable(kvm_pte_t pte)
>>> +{
>>> + return pte | KVM_PAGE_WRITEABLE;
>>> +}
>>> +
>>> static inline int kvm_need_flush(kvm_ptw_ctx *ctx)
>>> {
>>> return ctx->flag & _KVM_FLUSH_PGTABLE;
>>> diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
>>> index ed956c5cf2cc..7c8143e79c12 100644
>>> --- a/arch/loongarch/kvm/mmu.c
>>> +++ b/arch/loongarch/kvm/mmu.c
>>> @@ -569,7 +569,7 @@ static int kvm_map_page_fast(struct kvm_vcpu
>>> *vcpu, unsigned long gpa, bool writ
>>> /* Track access to pages marked old */
>>> new = kvm_pte_mkyoung(*ptep);
>>> if (write && !kvm_pte_dirty(new)) {
>>> - if (!kvm_pte_write(new)) {
>>> + if (!kvm_pte_writeable(new)) {
>>> ret = -EFAULT;
>>> goto out;
>>> }
>>> @@ -856,9 +856,9 @@ static int kvm_map_page(struct kvm_vcpu *vcpu,
>>> unsigned long gpa, bool write)
>>> prot_bits |= _CACHE_SUC;
>>>
>>> if (writeable) {
>>> - prot_bits |= _PAGE_WRITE;
>>> + prot_bits = kvm_pte_mkwriteable(prot_bits);
>>> if (write)
>>> - prot_bits |= __WRITEABLE;
>>> + prot_bits = kvm_pte_mkdirty(prot_bits);
>>> }
>>>
>>> /* Disable dirty logging on HugePages */
>>> @@ -904,7 +904,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu,
>>> unsigned long gpa, bool write)
>>> kvm_release_faultin_page(kvm, page, false, writeable);
>>> spin_unlock(&kvm->mmu_lock);
>>>
>>> - if (prot_bits & _PAGE_DIRTY)
>>> + if (kvm_pte_dirty(prot_bits))
>>> mark_page_dirty_in_slot(kvm, memslot, gfn);
>>>
>>> out:
>>>
>>> Huacai
>>>
>>>>
>>>> base-commit: 9dd1835ecda5b96ac88c166f4a87386f3e727bd9
>>>> --
>>>> 2.39.3
>>>>
>>>>
>>
Powered by blists - more mailing lists