[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <dc654dc5-e00a-9599-2238-40366942cc9c@loongson.cn>
Date: Fri, 19 Sep 2025 09:37:37 +0800
From: Bibo Mao <maobibo@...ngson.cn>
To: Huacai Chen <chenhuacai@...nel.org>
Cc: WANG Xuerui <kernel@...0n.name>, kvm@...r.kernel.org,
loongarch@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] LoongArch: KVM: Fix VM migration failure with PTW enabled
On 2025/9/18 下午6:54, Huacai Chen wrote:
> On Wed, Sep 17, 2025 at 9:11 AM Bibo Mao <maobibo@...ngson.cn> wrote:
>>
>>
>>
>> On 2025/9/16 下午10:21, Huacai Chen wrote:
>>> On Mon, Sep 15, 2025 at 9:22 AM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 2025/9/14 上午9:57, Huacai Chen wrote:
>>>>> Hi, Bibo,
>>>>>
>>>>> On Wed, Sep 10, 2025 at 3:14 PM Bibo Mao <maobibo@...ngson.cn> wrote:
>>>>>>
>>>>>> With PTW disabled system, bit Dirty is HW bit for page writing, however
>>>>>> with PTW enabled system, bit Write is HW bit for page writing. Previously
>>>>>> bit Write is treated as SW bit to record page writable attribute for fast
>>>>>> page fault handling in the secondary MMU, however with PTW enabled machine,
>>>>>> this bit is used by HW already.
>>>>>>
>>>>>> Here define KVM_PAGE_SOFT_WRITE with SW bit _PAGE_MODIFIED, so that it can
>>>>>> work on both PTW disabled and enabled machines. And with HW write bit, both
>>>>>> bit Dirty and Write is set or clear.
>>>>>>
>>>>>> Signed-off-by: Bibo Mao <maobibo@...ngson.cn>
>>>>>> ---
>>>>>> arch/loongarch/include/asm/kvm_mmu.h | 20 ++++++++++++++++----
>>>>>> arch/loongarch/kvm/mmu.c | 8 ++++----
>>>>>> 2 files changed, 20 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/loongarch/include/asm/kvm_mmu.h b/arch/loongarch/include/asm/kvm_mmu.h
>>>>>> index 099bafc6f797..efcd593c42b1 100644
>>>>>> --- a/arch/loongarch/include/asm/kvm_mmu.h
>>>>>> +++ b/arch/loongarch/include/asm/kvm_mmu.h
>>>>>> @@ -16,6 +16,13 @@
>>>>>> */
>>>>>> #define KVM_MMU_CACHE_MIN_PAGES (CONFIG_PGTABLE_LEVELS - 1)
>>>>>>
>>>>>> +/*
>>>>>> + * _PAGE_MODIFIED is SW pte bit, it records page ever written on host
>>>>>> + * kernel, on secondary MMU it records page writable in order to fast
>>>>>> + * path handling
>>>>>> + */
>>>>>> +#define KVM_PAGE_SOFT_WRITE _PAGE_MODIFIED
>>>>> KVM_PAGE_WRITEABLE is more suitable.
>>>> both are ok for me.
>>>>>
>>>>>> +
>>>>>> #define _KVM_FLUSH_PGTABLE 0x1
>>>>>> #define _KVM_HAS_PGMASK 0x2
>>>>>> #define kvm_pfn_pte(pfn, prot) (((pfn) << PFN_PTE_SHIFT) | pgprot_val(prot))
>>>>>> @@ -52,11 +59,16 @@ static inline void kvm_set_pte(kvm_pte_t *ptep, kvm_pte_t val)
>>>>>> WRITE_ONCE(*ptep, val);
>>>>>> }
>>>>>>
>>>>>> -static inline int kvm_pte_write(kvm_pte_t pte) { return pte & _PAGE_WRITE; }
>>>>>> -static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & _PAGE_DIRTY; }
>>>>>> +static inline int kvm_pte_soft_write(kvm_pte_t pte) { return pte & KVM_PAGE_SOFT_WRITE; }
>>>>> The same, kvm_pte_mkwriteable() is more suitable.
>>>> kvm_pte_writable() here ? and kvm_pte_mkwriteable() for the bellowing
>>>> sentense.
>>>>
>>>> If so, that is ok, both are ok for me.
>>> Yes.
>>>
>>>>>
>>>>>> +static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte & __WRITEABLE; }
>>>>> _PAGE_DIRTY and _PAGE_WRITE are always set/cleared at the same time,
>>>>> so the old version still works.
>>>> Although it is workable, I still want to remove single bit _PAGE_DIRTY
>>>> checking here.
>>> I want to check a single bit because "kvm_pte_write() return
>>> _PAGE_WRITE and kvm_pte_dirty() return _PAGE_DIRTY" looks more
>>> natural.
>> kvm_pte_write() is not needed any more and removed here. This is only
>> kvm_pte_writable() to check software writable bit, kvm_pte_dirty() to
>> check HW write bit.
>>
>> There is no reason to check single bit with _PAGE_WRITE or _PAGE_DIRTY,
>> since there is different meaning on machines with/without HW PTW.
> Applied together with other patches, you can test it.
> https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/log/?h=loongarch-next
yes, this branch passes to migrate on PTW enabled machines such as 3C6000.
Regards
Bibo Mao
>
> Huacai
>
>>
>> Regards
>> Bibo Mao
>>>
>>> You may argue that kvm_pte_mkdirty() set both _PAGE_WRITE and
>>> _PAGE_DIRTY so kvm_pte_dirty() should also return both. But I think
>>> kvm_pte_mkdirty() in this patch is just a "reasonable optimization".
>>> Because strictly speaking, we need both kvm_pte_mkdirty() and
>>> kvm_pte_mkwrite() and call the pair when needed.
>>>
>>> Huacai
>>>
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>>
>>>>>> static inline int kvm_pte_young(kvm_pte_t pte) { return pte & _PAGE_ACCESSED; }
>>>>>> static inline int kvm_pte_huge(kvm_pte_t pte) { return pte & _PAGE_HUGE; }
>>>>>>
>>>>>> +static inline kvm_pte_t kvm_pte_mksoft_write(kvm_pte_t pte)
>>>>>> +{
>>>>>> + return pte | KVM_PAGE_SOFT_WRITE;
>>>>>> +}
>>>>>> +
>>>>>> static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte)
>>>>>> {
>>>>>> return pte | _PAGE_ACCESSED;
>>>>>> @@ -69,12 +81,12 @@ static inline kvm_pte_t kvm_pte_mkold(kvm_pte_t pte)
>>>>>>
>>>>>> static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte)
>>>>>> {
>>>>>> - return pte | _PAGE_DIRTY;
>>>>>> + return pte | __WRITEABLE;
>>>>>> }
>>>>>>
>>>>>> static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte)
>>>>>> {
>>>>>> - return pte & ~_PAGE_DIRTY;
>>>>>> + return pte & ~__WRITEABLE;
>>>>>> }
>>>>>>
>>>>>> static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte)
>>>>>> diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
>>>>>> index ed956c5cf2cc..68749069290f 100644
>>>>>> --- a/arch/loongarch/kvm/mmu.c
>>>>>> +++ b/arch/loongarch/kvm/mmu.c
>>>>>> @@ -569,7 +569,7 @@ static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool writ
>>>>>> /* Track access to pages marked old */
>>>>>> new = kvm_pte_mkyoung(*ptep);
>>>>>> if (write && !kvm_pte_dirty(new)) {
>>>>>> - if (!kvm_pte_write(new)) {
>>>>>> + if (!kvm_pte_soft_write(new)) {
>>>>>> ret = -EFAULT;
>>>>>> goto out;
>>>>>> }
>>>>>> @@ -856,9 +856,9 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
>>>>>> prot_bits |= _CACHE_SUC;
>>>>>>
>>>>>> if (writeable) {
>>>>>> - prot_bits |= _PAGE_WRITE;
>>>>>> + prot_bits = kvm_pte_mksoft_write(prot_bits);
>>>>>> if (write)
>>>>>> - prot_bits |= __WRITEABLE;
>>>>>> + prot_bits = kvm_pte_mkdirty(prot_bits);
>>>>>> }
>>>>>>
>>>>>> /* Disable dirty logging on HugePages */
>>>>>> @@ -904,7 +904,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
>>>>>> kvm_release_faultin_page(kvm, page, false, writeable);
>>>>>> spin_unlock(&kvm->mmu_lock);
>>>>>>
>>>>>> - if (prot_bits & _PAGE_DIRTY)
>>>>>> + if (kvm_pte_dirty(prot_bits))
>>>>>> mark_page_dirty_in_slot(kvm, memslot, gfn);
>>>>>>
>>>>>> out:
>>>>> To save time, I just change the whole patch like this, you can confirm
>>>>> whether it woks:
>>>>>
>>>>> diff --git a/arch/loongarch/include/asm/kvm_mmu.h
>>>>> b/arch/loongarch/include/asm/kvm_mmu.h
>>>>> index 099bafc6f797..882f60c72b46 100644
>>>>> --- a/arch/loongarch/include/asm/kvm_mmu.h
>>>>> +++ b/arch/loongarch/include/asm/kvm_mmu.h
>>>>> @@ -16,6 +16,13 @@
>>>>> */
>>>>> #define KVM_MMU_CACHE_MIN_PAGES (CONFIG_PGTABLE_LEVELS - 1)
>>>>>
>>>>> +/*
>>>>> + * _PAGE_MODIFIED is SW pte bit, it records page ever written on host
>>>>> + * kernel, on secondary MMU it records page writable in order to fast
>>>>> + * path handling
>>>>> + */
>>>>> +#define KVM_PAGE_WRITEABLE _PAGE_MODIFIED
>>>>> +
>>>>> #define _KVM_FLUSH_PGTABLE 0x1
>>>>> #define _KVM_HAS_PGMASK 0x2
>>>>> #define kvm_pfn_pte(pfn, prot) (((pfn) << PFN_PTE_SHIFT) |
>>>>> pgprot_val(prot))
>>>>> @@ -56,6 +63,7 @@ static inline int kvm_pte_write(kvm_pte_t pte) {
>>>>> return pte & _PAGE_WRITE; }
>>>>> static inline int kvm_pte_dirty(kvm_pte_t pte) { return pte &
>>>>> _PAGE_DIRTY; }
>>>>> static inline int kvm_pte_young(kvm_pte_t pte) { return pte &
>>>>> _PAGE_ACCESSED; }
>>>>> static inline int kvm_pte_huge(kvm_pte_t pte) { return pte &
>>>>> _PAGE_HUGE; }
>>>>> +static inline int kvm_pte_writeable(kvm_pte_t pte) { return pte &
>>>>> KVM_PAGE_WRITEABLE; }
>>>>>
>>>>> static inline kvm_pte_t kvm_pte_mkyoung(kvm_pte_t pte)
>>>>> {
>>>>> @@ -69,12 +77,12 @@ static inline kvm_pte_t kvm_pte_mkold(kvm_pte_t
>>>>> pte)
>>>>>
>>>>> static inline kvm_pte_t kvm_pte_mkdirty(kvm_pte_t pte)
>>>>> {
>>>>> - return pte | _PAGE_DIRTY;
>>>>> + return pte | __WRITEABLE;
>>>>> }
>>>>>
>>>>> static inline kvm_pte_t kvm_pte_mkclean(kvm_pte_t pte)
>>>>> {
>>>>> - return pte & ~_PAGE_DIRTY;
>>>>> + return pte & ~__WRITEABLE;
>>>>> }
>>>>>
>>>>> static inline kvm_pte_t kvm_pte_mkhuge(kvm_pte_t pte)
>>>>> @@ -87,6 +95,11 @@ static inline kvm_pte_t kvm_pte_mksmall(kvm_pte_t
>>>>> pte)
>>>>> return pte & ~_PAGE_HUGE;
>>>>> }
>>>>>
>>>>> +static inline kvm_pte_t kvm_pte_mkwriteable(kvm_pte_t pte)
>>>>> +{
>>>>> + return pte | KVM_PAGE_WRITEABLE;
>>>>> +}
>>>>> +
>>>>> static inline int kvm_need_flush(kvm_ptw_ctx *ctx)
>>>>> {
>>>>> return ctx->flag & _KVM_FLUSH_PGTABLE;
>>>>> diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
>>>>> index ed956c5cf2cc..7c8143e79c12 100644
>>>>> --- a/arch/loongarch/kvm/mmu.c
>>>>> +++ b/arch/loongarch/kvm/mmu.c
>>>>> @@ -569,7 +569,7 @@ static int kvm_map_page_fast(struct kvm_vcpu
>>>>> *vcpu, unsigned long gpa, bool writ
>>>>> /* Track access to pages marked old */
>>>>> new = kvm_pte_mkyoung(*ptep);
>>>>> if (write && !kvm_pte_dirty(new)) {
>>>>> - if (!kvm_pte_write(new)) {
>>>>> + if (!kvm_pte_writeable(new)) {
>>>>> ret = -EFAULT;
>>>>> goto out;
>>>>> }
>>>>> @@ -856,9 +856,9 @@ static int kvm_map_page(struct kvm_vcpu *vcpu,
>>>>> unsigned long gpa, bool write)
>>>>> prot_bits |= _CACHE_SUC;
>>>>>
>>>>> if (writeable) {
>>>>> - prot_bits |= _PAGE_WRITE;
>>>>> + prot_bits = kvm_pte_mkwriteable(prot_bits);
>>>>> if (write)
>>>>> - prot_bits |= __WRITEABLE;
>>>>> + prot_bits = kvm_pte_mkdirty(prot_bits);
>>>>> }
>>>>>
>>>>> /* Disable dirty logging on HugePages */
>>>>> @@ -904,7 +904,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu,
>>>>> unsigned long gpa, bool write)
>>>>> kvm_release_faultin_page(kvm, page, false, writeable);
>>>>> spin_unlock(&kvm->mmu_lock);
>>>>>
>>>>> - if (prot_bits & _PAGE_DIRTY)
>>>>> + if (kvm_pte_dirty(prot_bits))
>>>>> mark_page_dirty_in_slot(kvm, memslot, gfn);
>>>>>
>>>>> out:
>>>>>
>>>>> Huacai
>>>>>
>>>>>>
>>>>>> base-commit: 9dd1835ecda5b96ac88c166f4a87386f3e727bd9
>>>>>> --
>>>>>> 2.39.3
>>>>>>
>>>>>>
>>>>
>>
>>
Powered by blists - more mailing lists