lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 10 Aug 2016 13:52:50 +0800
From:	Yang Zhang <yang.zhang.wz@...il.com>
To:	Wincy Van <fanwenyi0529@...il.com>
Cc:	Radim Krčmář <rkrcmar@...hat.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"kvm@...r.kernel.org" <kvm@...r.kernel.org>,
	Jim Mattson <jmattson@...gle.com>,
	Paolo Bonzini <pbonzini@...hat.com>,
	Bandan Das <bsd@...hat.com>
Subject: Re: [PATCH 1/2] KVM: nVMX: fix msr bitmaps to prevent L2 from
 accessing L0 x2APIC

On 2016/8/9 18:19, Wincy Van wrote:
> On Tue, Aug 9, 2016 at 5:32 PM, Yang Zhang <yang.zhang.wz@...il.com> wrote:
>> On 2016/8/9 2:16, Radim Krčmář wrote:
>>>
>>> msr bitmap can be used to avoid a VM exit (interception) on guest MSR
>>> accesses.  In some configurations of VMX controls, the guest can even
>>> directly access host's x2APIC MSRs.  See SDM 29.5 VIRTUALIZING MSR-BASED
>>> APIC ACCESSES.
>>>
>>> L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI.
>>> To do so, L1 would first trick KVM to disable all possible interceptions
>>> by enabling APICv features and then would turn those features off;
>>> nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would
>>> not intercept previously enabled MSRs even though they were not safe
>>> with the new configuration.
>>>
>>> Correctly re-enabling interceptions is not enough as a second bug would
>>> still allow L1+L2 to access host's MSRs: msr bitmap was shared for all
>>> VMCSs, so L1 could trigger a race to get the desired combination of msr
>>> bitmap and VMX controls.
>>>
>>> This fix allocates a msr bitmap for every L1 VCPU, allows only safe
>>> x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would
>>> have to intercept everything anyway.
>>>
>>> Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap")
>>> Reported-by: Jim Mattson <jmattson@...gle.com>
>>> Suggested-by: Wincy Van <fanwenyi0529@...il.com>
>>> Signed-off-by: Radim Krčmář <rkrcmar@...hat.com>
>>> ---
>>>  arch/x86/kvm/vmx.c | 107
>>> ++++++++++++++++++++++-------------------------------
>>>  1 file changed, 44 insertions(+), 63 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index a45d8580f91e..c66ac2c70d22 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -435,6 +435,8 @@ struct nested_vmx {
>>>         bool pi_pending;
>>>         u16 posted_intr_nv;
>>>
>>> +       unsigned long *msr_bitmap;
>>> +
>>>         struct hrtimer preemption_timer;
>>>         bool preemption_timer_expired;
>>>
>>> @@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
>>>  static unsigned long *vmx_msr_bitmap_longmode;
>>>  static unsigned long *vmx_msr_bitmap_legacy_x2apic;
>>>  static unsigned long *vmx_msr_bitmap_longmode_x2apic;
>>> -static unsigned long *vmx_msr_bitmap_nested;
>>>  static unsigned long *vmx_vmread_bitmap;
>>>  static unsigned long *vmx_vmwrite_bitmap;
>>>
>>> @@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu
>>> *vcpu)
>>>         unsigned long *msr_bitmap;
>>>
>>>         if (is_guest_mode(vcpu))
>>> -               msr_bitmap = vmx_msr_bitmap_nested;
>>> +               msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
>>>         else if (cpu_has_secondary_exec_ctrls() &&
>>>                  (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
>>>                   SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
>>> @@ -6363,13 +6364,6 @@ static __init int hardware_setup(void)
>>>         if (!vmx_msr_bitmap_longmode_x2apic)
>>>                 goto out4;
>>>
>>> -       if (nested) {
>>> -               vmx_msr_bitmap_nested =
>>> -                       (unsigned long *)__get_free_page(GFP_KERNEL);
>>> -               if (!vmx_msr_bitmap_nested)
>>> -                       goto out5;
>>> -       }
>>> -
>>>         vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
>>>         if (!vmx_vmread_bitmap)
>>>                 goto out6;
>>> @@ -6392,8 +6386,6 @@ static __init int hardware_setup(void)
>>>
>>>         memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
>>>         memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
>>> -       if (nested)
>>> -               memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
>>>
>>>         if (setup_vmcs_config(&vmcs_config) < 0) {
>>>                 r = -EIO;
>>> @@ -6529,9 +6521,6 @@ out8:
>>>  out7:
>>>         free_page((unsigned long)vmx_vmread_bitmap);
>>>  out6:
>>> -       if (nested)
>>> -               free_page((unsigned long)vmx_msr_bitmap_nested);
>>> -out5:
>>>         free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
>>>  out4:
>>>         free_page((unsigned long)vmx_msr_bitmap_longmode);
>>> @@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void)
>>>         free_page((unsigned long)vmx_io_bitmap_a);
>>>         free_page((unsigned long)vmx_vmwrite_bitmap);
>>>         free_page((unsigned long)vmx_vmread_bitmap);
>>> -       if (nested)
>>> -               free_page((unsigned long)vmx_msr_bitmap_nested);
>>>
>>>         free_kvm_area();
>>>  }
>>> @@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>>                 return 1;
>>>         }
>>>
>>> +       if (cpu_has_vmx_msr_bitmap()) {
>>> +               vmx->nested.msr_bitmap =
>>> +                               (unsigned long
>>> *)__get_free_page(GFP_KERNEL);
>>> +               if (!vmx->nested.msr_bitmap)
>>> +                       goto out_msr_bitmap;
>>> +       }
>>> +
>>
>>
>> We export msr_bitmap to guest even it is not supported by hardware. So we
>> need to allocate msr_bitmap for L1 unconditionally.
>>
>>
>
> Nice to see you, Yang :-)

Niec to see you too.

>
> I think vmx->nested.msr_bitmap is filled by L0 and L1's settings, and
> used by hardware.
> L1's msr_bitmap is managed by itself, and L1 will write MSR_BITMAP field to
> vmcs12->msr_bitmap, then L0 can get L1's settings to emulate MSR_BITMAP.
>
> Am I missed something?

You are correct!

>
> Thanks,
> Wincy
>
>>>         vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
>>>         if (!vmx->nested.cached_vmcs12)
>>> -               return -ENOMEM;
>>> +               goto out_cached_vmcs12;
>>>
>>>         if (enable_shadow_vmcs) {
>>>                 shadow_vmcs = alloc_vmcs();
>>> -               if (!shadow_vmcs) {
>>> -                       kfree(vmx->nested.cached_vmcs12);
>>> -                       return -ENOMEM;
>>> -               }
>>> +               if (!shadow_vmcs)
>>> +                       goto out_shadow_vmcs;
>>>                 /* mark vmcs as shadow */
>>>                 shadow_vmcs->revision_id |= (1u << 31);
>>>                 /* init shadow vmcs */
>>> @@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>>>         skip_emulated_instruction(vcpu);
>>>         nested_vmx_succeed(vcpu);
>>>         return 1;
>>> +
>>> +out_shadow_vmcs:
>>> +       kfree(vmx->nested.cached_vmcs12);
>>> +
>>> +out_cached_vmcs12:
>>> +       free_page((unsigned long)vmx->nested.msr_bitmap);
>>> +
>>> +out_msr_bitmap:
>>> +       return -ENOMEM;
>>>  }
>>>
>>>  /*
>>> @@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx)
>>>         vmx->nested.vmxon = false;
>>>         free_vpid(vmx->nested.vpid02);
>>>         nested_release_vmcs12(vmx);
>>> +       if (vmx->nested.msr_bitmap) {
>>> +               free_page((unsigned long)vmx->nested.msr_bitmap);
>>> +               vmx->nested.msr_bitmap = NULL;
>>> +       }
>>>         if (enable_shadow_vmcs)
>>>                 free_vmcs(vmx->nested.current_shadow_vmcs);
>>>         kfree(vmx->nested.cached_vmcs12);
>>> @@ -9472,8 +9477,10 @@ static inline bool
>>> nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>>  {
>>>         int msr;
>>>         struct page *page;
>>> -       unsigned long *msr_bitmap;
>>> +       unsigned long *msr_bitmap_l1;
>>> +       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
>>>
>>> +       /* This shortcut is ok because we support only x2APIC MSRs so far.
>>> */
>>>         if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>>>                 return false;
>>>
>>> @@ -9482,63 +9489,37 @@ static inline bool
>>> nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>>>                 WARN_ON(1);
>>>                 return false;
>>>         }
>>> -       msr_bitmap = (unsigned long *)kmap(page);
>>> -       if (!msr_bitmap) {
>>> +       msr_bitmap_l1 = (unsigned long *)kmap(page);
>>> +       if (!msr_bitmap_l1) {
>>>                 nested_release_page_clean(page);
>>>                 WARN_ON(1);
>>>                 return false;
>>>         }
>>>
>>> +       memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
>>> +
>>>         if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
>>>                 if (nested_cpu_has_apic_reg_virt(vmcs12))
>>>                         for (msr = 0x800; msr <= 0x8ff; msr++)
>>>                                 nested_vmx_disable_intercept_for_msr(
>>> -                                       msr_bitmap,
>>> -                                       vmx_msr_bitmap_nested,
>>> +                                       msr_bitmap_l1, msr_bitmap_l0,
>>>                                         msr, MSR_TYPE_R);
>>> -               /* TPR is allowed */
>>> -               nested_vmx_disable_intercept_for_msr(msr_bitmap,
>>> -                               vmx_msr_bitmap_nested,
>>> +
>>> +               nested_vmx_disable_intercept_for_msr(
>>> +                               msr_bitmap_l1, msr_bitmap_l0,
>>>                                 APIC_BASE_MSR + (APIC_TASKPRI >> 4),
>>>                                 MSR_TYPE_R | MSR_TYPE_W);
>>> +
>>>                 if (nested_cpu_has_vid(vmcs12)) {
>>> -                       /* EOI and self-IPI are allowed */
>>>                         nested_vmx_disable_intercept_for_msr(
>>> -                               msr_bitmap,
>>> -                               vmx_msr_bitmap_nested,
>>> +                               msr_bitmap_l1, msr_bitmap_l0,
>>>                                 APIC_BASE_MSR + (APIC_EOI >> 4),
>>>                                 MSR_TYPE_W);
>>>                         nested_vmx_disable_intercept_for_msr(
>>> -                               msr_bitmap,
>>> -                               vmx_msr_bitmap_nested,
>>> +                               msr_bitmap_l1, msr_bitmap_l0,
>>>                                 APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
>>>                                 MSR_TYPE_W);
>>>                 }
>>> -       } else {
>>> -               /*
>>> -                * Enable reading intercept of all the x2apic
>>> -                * MSRs. We should not rely on vmcs12 to do any
>>> -                * optimizations here, it may have been modified
>>> -                * by L1.
>>> -                */
>>> -               for (msr = 0x800; msr <= 0x8ff; msr++)
>>> -                       __vmx_enable_intercept_for_msr(
>>> -                               vmx_msr_bitmap_nested,
>>> -                               msr,
>>> -                               MSR_TYPE_R);
>>> -
>>> -               __vmx_enable_intercept_for_msr(
>>> -                               vmx_msr_bitmap_nested,
>>> -                               APIC_BASE_MSR + (APIC_TASKPRI >> 4),
>>> -                               MSR_TYPE_W);
>>> -               __vmx_enable_intercept_for_msr(
>>> -                               vmx_msr_bitmap_nested,
>>> -                               APIC_BASE_MSR + (APIC_EOI >> 4),
>>> -                               MSR_TYPE_W);
>>> -               __vmx_enable_intercept_for_msr(
>>> -                               vmx_msr_bitmap_nested,
>>> -                               APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
>>> -                               MSR_TYPE_W);
>>>         }
>>>         kunmap(page);
>>>         nested_release_page_clean(page);
>>> @@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
>>> struct vmcs12 *vmcs12)
>>>         }
>>>
>>>         if (cpu_has_vmx_msr_bitmap() &&
>>> -           exec_control & CPU_BASED_USE_MSR_BITMAPS) {
>>> -               nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
>>> -               /* MSR_BITMAP will be set by following vmx_set_efer. */
>>> -       } else
>>> +           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
>>> +           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
>>> +               ; /* MSR_BITMAP will be set by following vmx_set_efer. */
>>> +       else
>>>                 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
>>>
>>>         /*
>>>
>>
>>
>> --
>> Yang
>> Alibaba Cloud Computing


-- 
Yang
Alibaba Cloud Computing

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ