lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zmomoj-PngmXHlxQ@google.com>
Date: Wed, 12 Jun 2024 15:52:18 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Xin Li <xin3.li@...el.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org, 
	linux-doc@...r.kernel.org, linux-kselftest@...r.kernel.org, 
	pbonzini@...hat.com, corbet@....net, tglx@...utronix.de, mingo@...hat.com, 
	bp@...en8.de, dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com, 
	shuah@...nel.org, vkuznets@...hat.com, peterz@...radead.org, 
	ravi.v.shankar@...el.com, xin@...or.com
Subject: Re: [PATCH v2 12/25] KVM: VMX: Handle FRED event data

On Wed, Feb 07, 2024, Xin Li wrote:
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 4889754415b5..6b796c5c9c2b 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -256,8 +256,12 @@ enum vmcs_field {
>  	PID_POINTER_TABLE_HIGH		= 0x00002043,
>  	SECONDARY_VM_EXIT_CONTROLS	= 0x00002044,
>  	SECONDARY_VM_EXIT_CONTROLS_HIGH	= 0x00002045,
> +	INJECTED_EVENT_DATA		= 0x00002052,
> +	INJECTED_EVENT_DATA_HIGH	= 0x00002053,
>  	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
>  	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
> +	ORIGINAL_EVENT_DATA		= 0x00002404,
> +	ORIGINAL_EVENT_DATA_HIGH	= 0x00002405,

Are these the actual names from the SDM?  E.g. is there no FRED_ prefix to clue
in readers that they are FRED specific? (unless they aren't FRED specific?)

>  	VMCS_LINK_POINTER               = 0x00002800,
>  	VMCS_LINK_POINTER_HIGH          = 0x00002801,
>  	GUEST_IA32_DEBUGCTL             = 0x00002802,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ee61d2c25cb0..f622fb90a098 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1871,9 +1871,29 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu)
>  		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
>  			     vmx->vcpu.arch.event_exit_inst_len);
>  		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
> -	} else
> +	} else {
>  		intr_info |= INTR_TYPE_HARD_EXCEPTION;
>  
> +		if (kvm_is_fred_enabled(vcpu)) {
> +			u64 event_data = 0;
> +
> +			if (is_debug(intr_info))
> +				/*
> +				 * Compared to DR6, FRED #DB event data saved on
> +				 * the stack frame have bits 4 ~ 11 and 16 ~ 31
> +				 * inverted, i.e.,
> +				 *   fred_db_event_data = dr6 ^ 0xFFFF0FF0UL
> +				 */
> +				event_data = vcpu->arch.dr6 ^ DR6_RESERVED;
> +			else if (is_page_fault(intr_info))
> +				event_data = vcpu->arch.cr2;
> +			else if (is_nm_fault(intr_info))
> +				event_data = to_vmx(vcpu)->fred_xfd_event_data;
> +
> +			vmcs_write64(INJECTED_EVENT_DATA, event_data);
> +		}
> +	}
> +
>  	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
>  
>  	vmx_clear_hlt(vcpu);
> @@ -7082,8 +7102,11 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
>  	 *
>  	 * Queuing exception is done in vmx_handle_exit. See comment there.
>  	 */
> -	if (vcpu->arch.guest_fpu.fpstate->xfd)
> +	if (vcpu->arch.guest_fpu.fpstate->xfd) {
>  		rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
> +		to_vmx(vcpu)->fred_xfd_event_data = vcpu->arch.cr0 & X86_CR0_TS

kvm_is_cr0_bit_set(), don't read vcpu->arch.cr0 directly.

> +			? 0 : vcpu->arch.guest_fpu.xfd_err;

Maybe this?

		if (kvm_is_cr0_bit_set(vcpu, X86_CR0_TS))
			to_vmx(vcpu)->fred_xfd_event_data = 0;
		else
			to_vmx(vcpu)->fred_xfd_event_data = vcpu->arch.guest_fpu.xfd_err;

Hmm, but why does this need to be cached _now_?  I.e. why does fred_xfd_event_data
need to exist?  Wouldn't it be simpler and more robust to use vcpu->arch.guest_fpu.xfd_err
directly in vmx_inject_exception()?

> +	}
>  }
>  
>  static void handle_exception_irqoff(struct vcpu_vmx *vmx)
> @@ -7199,29 +7222,28 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
>  					      vmx->loaded_vmcs->entry_time));
>  }
>  
> -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
> -				      u32 idt_vectoring_info,
> -				      int instr_len_field,
> -				      int error_code_field)
> +static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, bool vectoring)
>  {
> -	u8 vector;
> -	int type;
> -	bool idtv_info_valid;
> -
> -	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
> +	u32 event_id = vectoring ? to_vmx(vcpu)->idt_vectoring_info
> +				 : vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);


Preferred style for ternary operators is:

	u32 event_id = vectoring ? to_vmx(vcpu)->idt_vectoring_info :
				   vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);

That said, I don't think this is a net positive versus passing in all params.
The bare true/false is somewhat inscrutable, and in this code, it's hard to
understand why KVM looks at X instead of Y without the conext of the caller.

> +	int instr_len_field = vectoring ? VM_EXIT_INSTRUCTION_LEN
> +					: VM_ENTRY_INSTRUCTION_LEN;
> +	int error_code_field = vectoring ? IDT_VECTORING_ERROR_CODE
> +					 : VM_ENTRY_EXCEPTION_ERROR_CODE;
> +	int event_data_field = vectoring ? ORIGINAL_EVENT_DATA
> +					 : INJECTED_EVENT_DATA;
> +	u8 vector = event_id & INTR_INFO_VECTOR_MASK;
> +	int type = event_id & INTR_INFO_INTR_TYPE_MASK;
>  
>  	vcpu->arch.nmi_injected = false;
>  	kvm_clear_exception_queue(vcpu);
>  	kvm_clear_interrupt_queue(vcpu);
>  
> -	if (!idtv_info_valid)
> +	if (!(event_id & INTR_INFO_VALID_MASK))
>  		return;
>  
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>  
> -	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
> -	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
> -
>  	switch (type) {
>  	case INTR_TYPE_NMI_INTR:
>  		vcpu->arch.nmi_injected = true;
> @@ -7236,10 +7258,31 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
>  		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
>  		fallthrough;
>  	case INTR_TYPE_HARD_EXCEPTION:
> -		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
> -			u32 err = vmcs_read32(error_code_field);
> -			kvm_requeue_exception_e(vcpu, vector, err);
> -		} else
> +		if (kvm_is_fred_enabled(vcpu)) {
> +			/* Save event data for being used as injected-event data */
> +			u64 event_data = vmcs_read64(event_data_field);
> +
> +			switch (vector) {
> +			case DB_VECTOR:
> +				/* %dr6 should be equal to (event_data ^ DR6_RESERVED) */

DR6, no need to use assembly syntax, but I'd just drop this comment, as well as
the CR2 comment.  They add no insight beyond what the code literally does.

> +				vcpu->arch.dr6 = event_data ^ DR6_RESERVED;
> +				break;
> +			case NM_VECTOR:
> +				to_vmx(vcpu)->fred_xfd_event_data = event_data;
> +				break;
> +			case PF_VECTOR:
> +				/* %cr2 should be equal to event_data */
> +				vcpu->arch.cr2 = event_data;
> +				break;
> +			default:
> +				WARN_ON(event_data != 0);
> +				break;
> +			}
> +		}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ