lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1e9ed17e6778b396927cfce5b3cdde889a61c305.camel@redhat.com>
Date:   Mon, 18 Apr 2022 14:02:55 +0300
From:   Maxim Levitsky <mlevitsk@...hat.com>
To:     Suravee Suthikulpanit <suravee.suthikulpanit@....com>,
        linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc:     pbonzini@...hat.com, seanjc@...gle.com, joro@...tes.org,
        jon.grimm@....com, wei.huang2@....com, terry.bowman@....com
Subject: Re: [PATCH 1/2] KVM: SVM: Introduce avic_kick_target_vcpus_fast()

On Thu, 2022-04-14 at 00:11 -0500, Suravee Suthikulpanit wrote:
> Currently, an AVIC-enabled VM suffers from performance bottleneck
> when scaling to large number of vCPUs for I/O intensive workloads.
> 
> In such case, a vCPU often executes halt instruction to get into idle state
> waiting for interrupts, in which KVM would de-schedule the vCPU from
> physical CPU.
> 
> When AVIC HW tries to deliver interrupt to the halting vCPU, it would
> result in AVIC incomplete IPI #vmexit to notify KVM to reschedule
> the target vCPU into running state.
> 
> Investigation has shown the main hotspot is in the kvm_apic_match_dest()
> in the following call stack where it tries to find target vCPUs
> corresponded to the information in the ICRH/ICRL registers.
> 
>   - handle_exit
>     - svm_invoke_exit_handler
>       - avic_incomplete_ipi_interception
>         - kvm_apic_match_dest
> 
> However, AVIC provides hints in the #vmexit info, which can be used to
> retrieve the destination guest physical APIC ID.
> 
> In addition, since QEMU defines guest physical APIC ID to be the same as
> vCPU ID, it can be used to quickly identify the target vCPU to deliver IPI,
> and avoid the overhead from searching through all vCPUs to match the target
> vCPU.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
> ---
>  arch/x86/kvm/svm/avic.c | 91 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 87 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index abcf761c0c53..92d8e0de1fb4 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -351,11 +351,94 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
>  	put_cpu();
>  }
>  
> -static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
> -				   u32 icrl, u32 icrh)
> +/*
> + * A fast-path version of avic_kick_target_vcpus(), which attempts to match
> + * destination APIC ID to vCPU without looping through all vCPUs.
> + */
> +static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
> +				       u32 icrl, u32 icrh, u32 index)
>  {
> +	u32 dest, apic_id;
>  	struct kvm_vcpu *vcpu;
> +	int dest_mode = icrl & APIC_DEST_MASK;
> +	int shorthand = icrl & APIC_SHORT_MASK;
> +	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
> +	u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
> +
> +	if (shorthand != APIC_DEST_NOSHORT)
> +		return -EINVAL;
> +
> +	/*
> +	 * The AVIC incomplete IPI #vmexit info provides index into
> +	 * the physical APIC ID table, which can be used to derive 
> +	 * guest physical APIC ID.
> +	 */
> +	if (dest_mode == APIC_DEST_PHYSICAL) {
> +		apic_id = index;
> +	} else {
> +		if (!apic_x2apic_mode(source)) {
> +			/* For xAPIC logical mode, the index is for logical APIC table. */
> +			apic_id = avic_logical_id_table[index] & 0x1ff;
> +		} else {
> +			/* For x2APIC logical mode, cannot leverage the index.
> +			 * Instead, calculate physical ID from logical ID in ICRH.
> +			 */
> +			int apic;
> +			int first = ffs(icrh & 0xffff);
> +			int last = fls(icrh & 0xffff);
> +			int cluster = (icrh & 0xffff0000) >> 16;
> +
> +			/*
> +			 * If the x2APIC logical ID sub-field (i.e. icrh[15:0]) contains zero
> +			 * or more than 1 bits, we cannot match just one vcpu to kick for
> +			 * fast path.
> +			 */
> +			if (!first || (first != last))
> +				return -EINVAL;
> +
> +			apic = first - 1;
> +			if ((apic < 0) || (apic > 15) || (cluster >= 0xfffff))
> +				return -EINVAL;
> +			apic_id = (cluster << 4) + apic;
> +		}
> +	}
> +
> +	/*
> +	 * Assuming vcpu ID is the same as physical apic ID,
> +	 * and use it to retrieve the target vCPU.
> +	 */
> +	vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
> +	if (!vcpu)
> +		return -EINVAL;
> +
> +	if (apic_x2apic_mode(vcpu->arch.apic))
> +		dest = icrh;
> +	else
> +		dest = GET_XAPIC_DEST_FIELD(icrh);
> +
> +	/*
> +	 * Try matching the destination APIC ID with the vCPU.
> +	 */
> +	if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
> +		vcpu->arch.apic->irr_pending = true;
> +		svm_complete_interrupt_delivery(vcpu,
> +						icrl & APIC_MODE_MASK,
> +						icrl & APIC_INT_LEVELTRIG,
> +						icrl & APIC_VECTOR_MASK);
> +		return 0;
> +	}
> +
> +	return -EINVAL;
> +}
> +
> +static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
> +				   u32 icrl, u32 icrh, u32 index)
> +{
>  	unsigned long i;
> +	struct kvm_vcpu *vcpu;
> +
> +	if (!avic_kick_target_vcpus_fast(kvm, source, icrl, icrh, index))
> +		return;
>  
>  	/*
>  	 * Wake any target vCPUs that are blocking, i.e. waiting for a wake
> @@ -388,7 +471,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
>  	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
>  	u32 icrl = svm->vmcb->control.exit_info_1;
>  	u32 id = svm->vmcb->control.exit_info_2 >> 32;
> -	u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
> +	u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  
>  	trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
> @@ -415,7 +498,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
>  		 * set the appropriate IRR bits on the valid target
>  		 * vcpus. So, we just need to kick the appropriate vcpu.
>  		 */
> -		avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
> +		avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
>  		break;
>  	case AVIC_IPI_FAILURE_INVALID_TARGET:
>  		break;



Reviewed-by: Maxim Levitsky <mlevitsk@...hat.com>

Best regards,
	Maxim Levitsky

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ