[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <22dc9ceb-4b16-be68-ed8e-c6a222619032@amd.com>
Date: Wed, 8 May 2024 14:13:17 -0500
From: Tom Lendacky <thomas.lendacky@....com>
To: Borislav Petkov <bp@...en8.de>
Cc: linux-kernel@...r.kernel.org, x86@...nel.org, linux-coco@...ts.linux.dev,
svsm-devel@...onut-svsm.dev, Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Dave Hansen <dave.hansen@...ux.intel.com>,
"H. Peter Anvin" <hpa@...or.com>, Andy Lutomirski <luto@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Dan Williams <dan.j.williams@...el.com>, Michael Roth
<michael.roth@....com>, Ashish Kalra <ashish.kalra@....com>
Subject: Re: [PATCH v4 05/15] x86/sev: Use kernel provided SVSM Calling Areas
On 5/8/24 03:05, Borislav Petkov wrote:
> On Wed, Apr 24, 2024 at 10:58:01AM -0500, Tom Lendacky wrote:
>> +static int __svsm_msr_protocol(struct svsm_call *call)
>
> All those protocol functions need a verb in the name:
>
> __svsm_do_msr_protocol
> __svsm_do_ghcb_protocol
>
> or something slicker than the silly "do".
ok, maybe __perform_svsm_msr_protocol or such.
>
>> +{
>> + u64 val, resp;
>> + u8 pending;
>> +
>> + val = sev_es_rd_ghcb_msr();
>> +
>> + sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0));
>> +
>> + pending = 0;
>
> Do that assignment above, at declaration.
Ok
>
>> + issue_svsm_call(call, &pending);
>> +
>> + resp = sev_es_rd_ghcb_msr();
>> +
>> + sev_es_wr_ghcb_msr(val);
>
> The MSR SVSM protocol is supposed to restore the MSR? A comment pls.
Ok, I'll put it above the read where val is assigned.
>
>> +
>> + if (pending)
>> + return -EINVAL;
>> +
>> + if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
>> + return -EINVAL;
>> +
>> + if (GHCB_MSR_VMPL_RESP_VAL(resp) != 0)
>
> s/ != 0//
Ok
>
>> + return -EINVAL;
>> +
>> + return call->rax_out;
>
> rax_out is u64, your function returns int because of the negative
> values. But then it'll truncate the u64 in the success case.
I'll fix that and return 0 here and check the rax_out value on the return.
>
>> +}
>> +
>> +static int __svsm_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
>> +{
>> + struct es_em_ctxt ctxt;
>> + u8 pending;
>> +
>> + vc_ghcb_invalidate(ghcb);
>> +
>> + /* Fill in protocol and format specifiers */
>> + ghcb->protocol_version = ghcb_version;
>> + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
>> +
>> + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
>> + ghcb_set_sw_exit_info_1(ghcb, 0);
>> + ghcb_set_sw_exit_info_2(ghcb, 0);
>> +
>> + sev_es_wr_ghcb_msr(__pa(ghcb));
>> +
>> + pending = 0;
>
> As above.
Yep
>
>> + issue_svsm_call(call, &pending);
>> +
>> + if (pending)
>> + return -EINVAL;
>> +
>> + switch (verify_exception_info(ghcb, &ctxt)) {
>> + case ES_OK:
>> + break;
>> + case ES_EXCEPTION:
>> + vc_forward_exception(&ctxt);
>> + fallthrough;
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + return call->rax_out;
>
> As above.
Ditto.
>
>> +}
>> +
>> static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
>> struct es_em_ctxt *ctxt,
>> u64 exit_code, u64 exit_info_1,
>> diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
>> index a500df807e79..21f3cc40d662 100644
>> --- a/arch/x86/kernel/sev.c
>> +++ b/arch/x86/kernel/sev.c
>> @@ -133,8 +133,13 @@ struct ghcb_state {
>> struct ghcb *ghcb;
>> };
>>
>> +/* For early boot SVSM communication */
>> +static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
>> +
>> static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
>> static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
>> +static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
>> +static DEFINE_PER_CPU(u64, svsm_caa_pa);
>>
>> struct sev_config {
>> __u64 debug : 1,
>> @@ -150,6 +155,15 @@ struct sev_config {
>> */
>> ghcbs_initialized : 1,
>>
>> + /*
>> + * A flag used to indicate when the per-CPU SVSM CA is to be
>
> s/A flag used //
>
> and above.
Sure
>
>> + * used instead of the boot SVSM CA.
>> + *
>> + * For APs, the per-CPU SVSM CA is created as part of the AP
>> + * bringup, so this flag can be used globally for the BSP and APs.
>> + */
>> + cas_initialized : 1,
>> +
>> __reserved : 62;
>> };
>>
>> @@ -572,9 +586,42 @@ static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t si
>> return ES_EXCEPTION;
>> }
>>
>> +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
>> +{
>> + long error_code = ctxt->fi.error_code;
>> + int trapnr = ctxt->fi.vector;
>> +
>> + ctxt->regs->orig_ax = ctxt->fi.error_code;
>> +
>> + switch (trapnr) {
>> + case X86_TRAP_GP:
>> + exc_general_protection(ctxt->regs, error_code);
>> + break;
>> + case X86_TRAP_UD:
>> + exc_invalid_op(ctxt->regs);
>> + break;
>> + case X86_TRAP_PF:
>> + write_cr2(ctxt->fi.cr2);
>> + exc_page_fault(ctxt->regs, error_code);
>> + break;
>> + case X86_TRAP_AC:
>> + exc_alignment_check(ctxt->regs, error_code);
>> + break;
>> + default:
>> + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
>> + BUG();
>> + }
>> +}
>> +
>> /* Include code shared with pre-decompression boot stage */
>> #include "sev-shared.c"
>>
>> +static struct svsm_ca *__svsm_get_caa(void)
>
> Why the "__" prefix?
>
> I gon't see a svsm_get_caa() variant...
There probably was at one point and I missed removing the "__" prefix.
I'll take care of that.
>
>> +{
>> + return sev_cfg.cas_initialized ? this_cpu_read(svsm_caa)
>> + : boot_svsm_caa;
>> +}
>> +
>> static noinstr void __sev_put_ghcb(struct ghcb_state *state)
>> {
>> struct sev_es_runtime_data *data;
>> @@ -600,6 +647,42 @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state)
>> }
>> }
>>
>> +static int svsm_protocol(struct svsm_call *call)
>
> svsm_issue_protocol_call() or whatnot...
>
> Btw, can all this svsm guest gunk live simply in a separate file? Or is
> it going to need a lot of sev.c stuff exported through an internal.h
> header?
>
> Either that or prefix all SVSM handling functions with "svsm_" so that
> there's at least some visibility which is which.
There's quite a bit of interaction so I'll make sure to prefix everything.
>
>> +{
>> + struct ghcb_state state;
>> + unsigned long flags;
>> + struct ghcb *ghcb;
>> + int ret;
>> +
>> + /*
>> + * This can be called very early in the boot, use native functions in
>> + * order to avoid paravirt issues.
>> + */
>> + flags = native_save_fl();
>> + if (flags & X86_EFLAGS_IF)
>> + native_irq_disable();
>
> Uff, conditional locking.
>
> What's wrong with using local_irq_save/local_irq_restore?
The paravirt versions of local_irq_save and local_irq_restore can't be
used as early as this routine is called.
>
>> +
>> + if (sev_cfg.ghcbs_initialized)
>> + ghcb = __sev_get_ghcb(&state);
>> + else if (boot_ghcb)
>> + ghcb = boot_ghcb;
>> + else
>> + ghcb = NULL;
>> +
>> + do {
>> + ret = ghcb ? __svsm_ghcb_protocol(ghcb, call)
>> + : __svsm_msr_protocol(call);
>> + } while (ret == SVSM_ERR_BUSY);
>> +
>> + if (sev_cfg.ghcbs_initialized)
>> + __sev_put_ghcb(&state);
>> +
>> + if (flags & X86_EFLAGS_IF)
>> + native_irq_enable();
>> +
>> + return ret;
>> +}
>
> ...
>
>> @@ -2095,6 +2188,50 @@ static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
>> return cc_info;
>> }
>>
>> +static __head void setup_svsm(struct cc_blob_sev_info *cc_info)
>
> svsm_setup
Yep
>
>> +{
>> + struct svsm_call call = {};
>> + int ret;
>> + u64 pa;
>> +
>> + /*
>> + * Record the SVSM Calling Area address (CAA) if the guest is not
>> + * running at VMPL0. The CA will be used to communicate with the
>> + * SVSM to perform the SVSM services.
>> + */
>> + setup_svsm_ca(cc_info);
>> +
>> + /* Nothing to do if not running under an SVSM. */
>> + if (!vmpl)
>> + return;
>
> You set up stuff above and now you bail out?
setup_svsm_ca() is what sets the vmpl variable. So nothing will have
been setup if the VMPL is zero, in which case we don't continue on.
>
> Judging by setup_svsm_ca() you don't really need that vmpl var but you
> can check
>
> if (!boot_svsm_caa)
> return;
>
> to determine whether a SVSM was detected.
Yes, but the vmpl var will be used for attestation requests, sysfs, etc.
>
>> +
>> + /*
>> + * It is very early in the boot and the kernel is running identity
>> + * mapped but without having adjusted the pagetables to where the
>> + * kernel was loaded (physbase), so the get the CA address using
>
> s/the //
hmmm... CA is Calling Area, so
"get the Calling Area address using RIP-relative"
"get Calling Area address using RIP-relative..."
The first one sound more correct to me.
>
>> + * RIP-relative addressing.
>> + */
>> + pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
>> +
>> + /*
>> + * Switch over to the boot SVSM CA while the current CA is still
>> + * addressable. There is no GHCB at this point so use the MSR protocol.
>> + *
>> + * SVSM_CORE_REMAP_CA call:
>> + * RAX = 0 (Protocol=0, CallID=0)
>> + * RCX = New CA GPA
>> + */
>> + call.caa = __svsm_get_caa();
>> + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
>> + call.rcx = pa;
>> + ret = svsm_protocol(&call);
>> + if (ret != SVSM_SUCCESS)
>> + panic("Can't remap the SVSM CA, ret=%#x (%d)\n", ret, ret);
>> +
>> + boot_svsm_caa = (struct svsm_ca *)pa;
>> + boot_svsm_caa_pa = pa;
>
> Huh, setup_svsm_ca() already assigned those...
setup_svsm_ca() assigned the ones from the secrets page. The kernel now
switches to using its own CA.
>
>> bool __head snp_init(struct boot_params *bp)
>> {
>> struct cc_blob_sev_info *cc_info;
>> @@ -2108,12 +2245,7 @@ bool __head snp_init(struct boot_params *bp)
>>
>> setup_cpuid_table(cc_info);
>>
>> - /*
>> - * Record the SVSM Calling Area address (CAA) if the guest is not
>> - * running at VMPL0. The CA will be used to communicate with the
>> - * SVSM to perform the SVSM services.
>> - */
>> - setup_svsm_ca(cc_info);
>> + setup_svsm(cc_info);
>>
>> /*
>> * The CC blob will be used later to access the secrets page. Cache
>> @@ -2306,3 +2438,12 @@ void sev_show_status(void)
>> }
>> pr_cont("\n");
>> }
>> +
>> +void __init snp_remap_svsm_ca(void)
>> +{
>> + if (!vmpl)
>> + return;
>> +
>> + /* Update the CAA to a proper kernel address */
>> + boot_svsm_caa = &boot_svsm_ca_page;
>> +}
>> diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
>> index 422602f6039b..6155020e4d2d 100644
>> --- a/arch/x86/mm/mem_encrypt_amd.c
>> +++ b/arch/x86/mm/mem_encrypt_amd.c
>> @@ -2,7 +2,7 @@
>> /*
>> * AMD Memory Encryption Support
>> *
>> - * Copyright (C) 2016 Advanced Micro Devices, Inc.
>> + * Copyright (C) 2016-2024 Advanced Micro Devices, Inc.
>
> Are we doing that now?
Looks like a leftover change... will remove it.
Thanks,
Tom
>
Powered by blists - more mailing lists