linux-kernel - Re: [PATCH Part1 RFC v4 15/36] x86/mm: Add support to validate memory when changing C-bit

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YRvxZtLkVNda9xwX@zn.tnic>
Date:   Tue, 17 Aug 2021 19:27:02 +0200
From:   Borislav Petkov <bp@...en8.de>
To:     Brijesh Singh <brijesh.singh@....com>
Cc:     x86@...nel.org, linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        linux-efi@...r.kernel.org, platform-driver-x86@...r.kernel.org,
        linux-coco@...ts.linux.dev, linux-mm@...ck.org,
        linux-crypto@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Joerg Roedel <jroedel@...e.de>,
        Tom Lendacky <thomas.lendacky@....com>,
        "H. Peter Anvin" <hpa@...or.com>, Ard Biesheuvel <ardb@...nel.org>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Andy Lutomirski <luto@...nel.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Sergio Lopez <slp@...hat.com>, Peter Gonda <pgonda@...gle.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
        David Rientjes <rientjes@...gle.com>,
        Dov Murik <dovmurik@...ux.ibm.com>,
        Tobin Feldman-Fitzthum <tobin@....com>,
        Michael Roth <michael.roth@....com>,
        Vlastimil Babka <vbabka@...e.cz>, tony.luck@...el.com,
        npmccallum@...hat.com, brijesh.ksingh@...il.com
Subject: Re: [PATCH Part1 RFC v4 15/36] x86/mm: Add support to validate
 memory when changing C-bit

On Wed, Jul 07, 2021 at 01:14:45PM -0500, Brijesh Singh wrote:
> +struct __packed psc_hdr {
> +	u16 cur_entry;
> +	u16 end_entry;
> +	u32 reserved;
> +};
> +
> +struct __packed psc_entry {
> +	u64	cur_page	: 12,
> +		gfn		: 40,
> +		operation	: 4,
> +		pagesize	: 1,
> +		reserved	: 7;
> +};
> +
> +struct __packed snp_psc_desc {
> +	struct psc_hdr hdr;
> +	struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
> +};

The majority of kernel code puts __packed after the struct definition,
let's put it there too pls, out of the way.

...

> +static int vmgexit_psc(struct snp_psc_desc *desc)
> +{
> +	int cur_entry, end_entry, ret;
> +	struct snp_psc_desc *data;
> +	struct ghcb_state state;
> +	struct ghcb *ghcb;
> +	struct psc_hdr *hdr;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +
> +	ghcb = __sev_get_ghcb(&state);
> +	if (unlikely(!ghcb))
> +		panic("SEV-SNP: Failed to get GHCB\n");
> +
> +	/* Copy the input desc into GHCB shared buffer */
> +	data = (struct snp_psc_desc *)ghcb->shared_buffer;
> +	memcpy(ghcb->shared_buffer, desc, sizeof(*desc));
> +
> +	hdr = &data->hdr;
> +	cur_entry = hdr->cur_entry;
> +	end_entry = hdr->end_entry;
> +
> +	/*
> +	 * As per the GHCB specification, the hypervisor can resume the guest
> +	 * before processing all the entries. Checks whether all the entries
> +	 * are processed. If not, then keep retrying.
> +	 *
> +	 * The stragtegy here is to wait for the hypervisor to change the page
> +	 * state in the RMP table before guest access the memory pages. If the
> +	 * page state was not successful, then later memory access will result
> +	 * in the crash.
> +	 */
> +	while (hdr->cur_entry <= hdr->end_entry) {
> +		ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
> +
> +		ret = sev_es_ghcb_hv_call(ghcb, NULL, SVM_VMGEXIT_PSC, 0, 0);
> +
> +		/*
> +		 * Page State Change VMGEXIT can pass error code through
> +		 * exit_info_2.
> +		 */
> +		if (WARN(ret || ghcb->save.sw_exit_info_2,
> +			 "SEV-SNP: page state change failed ret=%d exit_info_2=%llx\n",
> +			 ret, ghcb->save.sw_exit_info_2))
> +			return 1;

Yikes, you return here and below with interrupts disabled.

All your returns need to be "goto out;" instead where you do

out:
        __sev_put_ghcb(&state);
        local_irq_restore(flags);

Yap, you very likely need to put the GHCB too.

> +		/*
> +		 * Lets do some sanity check that entry processing is not going
> +		 * backward. This will happen only if hypervisor is tricking us.
> +		 */
> +		if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry),
> +			"SEV-SNP: page state change processing going backward, end_entry "
> +			"(expected %d got %d) cur_entry (expected %d got %d)\n",
> +			end_entry, hdr->end_entry, cur_entry, hdr->cur_entry))
> +			return 1;

WARNING: quoted string split across lines
#293: FILE: arch/x86/kernel/sev.c:750:
+			"SEV-SNP: page state change processing going backward, end_entry "
+			"(expected %d got %d) cur_entry (expected %d got %d)\n",

If you're wondering what to do, yes, you can really stretch that string
and shorten it too:

                if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry),
"SEV-SNP: PSC processing going backwards, end_entry %d (got %d) cur_entry: %d (got %d)\n",
                         end_entry, hdr->end_entry, cur_entry, hdr->cur_entry))
                        return 1;

so that it fits on a single line and grepping can find it.

> +		/* Lets verify that reserved bit is not set in the header*/
> +		if (WARN(hdr->reserved, "Reserved bit is set in the PSC header\n"))

psc_entry has a ->reserved field too and since we're iterating over the
entries...

> +			return 1;
> +	}
> +
> +	__sev_put_ghcb(&state);
> +	local_irq_restore(flags);
> +
> +	return 0;
> +}
> +
> +static void __set_page_state(struct snp_psc_desc *data, unsigned long vaddr,
> +			     unsigned long vaddr_end, int op)
> +{
> +	struct psc_hdr *hdr;
> +	struct psc_entry *e;
> +	unsigned long pfn;
> +	int i;
> +
> +	hdr = &data->hdr;
> +	e = data->entries;
> +
> +	memset(data, 0, sizeof(*data));
> +	i = 0;
> +
> +	while (vaddr < vaddr_end) {
> +		if (is_vmalloc_addr((void *)vaddr))
> +			pfn = vmalloc_to_pfn((void *)vaddr);
> +		else
> +			pfn = __pa(vaddr) >> PAGE_SHIFT;
> +
> +		e->gfn = pfn;
> +		e->operation = op;
> +		hdr->end_entry = i;
> +
> +		/*
> +		 * The GHCB specification provides the flexibility to
> +		 * use either 4K or 2MB page size in the RMP table.
> +		 * The current SNP support does not keep track of the
> +		 * page size used in the RMP table. To avoid the
> +		 * overlap request, use the 4K page size in the RMP
> +		 * table.
> +		 */
> +		e->pagesize = RMP_PG_SIZE_4K;
> +
> +		vaddr = vaddr + PAGE_SIZE;
> +		e++;
> +		i++;
> +	}
> +
> +	/* Terminate the guest on page state change failure. */

That comment is kinda obvious :)

> +	if (vmgexit_psc(data))
> +		sev_es_terminate(1, GHCB_TERM_PSC);
> +}
> +
> +static void set_page_state(unsigned long vaddr, unsigned int npages, int op)
> +{
> +	unsigned long vaddr_end, next_vaddr;
> +	struct snp_psc_desc *desc;
> +
> +	vaddr = vaddr & PAGE_MASK;
> +	vaddr_end = vaddr + (npages << PAGE_SHIFT);
> +
> +	desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);

kzalloc() so that you don't have to memset() later in
__set_page_state().

> +	if (!desc)
> +		panic("failed to allocate memory");

Make that error message more distinctive so that *if* it happens, one
can pinpoint the place in the code where the panic comes from.

> +	while (vaddr < vaddr_end) {
> +		/*
> +		 * Calculate the last vaddr that can be fit in one
> +		 * struct snp_psc_desc.
> +		 */
> +		next_vaddr = min_t(unsigned long, vaddr_end,
> +				(VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr);
> +
> +		__set_page_state(desc, vaddr, next_vaddr, op);
> +
> +		vaddr = next_vaddr;
> +	}
> +
> +	kfree(desc);
> +}
> +

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette