[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <79be2e29-6487-dd60-9b6f-3daa48a2e93f@amd.com>
Date: Mon, 4 Nov 2024 10:03:22 -0600
From: Tom Lendacky <thomas.lendacky@....com>
To: Borislav Petkov <bp@...en8.de>
Cc: linux-kernel@...r.kernel.org, x86@...nel.org,
Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Michael Roth <michael.roth@....com>, Ashish Kalra <ashish.kalra@....com>,
Nikunj A Dadhania <nikunj@....com>, Neeraj Upadhyay <Neeraj.Upadhyay@....com>
Subject: Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a
single RMP segment
On 11/4/24 04:33, Borislav Petkov wrote:
> On Mon, Oct 28, 2024 at 02:32:41PM -0500, Tom Lendacky wrote:
>> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
>> index 5871c924c0b2..37ff4f98e8d1 100644
>> --- a/arch/x86/virt/svm/sev.c
>> +++ b/arch/x86/virt/svm/sev.c
>> @@ -18,6 +18,7 @@
>> #include <linux/cpumask.h>
>> #include <linux/iommu.h>
>> #include <linux/amd-iommu.h>
>> +#include <linux/nospec.h>
>
> What's that include for?
>
> For array_index_nospec() below?
>
> It builds without it or did you trigger a build error with some funky .config?
But the define is in linux/nospec.h, so I might only be a side effect of
another include.
>
>> #include <asm/sev.h>
>> #include <asm/processor.h>
>> @@ -77,12 +78,42 @@ struct rmpentry_raw {
>> */
>> #define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
>>
>> +/*
>> + * For a non-segmented RMP table, use the maximum physical addressing as the
>> + * segment size in order to always arrive at index 0 in the table.
>> + */
>> +#define RMPTABLE_NON_SEGMENTED_SHIFT 52
>> +
>> +struct rmp_segment_desc {
>> + struct rmpentry_raw *rmp_entry;
>> + u64 max_index;
>> + u64 size;
>> +};
>> +
>> +/*
>> + * Segmented RMP Table support.
>> + * - The segment size is used for two purposes:
>> + * - Identify the amount of memory covered by an RMP segment
>> + * - Quickly locate an RMP segment table entry for a physical address
>> + *
>> + * - The RMP segment table contains pointers to an RMP table that covers
>> + * a specific portion of memory. There can be up to 512 8-byte entries,
>> + * one pages worth.
>> + */
>> +static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
>> +static unsigned int rst_max_index __ro_after_init = 512;
>> +
>> +static u64 rmp_segment_size_max;
>
> This one is used in a single function. Generate it there pls.
It's used in two functions, alloc_rmp_segment_desc() and
set_rmp_segment_info().
>
>> +static unsigned int rmp_segment_coverage_shift;
>> +static u64 rmp_segment_coverage_size;
>> +static u64 rmp_segment_coverage_mask;
>
> That "_coverage_" in there looks redundant to me and could go and make those
> variables shorter.
Sure, I can remove the _coverage_ portion.
>
>> +#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_coverage_shift)
>> +#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
>> +
>> /* Mask to apply to a PFN to get the first PFN of a 2MB page */
>> #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>>
>> static u64 probed_rmp_base, probed_rmp_size;
>> -static struct rmpentry_raw *rmptable __ro_after_init;
>> -static u64 rmptable_max_pfn __ro_after_init;
>>
>> static LIST_HEAD(snp_leaked_pages_list);
>> static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
>> @@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
>> return true;
>> }
>>
>> +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
>> +{
>> + struct rmp_segment_desc *desc;
>> + void *rmp_segment;
>> + u64 rst_index;
>> +
>> + /* Validate the RMP segment size */
>> + if (segment_size > rmp_segment_size_max) {
>> + pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
>> + segment_size, rmp_segment_size_max);
>> + return false;
>> + }
>> +
>> + /* Validate the RMP segment table index */
>> + rst_index = RST_ENTRY_INDEX(pa);
>> + if (rst_index >= rst_max_index) {
>> + pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
>> + pa, rmp_segment_coverage_size);
>> + return false;
>> + }
>> + rst_index = array_index_nospec(rst_index, rst_max_index);
>
> Why are we doing this here? Are you expecting some out-of-bounds
> user-controlled values here?
>
> AFAICT, this is all read from the hw/fw so why are speculative accesses
> a problem?
Yeah, not needed here since this is before userspace has even started.
>
>> + if (rmp_segment_table[rst_index]) {
>> + pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
>> + return false;
>> + }
>> +
>> + /* Map the RMP entries */
>
> Kinda obvious...
>
>> + rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
>> + if (!rmp_segment) {
>> + pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
>> + segment_pa, segment_size);
>> + return false;
>> + }
>> +
>> + desc = kzalloc(sizeof(*desc), GFP_KERNEL);
>
> sizeof(struct rmp_segment_desc)
Hmmm... I prefer to keep this as sizeof(*desc). If any changes are made
to the structure name, then this line doesn't need to be changed.
>
>
>> + if (!desc) {
>> + memunmap(rmp_segment);
>> + return false;
>> + }
>> +
>> + desc->rmp_entry = rmp_segment;
>> + desc->max_index = segment_size / sizeof(*desc->rmp_entry);
>> + desc->size = segment_size;
>> +
>> + /* Add the segment descriptor to the table */
>> + rmp_segment_table[rst_index] = desc;
>> +
>> + return true;
>> +}
>
> ...
>
>> @@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
>>
>> /* Zero out the RMP bookkeeping area */
>> if (!init_rmptable_bookkeeping()) {
>> - memunmap(rmptable_start);
>> + free_rmp_segment_table();
>> goto nosnp;
>> }
>>
>> /* Zero out the RMP entries */
>> - memset(rmptable_start, 0, rmptable_size);
>> + for (i = 0; i < rst_max_index; i++) {
>> + struct rmp_segment_desc *desc;
>> +
>> + desc = rmp_segment_table[i];
>> + if (!desc)
>> + continue;
>> +
>> + memset(desc->rmp_entry, 0, desc->size);
>> + }
>
> Why isn't this zeroing out part of alloc_rmp_segment_table()?
If the SNP_EN bit is set in the SYSCFG MSR, the code needs to skip the
zeroing of the RMP table since it no longer has write access to the
table (this happens with kexec).
This keeps the code consistent with the prior code as to where the
zeroing occurs. I can add another argument to alloc_rmp_segment_desc()
that tells it whether to clear it or not, if you prefer.
>
>> @@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
>>
>> static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
>> {
>> - if (!rmptable)
>> + struct rmp_segment_desc *desc;
>> + u64 paddr, rst_index, segment_index;
>
> Reverse xmas tree order pls.
Right.
>
>> + if (!rmp_segment_table)
>> return ERR_PTR(-ENODEV);
>>
>> - if (unlikely(pfn > rmptable_max_pfn))
>> + paddr = pfn << PAGE_SHIFT;
>> +
>> + rst_index = RST_ENTRY_INDEX(paddr);
>> + if (unlikely(rst_index >= rst_max_index))
>> + return ERR_PTR(-EFAULT);
>> + rst_index = array_index_nospec(rst_index, rst_max_index);
>
> Same question as above.
This is where I was worried about the VMM/guest being able to get into
this routine with a bad PFN value.
This function is invoked from dump_rmpentry(), which can be invoked from:
rmpupdate() - I think this is safe because the adjust_direct_map() will
fail if the PFN isn't valid, before the RMP is accessed.
snp_leak_pages() - I think this is safe because the PFN is based on an
actual allocation.
snp_dump_hva_rmpentry() - This is called from the page fault handler. I
think this invocation is safe for now because because it is only called
if the fault type is an RMP fault type, which implies that the RMP is
involved. But as an external function, there's no guarantee as to the
situation it can be called from in the future.
I can remove it for now if you think it will be safe going forward.
Thanks,
Tom
>
>> +
>> + desc = rmp_segment_table[rst_index];
>> + if (unlikely(!desc))
>> return ERR_PTR(-EFAULT);
>>
>> - return rmptable + pfn;
>> + segment_index = RMP_ENTRY_INDEX(paddr);
>> + if (unlikely(segment_index >= desc->max_index))
>> + return ERR_PTR(-EFAULT);
>> + segment_index = array_index_nospec(segment_index, desc->max_index);
>> +
>> + return desc->rmp_entry + segment_index;
>> }
>
Powered by blists - more mailing lists