[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e86f71ec-94f7-46be-87fe-79ca26fa91d7@amd.com>
Date: Tue, 25 Mar 2025 17:40:45 +0530
From: Neeraj Upadhyay <Neeraj.Upadhyay@....com>
To: Thomas Gleixner <tglx@...utronix.de>, linux-kernel@...r.kernel.org
Cc: bp@...en8.de, mingo@...hat.com, dave.hansen@...ux.intel.com,
Thomas.Lendacky@....com, nikunj@....com, Santosh.Shukla@....com,
Vasant.Hegde@....com, Suravee.Suthikulpanit@....com, David.Kaplan@....com,
x86@...nel.org, hpa@...or.com, peterz@...radead.org, seanjc@...gle.com,
pbonzini@...hat.com, kvm@...r.kernel.org, kirill.shutemov@...ux.intel.com,
huibo.wang@....com, naveen.rao@....com
Subject: Re: [RFC v2 05/17] x86/apic: Add update_vector callback for Secure
AVIC
Hi Thomas,
On 3/21/2025 9:05 PM, Neeraj Upadhyay wrote:
...
>
>>> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
>>> index 72fa4bb78f0a..e0c9505e05f8 100644
>>> --- a/arch/x86/kernel/apic/vector.c
>>> +++ b/arch/x86/kernel/apic/vector.c
>>> @@ -174,6 +174,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
>>> apicd->prev_cpu = apicd->cpu;
>>> WARN_ON_ONCE(apicd->cpu == newcpu);
>>> } else {
>>> + if (apic->update_vector)
>>> + apic->update_vector(apicd->cpu, apicd->vector, false);
>>> irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
>>> managed);
>>> }
>>> @@ -183,6 +185,8 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
>>> apicd->cpu = newcpu;
>>> BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
>>> per_cpu(vector_irq, newcpu)[newvec] = desc;
>>> + if (apic->update_vector)
>>> + apic->update_vector(apicd->cpu, apicd->vector, true);
>>
>> A trivial
>>
>> static inline void apic_update_vector(....)
>> {
>> if (apic->update_vector)
>> ....
>> }
>>
>> would be too easy to read and add not enough line count, right?
>>
>
> Yes.
>
As apic_update_vector() is already defined, I used apic_drv_update_vector().
>>> static void vector_assign_managed_shutdown(struct irq_data *irqd)
>>> @@ -528,11 +532,15 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
>>> if (irqd_is_activated(irqd)) {
>>> trace_vector_setup(virq, true, 0);
>>> apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
>>> + if (apic->update_vector)
>>> + apic->update_vector(apicd->cpu, apicd->vector, true);
>>> } else {
>>> /* Release the vector */
>>> apicd->can_reserve = true;
>>> irqd_set_can_reserve(irqd);
>>> clear_irq_vector(irqd);
>>> + if (apic->update_vector)
>>> + apic->update_vector(apicd->cpu, apicd->vector, false);
>>> realloc = true;
>>
>> This is as incomplete as it gets. None of the other code paths which
>> invoke clear_irq_vector() nor those which invoke free_moved_vector() are
>> mopping up the leftovers in the backing page.
>>
>> And no, you don't sprinkle this nonsense all over the call sites. There
>> is only a very limited number of functions which are involed in setting
>> up and tearing down a vector. Doing this at the call sites is a
>> guarantee for missing out as you demonstrated.
>>
>
> This is the part where I was looking for guidance. As ALLOWED_IRR (which
> tells if Hypervisor is allowed to inject a vector to guest vCPU) is per
> CPU, intent was to call it at places where vector's CPU affinity changes.
> I surely have missed cleaning up ALLOWED_IRR on previously affined CPU.
> I will follow your suggestion to do it during setup/teardown of vector (need
> to figure out those functions) and configure it for all CPUs in those
> functions.
>
I updated it like below. Can you share your opinion on this, if this
looks fine or I got it wrong?
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 736f62812f5c..fef6faffeed1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -139,6 +139,46 @@ static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
apicd->hw_irq_cfg.dest_apicid);
}
+static inline void apic_drv_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ if (apic->update_vector)
+ apic->update_vector(cpu, vector, set);
+}
+
+static int irq_alloc_vector(const struct cpumask *dest, bool resvd, unsigned int *cpu)
+{
+ int vector;
+
+ vector = irq_matrix_alloc(vector_matrix, dest, resvd, cpu);
+
+ if (vector < 0)
+ return vector;
+
+ apic_drv_update_vector(*cpu, vector, true);
+
+ return vector;
+}
+
+static int irq_alloc_managed_vector(unsigned int *cpu)
+{
+ int vector;
+
+ vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask, cpu);
+
+ if (vector < 0)
+ return vector;
+
+ apic_drv_update_vector(*cpu, vector, true);
+
+ return vector;
+}
+
+static void irq_free_vector(unsigned int cpu, unsigned int vector, bool managed)
+{
+ apic_drv_update_vector(cpu, vector, false);
+ irq_matrix_free(vector_matrix, cpu, vector, managed);
+}
+
static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
unsigned int newcpu)
{
@@ -174,8 +214,7 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
apicd->prev_cpu = apicd->cpu;
WARN_ON_ONCE(apicd->cpu == newcpu);
} else {
- irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
- managed);
+ irq_free_vector(apicd->cpu, apicd->vector, managed);
}
setnew:
@@ -256,7 +295,7 @@ assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist))
return -EBUSY;
- vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
+ vector = irq_alloc_vector(dest, resvd, &cpu);
trace_vector_alloc(irqd->irq, vector, resvd, vector);
if (vector < 0)
return vector;
@@ -332,8 +371,7 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
/* set_affinity might call here for nothing */
if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
return 0;
- vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask,
- &cpu);
+ vector = irq_alloc_managed_vector(&cpu);
trace_vector_alloc_managed(irqd->irq, vector, vector);
if (vector < 0)
return vector;
@@ -357,7 +395,7 @@ static void clear_irq_vector(struct irq_data *irqd)
apicd->prev_cpu);
per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
- irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+ irq_free_vector(apicd->cpu, vector, managed);
apicd->vector = 0;
/* Clean up move in progress */
@@ -366,7 +404,7 @@ static void clear_irq_vector(struct irq_data *irqd)
return;
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
- irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+ irq_free_vector(apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
hlist_del_init(&apicd->clist);
@@ -528,6 +566,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
if (irqd_is_activated(irqd)) {
trace_vector_setup(virq, true, 0);
apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
+ apic_drv_update_vector(apicd->cpu, apicd->vector, true);
} else {
/* Release the vector */
apicd->can_reserve = true;
@@ -949,7 +988,7 @@ static void free_moved_vector(struct apic_chip_data *apicd)
* affinity mask comes online.
*/
trace_vector_free_moved(apicd->irq, cpu, vector, managed);
- irq_matrix_free(vector_matrix, cpu, vector, managed);
+ irq_free_vector(cpu, vector, managed);
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
hlist_del_init(&apicd->clist);
apicd->prev_vector = 0;
...
>
>> Als I have no idea what SAVIC_ALLOWED_IRR_OFFSET means. Whether it's
>> something from the datashit or a made up thing does not matter. It's
>> patently non-informative.
>>
>
> Ok, I had tried to give some details in the cover letter. These APIC
> regs are at offset APIC_IRR(n) + 4 and are used by guest to configure the
> interrupt vectors which can be injected by Hypervisor to Guest.
>
>
>> Again:
>>
>> struct apic_page {
>> union {
>> u32 regs[NR_APIC_REGS];
>> u8 bytes[PAGE_SIZE];
>> };
>> };
>>
>> struct apic_page *ap = this_cpu_ptr(apic_page);
>> unsigned long *sirr;
>>
>> /*
>> * apic_page.regs[SAVIC_ALLOWED_IRR_OFFSET...] is an array of
>> * consecutive 32-bit registers, which represents a vector bitmap.
>> */
>> sirr = (unsigned long *) &ap->regs[SAVIC_ALLOWED_IRR_OFFSET];
>> if (set)
>> set_bit(sirr, vector);
>> else
>> clear_bit(sirr, vector);
>>
>> See how code suddenly becomes self explaining, obvious and
>> comprehensible?
>>
>
> Yes, thank you!
>
After checking more on this, set_bit(vector, ) cannot be used directly here, as
32-bit registers are not consecutive. Each register is aligned at 16 byte
boundary. So, I changed it to below:
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -19,6 +19,26 @@
/* APIC_EILVTn(3) is the last defined APIC register. */
#define NR_APIC_REGS (APIC_EILVTn(4) >> 2)
+/*
+ * APIC registers such as APIC_IRR, APIC_ISR, ... are mapped as
+ * 32-bit registers and are aligned at 16-byte boundary. For
+ * example, APIC_IRR registers mapping looks like below:
+ *
+ * #Offset #bits Description
+ * 0x200 31:0 vectors 0-31
+ * 0x210 31:0 vectors 32-63
+ * ...
+ * 0x270 31:0 vectors 224-255
+ *
+ * VEC_BIT_POS gives the bit position of a vector in the APIC
+ * reg containing its state.
+ */
+#define VEC_BIT_POS(v) ((v) & (32 - 1))
+/*
+ * VEC_REG_OFF gives the relative (from the start offset of that APIC
+ * register) offset of the APIC register containing state for a vector.
+ */
+#define VEC_REG_OFF(v) (((v) >> 5) << 4)
struct apic_page {
union {
@@ -185,6 +205,35 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in
__send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
}
+static void x2apic_savic_update_vector(unsigned int cpu, unsigned int vector, bool set)
+{
+ struct apic_page *ap = per_cpu_ptr(apic_backing_page, cpu);
+ unsigned long *sirr;
+ int vec_bit;
+ int reg_off;
+
+ /*
+ * ALLOWED_IRR registers are mapped in the apic_page at below byte
+ * offsets. Each register is a 32-bit register aligned at 16-byte
+ * boundary.
+ *
+ * #Offset #bits Description
+ * SAVIC_ALLOWED_IRR_OFFSET 31:0 Guest allowed vectors 0-31
+ * "" + 0x10 31:0 Guest allowed vectors 32-63
+ * ...
+ * "" + 0x70 31:0 Guest allowed vectors 224-255
+ *
+ */
+ reg_off = SAVIC_ALLOWED_IRR_OFFSET + VEC_REG_OFF(vector);
+ sirr = (unsigned long *) &ap->regs[reg_off >> 2];
+ vec_bit = VEC_BIT_POS(vector);
+
+ if (set)
+ set_bit(vec_bit, sirr);
+ else
+ clear_bit(vec_bit, sirr);
+}
+
static void init_backing_page(void *backing_page)
{
u32 apic_id;
@@ -272,6 +321,8 @@ static struct apic apic_x2apic_savic __ro_after_init = {
.eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
+
+ .update_vector = x2apic_savic_update_vector,
};
- Neeraj
> - Neeraj
>
>> Thanks,
>>
>> tglx
>
Powered by blists - more mailing lists