linux-kernel - Re: [PATCH v5 6/7] acpi/ghes: add support for generic error injection via QAPI

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240806163113.3bdc260a@imammedo.users.ipa.redhat.com>
Date: Tue, 6 Aug 2024 16:31:13 +0200
From: Igor Mammedov <imammedo@...hat.com>
To: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@...wei.com>, Shiju Jose
 <shiju.jose@...wei.com>, "Michael S. Tsirkin" <mst@...hat.com>, Ani Sinha
 <anisinha@...hat.com>, Dongjiu Geng <gengdongjiu1@...il.com>,
 linux-kernel@...r.kernel.org, qemu-arm@...gnu.org, qemu-devel@...gnu.org
Subject: Re: [PATCH v5 6/7] acpi/ghes: add support for generic error
 injection via QAPI

On Fri,  2 Aug 2024 23:44:01 +0200
Mauro Carvalho Chehab <mchehab+huawei@...nel.org> wrote:

> Provide a generic interface for error injection via GHESv2.
> 
> This patch is co-authored:
>     - original ghes logic to inject a simple ARM record by Shiju Jose;
>     - generic logic to handle block addresses by Jonathan Cameron;
>     - generic GHESv2 error inject by Mauro Carvalho Chehab;
> 
> Co-authored-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
> Co-authored-by: Shiju Jose <shiju.jose@...wei.com>
> Co-authored-by: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
> Cc: Jonathan Cameron <Jonathan.Cameron@...wei.com>
> Cc: Shiju Jose <shiju.jose@...wei.com>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
> ---
>  hw/acpi/ghes.c         | 159 ++++++++++++++++++++++++++++++++++++++---
>  hw/acpi/ghes_cper.c    |   2 +-
>  include/hw/acpi/ghes.h |   3 +
>  3 files changed, 152 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index a745dcc7be5e..e125c9475773 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -395,23 +395,22 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
>      ags->present = true;
>  }
>  
> +static uint64_t ghes_get_state_start_address(void)

ghes_get_hardware_errors_address() might better reflect what address it will return

> +{
> +    AcpiGedState *acpi_ged_state =
> +        ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL));
> +    AcpiGhesState *ags = &acpi_ged_state->ghes_state;
> +
> +    return le64_to_cpu(ags->ghes_addr_le);
> +}
> +
>  int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
>  {
>      uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0;
> -    uint64_t start_addr;
> +    uint64_t start_addr = ghes_get_state_start_address();
>      bool ret = -1;
> -    AcpiGedState *acpi_ged_state;
> -    AcpiGhesState *ags;
> -
>      assert(source_id < ACPI_HEST_SRC_ID_RESERVED);
>  
> -    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
> -                                                       NULL));
> -    g_assert(acpi_ged_state);
> -    ags = &acpi_ged_state->ghes_state;
> -
> -    start_addr = le64_to_cpu(ags->ghes_addr_le);
> -
>      if (physical_address) {
>          start_addr += source_id * sizeof(uint64_t);

above should be a separate patch

>  
> @@ -448,9 +447,147 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
>      return ret;
>  }
>  
> +/*
> + * Error register block data layout
> + *
> + * | +---------------------+ ges.ghes_addr_le
> + * | |error_block_address0 |
> + * | +---------------------+
> + * | |error_block_address1 |
> + * | +---------------------+ --+--
> + * | |    .............    | GHES_ADDRESS_SIZE
> + * | +---------------------+ --+--
> + * | |error_block_addressN |
> + * | +---------------------+
> + * | | read_ack0           |
> + * | +---------------------+ --+--
> + * | | read_ack1           | GHES_ADDRESS_SIZE
> + * | +---------------------+ --+--
> + * | |   .............     |
> + * | +---------------------+
> + * | | read_ackN           |
> + * | +---------------------+ --+--
> + * | |      CPER           |   |
> + * | |      ....           | GHES_MAX_RAW_DATA_LENGT
> + * | |      CPER           |   |
> + * | +---------------------+ --+--
> + * | |    ..........       |
> + * | +---------------------+
> + * | |      CPER           |
> + * | |      ....           |
> + * | |      CPER           |
> + * | +---------------------+
> + */

no need to duplicate docs/specs/acpi_hest_ghes.rst,
I'd just reffer to it and maybe add short comment as to why it's mentioned.

> +/* Map from uint32_t notify to entry offset in GHES */
> +static const uint8_t error_source_to_index[] = { 0xff, 0xff, 0xff, 0xff,
> +                                                 0xff, 0xff, 0xff, 1, 0};
> +
> +static bool ghes_get_addr(uint32_t notify, uint64_t *error_block_addr,
> +                          uint64_t *read_ack_addr)
> +{
> +    uint64_t base;
> +
> +    if (notify >= ACPI_GHES_NOTIFY_RESERVED) {
> +        return false;
> +    }
> +
> +    /* Find and check the source id for this new CPER */
> +    if (error_source_to_index[notify] == 0xff) {
> +        return false;
> +    }
> +
> +    base = ghes_get_state_start_address();
> +
> +    *read_ack_addr = base +
> +        ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
> +        error_source_to_index[notify] * sizeof(uint64_t);
> +
> +    /* Could also be read back from the error_block_address register */
> +    *error_block_addr = base +
> +        ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
> +        ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t) +
> +        error_source_to_index[notify] * ACPI_GHES_MAX_RAW_DATA_LENGTH;
> +
> +    return true;
> +}

I don't like all this pointer math, which is basically a reverse engineered
QEMU actions on startup + guest provided etc/hardware_errors address.

For once, it assumes error_source_to_index[] matches order in which HEST
error sources were described, which is fragile.

2nd: migration-wive it's disaster, since old/new HEST/hardware_errors tables
in RAM migrated from older version might not match above assumptions
of target QEMU. 

I see 2 ways to rectify it:
  1st: preferred/cleanest would be to tell QEMU (via fw_cfg) address of HEST table
       in guest RAM, like we do with etc/hardware_errors, see
            build_ghes_error_table()
               ...
               tell firmware to write hardware_errors GPA into
       and then fetch from HEST table in RAM, the guest patched error/ack addresses
       for given source_id

       code-wise: relatively simple once one wraps their own head over
                 how this whole APEI thing works in QEMU
                 workflow  is described in docs/specs/acpi_hest_ghes.rst
                 look to me as sufficient to grasp it.
                 (but my view is very biased given my prior knowledge,
                  aka: docs/comments/examples wrt acpi patching are good enough)
                 (if it's not clear how to do it, ask me for pointers)

  2nd:  sort of hack based on build_ghes_v2() Error Status Address/Read Ack Register
        patching instructions
               bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,                
                   address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t),                      
                   ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t));
                                                 ^^^^^^^^^^^^^^^^^^^^^^^^^
        during build_ghes_v2() also store on a side mapping
             source_id -> error address offset : read ack address

        so when you are injecting error, you'd at least use offsets
        used at start time, to get rid of risk where injection code
        diverge from HEST:etc/hardware_errors layout at start time.

        However to make migration safe, one would need to add a fat
        comment not to change order ghest error sources in HEST _and_
        a dedicated unit test to make sure we catch it when that happens.
        bios_tables_test should be able to catch the change, but it won't
        say what's wrong, hence a test case that explicitly checks order
        and loudly & clear complains when we will break order assumptions.

        downside:
           * we are are limiting ways HEST could be composed/reshuffled in future
           * consumption of extra CI resources
           * and well, it relies on above duct tape holding all pieces together

>  NotifierList generic_error_notifiers =
>      NOTIFIER_LIST_INITIALIZER(error_device_notifiers);
>  
> +void ghes_record_cper_errors(AcpiGhesCper *cper, Error **errp,
> +                             uint32_t notify)
> +{
> +    int read_ack = 0;
       ^^^
[...]
> +    cpu_physical_memory_read(read_ack_addr,
> +                             &read_ack, sizeof(uint64_t));
                                                  ^^^^
it looks like possible stack corruption, isn't it?

> +    /* zero means OSPM does not acknowledge the error */
> +    if (!read_ack) {
> +        error_setg(errp,
> +                   "Last CPER record was not acknowledged yet");
> +        read_ack = 1;
> +        cpu_physical_memory_write(read_ack_addr,
> +                                  &read_ack, sizeof(uint64_t));
                                                        ^^^^^
and then who knows what we are writing back here

> +        return;
> +    }
> +
> +    read_ack = cpu_to_le64(0);
> +    cpu_physical_memory_write(read_ack_addr,
> +                              &read_ack, sizeof(uint64_t));
> +
> +    /* Build CPER record */
> +
> +    /*
> +     * Invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
> +     * Table 17-13 Generic Error Data Entry
> +     */
> +    QemuUUID fru_id = {};
> +
> +    block = g_array_new(false, true /* clear */, 1);
> +    data_length = ACPI_GHES_DATA_LENGTH + cper->data_len;
> +
> +    /*
> +        * It should not run out of the preallocated memory if
> +        * adding a new generic error data entry
> +        */
> +    assert((data_length + ACPI_GHES_GESB_SIZE) <=
> +            ACPI_GHES_MAX_RAW_DATA_LENGTH);
it's better to error out gracefully here instead of crash
in case script generated too long record,
not the end of the world, but it's annoying to restart guest
on external mistake.

PS:
looking at the code, ACPI_GHES_MAX_RAW_DATA_LENGTH is 1K
and it is the total size of a error block for a error source.

However acpi_hest_ghes.rst (3) says it should be 4K,
am I mistaken? 


> +    /* Build the new generic error status block header */
> +    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
> +                                    0, 0, data_length,
> +                                    ACPI_CPER_SEV_RECOVERABLE);
> +
> +    /* Build this new generic error data entry header */
> +    acpi_ghes_generic_error_data(block, cper->guid,
> +                                ACPI_CPER_SEV_RECOVERABLE, 0, 0,
> +                                cper->data_len, fru_id, 0);
> +

not that I mind, but I'd ax above calls with their hardcoded
assumptions and make script generate whole error block,
it's more flexible wrt ACPI_CPER_SEV_RECOVERABLE/ACPI_GEBS_UNCORRECTABLE
and then one can ditch from QAPI interface cper->guid.

basically inject whatever user provided via QAPI without any other assumptions.

> +    /* Add CPER data */
> +    for (i = 0; i < cper->data_len; i++) {
> +        build_append_int_noprefix(block, cper->data[i], 1);
> +    }
> +
> +    /* Write the generic error data entry into guest memory */
> +    cpu_physical_memory_write(error_block_addr, block->data, block->len);
> +
> +    g_array_free(block, true);
> +
> +    notifier_list_notify(&generic_error_notifiers, NULL);
> +}
> +
>  bool acpi_ghes_present(void)
>  {
>      AcpiGedState *acpi_ged_state;
> diff --git a/hw/acpi/ghes_cper.c b/hw/acpi/ghes_cper.c
> index 7aa7e71e90dc..d7ff7debee74 100644
> --- a/hw/acpi/ghes_cper.c
> +++ b/hw/acpi/ghes_cper.c
> @@ -39,7 +39,7 @@ void qmp_ghes_cper(CommonPlatformErrorRecord *qmp_cper,
>          return;
>      }
>  
> -    /* TODO: call a function at ghes */
> +    ghes_record_cper_errors(&cper, errp, ACPI_GHES_NOTIFY_GPIO);
>  
>      g_free(cper.data);
>  }
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index 06a5b8820cd5..ee6f6cd96911 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -85,6 +85,9 @@ typedef struct AcpiGhesCper {
>      size_t data_len;
>  } AcpiGhesCper;
>  
> +void ghes_record_cper_errors(AcpiGhesCper *cper, Error **errp,
> +                             uint32_t notify);

maybe rename it to acpi_ghes_inject_error_block()

> +
>  /**
>   * acpi_ghes_present: Report whether ACPI GHES table is present
>   *