diff mbox series

[v4,1/2] x86: return modified setup_data only if read as memory, not as file

Message ID 20220913234135.255426-1-Jason@zx2c4.com (mailing list archive)
State New, archived
Headers show
Series [v4,1/2] x86: return modified setup_data only if read as memory, not as file | expand

Commit Message

Jason A. Donenfeld Sept. 13, 2022, 11:41 p.m. UTC
If setup_data is being read into a specific memory location, then
generally the setup_data address parameter is read first, so that the
caller knows where to read it into. In that case, we should return
setup_data containing the absolute addresses that are hard coded and
determined a priori. This is the case when kernels are loaded by BIOS,
for example. In contrast, when setup_data is read as a file, then we
shouldn't modify setup_data, since the absolute address will be wrong by
definition. This is the case when OVMF loads the image.

This allows setup_data to be used like normal, without crashing when EFI
tries to use it.

(As a small development note, strangely, fw_cfg_add_file_callback() was
exported but fw_cfg_add_bytes_callback() wasn't, so this makes that
consistent.)

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Suggested-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 hw/i386/x86.c             | 37 +++++++++++++++++++++++++++----------
 hw/nvram/fw_cfg.c         | 12 ++++++------
 include/hw/nvram/fw_cfg.h | 22 ++++++++++++++++++++++
 3 files changed, 55 insertions(+), 16 deletions(-)

Comments

Ard Biesheuvel Sept. 16, 2022, 7:15 a.m. UTC | #1
On Wed, 14 Sept 2022 at 01:42, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> If setup_data is being read into a specific memory location, then
> generally the setup_data address parameter is read first, so that the
> caller knows where to read it into. In that case, we should return
> setup_data containing the absolute addresses that are hard coded and
> determined a priori. This is the case when kernels are loaded by BIOS,
> for example. In contrast, when setup_data is read as a file, then we
> shouldn't modify setup_data, since the absolute address will be wrong by
> definition. This is the case when OVMF loads the image.
>
> This allows setup_data to be used like normal, without crashing when EFI
> tries to use it.
>
> (As a small development note, strangely, fw_cfg_add_file_callback() was
> exported but fw_cfg_add_bytes_callback() wasn't, so this makes that
> consistent.)
>
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Laurent Vivier <laurent@vivier.eu>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Suggested-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>

This is still somewhat of a crutch, but at least we can now
disambiguate between loaders that treat the setup data as a file
(OVMF) and ones that treat it as an object that lives at a fixed
address in memory (SeaBIOS)

I'll note that this also addresses the existing issue with -dtb on
x86, which currently breaks the OVMF direct kernel boot in the same
way as the RNG seed does.

> ---
>  hw/i386/x86.c             | 37 +++++++++++++++++++++++++++----------
>  hw/nvram/fw_cfg.c         | 12 ++++++------
>  include/hw/nvram/fw_cfg.h | 22 ++++++++++++++++++++++
>  3 files changed, 55 insertions(+), 16 deletions(-)
>
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 050eedc0c8..933bbdd836 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -764,6 +764,18 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>
> +struct setup_data_fixup {
> +    void *pos;
> +    hwaddr val;
> +    uint32_t addr;
> +};
> +
> +static void fixup_setup_data(void *opaque)
> +{
> +    struct setup_data_fixup *fixup = opaque;
> +    stq_p(fixup->pos, fixup->val);
> +}
> +
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -1088,8 +1100,11 @@ void x86_load_linux(X86MachineState *x86ms,
>          qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
>      }
>
> -    /* Offset 0x250 is a pointer to the first setup_data link. */
> -    stq_p(header + 0x250, first_setup_data);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> +    sev_load_ctx.kernel_data = (char *)kernel;
> +    sev_load_ctx.kernel_size = kernel_size;
>
>      /*
>       * If we're starting an encrypted VM, it will be OVMF based, which uses the
> @@ -1099,16 +1114,18 @@ void x86_load_linux(X86MachineState *x86ms,
>       * file the user passed in.
>       */
>      if (!sev_enabled()) {
> +        struct setup_data_fixup *fixup = g_malloc(sizeof(*fixup));
> +
>          memcpy(setup, header, MIN(sizeof(header), setup_size));
> +        /* Offset 0x250 is a pointer to the first setup_data link. */
> +        fixup->pos = setup + 0x250;
> +        fixup->val = first_setup_data;
> +        fixup->addr = real_addr;
> +        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> +                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> +    } else {
> +        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    sev_load_ctx.kernel_data = (char *)kernel;
> -    sev_load_ctx.kernel_size = kernel_size;
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index d605f3f45a..564bda3395 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -692,12 +692,12 @@ static const VMStateDescription vmstate_fw_cfg = {
>      }
>  };
>
> -static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> -                                      FWCfgCallback select_cb,
> -                                      FWCfgWriteCallback write_cb,
> -                                      void *callback_opaque,
> -                                      void *data, size_t len,
> -                                      bool read_only)
> +void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> +                               FWCfgCallback select_cb,
> +                               FWCfgWriteCallback write_cb,
> +                               void *callback_opaque,
> +                               void *data, size_t len,
> +                               bool read_only)
>  {
>      int arch = !!(key & FW_CFG_ARCH_LOCAL);
>
> diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> index 0e7a8bc7af..e4fef393be 100644
> --- a/include/hw/nvram/fw_cfg.h
> +++ b/include/hw/nvram/fw_cfg.h
> @@ -117,6 +117,28 @@ struct FWCfgMemState {
>   */
>  void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
>
> +/**
> + * fw_cfg_add_bytes_callback:
> + * @s: fw_cfg device being modified
> + * @key: selector key value for new fw_cfg item
> + * @select_cb: callback function when selecting
> + * @write_cb: callback function after a write
> + * @callback_opaque: argument to be passed into callback function
> + * @data: pointer to start of item data
> + * @len: size of item data
> + * @read_only: is file read only
> + *
> + * Add a new fw_cfg item, available by selecting the given key, as a raw
> + * "blob" of the given size. The data referenced by the starting pointer
> + * is only linked, NOT copied, into the data structure of the fw_cfg device.
> + */
> +void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> +                               FWCfgCallback select_cb,
> +                               FWCfgWriteCallback write_cb,
> +                               void *callback_opaque,
> +                               void *data, size_t len,
> +                               bool read_only);
> +
>  /**
>   * fw_cfg_add_string:
>   * @s: fw_cfg device being modified
> --
> 2.37.3
>
Paolo Bonzini Sept. 21, 2022, 8:59 a.m. UTC | #2
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 050eedc0c8..933bbdd836 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -764,6 +764,18 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>
> +struct setup_data_fixup {
> +    void *pos;
> +    hwaddr val;
> +    uint32_t addr;
> +};

Just a small comment, addr should be little-endian (see
fw_cfg_add_i32).  It's not used outside x86_load_linux, so it is
possible to just use cpu_to_le32 there.

Also I think it's cleaner if a reset callback puts the value back to
zero. fw_cfg already has fw_cfg_machine_reset, so perhaps the easiest
way is to add a FWCfgCallback reset_cb argument to just
fw_cfg_add_bytes_callback. If I am missing something and it's not
necessary I can do the cpu_to_le32 change myself or wait for you; in
any case I'll wait for either your ack or a v5.

By the way, does this supersede v1..v3 that use the new protocol (I'd
guess so from the presence of the same 2/2 patch), or are the two
patches doing belts-and-suspenders?

Thanks,

Paolo

> +static void fixup_setup_data(void *opaque)
> +{
> +    struct setup_data_fixup *fixup = opaque;
> +    stq_p(fixup->pos, fixup->val);
> +}
> +
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -1088,8 +1100,11 @@ void x86_load_linux(X86MachineState *x86ms,
>          qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
>      }
>
> -    /* Offset 0x250 is a pointer to the first setup_data link. */
> -    stq_p(header + 0x250, first_setup_data);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> +    sev_load_ctx.kernel_data = (char *)kernel;
> +    sev_load_ctx.kernel_size = kernel_size;
>
>      /*
>       * If we're starting an encrypted VM, it will be OVMF based, which uses the
> @@ -1099,16 +1114,18 @@ void x86_load_linux(X86MachineState *x86ms,
>       * file the user passed in.
>       */
>      if (!sev_enabled()) {
> +        struct setup_data_fixup *fixup = g_malloc(sizeof(*fixup));
> +
>          memcpy(setup, header, MIN(sizeof(header), setup_size));
> +        /* Offset 0x250 is a pointer to the first setup_data link. */
> +        fixup->pos = setup + 0x250;
> +        fixup->val = first_setup_data;
> +        fixup->addr = real_addr;
> +        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> +                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> +    } else {
> +        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    sev_load_ctx.kernel_data = (char *)kernel;
> -    sev_load_ctx.kernel_size = kernel_size;
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index d605f3f45a..564bda3395 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -692,12 +692,12 @@ static const VMStateDescription vmstate_fw_cfg = {
>      }
>  };
>
> -static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> -                                      FWCfgCallback select_cb,
> -                                      FWCfgWriteCallback write_cb,
> -                                      void *callback_opaque,
> -                                      void *data, size_t len,
> -                                      bool read_only)
> +void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> +                               FWCfgCallback select_cb,
> +                               FWCfgWriteCallback write_cb,
> +                               void *callback_opaque,
> +                               void *data, size_t len,
> +                               bool read_only)
>  {
>      int arch = !!(key & FW_CFG_ARCH_LOCAL);
>
> diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> index 0e7a8bc7af..e4fef393be 100644
> --- a/include/hw/nvram/fw_cfg.h
> +++ b/include/hw/nvram/fw_cfg.h
> @@ -117,6 +117,28 @@ struct FWCfgMemState {
>   */
>  void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
>
> +/**
> + * fw_cfg_add_bytes_callback:
> + * @s: fw_cfg device being modified
> + * @key: selector key value for new fw_cfg item
> + * @select_cb: callback function when selecting
> + * @write_cb: callback function after a write
> + * @callback_opaque: argument to be passed into callback function
> + * @data: pointer to start of item data
> + * @len: size of item data
> + * @read_only: is file read only
> + *
> + * Add a new fw_cfg item, available by selecting the given key, as a raw
> + * "blob" of the given size. The data referenced by the starting pointer
> + * is only linked, NOT copied, into the data structure of the fw_cfg device.
> + */
> +void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> +                               FWCfgCallback select_cb,
> +                               FWCfgWriteCallback write_cb,
> +                               void *callback_opaque,
> +                               void *data, size_t len,
> +                               bool read_only);
> +
>  /**
>   * fw_cfg_add_string:
>   * @s: fw_cfg device being modified
> --
> 2.37.3
>
Jason A. Donenfeld Sept. 21, 2022, 9:04 a.m. UTC | #3
Hi Paolo,

On Wed, Sep 21, 2022 at 10:59 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
> Just a small comment, addr should be little-endian (see
> fw_cfg_add_i32).  It's not used outside x86_load_linux, so it is
> possible to just use cpu_to_le32 there.

Oh, shucks: I thought about this and then forgot to do it. Thanks for
catching it.

> Also I think it's cleaner if a reset callback puts the value back to
> zero. fw_cfg already has fw_cfg_machine_reset, so perhaps the easiest
> way is to add a FWCfgCallback reset_cb argument to just
> fw_cfg_add_bytes_callback. If I am missing something and it's not
> necessary I can do the cpu_to_le32 change myself or wait for you; in
> any case I'll wait for either your ack or a v5.

Actually, the idea is for the change to be permanent, since that
represents how the system has actually been booted. Are there
substantial changes possible to the firmware configuration on
fw_cfg_machine_reset() that setting this back how it was would make a
difference? Or do we benefit from having some consistency?

> By the way, does this supersede v1..v3 that use the new protocol (I'd
> guess so from the presence of the same 2/2 patch), or are the two
> patches doing belts-and-suspenders?

This v4 supersedes everything else.

Jason
Jason A. Donenfeld Sept. 21, 2022, 9:12 a.m. UTC | #4
On Wed, Sep 21, 2022 at 11:04:17AM +0200, Jason A. Donenfeld wrote:
> > Also I think it's cleaner if a reset callback puts the value back to
> > zero. fw_cfg already has fw_cfg_machine_reset, so perhaps the easiest
> > way is to add a FWCfgCallback reset_cb argument to just
> > fw_cfg_add_bytes_callback. If I am missing something and it's not
> > necessary I can do the cpu_to_le32 change myself or wait for you; in
> > any case I'll wait for either your ack or a v5.
> 
> Actually, the idea is for the change to be permanent, since that
> represents how the system has actually been booted. Are there
> substantial changes possible to the firmware configuration on
> fw_cfg_machine_reset() that setting this back how it was would make a
> difference? Or do we benefit from having some consistency?

Looking at this more, I think your suggestion makes sense. I also have a
very straight forward way of implementing it. I'll send a v5 in a
minute.

Jason
Michael S. Tsirkin Sept. 21, 2022, 9:15 a.m. UTC | #5
On Wed, Sep 14, 2022 at 12:41:34AM +0100, Jason A. Donenfeld wrote:
> If setup_data is being read into a specific memory location, then
> generally the setup_data address parameter is read first, so that the
> caller knows where to read it into. In that case, we should return
> setup_data containing the absolute addresses that are hard coded and
> determined a priori. This is the case when kernels are loaded by BIOS,
> for example. In contrast, when setup_data is read as a file, then we
> shouldn't modify setup_data, since the absolute address will be wrong by
> definition. This is the case when OVMF loads the image.
> 
> This allows setup_data to be used like normal, without crashing when EFI
> tries to use it.
> 
> (As a small development note, strangely, fw_cfg_add_file_callback() was
> exported but fw_cfg_add_bytes_callback() wasn't, so this makes that
> consistent.)
> 
> Cc: Gerd Hoffmann <kraxel@redhat.com>
> Cc: Laurent Vivier <laurent@vivier.eu>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
> Cc: Richard Henderson <richard.henderson@linaro.org>
> Suggested-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  hw/i386/x86.c             | 37 +++++++++++++++++++++++++++----------
>  hw/nvram/fw_cfg.c         | 12 ++++++------
>  include/hw/nvram/fw_cfg.h | 22 ++++++++++++++++++++++
>  3 files changed, 55 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 050eedc0c8..933bbdd836 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -764,6 +764,18 @@ static bool load_elfboot(const char *kernel_filename,
>      return true;
>  }
>  
> +struct setup_data_fixup {
> +    void *pos;
> +    hwaddr val;
> +    uint32_t addr;
> +};
> +

btw

	typedef struct SetupDataFixup {
	    void *pos;
	    hwaddr val;
	    uint32_t addr;
	} SetupDataFixup;


and use typedef everywhere.

Yes I know setup_data is like this but that probably should be
fixed too.

> +static void fixup_setup_data(void *opaque)
> +{
> +    struct setup_data_fixup *fixup = opaque;
> +    stq_p(fixup->pos, fixup->val);
> +}
> +
>  void x86_load_linux(X86MachineState *x86ms,
>                      FWCfgState *fw_cfg,
>                      int acpi_data_size,
> @@ -1088,8 +1100,11 @@ void x86_load_linux(X86MachineState *x86ms,
>          qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
>      }
>  
> -    /* Offset 0x250 is a pointer to the first setup_data link. */
> -    stq_p(header + 0x250, first_setup_data);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> +    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> +    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> +    sev_load_ctx.kernel_data = (char *)kernel;
> +    sev_load_ctx.kernel_size = kernel_size;
>  
>      /*
>       * If we're starting an encrypted VM, it will be OVMF based, which uses the
> @@ -1099,16 +1114,18 @@ void x86_load_linux(X86MachineState *x86ms,
>       * file the user passed in.
>       */
>      if (!sev_enabled()) {
> +        struct setup_data_fixup *fixup = g_malloc(sizeof(*fixup));
> +
>          memcpy(setup, header, MIN(sizeof(header), setup_size));
> +        /* Offset 0x250 is a pointer to the first setup_data link. */
> +        fixup->pos = setup + 0x250;
> +        fixup->val = first_setup_data;
> +        fixup->addr = real_addr;
> +        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
> +                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
> +    } else {
> +        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      }
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
> -    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
> -    sev_load_ctx.kernel_data = (char *)kernel;
> -    sev_load_ctx.kernel_size = kernel_size;
> -
> -    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>      sev_load_ctx.setup_data = (char *)setup;
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index d605f3f45a..564bda3395 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -692,12 +692,12 @@ static const VMStateDescription vmstate_fw_cfg = {
>      }
>  };
>  
> -static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> -                                      FWCfgCallback select_cb,
> -                                      FWCfgWriteCallback write_cb,
> -                                      void *callback_opaque,
> -                                      void *data, size_t len,
> -                                      bool read_only)
> +void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> +                               FWCfgCallback select_cb,
> +                               FWCfgWriteCallback write_cb,
> +                               void *callback_opaque,
> +                               void *data, size_t len,
> +                               bool read_only)
>  {
>      int arch = !!(key & FW_CFG_ARCH_LOCAL);
>  
> diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> index 0e7a8bc7af..e4fef393be 100644
> --- a/include/hw/nvram/fw_cfg.h
> +++ b/include/hw/nvram/fw_cfg.h
> @@ -117,6 +117,28 @@ struct FWCfgMemState {
>   */
>  void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
>  
> +/**
> + * fw_cfg_add_bytes_callback:
> + * @s: fw_cfg device being modified
> + * @key: selector key value for new fw_cfg item
> + * @select_cb: callback function when selecting
> + * @write_cb: callback function after a write
> + * @callback_opaque: argument to be passed into callback function
> + * @data: pointer to start of item data
> + * @len: size of item data
> + * @read_only: is file read only
> + *
> + * Add a new fw_cfg item, available by selecting the given key, as a raw
> + * "blob" of the given size. The data referenced by the starting pointer
> + * is only linked, NOT copied, into the data structure of the fw_cfg device.
> + */
> +void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
> +                               FWCfgCallback select_cb,
> +                               FWCfgWriteCallback write_cb,
> +                               void *callback_opaque,
> +                               void *data, size_t len,
> +                               bool read_only);
> +
>  /**
>   * fw_cfg_add_string:
>   * @s: fw_cfg device being modified
> -- 
> 2.37.3
Jason A. Donenfeld Sept. 21, 2022, 9:15 a.m. UTC | #6
On Wed, Sep 21, 2022 at 11:15 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Sep 14, 2022 at 12:41:34AM +0100, Jason A. Donenfeld wrote:
> > If setup_data is being read into a specific memory location, then
> > generally the setup_data address parameter is read first, so that the
> > caller knows where to read it into. In that case, we should return
> > setup_data containing the absolute addresses that are hard coded and
> > determined a priori. This is the case when kernels are loaded by BIOS,
> > for example. In contrast, when setup_data is read as a file, then we
> > shouldn't modify setup_data, since the absolute address will be wrong by
> > definition. This is the case when OVMF loads the image.
> >
> > This allows setup_data to be used like normal, without crashing when EFI
> > tries to use it.
> >
> > (As a small development note, strangely, fw_cfg_add_file_callback() was
> > exported but fw_cfg_add_bytes_callback() wasn't, so this makes that
> > consistent.)
> >
> > Cc: Gerd Hoffmann <kraxel@redhat.com>
> > Cc: Laurent Vivier <laurent@vivier.eu>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Peter Maydell <peter.maydell@linaro.org>
> > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
> > Cc: Richard Henderson <richard.henderson@linaro.org>
> > Suggested-by: Ard Biesheuvel <ardb@kernel.org>
> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > ---
> >  hw/i386/x86.c             | 37 +++++++++++++++++++++++++++----------
> >  hw/nvram/fw_cfg.c         | 12 ++++++------
> >  include/hw/nvram/fw_cfg.h | 22 ++++++++++++++++++++++
> >  3 files changed, 55 insertions(+), 16 deletions(-)
> >
> > diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> > index 050eedc0c8..933bbdd836 100644
> > --- a/hw/i386/x86.c
> > +++ b/hw/i386/x86.c
> > @@ -764,6 +764,18 @@ static bool load_elfboot(const char *kernel_filename,
> >      return true;
> >  }
> >
> > +struct setup_data_fixup {
> > +    void *pos;
> > +    hwaddr val;
> > +    uint32_t addr;
> > +};
> > +
>
> btw
>
>         typedef struct SetupDataFixup {
>             void *pos;
>             hwaddr val;
>             uint32_t addr;
>         } SetupDataFixup;
>
>
> and use typedef everywhere.

Okay no problem. Will do for v5.

Jason
Paolo Bonzini Sept. 21, 2022, 9:35 a.m. UTC | #7
On Wed, Sep 21, 2022 at 11:12 AM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > Also I think it's cleaner if a reset callback puts the value back to
> > zero. fw_cfg already has fw_cfg_machine_reset, so perhaps the easiest
> > way is to add a FWCfgCallback reset_cb argument to just
> > fw_cfg_add_bytes_callback. If I am missing something and it's not
> > necessary I can do the cpu_to_le32 change myself or wait for you; in
> > any case I'll wait for either your ack or a v5.
>
> Actually, the idea is for the change to be permanent, since that
> represents how the system has actually been booted. Are there
> substantial changes possible to the firmware configuration on
> fw_cfg_machine_reset() that setting this back how it was would make a
> difference? Or do we benefit from having some consistency?

It's not a very practical thing to happen but I guess you could boot
UEFI twice, but the second time go to a CSM which could use the
setup_data. But really as you say it's just more consistent if reset
brings everything back to the pristine state, unless there's a good
reason to do so (which you agreed in the next message there isn't).

I'll queue v5, thanks!

Paolo
diff mbox series

Patch

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 050eedc0c8..933bbdd836 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -764,6 +764,18 @@  static bool load_elfboot(const char *kernel_filename,
     return true;
 }
 
+struct setup_data_fixup {
+    void *pos;
+    hwaddr val;
+    uint32_t addr;
+};
+
+static void fixup_setup_data(void *opaque)
+{
+    struct setup_data_fixup *fixup = opaque;
+    stq_p(fixup->pos, fixup->val);
+}
+
 void x86_load_linux(X86MachineState *x86ms,
                     FWCfgState *fw_cfg,
                     int acpi_data_size,
@@ -1088,8 +1100,11 @@  void x86_load_linux(X86MachineState *x86ms,
         qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
     }
 
-    /* Offset 0x250 is a pointer to the first setup_data link. */
-    stq_p(header + 0x250, first_setup_data);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
+    sev_load_ctx.kernel_data = (char *)kernel;
+    sev_load_ctx.kernel_size = kernel_size;
 
     /*
      * If we're starting an encrypted VM, it will be OVMF based, which uses the
@@ -1099,16 +1114,18 @@  void x86_load_linux(X86MachineState *x86ms,
      * file the user passed in.
      */
     if (!sev_enabled()) {
+        struct setup_data_fixup *fixup = g_malloc(sizeof(*fixup));
+
         memcpy(setup, header, MIN(sizeof(header), setup_size));
+        /* Offset 0x250 is a pointer to the first setup_data link. */
+        fixup->pos = setup + 0x250;
+        fixup->val = first_setup_data;
+        fixup->addr = real_addr;
+        fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
+                                  fixup, &fixup->addr, sizeof(fixup->addr), true);
+    } else {
+        fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
     }
-
-    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
-    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
-    fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
-    sev_load_ctx.kernel_data = (char *)kernel;
-    sev_load_ctx.kernel_size = kernel_size;
-
-    fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
     sev_load_ctx.setup_data = (char *)setup;
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index d605f3f45a..564bda3395 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -692,12 +692,12 @@  static const VMStateDescription vmstate_fw_cfg = {
     }
 };
 
-static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
-                                      FWCfgCallback select_cb,
-                                      FWCfgWriteCallback write_cb,
-                                      void *callback_opaque,
-                                      void *data, size_t len,
-                                      bool read_only)
+void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
+                               FWCfgCallback select_cb,
+                               FWCfgWriteCallback write_cb,
+                               void *callback_opaque,
+                               void *data, size_t len,
+                               bool read_only)
 {
     int arch = !!(key & FW_CFG_ARCH_LOCAL);
 
diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
index 0e7a8bc7af..e4fef393be 100644
--- a/include/hw/nvram/fw_cfg.h
+++ b/include/hw/nvram/fw_cfg.h
@@ -117,6 +117,28 @@  struct FWCfgMemState {
  */
 void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
 
+/**
+ * fw_cfg_add_bytes_callback:
+ * @s: fw_cfg device being modified
+ * @key: selector key value for new fw_cfg item
+ * @select_cb: callback function when selecting
+ * @write_cb: callback function after a write
+ * @callback_opaque: argument to be passed into callback function
+ * @data: pointer to start of item data
+ * @len: size of item data
+ * @read_only: is file read only
+ *
+ * Add a new fw_cfg item, available by selecting the given key, as a raw
+ * "blob" of the given size. The data referenced by the starting pointer
+ * is only linked, NOT copied, into the data structure of the fw_cfg device.
+ */
+void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
+                               FWCfgCallback select_cb,
+                               FWCfgWriteCallback write_cb,
+                               void *callback_opaque,
+                               void *data, size_t len,
+                               bool read_only);
+
 /**
  * fw_cfg_add_string:
  * @s: fw_cfg device being modified