diff mbox

arm64: Relocate screen_info.lfb_base on PCI BAR allocation

Message ID 1461795744-28837-1-git-send-email-agraf@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Alexander Graf April 27, 2016, 10:22 p.m. UTC
When booting with efifb, we get a frame buffer address passed into the system.
This address can be backed by any device, including PCI devices.

PCI devices can have their BARs mapped to various places inside the PCI window
though. Linux makes use of that on early boot and usually maps PCI BARs wherever
it thinks makes sense.

If we now load the efifb driver after that BAR map has happened, the frame
buffer address we received may be invalid, because it was in a BAR map before
Linux modified it.

To work around that issue, this patch introduces a BAR mapping callback that
gets called every time Linux (re)allocates a BAR. That way our arm64 efi code
can check whether the frame buffer is inside the old map and adjust it to
the new one.

With this and the efifb patches applied, I can successfully see efifb output
even after Linux remapped BARs.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/arm64/kernel/efi.c | 40 +++++++++++++++++++++++++++++++++++++++-
 drivers/pci/setup-res.c | 29 +++++++++++++++++++++++++++++
 include/linux/pci.h     |  8 ++++++++
 3 files changed, 76 insertions(+), 1 deletion(-)

Comments

Bjorn Helgaas April 28, 2016, 4:20 p.m. UTC | #1
On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
> When booting with efifb, we get a frame buffer address passed into the system.
> This address can be backed by any device, including PCI devices.

I guess we get the frame buffer address via EFI, but it doesn't tell
us what PCI device it's connected to?

This same thing could happen on any EFI arch, I guess.  Maybe even on
non-EFI arches, if there's a way to discover the frame buffer address
as a bare address rather than a "offset X into BAR Y of PCI device Z"
sort of thing.

> PCI devices can have their BARs mapped to various places inside the PCI window
> though. Linux makes use of that on early boot and usually maps PCI BARs wherever
> it thinks makes sense.
> 
> If we now load the efifb driver after that BAR map has happened, the frame
> buffer address we received may be invalid, because it was in a BAR map before
> Linux modified it.
> 
> To work around that issue, this patch introduces a BAR mapping callback that
> gets called every time Linux (re)allocates a BAR. That way our arm64 efi code
> can check whether the frame buffer is inside the old map and adjust it to
> the new one.
> 
> With this and the efifb patches applied, I can successfully see efifb output
> even after Linux remapped BARs.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> ---
>  arch/arm64/kernel/efi.c | 40 +++++++++++++++++++++++++++++++++++++++-
>  drivers/pci/setup-res.c | 29 +++++++++++++++++++++++++++++
>  include/linux/pci.h     |  8 ++++++++
>  3 files changed, 76 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
> index 56a76b6..3612110 100644
> --- a/arch/arm64/kernel/efi.c
> +++ b/arch/arm64/kernel/efi.c
> @@ -27,6 +27,7 @@
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
>  #include <linux/platform_device.h>
> +#include <linux/pci.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/efi.h>
> @@ -213,6 +214,41 @@ static __init void reserve_regions(void)
>  	set_bit(EFI_MEMMAP, &efi.flags);
>  }
>  
> +#ifdef CONFIG_PCI
> +static bool efi_pci_overlaps_efifb(struct pci_bar_update_info *update_info)
> +{
> +	/* is the screen_info frame buffer inside the pci BAR? */
> +	if (screen_info.lfb_base >= update_info->old_start &&
> +	    (screen_info.lfb_base + screen_info.lfb_size) <=
> +	     (update_info->old_start + update_info->size))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int efi_pci_notifier(struct notifier_block *self,
> +			    unsigned long cmd, void *v)
> +{
> +	struct pci_bar_update_info *update_info = v;
> +
> +	/*
> +	 * When we reallocate a BAR that contains our frame buffer, set the
> +	 * screen_info base to where it belongs
> +	 */
> +	if (efi_pci_overlaps_efifb(update_info)) {
> +		u64 diff = (update_info->new_start - update_info->old_start);
> +		screen_info.lfb_base += diff;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +static struct notifier_block efi_pci_notifier_block = {
> +	.notifier_call = efi_pci_notifier,
> +};
> +#else
> +#define pci_notify_on_update_resource(a)
> +#endif
> +
>  void __init efi_init_fdt(void *fdt)
>  {
>  	struct efi_fdt_params params;
> @@ -246,8 +282,10 @@ void __init efi_init_fdt(void *fdt)
>  	reserve_regions();
>  	early_memunmap(memmap.map, params.mmap_size);
>  
> -	if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI)
> +	if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI) {
> +		pci_notify_on_update_resource(&efi_pci_notifier_block);
>  		memblock_reserve(screen_info.lfb_base, screen_info.lfb_size);
> +	}
>  }
>  
>  static int __init register_gop_device(void)
> diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
> index 604011e..d5c24fc 100644
> --- a/drivers/pci/setup-res.c
> +++ b/drivers/pci/setup-res.c
> @@ -23,8 +23,10 @@
>  #include <linux/ioport.h>
>  #include <linux/cache.h>
>  #include <linux/slab.h>
> +#include <linux/notifier.h>
>  #include "pci.h"
>  
> +static RAW_NOTIFIER_HEAD(bar_update_chain);
>  
>  void pci_update_resource(struct pci_dev *dev, int resno)
>  {
> @@ -35,6 +37,9 @@ void pci_update_resource(struct pci_dev *dev, int resno)
>  	int reg;
>  	enum pci_bar_type type;
>  	struct resource *res = dev->resource + resno;
> +	struct pci_bar_update_info update_info;
> +	struct pci_bus_region update_reg;
> +	struct resource update_res;
>  
>  	if (dev->is_virtfn) {
>  		dev_warn(&dev->dev, "can't update VF BAR%d\n", resno);
> @@ -77,6 +82,22 @@ void pci_update_resource(struct pci_dev *dev, int resno)
>  	}
>  
>  	/*
> +	 * Fetch the old BAR location from the device, so we can notify
> +	 * users of that BAR that its location is changing.
> +	 */
> +	pci_read_config_dword(dev, reg, &check);
> +	update_reg.start = check & PCI_BASE_ADDRESS_MEM_MASK;
> +	if (check & PCI_BASE_ADDRESS_MEM_TYPE_64) {
> +		pci_read_config_dword(dev, reg, &check);
> +		update_reg.start |= ((u64)check) << 32;
> +	}
> +	update_info.size = region.end - region.start;
> +	update_reg.end = update_reg.start + update_info.size;
> +	pcibios_bus_to_resource(dev->bus, &update_res, &update_reg);
> +	update_info.old_start = update_res.start;
> +	update_info.new_start = res->start;
> +
> +	/*
>  	 * We can't update a 64-bit BAR atomically, so when possible,
>  	 * disable decoding so that a half-updated BAR won't conflict
>  	 * with another device.
> @@ -108,6 +129,14 @@ void pci_update_resource(struct pci_dev *dev, int resno)
>  
>  	if (disable)
>  		pci_write_config_word(dev, PCI_COMMAND, cmd);
> +
> +	/* Tell interested parties that the BAR mapping changed */
> +	raw_notifier_call_chain(&bar_update_chain, 0, &update_info);
> +}
> +
> +int pci_notify_on_update_resource(struct notifier_block *nb)
> +{
> +        return raw_notifier_chain_register(&bar_update_chain, nb);
>  }
>  
>  int pci_claim_resource(struct pci_dev *dev, int resource)
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index c061250..04a430e 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -30,6 +30,7 @@
>  #include <linux/device.h>
>  #include <linux/io.h>
>  #include <linux/resource_ext.h>
> +#include <linux/notifier.h>
>  #include <uapi/linux/pci.h>
>  
>  #include <linux/pci_ids.h>
> @@ -1037,6 +1038,13 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags);
>  bool pci_device_is_present(struct pci_dev *pdev);
>  void pci_ignore_hotplug(struct pci_dev *dev);
>  
> +struct pci_bar_update_info {
> +	u64 old_start;
> +	u64 new_start;
> +	u64 size;
> +};
> +int pci_notify_on_update_resource(struct notifier_block *nb);
> +
>  /* ROM control related routines */
>  int pci_enable_rom(struct pci_dev *pdev);
>  void pci_disable_rom(struct pci_dev *pdev);
> -- 
> 1.8.5.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 28, 2016, 4:41 p.m. UTC | #2
On 04/28/2016 06:20 PM, Bjorn Helgaas wrote:
> On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
>> When booting with efifb, we get a frame buffer address passed into the system.
>> This address can be backed by any device, including PCI devices.
> I guess we get the frame buffer address via EFI, but it doesn't tell
> us what PCI device it's connected to?

Pretty much, yes. We can get the frame buffer address from a multitude 
of sources using various boot protocols, but the case where I ran into 
this was with efi on arm64.

> This same thing could happen on any EFI arch, I guess.  Maybe even on

Yes and no :). I would've put it into whatever code "owns" screen_info, 
but I couldn't find any. So instead I figured I'd make the approach as 
generic as I could and implemented the calculation for the case where I 
saw it break.

The reason we don't see this on x86 (if I understand all pieces of the 
puzzle correctly) is that we get the BAR allocation from firmware using 
_CRS attributes in ACPI, so firmware tells the OS where to put the BARs. 
In the device tree case (which is what I'm running on arm64) we however 
allocate BARs dynamically.

> non-EFI arches, if there's a way to discover the frame buffer address
> as a bare address rather than a "offset X into BAR Y of PCI device Z"
> sort of thing.

It'd be perfectly doable today - we do get a cpu physical address and 
use that in the notifier. All we would need to do is move the code that 
I added in arm64/efi.c to something more generic that "owns" the frame 
buffer address. Then any boot protocol that passes a screen_info in 
would get the frame buffer relocated on BAR remap. Drivers like vesafb 
might benefit from this as well - though apparently x86 fixed this using 
ACPI.

I'm not sure if offb could potentially also break. At the end of the 
day, it might, depending on how it's backed. For that we would then need 
another callback, since it doesn't use screen_info.

I don't feel incredibly comfortable preemptively fixing issues people 
haven't seen yet. There's just a good chance we end up breaking more 
than we fix :). But if you like and can point me to a better place for 
the screen_info modification, I'm happy to move it there.


Alex
Bjorn Helgaas April 28, 2016, 6:06 p.m. UTC | #3
On Thu, Apr 28, 2016 at 06:41:42PM +0200, Alexander Graf wrote:
> On 04/28/2016 06:20 PM, Bjorn Helgaas wrote:
> >On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
> >>When booting with efifb, we get a frame buffer address passed into the system.
> >>This address can be backed by any device, including PCI devices.
> >I guess we get the frame buffer address via EFI, but it doesn't tell
> >us what PCI device it's connected to?
> 
> Pretty much, yes. We can get the frame buffer address from a
> multitude of sources using various boot protocols, but the case
> where I ran into this was with efi on arm64.
> 
> >This same thing could happen on any EFI arch, I guess.  Maybe even on
> 
> Yes and no :). I would've put it into whatever code "owns"
> screen_info, but I couldn't find any. So instead I figured I'd make
> the approach as generic as I could and implemented the calculation
> for the case where I saw it break.
> 
> The reason we don't see this on x86 (if I understand all pieces of
> the puzzle correctly) is that we get the BAR allocation from
> firmware using _CRS attributes in ACPI, so firmware tells the OS
> where to put the BARs. 

I think the real reason is that on x86, firmware typically assigns all
the BARs and Linux typically doesn't change them.  PCI host bridges
have _CRS, which tells us where the host bridge windows are.  PCI
devices themselves don't normally have _CRS; we just make sure their
BARs are inside the ranges of an upstream _CRS.  If/when we get x86
boxes where firmware doesn't assign all the BARs, we should see the
same problem there.

> In the device tree case (which is what I'm
> running on arm64) we however allocate BARs dynamically.
> 
> >non-EFI arches, if there's a way to discover the frame buffer address
> >as a bare address rather than a "offset X into BAR Y of PCI device Z"
> >sort of thing.
> 
> It'd be perfectly doable today - we do get a cpu physical address
> and use that in the notifier. All we would need to do is move the
> code that I added in arm64/efi.c to something more generic that
> "owns" the frame buffer address. Then any boot protocol that passes
> a screen_info in would get the frame buffer relocated on BAR remap.

We could consider a quirk that would mark any BAR that happened to
contain the frame buffer address as IORESOURCE_PCI_FIXED.  That would
(in theory, anyway) keep the PCI core from moving it.

Is there any run-time EFI (or other firmware) dependency on the frame
buffer address?  If there is, things will break when we move it, even
if we have your notifier to tell efifb about it.

> Drivers like vesafb might benefit from this as well - though
> apparently x86 fixed this using ACPI.

Where is this x86 vesafb ACPI fix?  I don't see anything ACPI-related
in drivers/video/fbdev/vesafb.c.  I'm just curious what this fix looks
like.

> I'm not sure if offb could potentially also break. At the end of the
> day, it might, depending on how it's backed. For that we would then
> need another callback, since it doesn't use screen_info.

If firmware is giving us a bare address of something, that seems like
a clue that it might depend on that address staying the same.

Bjorn
Alexander Graf April 28, 2016, 9:39 p.m. UTC | #4
On 28.04.16 20:06, Bjorn Helgaas wrote:
> On Thu, Apr 28, 2016 at 06:41:42PM +0200, Alexander Graf wrote:
>> On 04/28/2016 06:20 PM, Bjorn Helgaas wrote:
>>> On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
>>>> When booting with efifb, we get a frame buffer address passed into the system.
>>>> This address can be backed by any device, including PCI devices.
>>> I guess we get the frame buffer address via EFI, but it doesn't tell
>>> us what PCI device it's connected to?
>>
>> Pretty much, yes. We can get the frame buffer address from a
>> multitude of sources using various boot protocols, but the case
>> where I ran into this was with efi on arm64.
>>
>>> This same thing could happen on any EFI arch, I guess.  Maybe even on
>>
>> Yes and no :). I would've put it into whatever code "owns"
>> screen_info, but I couldn't find any. So instead I figured I'd make
>> the approach as generic as I could and implemented the calculation
>> for the case where I saw it break.
>>
>> The reason we don't see this on x86 (if I understand all pieces of
>> the puzzle correctly) is that we get the BAR allocation from
>> firmware using _CRS attributes in ACPI, so firmware tells the OS
>> where to put the BARs. 
> 
> I think the real reason is that on x86, firmware typically assigns all
> the BARs and Linux typically doesn't change them.  PCI host bridges

Can you point me to the code that "doesn't change them"? I couldn't find
it, but I haven't see Linux reallocate BARs on x86.

The thing is that if a BAR is already allocated, we could as well not
remap it on arm as well - but how do we know?

> have _CRS, which tells us where the host bridge windows are.  PCI
> devices themselves don't normally have _CRS; we just make sure their
> BARs are inside the ranges of an upstream _CRS.  If/when we get x86
> boxes where firmware doesn't assign all the BARs, we should see the
> same problem there.

So the check is whether all BARs get assigned by firmware?

> 
>> In the device tree case (which is what I'm
>> running on arm64) we however allocate BARs dynamically.
>>
>>> non-EFI arches, if there's a way to discover the frame buffer address
>>> as a bare address rather than a "offset X into BAR Y of PCI device Z"
>>> sort of thing.
>>
>> It'd be perfectly doable today - we do get a cpu physical address
>> and use that in the notifier. All we would need to do is move the
>> code that I added in arm64/efi.c to something more generic that
>> "owns" the frame buffer address. Then any boot protocol that passes
>> a screen_info in would get the frame buffer relocated on BAR remap.
> 
> We could consider a quirk that would mark any BAR that happened to
> contain the frame buffer address as IORESOURCE_PCI_FIXED.  That would
> (in theory, anyway) keep the PCI core from moving it.

That's what I thought I should do at first. Then I realized that we
could have a PCIe GPU in the system that provides a really big BAR which
we would need to map into an mmio64 region to make full use of it.
Firmware however - because of limitations - only maps it into the mmio32
space though.

That means we now break a case that would work without efifb, right?

> Is there any run-time EFI (or other firmware) dependency on the frame
> buffer address?  If there is, things will break when we move it, even
> if we have your notifier to tell efifb about it.

Simple answer is no :).

>> Drivers like vesafb might benefit from this as well - though
>> apparently x86 fixed this using ACPI.
> 
> Where is this x86 vesafb ACPI fix?  I don't see anything ACPI-related
> in drivers/video/fbdev/vesafb.c.  I'm just curious what this fix looks
> like.

I don't know of any - I haven't found the code that would actually
prevent the same thing from happening on x86. Ard pointed to ACPI as the
reason it works there. I couldn't really identify why though.

>> I'm not sure if offb could potentially also break. At the end of the
>> day, it might, depending on how it's backed. For that we would then
>> need another callback, since it doesn't use screen_info.
> 
> If firmware is giving us a bare address of something, that seems like
> a clue that it might depend on that address staying the same.

Well, I'd look at it from the other side. It gives us a correct address
on entry with the system configured at exactly the state it's in on
entry. If Linux changes the system, some guarantees obviously don't work
anymore.

Whether something is still valid for runtime services is a different
question and should get handled differently IMHO. In the EFI case, we
know which memory regions are marked as runtime required. We could reuse
the notifier there too to change the virtual runtime efi page tables to
point to the new BAR address if an RTS MMIO region falls inside a BAR.


Alex
Lorenzo Pieralisi April 29, 2016, 10:03 a.m. UTC | #5
On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
> 
> 
> On 28.04.16 20:06, Bjorn Helgaas wrote:
> > On Thu, Apr 28, 2016 at 06:41:42PM +0200, Alexander Graf wrote:
> >> On 04/28/2016 06:20 PM, Bjorn Helgaas wrote:
> >>> On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
> >>>> When booting with efifb, we get a frame buffer address passed into the system.
> >>>> This address can be backed by any device, including PCI devices.
> >>> I guess we get the frame buffer address via EFI, but it doesn't tell
> >>> us what PCI device it's connected to?
> >>
> >> Pretty much, yes. We can get the frame buffer address from a
> >> multitude of sources using various boot protocols, but the case
> >> where I ran into this was with efi on arm64.
> >>
> >>> This same thing could happen on any EFI arch, I guess.  Maybe even on
> >>
> >> Yes and no :). I would've put it into whatever code "owns"
> >> screen_info, but I couldn't find any. So instead I figured I'd make
> >> the approach as generic as I could and implemented the calculation
> >> for the case where I saw it break.
> >>
> >> The reason we don't see this on x86 (if I understand all pieces of
> >> the puzzle correctly) is that we get the BAR allocation from
> >> firmware using _CRS attributes in ACPI, so firmware tells the OS
> >> where to put the BARs. 
> > 
> > I think the real reason is that on x86, firmware typically assigns all
> > the BARs and Linux typically doesn't change them.  PCI host bridges
> 
> Can you point me to the code that "doesn't change them"? I couldn't find
> it, but I haven't see Linux reallocate BARs on x86.
> 
> The thing is that if a BAR is already allocated, we could as well not
> remap it on arm as well - but how do we know?

We don't. Long story short: if I understand X86 code correctly,
on X86 PCI resources are claimed at boot:

arch/x86/pci/i386.c (pcibios_resource_survey())

which means that if the BARs are set-up in a way that passes the resource
claiming validation tests (ie the resource fits into the resource tree),
the BAR resources are inserted into the resource tree and are not touched
by the code that reassigns the PCI resources.

Ergo, FW set-up is kept intact, that's my understanding of X86 code.

The other way of preventing a PCI resource to be moved is by marking it
IORESOURCE_PCI_FIXED, I am not sure that's what X86 does in your specific
case though.

> > have _CRS, which tells us where the host bridge windows are.  PCI
> > devices themselves don't normally have _CRS; we just make sure their
> > BARs are inside the ranges of an upstream _CRS.  If/when we get x86
> > boxes where firmware doesn't assign all the BARs, we should see the
> > same problem there.
> 
> So the check is whether all BARs get assigned by firmware?

Eheh, the problem, and I am glad that you raised the point, is how
do we know that FW assigned the BARs ? The only thing we can do is
we try to claim the BAR resource, if it fits into the resource tree
we successfully claim the resource and the kernel won't reassign it.

On ARM PCI resources are never claimed, they are always reassigned,
and that's the reason why you are experiencing these failures.

> >> In the device tree case (which is what I'm
> >> running on arm64) we however allocate BARs dynamically.
> >>
> >>> non-EFI arches, if there's a way to discover the frame buffer address
> >>> as a bare address rather than a "offset X into BAR Y of PCI device Z"
> >>> sort of thing.
> >>
> >> It'd be perfectly doable today - we do get a cpu physical address
> >> and use that in the notifier. All we would need to do is move the
> >> code that I added in arm64/efi.c to something more generic that
> >> "owns" the frame buffer address. Then any boot protocol that passes
> >> a screen_info in would get the frame buffer relocated on BAR remap.
> > 
> > We could consider a quirk that would mark any BAR that happened to
> > contain the frame buffer address as IORESOURCE_PCI_FIXED.  That would
> > (in theory, anyway) keep the PCI core from moving it.
> 
> That's what I thought I should do at first. Then I realized that we
> could have a PCIe GPU in the system that provides a really big BAR which
> we would need to map into an mmio64 region to make full use of it.
> Firmware however - because of limitations - only maps it into the mmio32
> space though.
> 
> That means we now break a case that would work without efifb, right?
> 
> > Is there any run-time EFI (or other firmware) dependency on the frame
> > buffer address?  If there is, things will break when we move it, even
> > if we have your notifier to tell efifb about it.
> 
> Simple answer is no :).
> 
> >> Drivers like vesafb might benefit from this as well - though
> >> apparently x86 fixed this using ACPI.
> > 
> > Where is this x86 vesafb ACPI fix?  I don't see anything ACPI-related
> > in drivers/video/fbdev/vesafb.c.  I'm just curious what this fix looks
> > like.
> 
> I don't know of any - I haven't found the code that would actually
> prevent the same thing from happening on x86. Ard pointed to ACPI as the
> reason it works there. I couldn't really identify why though.

See above, please let me know how you get along.

Thanks !
Lorenzo
Bjorn Helgaas April 29, 2016, 1:41 p.m. UTC | #6
On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
> On 28.04.16 20:06, Bjorn Helgaas wrote:
> > On Thu, Apr 28, 2016 at 06:41:42PM +0200, Alexander Graf wrote:
> >> On 04/28/2016 06:20 PM, Bjorn Helgaas wrote:
> >>> On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
> >>>> When booting with efifb, we get a frame buffer address passed into the system.
> >>>> This address can be backed by any device, including PCI devices.
> >>> I guess we get the frame buffer address via EFI, but it doesn't tell
> >>> us what PCI device it's connected to?
> >>
> >> Pretty much, yes. We can get the frame buffer address from a
> >> multitude of sources using various boot protocols, but the case
> >> where I ran into this was with efi on arm64.
> >>
> >>> This same thing could happen on any EFI arch, I guess.  Maybe even on
> >>
> >> Yes and no :). I would've put it into whatever code "owns"
> >> screen_info, but I couldn't find any. So instead I figured I'd make
> >> the approach as generic as I could and implemented the calculation
> >> for the case where I saw it break.
> >>
> >> The reason we don't see this on x86 (if I understand all pieces of
> >> the puzzle correctly) is that we get the BAR allocation from
> >> firmware using _CRS attributes in ACPI, so firmware tells the OS
> >> where to put the BARs. 
> > 
> > I think the real reason is that on x86, firmware typically assigns all
> > the BARs and Linux typically doesn't change them.  PCI host bridges
> 
> Can you point me to the code that "doesn't change them"? I couldn't find
> it, but I haven't see Linux reallocate BARs on x86.

Lorenzo already answered this, I think.  I'll just reiterate that all
we can really do is check whether a BAR's current value is inside the
upstream bridge aperture.  If it is, we assume the BAR has been
assigned and we try to use that assignment unchanged.  Zero is a valid
BAR value, so we can't just check for something non-zero.

> >> In the device tree case (which is what I'm
> >> running on arm64) we however allocate BARs dynamically.

Side note, from a PCI core point of view, this is not a DT vs. ACPI
issue.  It's just a question of whether the BARs have been assigned
already, which might appear to correlate with DT or ACPI, but AFAIK
it's outside the scope of those specs.

> >>> ... if there's a way to discover the frame buffer address
> >>> as a bare address rather than a "offset X into BAR Y of PCI device Z"
> >>> sort of thing.
> >>
> >> It'd be perfectly doable today - we do get a cpu physical address
> >> and use that in the notifier. All we would need to do is move the
> >> code that I added in arm64/efi.c to something more generic that
> >> "owns" the frame buffer address. Then any boot protocol that passes
> >> a screen_info in would get the frame buffer relocated on BAR remap.
> > 
> > We could consider a quirk that would mark any BAR that happened to
> > contain the frame buffer address as IORESOURCE_PCI_FIXED.  That would
> > (in theory, anyway) keep the PCI core from moving it.
> 
> That's what I thought I should do at first. Then I realized that we
> could have a PCIe GPU in the system that provides a really big BAR which
> we would need to map into an mmio64 region to make full use of it.
> Firmware however - because of limitations - only maps it into the mmio32
> space though.
> 
> That means we now break a case that would work without efifb, right?

I'm not sure I understand.  Are you saying you might have, say, a 2GB
BAR, and firmware might put it in an mmio32 1GB host bridge aperture?
I guess you *could* program the BAR that way, but obviously a driver
would only be able to see the first 1GB of the BAR.

Linux would consider that invalid because the BAR doesn't fit in the
aperture and would reassign it.  But I don't think I understand the
whole picture.

> > If firmware is giving us a bare address of something, that seems like
> > a clue that it might depend on that address staying the same.
> 
> Well, I'd look at it from the other side. It gives us a correct address
> on entry with the system configured at exactly the state it's in on
> entry. If Linux changes the system, some guarantees obviously don't work
> anymore.

Can you point me to the part of the EFI spec that communicates this?
I'm curious what the intent is and whether there's any indication that
EFI expects the OS to preserve some configuration.  I don't think it's
reasonable for the OS to preserve this sort of configuration because
it limits how well we can support hotplug.

I wonder if we're using this frame buffer address as more than what
EFI intended.  For example, maybe it was intended for use by an early
console driver, but there's some other mechanism we should be using
after that.

Bjorn
Ard Biesheuvel April 29, 2016, 1:51 p.m. UTC | #7
On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
>> On 28.04.16 20:06, Bjorn Helgaas wrote:
>> > On Thu, Apr 28, 2016 at 06:41:42PM +0200, Alexander Graf wrote:
>> >> On 04/28/2016 06:20 PM, Bjorn Helgaas wrote:
>> >>> On Thu, Apr 28, 2016 at 12:22:24AM +0200, Alexander Graf wrote:
>> >>>> When booting with efifb, we get a frame buffer address passed into the system.
>> >>>> This address can be backed by any device, including PCI devices.
>> >>> I guess we get the frame buffer address via EFI, but it doesn't tell
>> >>> us what PCI device it's connected to?
>> >>
>> >> Pretty much, yes. We can get the frame buffer address from a
>> >> multitude of sources using various boot protocols, but the case
>> >> where I ran into this was with efi on arm64.
>> >>
>> >>> This same thing could happen on any EFI arch, I guess.  Maybe even on
>> >>
>> >> Yes and no :). I would've put it into whatever code "owns"
>> >> screen_info, but I couldn't find any. So instead I figured I'd make
>> >> the approach as generic as I could and implemented the calculation
>> >> for the case where I saw it break.
>> >>
>> >> The reason we don't see this on x86 (if I understand all pieces of
>> >> the puzzle correctly) is that we get the BAR allocation from
>> >> firmware using _CRS attributes in ACPI, so firmware tells the OS
>> >> where to put the BARs.
>> >
>> > I think the real reason is that on x86, firmware typically assigns all
>> > the BARs and Linux typically doesn't change them.  PCI host bridges
>>
>> Can you point me to the code that "doesn't change them"? I couldn't find
>> it, but I haven't see Linux reallocate BARs on x86.
>
> Lorenzo already answered this, I think.  I'll just reiterate that all
> we can really do is check whether a BAR's current value is inside the
> upstream bridge aperture.  If it is, we assume the BAR has been
> assigned and we try to use that assignment unchanged.  Zero is a valid
> BAR value, so we can't just check for something non-zero.
>
>> >> In the device tree case (which is what I'm
>> >> running on arm64) we however allocate BARs dynamically.
>
> Side note, from a PCI core point of view, this is not a DT vs. ACPI
> issue.  It's just a question of whether the BARs have been assigned
> already, which might appear to correlate with DT or ACPI, but AFAIK
> it's outside the scope of those specs.
>

That is true, but the distinction is made because ACPI implies UEFI on
arm64, so there we know there is firmware that executes before the
kernel.

A DT kernel could theoretically boot almost straight out of reset
(i.e., on a system which does not implement the security extensions),
which is most likely the historic explanation of why PCI on ARM
assumes that the PCI subsystem needs to be configured from scratch.

>> >>> ... if there's a way to discover the frame buffer address


>> >>> as a bare address rather than a "offset X into BAR Y of PCI device Z"
>> >>> sort of thing.
>> >>
>> >> It'd be perfectly doable today - we do get a cpu physical address
>> >> and use that in the notifier. All we would need to do is move the
>> >> code that I added in arm64/efi.c to something more generic that
>> >> "owns" the frame buffer address. Then any boot protocol that passes
>> >> a screen_info in would get the frame buffer relocated on BAR remap.
>> >
>> > We could consider a quirk that would mark any BAR that happened to
>> > contain the frame buffer address as IORESOURCE_PCI_FIXED.  That would
>> > (in theory, anyway) keep the PCI core from moving it.
>>
>> That's what I thought I should do at first. Then I realized that we
>> could have a PCIe GPU in the system that provides a really big BAR which
>> we would need to map into an mmio64 region to make full use of it.
>> Firmware however - because of limitations - only maps it into the mmio32
>> space though.
>>
>> That means we now break a case that would work without efifb, right?
>
> I'm not sure I understand.  Are you saying you might have, say, a 2GB
> BAR, and firmware might put it in an mmio32 1GB host bridge aperture?
> I guess you *could* program the BAR that way, but obviously a driver
> would only be able to see the first 1GB of the BAR.
>
> Linux would consider that invalid because the BAR doesn't fit in the
> aperture and would reassign it.  But I don't think I understand the
> whole picture.
>
>> > If firmware is giving us a bare address of something, that seems like
>> > a clue that it might depend on that address staying the same.
>>
>> Well, I'd look at it from the other side. It gives us a correct address
>> on entry with the system configured at exactly the state it's in on
>> entry. If Linux changes the system, some guarantees obviously don't work
>> anymore.
>
> Can you point me to the part of the EFI spec that communicates this?
> I'm curious what the intent is and whether there's any indication that
> EFI expects the OS to preserve some configuration.  I don't think it's
> reasonable for the OS to preserve this sort of configuration because
> it limits how well we can support hotplug.
>
> I wonder if we're using this frame buffer address as more than what
> EFI intended.  For example, maybe it was intended for use by an early
> console driver, but there's some other mechanism we should be using
> after that.
>

The UEFI spec describes this as follows (UEFIv2.6 section 11.9)

"""
Graphics output may also be required as part of the startup of an
operating system. There are
potentially times in modern operating systems prior to the loading of
a high performance OS
graphics driver where access to graphics output device is required.
The Graphics Output Protocol
supports this capability by providing the EFI OS loader access to a
hardware frame buffer and
enough information to allow the OS to draw directly to the graphics
output device.
"""

So the intent is to provide minimal framebuffer services until the
'real' driver takes over.

The GOP protocol only describes the base and size of the framebuffer,
and the pixel format. At boot time, the early UEFI code in the kernel
could potentially figure out which PCI device it is related to, if
necessary, but i am not sure if this would solve the x86 case as well.
Bjorn Helgaas April 29, 2016, 8:06 p.m. UTC | #8
On Fri, Apr 29, 2016 at 03:51:49PM +0200, Ard Biesheuvel wrote:
> On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
> >> On 28.04.16 20:06, Bjorn Helgaas wrote:

> >> > If firmware is giving us a bare address of something, that seems like
> >> > a clue that it might depend on that address staying the same.
> >>
> >> Well, I'd look at it from the other side. It gives us a correct address
> >> on entry with the system configured at exactly the state it's in on
> >> entry. If Linux changes the system, some guarantees obviously don't work
> >> anymore.
> >
> > Can you point me to the part of the EFI spec that communicates this?
> > I'm curious what the intent is and whether there's any indication that
> > EFI expects the OS to preserve some configuration.  I don't think it's
> > reasonable for the OS to preserve this sort of configuration because
> > it limits how well we can support hotplug.
> >
> > I wonder if we're using this frame buffer address as more than what
> > EFI intended.  For example, maybe it was intended for use by an early
> > console driver, but there's some other mechanism we should be using
> > after that.
> >
> 
> The UEFI spec describes this as follows (UEFIv2.6 section 11.9)
> 
> """
> Graphics output may also be required as part of the startup of an
> operating system. There are
> potentially times in modern operating systems prior to the loading of
> a high performance OS
> graphics driver where access to graphics output device is required.
> The Graphics Output Protocol
> supports this capability by providing the EFI OS loader access to a
> hardware frame buffer and
> enough information to allow the OS to draw directly to the graphics
> output device.
> """
> 
> So the intent is to provide minimal framebuffer services until the
> 'real' driver takes over.

Makes sense.  A 'real' driver for a PCI device would use
pci_register_driver() and use pci_resource_start() or similar to
locate the framebuffer, which would avoid the problem because the PCI
core doesn't change BARs while a driver owns the device.

> The GOP protocol only describes the base and size of the framebuffer,
> and the pixel format. At boot time, the early UEFI code in the kernel
> could potentially figure out which PCI device it is related to, if
> necessary, but i am not sure if this would solve the x86 case as well.

Does drivers/video/fbdev/efifb.c support only a single framebuffer
device?  If a system has several, how does it decide which to use?  I
assume UEFI would provide GOP for all the framebuffers?

If we could fix this by making efifb claim a PCI device, I think that
would be cleaner.  I don't know how to figure out the correct device,
but that would solve the "BAR changed" problem, and it would have
cleaner ownership semantics, too.

It looks like the current situation is that a device-specific driver
(radeon, i915, etc.) could claim the device via the usual
pci_register_driver() path, and the efifb driver could think it owns
the device at the same time.  This seems like too obvious a problem,
so maybe there's some ad hoc mechanism that resolves this?
Ard Biesheuvel April 29, 2016, 8:25 p.m. UTC | #9
> On 29 apr. 2016, at 22:06, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
>> On Fri, Apr 29, 2016 at 03:51:49PM +0200, Ard Biesheuvel wrote:
>>> On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>> On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
>>>> On 28.04.16 20:06, Bjorn Helgaas wrote:
> 
>>>>> If firmware is giving us a bare address of something, that seems like
>>>>> a clue that it might depend on that address staying the same.
>>>> 
>>>> Well, I'd look at it from the other side. It gives us a correct address
>>>> on entry with the system configured at exactly the state it's in on
>>>> entry. If Linux changes the system, some guarantees obviously don't work
>>>> anymore.
>>> 
>>> Can you point me to the part of the EFI spec that communicates this?
>>> I'm curious what the intent is and whether there's any indication that
>>> EFI expects the OS to preserve some configuration.  I don't think it's
>>> reasonable for the OS to preserve this sort of configuration because
>>> it limits how well we can support hotplug.
>>> 
>>> I wonder if we're using this frame buffer address as more than what
>>> EFI intended.  For example, maybe it was intended for use by an early
>>> console driver, but there's some other mechanism we should be using
>>> after that.
>> 
>> The UEFI spec describes this as follows (UEFIv2.6 section 11.9)
>> 
>> """
>> Graphics output may also be required as part of the startup of an
>> operating system. There are
>> potentially times in modern operating systems prior to the loading of
>> a high performance OS
>> graphics driver where access to graphics output device is required.
>> The Graphics Output Protocol
>> supports this capability by providing the EFI OS loader access to a
>> hardware frame buffer and
>> enough information to allow the OS to draw directly to the graphics
>> output device.
>> """
>> 
>> So the intent is to provide minimal framebuffer services until the
>> 'real' driver takes over.
> 
> Makes sense.  A 'real' driver for a PCI device would use
> pci_register_driver() and use pci_resource_start() or similar to
> locate the framebuffer, which would avoid the problem because the PCI
> core doesn't change BARs while a driver owns the device.
> 
>> The GOP protocol only describes the base and size of the framebuffer,
>> and the pixel format. At boot time, the early UEFI code in the kernel
>> could potentially figure out which PCI device it is related to, if
>> necessary, but i am not sure if this would solve the x86 case as well.
> 
> Does drivers/video/fbdev/efifb.c support only a single framebuffer
> device?  If a system has several, how does it decide which to use?  I
> assume UEFI would provide GOP for all the framebuffers?
> 

Yes. The early efi code iterates over all gop instances to figure out which one is connected to the efi console.

> If we could fix this by making efifb claim a PCI device, I think that
> would be cleaner.  I don't know how to figure out the correct device,
> but that would solve the "BAR changed" problem, and it would have
> cleaner ownership semantics, too.

If the bus layout is static, the efi init code can record the segment/bus/device and match it with the kernel's view. is that guaranteed to be sufficient for all implementations of pci?

> It looks like the current situation is that a device-specific driver
> (radeon, i915, etc.) could claim the device via the usual
> pci_register_driver() path, and the efifb driver could think it owns
> the device at the same time.  This seems like too obvious a problem,
> so maybe there's some ad hoc mechanism that resolves this?
Alexander Graf April 29, 2016, 8:51 p.m. UTC | #10
On 29.04.16 22:25, Ard Biesheuvel wrote:
> 
>> On 29 apr. 2016, at 22:06, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>
>>> On Fri, Apr 29, 2016 at 03:51:49PM +0200, Ard Biesheuvel wrote:
>>>> On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>>> On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
>>>>> On 28.04.16 20:06, Bjorn Helgaas wrote:
>>
>>>>>> If firmware is giving us a bare address of something, that seems like
>>>>>> a clue that it might depend on that address staying the same.
>>>>>
>>>>> Well, I'd look at it from the other side. It gives us a correct address
>>>>> on entry with the system configured at exactly the state it's in on
>>>>> entry. If Linux changes the system, some guarantees obviously don't work
>>>>> anymore.
>>>>
>>>> Can you point me to the part of the EFI spec that communicates this?
>>>> I'm curious what the intent is and whether there's any indication that
>>>> EFI expects the OS to preserve some configuration.  I don't think it's
>>>> reasonable for the OS to preserve this sort of configuration because
>>>> it limits how well we can support hotplug.
>>>>
>>>> I wonder if we're using this frame buffer address as more than what
>>>> EFI intended.  For example, maybe it was intended for use by an early
>>>> console driver, but there's some other mechanism we should be using
>>>> after that.
>>>
>>> The UEFI spec describes this as follows (UEFIv2.6 section 11.9)
>>>
>>> """
>>> Graphics output may also be required as part of the startup of an
>>> operating system. There are
>>> potentially times in modern operating systems prior to the loading of
>>> a high performance OS
>>> graphics driver where access to graphics output device is required.
>>> The Graphics Output Protocol
>>> supports this capability by providing the EFI OS loader access to a
>>> hardware frame buffer and
>>> enough information to allow the OS to draw directly to the graphics
>>> output device.
>>> """
>>>
>>> So the intent is to provide minimal framebuffer services until the
>>> 'real' driver takes over.
>>
>> Makes sense.  A 'real' driver for a PCI device would use
>> pci_register_driver() and use pci_resource_start() or similar to
>> locate the framebuffer, which would avoid the problem because the PCI
>> core doesn't change BARs while a driver owns the device.
>>
>>> The GOP protocol only describes the base and size of the framebuffer,
>>> and the pixel format. At boot time, the early UEFI code in the kernel
>>> could potentially figure out which PCI device it is related to, if
>>> necessary, but i am not sure if this would solve the x86 case as well.
>>
>> Does drivers/video/fbdev/efifb.c support only a single framebuffer
>> device?  If a system has several, how does it decide which to use?  I
>> assume UEFI would provide GOP for all the framebuffers?
>>
> 
> Yes. The early efi code iterates over all gop instances to figure out which one is connected to the efi console.
> 
>> If we could fix this by making efifb claim a PCI device, I think that
>> would be cleaner.  I don't know how to figure out the correct device,
>> but that would solve the "BAR changed" problem, and it would have
>> cleaner ownership semantics, too.
> 
> If the bus layout is static, the efi init code can record the segment/bus/device and match it with the kernel's view. is that guaranteed to be sufficient for all implementations of pci?

I think the only thing we can really take as granted is the physical
address we receive. Any efi device topology may be as implementation
dependent as your firmware vendor wants to it to be.

So couldn't we just try to see whether the efifb is within a pci mmio
window? We could then go through all devices on it, search for
overlapping BAR allocations and know the device. From there it would
just be a matter of officially reserving the region - or setting the
resource to FIXED, right?


Alex
Bjorn Helgaas April 29, 2016, 9:20 p.m. UTC | #11
On Fri, Apr 29, 2016 at 10:25:13PM +0200, Ard Biesheuvel wrote:
> 
> > On 29 apr. 2016, at 22:06, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > 
> >> On Fri, Apr 29, 2016 at 03:51:49PM +0200, Ard Biesheuvel wrote:
> >>> On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>>> On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
> >>>> On 28.04.16 20:06, Bjorn Helgaas wrote:
> > 
> >>>>> If firmware is giving us a bare address of something, that seems like
> >>>>> a clue that it might depend on that address staying the same.
> >>>> 
> >>>> Well, I'd look at it from the other side. It gives us a correct address
> >>>> on entry with the system configured at exactly the state it's in on
> >>>> entry. If Linux changes the system, some guarantees obviously don't work
> >>>> anymore.
> >>> 
> >>> Can you point me to the part of the EFI spec that communicates this?
> >>> I'm curious what the intent is and whether there's any indication that
> >>> EFI expects the OS to preserve some configuration.  I don't think it's
> >>> reasonable for the OS to preserve this sort of configuration because
> >>> it limits how well we can support hotplug.
> >>> 
> >>> I wonder if we're using this frame buffer address as more than what
> >>> EFI intended.  For example, maybe it was intended for use by an early
> >>> console driver, but there's some other mechanism we should be using
> >>> after that.
> >> 
> >> The UEFI spec describes this as follows (UEFIv2.6 section 11.9)
> >> 
> >> """
> >> Graphics output may also be required as part of the startup of an
> >> operating system. There are
> >> potentially times in modern operating systems prior to the loading of
> >> a high performance OS
> >> graphics driver where access to graphics output device is required.
> >> The Graphics Output Protocol
> >> supports this capability by providing the EFI OS loader access to a
> >> hardware frame buffer and
> >> enough information to allow the OS to draw directly to the graphics
> >> output device.
> >> """
> >> 
> >> So the intent is to provide minimal framebuffer services until the
> >> 'real' driver takes over.
> > 
> > Makes sense.  A 'real' driver for a PCI device would use
> > pci_register_driver() and use pci_resource_start() or similar to
> > locate the framebuffer, which would avoid the problem because the PCI
> > core doesn't change BARs while a driver owns the device.
> > 
> >> The GOP protocol only describes the base and size of the framebuffer,
> >> and the pixel format. At boot time, the early UEFI code in the kernel
> >> could potentially figure out which PCI device it is related to, if
> >> necessary, but i am not sure if this would solve the x86 case as well.
> > 
> > Does drivers/video/fbdev/efifb.c support only a single framebuffer
> > device?  If a system has several, how does it decide which to use?  I
> > assume UEFI would provide GOP for all the framebuffers?
> > 
> 
> Yes. The early efi code iterates over all gop instances to figure out which one is connected to the efi console.
> 
> > If we could fix this by making efifb claim a PCI device, I think that
> > would be cleaner.  I don't know how to figure out the correct device,
> > but that would solve the "BAR changed" problem, and it would have
> > cleaner ownership semantics, too.
> 
> If the bus layout is static, the efi init code can record the segment/bus/device and match it with the kernel's view. is that guaranteed to be sufficient for all implementations of pci?

In theory, no.  The kernel can reassign bus numbers to make space for
hotplug devices.  In practice, they're like BARs: if the firmware sets
them up, we don't normally change them unless they're obviously
broken.
Bjorn Helgaas April 29, 2016, 9:37 p.m. UTC | #12
On Fri, Apr 29, 2016 at 10:51:34PM +0200, Alexander Graf wrote:
> 
> 
> On 29.04.16 22:25, Ard Biesheuvel wrote:
> > 
> >> On 29 apr. 2016, at 22:06, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>
> >>> On Fri, Apr 29, 2016 at 03:51:49PM +0200, Ard Biesheuvel wrote:
> >>>> On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>>>> On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
> >>>>> On 28.04.16 20:06, Bjorn Helgaas wrote:
> >>
> >>>>>> If firmware is giving us a bare address of something, that seems like
> >>>>>> a clue that it might depend on that address staying the same.
> >>>>>
> >>>>> Well, I'd look at it from the other side. It gives us a correct address
> >>>>> on entry with the system configured at exactly the state it's in on
> >>>>> entry. If Linux changes the system, some guarantees obviously don't work
> >>>>> anymore.
> >>>>
> >>>> Can you point me to the part of the EFI spec that communicates this?
> >>>> I'm curious what the intent is and whether there's any indication that
> >>>> EFI expects the OS to preserve some configuration.  I don't think it's
> >>>> reasonable for the OS to preserve this sort of configuration because
> >>>> it limits how well we can support hotplug.
> >>>>
> >>>> I wonder if we're using this frame buffer address as more than what
> >>>> EFI intended.  For example, maybe it was intended for use by an early
> >>>> console driver, but there's some other mechanism we should be using
> >>>> after that.
> >>>
> >>> The UEFI spec describes this as follows (UEFIv2.6 section 11.9)
> >>>
> >>> """
> >>> Graphics output may also be required as part of the startup of an
> >>> operating system. There are
> >>> potentially times in modern operating systems prior to the loading of
> >>> a high performance OS
> >>> graphics driver where access to graphics output device is required.
> >>> The Graphics Output Protocol
> >>> supports this capability by providing the EFI OS loader access to a
> >>> hardware frame buffer and
> >>> enough information to allow the OS to draw directly to the graphics
> >>> output device.
> >>> """
> >>>
> >>> So the intent is to provide minimal framebuffer services until the
> >>> 'real' driver takes over.
> >>
> >> Makes sense.  A 'real' driver for a PCI device would use
> >> pci_register_driver() and use pci_resource_start() or similar to
> >> locate the framebuffer, which would avoid the problem because the PCI
> >> core doesn't change BARs while a driver owns the device.
> >>
> >>> The GOP protocol only describes the base and size of the framebuffer,
> >>> and the pixel format. At boot time, the early UEFI code in the kernel
> >>> could potentially figure out which PCI device it is related to, if
> >>> necessary, but i am not sure if this would solve the x86 case as well.
> >>
> >> Does drivers/video/fbdev/efifb.c support only a single framebuffer
> >> device?  If a system has several, how does it decide which to use?  I
> >> assume UEFI would provide GOP for all the framebuffers?
> >>
> > 
> > Yes. The early efi code iterates over all gop instances to figure out which one is connected to the efi console.

How does it match the EFI console to the GOP instance?  Sorry, I'm
pretty EFI-illiterate, and "find . -name \*efi\*.c | xargs grep -i gop"
shows nothing :)

> >> If we could fix this by making efifb claim a PCI device, I think that
> >> would be cleaner.  I don't know how to figure out the correct device,
> >> but that would solve the "BAR changed" problem, and it would have
> >> cleaner ownership semantics, too.
> > 
> > If the bus layout is static, the efi init code can record the segment/bus/device and match it with the kernel's view. is that guaranteed to be sufficient for all implementations of pci?
> 
> I think the only thing we can really take as granted is the physical
> address we receive. Any efi device topology may be as implementation
> dependent as your firmware vendor wants to it to be.
> 
> So couldn't we just try to see whether the efifb is within a pci mmio
> window? We could then go through all devices on it, search for
> overlapping BAR allocations and know the device. From there it would
> just be a matter of officially reserving the region - or setting the
> resource to FIXED, right?

You could do something like a quirk that ran on every device after
reading the BARs.  Then you could see if any of the device's resources
contain the framebuffer address, and if so, set the FIXED bit for that
resource.  Or you could save the pci_dev and the BAR number and make
the framebuffer driver actually claim that device.  That would have
the advantage of cleaner ownership and allowing the core to reassign
it if necessary.

It would be a lot nicer if UEFI told us which device was the console.
We can't figure this out from the ConOut variable or the HCDP or PCDP
table or something?

Bjorn
Alexander Graf April 29, 2016, 9:52 p.m. UTC | #13
On 29.04.16 23:37, Bjorn Helgaas wrote:
> On Fri, Apr 29, 2016 at 10:51:34PM +0200, Alexander Graf wrote:
>>
>>
>> On 29.04.16 22:25, Ard Biesheuvel wrote:
>>>
>>>> On 29 apr. 2016, at 22:06, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>>
>>>>> On Fri, Apr 29, 2016 at 03:51:49PM +0200, Ard Biesheuvel wrote:
>>>>>> On 29 April 2016 at 15:41, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>>>>> On Thu, Apr 28, 2016 at 11:39:35PM +0200, Alexander Graf wrote:
>>>>>>> On 28.04.16 20:06, Bjorn Helgaas wrote:
>>>>
>>>>>>>> If firmware is giving us a bare address of something, that seems like
>>>>>>>> a clue that it might depend on that address staying the same.
>>>>>>>
>>>>>>> Well, I'd look at it from the other side. It gives us a correct address
>>>>>>> on entry with the system configured at exactly the state it's in on
>>>>>>> entry. If Linux changes the system, some guarantees obviously don't work
>>>>>>> anymore.
>>>>>>
>>>>>> Can you point me to the part of the EFI spec that communicates this?
>>>>>> I'm curious what the intent is and whether there's any indication that
>>>>>> EFI expects the OS to preserve some configuration.  I don't think it's
>>>>>> reasonable for the OS to preserve this sort of configuration because
>>>>>> it limits how well we can support hotplug.
>>>>>>
>>>>>> I wonder if we're using this frame buffer address as more than what
>>>>>> EFI intended.  For example, maybe it was intended for use by an early
>>>>>> console driver, but there's some other mechanism we should be using
>>>>>> after that.
>>>>>
>>>>> The UEFI spec describes this as follows (UEFIv2.6 section 11.9)
>>>>>
>>>>> """
>>>>> Graphics output may also be required as part of the startup of an
>>>>> operating system. There are
>>>>> potentially times in modern operating systems prior to the loading of
>>>>> a high performance OS
>>>>> graphics driver where access to graphics output device is required.
>>>>> The Graphics Output Protocol
>>>>> supports this capability by providing the EFI OS loader access to a
>>>>> hardware frame buffer and
>>>>> enough information to allow the OS to draw directly to the graphics
>>>>> output device.
>>>>> """
>>>>>
>>>>> So the intent is to provide minimal framebuffer services until the
>>>>> 'real' driver takes over.
>>>>
>>>> Makes sense.  A 'real' driver for a PCI device would use
>>>> pci_register_driver() and use pci_resource_start() or similar to
>>>> locate the framebuffer, which would avoid the problem because the PCI
>>>> core doesn't change BARs while a driver owns the device.
>>>>
>>>>> The GOP protocol only describes the base and size of the framebuffer,
>>>>> and the pixel format. At boot time, the early UEFI code in the kernel
>>>>> could potentially figure out which PCI device it is related to, if
>>>>> necessary, but i am not sure if this would solve the x86 case as well.
>>>>
>>>> Does drivers/video/fbdev/efifb.c support only a single framebuffer
>>>> device?  If a system has several, how does it decide which to use?  I
>>>> assume UEFI would provide GOP for all the framebuffers?
>>>>
>>>
>>> Yes. The early efi code iterates over all gop instances to figure out which one is connected to the efi console.
> 
> How does it match the EFI console to the GOP instance?  Sorry, I'm
> pretty EFI-illiterate, and "find . -name \*efi\*.c | xargs grep -i gop"
> shows nothing :)
> 
>>>> If we could fix this by making efifb claim a PCI device, I think that
>>>> would be cleaner.  I don't know how to figure out the correct device,
>>>> but that would solve the "BAR changed" problem, and it would have
>>>> cleaner ownership semantics, too.
>>>
>>> If the bus layout is static, the efi init code can record the segment/bus/device and match it with the kernel's view. is that guaranteed to be sufficient for all implementations of pci?
>>
>> I think the only thing we can really take as granted is the physical
>> address we receive. Any efi device topology may be as implementation
>> dependent as your firmware vendor wants to it to be.
>>
>> So couldn't we just try to see whether the efifb is within a pci mmio
>> window? We could then go through all devices on it, search for
>> overlapping BAR allocations and know the device. From there it would
>> just be a matter of officially reserving the region - or setting the
>> resource to FIXED, right?
> 
> You could do something like a quirk that ran on every device after
> reading the BARs.  Then you could see if any of the device's resources
> contain the framebuffer address, and if so, set the FIXED bit for that
> resource.  Or you could save the pci_dev and the BAR number and make
> the framebuffer driver actually claim that device.  That would have
> the advantage of cleaner ownership and allowing the core to reassign
> it if necessary.

Unfortunately the framebuffer driver gets initialized after the BARs got
reassigned, so that wouldn't work :).

Can you point me to the code that does "read[ing] the BARs"? So far my
impression was that we don't even try to read out the current BAR values
from pci config space, but I can't claim I fully grasp the Linux pci
code quite yet.

> It would be a lot nicer if UEFI told us which device was the console.
> We can't figure this out from the ConOut variable or the HCDP or PCDP
> table or something?

I guess you could try to look at the device path for the GOP handle, but
I'm not sure any layout of the device paths is mandated by the spec. For
all I know a valid implementation could just claim the GOP interface
fell from the skies and it'd be legitimate. But I'm happy to stand
corrected if someone points me to the respective part of the spec.

I suppose HCDP and PCDP are ACPI tables? We can't rely on those :).
AArch64 systems can (and some do) boot perfectly fine without any ACPI
tables exposed via uEFI. And the less we have to marry ACPI with uEFI
the happier I am - uEFI seems like a pretty nice path towards
standardized booting while ACPI was quite a nightmare every time I
looked at it so far ;).


Alex
Alexander Graf April 29, 2016, 10:03 p.m. UTC | #14
On 29.04.16 23:52, Alexander Graf wrote:
> 
> 
> On 29.04.16 23:37, Bjorn Helgaas wrote:
>
> Can you point me to the code that does "read[ing] the BARs"? So far my
> impression was that we don't even try to read out the current BAR values
> from pci config space, but I can't claim I fully grasp the Linux pci
> code quite yet.

I should've probably read Lorenzos email more closely before - it
contains the pointer.


Alex
Bjorn Helgaas April 29, 2016, 11:31 p.m. UTC | #15
[+cc Matt, because I'm out of my depth in this UEFI stuff]

On Fri, Apr 29, 2016 at 11:52:47PM +0200, Alexander Graf wrote:
> On 29.04.16 23:37, Bjorn Helgaas wrote:
> > On Fri, Apr 29, 2016 at 10:51:34PM +0200, Alexander Graf wrote:

> >> I think the only thing we can really take as granted is the physical
> >> address we receive. Any efi device topology may be as implementation
> >> dependent as your firmware vendor wants to it to be.
> >>
> >> So couldn't we just try to see whether the efifb is within a pci mmio
> >> window? We could then go through all devices on it, search for
> >> overlapping BAR allocations and know the device. From there it would
> >> just be a matter of officially reserving the region - or setting the
> >> resource to FIXED, right?
> > 
> > You could do something like a quirk that ran on every device after
> > reading the BARs.  Then you could see if any of the device's resources
> > contain the framebuffer address, and if so, set the FIXED bit for that
> > resource.  Or you could save the pci_dev and the BAR number and make
> > the framebuffer driver actually claim that device.  That would have
> > the advantage of cleaner ownership and allowing the core to reassign
> > it if necessary.
> 
> Unfortunately the framebuffer driver gets initialized after the BARs got
> reassigned, so that wouldn't work :).

Quirks (fixups) can run at several different times, including before
we read the BARs, after we read them but before any reassignment, etc.
I'll attach a call graph of the enumeration process (somewhat out of
date now, but enough to show the fixup points).  If we went this
route, a header quirk (DECLARE_PCI_FIXUP_HEADER) would do what we
need.

> Can you point me to the code that does "read[ing] the BARs"? So far my
> impression was that we don't even try to read out the current BAR values
> from pci config space, but I can't claim I fully grasp the Linux pci
> code quite yet.

The efifb.c driver doesn't do anything at all with PCI (it includes
linux/pci.h, but probably doesn't need it).  That's part of what I'm
suggesting -- if it *did* register as a PCI device driver, then it
would look at pci_dev->resource[n], which is populated by the PCI
core based on the BAR values.

> > It would be a lot nicer if UEFI told us which device was the console.
> > We can't figure this out from the ConOut variable or the HCDP or PCDP
> > table or something?
> 
> I guess you could try to look at the device path for the GOP handle, but
> I'm not sure any layout of the device paths is mandated by the spec. For
> all I know a valid implementation could just claim the GOP interface
> fell from the skies and it'd be legitimate. But I'm happy to stand
> corrected if someone points me to the respective part of the spec.

Is ConOut what you're after?  I.e., is the whole point of this
exercise to get a framebuffer driver attached to the device that was
the firmware console?  I would think the ConOut path should be
decodable -- it has to tell you how to navigate the interconnect from
the CPU to the device.  But I don't know how to do it.

It looks like on x86, at least, setup_gop32()/setup_gop64() might be
extracting the framebuffer address from the ConOut device and stuffing
it into screen_info, which is what efifb.c later looks at (maybe this
is what Ard was referring to).

> I suppose HCDP and PCDP are ACPI tables? We can't rely on those :).
> AArch64 systems can (and some do) boot perfectly fine without any ACPI
> tables exposed via uEFI. And the less we have to marry ACPI with uEFI
> the happier I am - uEFI seems like a pretty nice path towards
> standardized booting while ACPI was quite a nightmare every time I
> looked at it so far ;).

HCDP/PCDP was from DIG64, originally for ia64, but I can't find a copy
of the spec anymore.  The http://www.dig64.org/specifications/ pointer
in pcdp.h appears broken.  So forget I brought this up; this looks dead.

Bjorn


PCI enumeration call graph:

  pci_scan_root_bus
    pci_create_root_bus
      pci_alloc_host_bridge
      device_register(pci_host_bridge)
      device_register(pci_bus)
      pcibios_add_bus(pci_bus)                  # pcibios

    pci_scan_child_bus
      pci_scan_slot
        pci_scan_single_device
          pci_scan_device
            pci_alloc_dev
            pci_setup_device
              printk("[%04x:%04x] type ...")
              pci_fixup_device(pci_fixup_early) # fixup
              PCI_HEADER_TYPE_NORMAL:
                pci_read_irq
                pci_read_bases                  # <-- read BARs
              PCI_HEADER_TYPE_BRIDGE:
                pci_read_irq
                pci_read_bases
              PCI_HEADER_TYPE_CARDBUS:
                pci_read_irq
                pci_read_bases
          pci_device_add
            pci_fixup_device(pci_fixup_header)  # fixup
            pci_reassigndev_resource_alignment
            pci_init_capabilities
            pcibios_add_device                  # pcibios
            device_add
        pcie_aspm_init_link_state

      for (pass = 0; pass < 2; pass++)
        list_for_each_entry
          pci_scan_bridge
            pci_add_new_bus
              pci_alloc_child_bus
                pci_alloc_bus
                dev_set_name(pci_bus)
                device_register(pci_bus)
                pcibios_add_bus                 # pcibios
                pci_create_legacy_files
            pci_scan_child_bus                  # recursive

    pci_bus_add_devices
      list_for_each_entry(dev, &bus->devices, ...)
        pci_bus_add_device
          pci_fixup_device(pci_fixup_final)             # fixup
          pci_create_sysfs_dev_files
          pci_proc_attach_device
      list_for_each_entry(dev, &bus->devices, ...)
        pci_bus_add_devices(dev->subordinate)           # recursive

  pci_assign_unassigned_resources                       # from arch code
    list_for_each_entry(..., &pci_root_buses, ...)
      pci_assign_unassigned_root_bus_resources          # <-- assignment
        __pci_bus_size_bridges
          pci_bridge_check_ranges
            pci_read_config_word(..., PCI_IO_BASE, ...)
            pci_read_config_word(..., PCI_PREF_MEMORY_BASE, ...)

  <driver>
    pci_enable_device
      pci_enable_device_flags(MEM | IO)
        pci_enable_bridge(pci_upstream_bridge)
          pci_enable_bridge(pci_upstream_bridge)          # recursive
          pci_enable_device
          pci_set_master
        do_pci_enable_device
          pci_set_power_state(D0)
          pcibios_enable_device                           # pcibios
          pci_fixup_device(pci_fixup_enable)              # fixup
Matt Fleming April 30, 2016, 9:17 p.m. UTC | #16
(Pulling in efifb maintainer, Peter)

On Fri, 29 Apr, at 06:31:19PM, Bjorn Helgaas wrote:
> 
> The efifb.c driver doesn't do anything at all with PCI (it includes
> linux/pci.h, but probably doesn't need it).  That's part of what I'm
> suggesting -- if it *did* register as a PCI device driver, then it
> would look at pci_dev->resource[n], which is populated by the PCI
> core based on the BAR values.
 
This discussion came up recently here,

  https://lkml.kernel.org/r/20160216151859.GB11373@redhat.com

There's nothing PCI-specific about the EFI framebuffer per se, but in
practice it's always a PCI device.

> Is ConOut what you're after?  I.e., is the whole point of this
> exercise to get a framebuffer driver attached to the device that was
> the firmware console?  I would think the ConOut path should be
> decodable -- it has to tell you how to navigate the interconnect from
> the CPU to the device.  But I don't know how to do it.
> 
> It looks like on x86, at least, setup_gop32()/setup_gop64() might be
> extracting the framebuffer address from the ConOut device and stuffing
> it into screen_info, which is what efifb.c later looks at (maybe this
> is what Ard was referring to).
 
Matthew Garrett wrote the x86 code for guessing where the console is.
We look for the ConOut protocol with a fallback to first GOP device if
we can't find it. I think the heuristic was based on reading the
implementation in EDK2. See commit 38cb5ef4473c ("X86: Improve GOP
detection in the EFI boot stub").
diff mbox

Patch

diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 56a76b6..3612110 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -27,6 +27,7 @@ 
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
+#include <linux/pci.h>
 
 #include <asm/cacheflush.h>
 #include <asm/efi.h>
@@ -213,6 +214,41 @@  static __init void reserve_regions(void)
 	set_bit(EFI_MEMMAP, &efi.flags);
 }
 
+#ifdef CONFIG_PCI
+static bool efi_pci_overlaps_efifb(struct pci_bar_update_info *update_info)
+{
+	/* is the screen_info frame buffer inside the pci BAR? */
+	if (screen_info.lfb_base >= update_info->old_start &&
+	    (screen_info.lfb_base + screen_info.lfb_size) <=
+	     (update_info->old_start + update_info->size))
+		return true;
+
+	return false;
+}
+
+static int efi_pci_notifier(struct notifier_block *self,
+			    unsigned long cmd, void *v)
+{
+	struct pci_bar_update_info *update_info = v;
+
+	/*
+	 * When we reallocate a BAR that contains our frame buffer, set the
+	 * screen_info base to where it belongs
+	 */
+	if (efi_pci_overlaps_efifb(update_info)) {
+		u64 diff = (update_info->new_start - update_info->old_start);
+		screen_info.lfb_base += diff;
+	}
+
+	return NOTIFY_OK;
+}
+static struct notifier_block efi_pci_notifier_block = {
+	.notifier_call = efi_pci_notifier,
+};
+#else
+#define pci_notify_on_update_resource(a)
+#endif
+
 void __init efi_init_fdt(void *fdt)
 {
 	struct efi_fdt_params params;
@@ -246,8 +282,10 @@  void __init efi_init_fdt(void *fdt)
 	reserve_regions();
 	early_memunmap(memmap.map, params.mmap_size);
 
-	if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI)
+	if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI) {
+		pci_notify_on_update_resource(&efi_pci_notifier_block);
 		memblock_reserve(screen_info.lfb_base, screen_info.lfb_size);
+	}
 }
 
 static int __init register_gop_device(void)
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 604011e..d5c24fc 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -23,8 +23,10 @@ 
 #include <linux/ioport.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
+#include <linux/notifier.h>
 #include "pci.h"
 
+static RAW_NOTIFIER_HEAD(bar_update_chain);
 
 void pci_update_resource(struct pci_dev *dev, int resno)
 {
@@ -35,6 +37,9 @@  void pci_update_resource(struct pci_dev *dev, int resno)
 	int reg;
 	enum pci_bar_type type;
 	struct resource *res = dev->resource + resno;
+	struct pci_bar_update_info update_info;
+	struct pci_bus_region update_reg;
+	struct resource update_res;
 
 	if (dev->is_virtfn) {
 		dev_warn(&dev->dev, "can't update VF BAR%d\n", resno);
@@ -77,6 +82,22 @@  void pci_update_resource(struct pci_dev *dev, int resno)
 	}
 
 	/*
+	 * Fetch the old BAR location from the device, so we can notify
+	 * users of that BAR that its location is changing.
+	 */
+	pci_read_config_dword(dev, reg, &check);
+	update_reg.start = check & PCI_BASE_ADDRESS_MEM_MASK;
+	if (check & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+		pci_read_config_dword(dev, reg, &check);
+		update_reg.start |= ((u64)check) << 32;
+	}
+	update_info.size = region.end - region.start;
+	update_reg.end = update_reg.start + update_info.size;
+	pcibios_bus_to_resource(dev->bus, &update_res, &update_reg);
+	update_info.old_start = update_res.start;
+	update_info.new_start = res->start;
+
+	/*
 	 * We can't update a 64-bit BAR atomically, so when possible,
 	 * disable decoding so that a half-updated BAR won't conflict
 	 * with another device.
@@ -108,6 +129,14 @@  void pci_update_resource(struct pci_dev *dev, int resno)
 
 	if (disable)
 		pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+	/* Tell interested parties that the BAR mapping changed */
+	raw_notifier_call_chain(&bar_update_chain, 0, &update_info);
+}
+
+int pci_notify_on_update_resource(struct notifier_block *nb)
+{
+        return raw_notifier_chain_register(&bar_update_chain, nb);
 }
 
 int pci_claim_resource(struct pci_dev *dev, int resource)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c061250..04a430e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -30,6 +30,7 @@ 
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/resource_ext.h>
+#include <linux/notifier.h>
 #include <uapi/linux/pci.h>
 
 #include <linux/pci_ids.h>
@@ -1037,6 +1038,13 @@  int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 bool pci_device_is_present(struct pci_dev *pdev);
 void pci_ignore_hotplug(struct pci_dev *dev);
 
+struct pci_bar_update_info {
+	u64 old_start;
+	u64 new_start;
+	u64 size;
+};
+int pci_notify_on_update_resource(struct notifier_block *nb);
+
 /* ROM control related routines */
 int pci_enable_rom(struct pci_dev *pdev);
 void pci_disable_rom(struct pci_dev *pdev);