[PATCH 4/6][SRU][Jammy] x86/PCI: Add kernel cmdline options to use/ignore E820 reserved regions

AceLan Kao acelan.kao at canonical.com
Mon Aug 1 13:38:29 UTC 2022


From: Hans de Goede <hdegoede at redhat.com>

BugLink: https://bugs.launchpad.net/bugs/1884232

Some firmware supplies PCI host bridge _CRS that includes address space
unusable by PCI devices, e.g., space occupied by host bridge registers or
used by hidden PCI devices.

To avoid this unusable space, Linux currently excludes E820 reserved
regions from _CRS windows; see 4dc2287c1805 ("x86: avoid E820 regions when
allocating address space").

However, this use of E820 reserved regions to clip things out of _CRS is
not supported by ACPI, UEFI, or PCI Firmware specs, and some systems have
E820 reserved regions that cover the entire memory window from _CRS.
4dc2287c1805 clips the entire window, leaving no space for hot-added or
uninitialized PCI devices.

For example, from a Lenovo IdeaPad 3 15IIL 81WE:

  BIOS-e820: [mem 0x4bc50000-0xcfffffff] reserved
  pci_bus 0000:00: root bus resource [mem 0x65400000-0xbfffffff window]
  pci 0000:00:15.0: BAR 0: [mem 0x00000000-0x00000fff 64bit]
  pci 0000:00:15.0: BAR 0: no space for [mem size 0x00001000 64bit]

Future patches will add quirks to enable/disable E820 clipping
automatically.

Add a "pci=no_e820" kernel command line option to disable clipping with
E820 reserved regions.  Also add a matching "pci=use_e820" option to enable
clipping with E820 reserved regions if that has been disabled by default by
further patches in this patch-set.

Both options taint the kernel because they are intended for debugging and
workaround purposes until a quirk can set them automatically.

[bhelgaas: commit log, add printk]
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1868899 Lenovo IdeaPad 3
Link: https://lore.kernel.org/r/20220519152150.6135-2-hdegoede@redhat.com
Signed-off-by: Hans de Goede <hdegoede at redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas at google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>
Cc: Benoit Grégoire <benoitg at coeus.ca>
Cc: Hui Wang <hui.wang at canonical.com>
(cherry picked from commit fa6dae5d82081e8d9f8e6a2baf7149442a6c1ba5)
Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao at canonical.com>
---
 .../admin-guide/kernel-parameters.txt          |  9 +++++++++
 arch/x86/include/asm/pci_x86.h                 |  2 ++
 arch/x86/pci/acpi.c                            | 18 ++++++++++++++++--
 arch/x86/pci/common.c                          |  8 ++++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c73168dbb050..d414b5fef594 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4000,6 +4000,15 @@
 				please report a bug.
 		nocrs		[X86] Ignore PCI host bridge windows from ACPI.
 				If you need to use this, please report a bug.
+		use_e820	[X86] Use E820 reservations to exclude parts of
+				PCI host bridge windows. This is a workaround
+				for BIOS defects in host bridge _CRS methods.
+				If you need to use this, please report a bug to
+				<linux-pci at vger.kernel.org>.
+		no_e820		[X86] Ignore E820 reservations for PCI host
+				bridge windows. This is the default on modern
+				hardware. If you need to use this, please report
+				a bug to <linux-pci at vger.kernel.org>.
 		routeirq	Do IRQ routing for all PCI devices.
 				This is normally done in pci_enable_device(),
 				so this option is a temporary workaround
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 490411dba438..695ef363c352 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -39,6 +39,8 @@ do {						\
 #define PCI_ROOT_NO_CRS		0x100000
 #define PCI_NOASSIGN_BARS	0x200000
 #define PCI_BIG_ROOT_WINDOW	0x400000
+#define PCI_USE_E820		0x800000
+#define PCI_NO_E820		0x1000000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index cffbef0884c0..0e48151c1fdc 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -20,6 +20,7 @@ struct pci_root_info {
 #endif
 };
 
+static bool pci_use_e820 = true;
 static bool pci_use_crs = true;
 static bool pci_ignore_seg = false;
 
@@ -161,6 +162,17 @@ void __init pci_acpi_crs_quirks(void)
 	       "if necessary, use \"pci=%s\" and report a bug\n",
 	       pci_use_crs ? "Using" : "Ignoring",
 	       pci_use_crs ? "nocrs" : "use_crs");
+
+	/* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */
+	if (pci_probe & PCI_NO_E820)
+		pci_use_e820 = false;
+	else if (pci_probe & PCI_USE_E820)
+		pci_use_e820 = true;
+
+	printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n",
+	       pci_use_e820 ? "Using" : "Ignoring");
+	if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
+		printk(KERN_INFO "PCI: Please notify linux-pci at vger.kernel.org so future kernels can this automatically\n");
 }
 
 #ifdef	CONFIG_PCI_MMCONFIG
@@ -301,8 +313,10 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
 
 	status = acpi_pci_probe_root_resources(ci);
 
-	resource_list_for_each_entry(entry, &ci->resources)
-		remove_e820_regions(&device->dev, entry->res);
+	if (pci_use_e820) {
+		resource_list_for_each_entry(entry, &ci->resources)
+			remove_e820_regions(&device->dev, entry->res);
+	}
 
 	if (pci_use_crs) {
 		resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 13b0dd380aad..2cc65a94a5b7 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -596,6 +596,14 @@ char *__init pcibios_setup(char *str)
 	} else if (!strcmp(str, "nocrs")) {
 		pci_probe |= PCI_ROOT_NO_CRS;
 		return NULL;
+	} else if (!strcmp(str, "use_e820")) {
+		pci_probe |= PCI_USE_E820;
+		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+		return NULL;
+	} else if (!strcmp(str, "no_e820")) {
+		pci_probe |= PCI_NO_E820;
+		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+		return NULL;
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 	} else if (!strcmp(str, "big_root_window")) {
 		pci_probe |= PCI_BIG_ROOT_WINDOW;
-- 
2.25.1




More information about the kernel-team mailing list