diff mbox

[v34,04/14] arm64: kdump: reserve memory for crash dump kernel

Message ID 20170328065130.16019-2-takahiro.akashi@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

AKASHI Takahiro March 28, 2017, 6:51 a.m. UTC
"crashkernel=" kernel parameter specifies the size (and optionally
the start address) of the system ram to be used by crash dump kernel.
reserve_crashkernel() will allocate and reserve that memory at boot time
of primary kernel.

The memory range will be exposed to userspace as a resource named
"Crash kernel" in /proc/iomem.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Pratyush Anand <panand@redhat.com>
Reviewed-by: James Morse <james.morse@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/setup.c |  7 ++++-
 arch/arm64/mm/init.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 1 deletion(-)

Comments

Ard Biesheuvel March 28, 2017, 9:52 a.m. UTC | #1
On 28 March 2017 at 07:51, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> "crashkernel=" kernel parameter specifies the size (and optionally
> the start address) of the system ram to be used by crash dump kernel.
> reserve_crashkernel() will allocate and reserve that memory at boot time
> of primary kernel.
>
> The memory range will be exposed to userspace as a resource named
> "Crash kernel" in /proc/iomem.
>
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> Signed-off-by: Mark Salter <msalter@redhat.com>
> Signed-off-by: Pratyush Anand <panand@redhat.com>
> Reviewed-by: James Morse <james.morse@arm.com>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

> ---
>  arch/arm64/kernel/setup.c |  7 ++++-
>  arch/arm64/mm/init.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 72 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 42274bda0ccb..28855ec1be95 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -31,7 +31,6 @@
>  #include <linux/screen_info.h>
>  #include <linux/init.h>
>  #include <linux/kexec.h>
> -#include <linux/crash_dump.h>
>  #include <linux/root_dev.h>
>  #include <linux/cpu.h>
>  #include <linux/interrupt.h>
> @@ -226,6 +225,12 @@ static void __init request_standard_resources(void)
>                 if (kernel_data.start >= res->start &&
>                     kernel_data.end <= res->end)
>                         request_resource(res, &kernel_data);
> +#ifdef CONFIG_KEXEC_CORE
> +               /* Userspace will find "Crash kernel" region in /proc/iomem. */
> +               if (crashk_res.end && crashk_res.start >= res->start &&
> +                   crashk_res.end <= res->end)
> +                       request_resource(res, &crashk_res);
> +#endif
>         }
>  }
>
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 290794b1a0f1..09d19207362d 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -30,6 +30,7 @@
>  #include <linux/gfp.h>
>  #include <linux/memblock.h>
>  #include <linux/sort.h>
> +#include <linux/of.h>
>  #include <linux/of_fdt.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/dma-contiguous.h>
> @@ -37,6 +38,7 @@
>  #include <linux/swiotlb.h>
>  #include <linux/vmalloc.h>
>  #include <linux/mm.h>
> +#include <linux/kexec.h>
>
>  #include <asm/boot.h>
>  #include <asm/fixmap.h>
> @@ -77,6 +79,67 @@ static int __init early_initrd(char *p)
>  early_param("initrd", early_initrd);
>  #endif
>
> +#ifdef CONFIG_KEXEC_CORE
> +/*
> + * reserve_crashkernel() - reserves memory for crash kernel
> + *
> + * This function reserves memory area given in "crashkernel=" kernel command
> + * line parameter. The memory reserved is used by dump capture kernel when
> + * primary kernel is crashing.
> + */
> +static void __init reserve_crashkernel(void)
> +{
> +       unsigned long long crash_base, crash_size;
> +       int ret;
> +
> +       ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> +                               &crash_size, &crash_base);
> +       /* no crashkernel= or invalid value specified */
> +       if (ret || !crash_size)
> +               return;
> +
> +       crash_size = PAGE_ALIGN(crash_size);
> +
> +       if (crash_base == 0) {
> +               /* Current arm64 boot protocol requires 2MB alignment */
> +               crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
> +                               crash_size, SZ_2M);
> +               if (crash_base == 0) {
> +                       pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> +                               crash_size);
> +                       return;
> +               }
> +       } else {
> +               /* User specifies base address explicitly. */
> +               if (!memblock_is_region_memory(crash_base, crash_size)) {
> +                       pr_warn("cannot reserve crashkernel: region is not memory\n");
> +                       return;
> +               }
> +
> +               if (memblock_is_region_reserved(crash_base, crash_size)) {
> +                       pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
> +                       return;
> +               }
> +
> +               if (!IS_ALIGNED(crash_base, SZ_2M)) {
> +                       pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
> +                       return;
> +               }
> +       }
> +       memblock_reserve(crash_base, crash_size);
> +
> +       pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> +               crash_base, crash_base + crash_size, crash_size >> 20);
> +
> +       crashk_res.start = crash_base;
> +       crashk_res.end = crash_base + crash_size - 1;
> +}
> +#else
> +static void __init reserve_crashkernel(void)
> +{
> +}
> +#endif /* CONFIG_KEXEC_CORE */
> +
>  /*
>   * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
>   * currently assumes that for memory starting above 4G, 32-bit devices will
> @@ -332,6 +395,9 @@ void __init arm64_memblock_init(void)
>                 arm64_dma_phys_limit = max_zone_dma_phys();
>         else
>                 arm64_dma_phys_limit = PHYS_MASK + 1;
> +
> +       reserve_crashkernel();
> +
>         dma_contiguous_reserve(arm64_dma_phys_limit);
>
>         memblock_allow_resize();
> --
> 2.11.1
>
David Woodhouse April 3, 2017, 8:18 a.m. UTC | #2
On Tue, 2017-03-28 at 15:51 +0900, AKASHI Takahiro wrote:
> 
> +       if (crash_base == 0) {
> +               /* Current arm64 boot protocol requires 2MB alignment */
> +               crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
> +                               crash_size, SZ_2M);
> +               if (crash_base == 0) {
> +                       pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> +                               crash_size);
> +                       return;
> +               }
> +       } else {
> +               /* User specifies base address explicitly. */
> +               if (!memblock_is_region_memory(crash_base, crash_size)) {
> +                       pr_warn("cannot reserve crashkernel: region is not memory\n");
> +                       return;
> +               }
> +
> +               if (memblock_is_region_reserved(crash_base, crash_size)) {
> +                       pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
> +                       return;
> +               }
> +
> +               if (!IS_ALIGNED(crash_base, SZ_2M)) {
> +                       pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
> +                       return;
> +               }

You still have typos here.
AKASHI Takahiro April 4, 2017, 5:41 a.m. UTC | #3
On Mon, Apr 03, 2017 at 09:18:12AM +0100, David Woodhouse wrote:
> 
> You still have typos here.

I'd like to defer to the maintainers whether we prefer MiB over MB.

Thanks,
-Takahiro AKASHI
David Woodhouse April 4, 2017, 6:14 a.m. UTC | #4
> On Mon, Apr 03, 2017 at 09:18:12AM +0100, David Woodhouse wrote:
>>
>> You still have typos here.
>
> I'd like to defer to the maintainers whether we prefer MiB over MB.

It is not really a matter of preference. One is correct; the other is not.

While simple errors can of course be forgiven, I cannot understand why you
would deliberately repeat an error once it has been pointed out to you.
AKASHI Takahiro April 4, 2017, 7:35 a.m. UTC | #5
On Tue, Apr 04, 2017 at 06:14:55AM -0000, David Woodhouse wrote:
> 
> > On Mon, Apr 03, 2017 at 09:18:12AM +0100, David Woodhouse wrote:
> >>
> >> You still have typos here.
> >
> > I'd like to defer to the maintainers whether we prefer MiB over MB.
> 
> It is not really a matter of preference. One is correct; the other is not.
> 
> While simple errors can of course be forgiven, I cannot understand why you
> would deliberately repeat an error once it has been pointed out to you.

Because I think that people sometimes use those two interchangeably.
So I said I would defer to the maintainers.

-Takahiro AKASHI

> 
> 
> -- 
> dwmw2
>
Ard Biesheuvel April 4, 2017, 7:39 a.m. UTC | #6
On 4 April 2017 at 08:35, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> On Tue, Apr 04, 2017 at 06:14:55AM -0000, David Woodhouse wrote:
>>
>> > On Mon, Apr 03, 2017 at 09:18:12AM +0100, David Woodhouse wrote:
>> >>
>> >> You still have typos here.
>> >
>> > I'd like to defer to the maintainers whether we prefer MiB over MB.
>>
>> It is not really a matter of preference. One is correct; the other is not.
>>
>> While simple errors can of course be forgiven, I cannot understand why you
>> would deliberately repeat an error once it has been pointed out to you.
>
> Because I think that people sometimes use those two interchangeably.
> So I said I would defer to the maintainers.
>

I have to agree with Akashi-san here: while you are technically
correct, the reality is that the MiB is not as widely adopted as you
suggest, and there is no ambiguity whatsoever in this particular case
(i.e., when referring to blocks of RAM), so I feel it is somewhat
counterproductive to confuse reviewers by stating 'You still have
typos here.' without specifying that it is MiB vs MB that you are
actually referring to.

These patches are complex enough as they are, so could we *please*
focus on the things that matter?
David Woodhouse April 4, 2017, 7:44 a.m. UTC | #7
On Tue, 2017-04-04 at 16:35 +0900, AKASHI Takahiro wrote:
> 
> Because I think that people sometimes use those two interchangeably.
> So I said I would defer to the maintainers.

Sometimes they do, yes. Just as sometimes people use "their",
"they're", and "there" interchangeably.

Rarely in a professional context, though. And even more rarely when
their error has already been pointed out to them.

There are no good reasons to *deliberately* get it wrong.

I've heard it suggested that 'MiB' would confuse people who have never
seen it before. And that it was ugly. Those arguments were fairly
specious when they were first made, and they're even sillier now — more
than 20 years since the binary prefixes were introduced.

The alleged confusion, and the perceived ugliness, are purely due to
unfamiliarity and will pass.

The correctness, and the lack of ambiguity, will not.
Will Deacon April 4, 2017, 9:26 a.m. UTC | #8
Guys, we were supposed to stop discussing this three days ago.

On Tue, Apr 04, 2017 at 09:44:04AM +0200, David Woodhouse wrote:
> On Tue, 2017-04-04 at 16:35 +0900, AKASHI Takahiro wrote:
> > 
> > Because I think that people sometimes use those two interchangeably.
> > So I said I would defer to the maintainers.
> 
> Sometimes they do, yes. Just as sometimes people use "their",
> "they're", and "there" interchangeably.
> 
> Rarely in a professional context, though. And even more rarely when
> their error has already been pointed out to them.
> 
> There are no good reasons to *deliberately* get it wrong.
> 
> I've heard it suggested that 'MiB' would confuse people who have never
> seen it before. And that it was ugly. Those arguments were fairly
> specious when they were first made, and they're even sillier now — more
> than 20 years since the binary prefixes were introduced.
> 
> The alleged confusion, and the perceived ugliness, are purely due to
> unfamiliarity and will pass.
> 
> The correctness, and the lack of ambiguity, will not.

I think consistency comes into play here. We (arm64) and the rest of the
kernel get this wrong all the time, so if we're going to fix it then we
should look at the wider codebase and I'd rather not do that as part of the
kdump series. Also, why stop at the suffixes? We don't have any occurences
of 'mebibyte' in the kernel sources, but plenty of busted 'megabytes'. A
patch making arm64 consistent could be discussed separately, otherwise kdump
becomes the pedantic ISO guy trying to lead by example, but really everybody
ignores him because it's completely inconsequential and they also know he
went 35 versions without giving a monkey's.

David, since you seem to be the most outraged, fancy sending a patch? ;)

Alternatively, who fancies burning some dictionaries?

Will
David Woodhouse April 13, 2017, 12:15 p.m. UTC | #9
On Tue, 2017-04-04 at 10:26 +0100, Will Deacon wrote:
>  A
> patch making arm64 consistent could be discussed separately, otherwise kdump
> becomes the pedantic ISO guy trying to lead by example, but really everybody
> ignores him because it's completely inconsequential and they also know he
> went 35 versions without giving a monkey's.

I still don't see the logic there for *wanting* kdump to be wrong.

Sure, kdump getting it right doesn't necessarily make a big difference
in itself.

But given that the error has been pointed out, what is the motivation
for *wanting* the error to remain in the kdump code, instead of just
fixing it? "Consistency" isn't an answer because we are *already*
inconsistent — some code gets it right, and other code doesn't.

We should converge towards *correctness* rather than deliberately
adding more incorrect code.

I've used my 'i' key more times just in typing this email than it would
have taken to just fix the problem the first time it was pointed out.

> David, since you seem to be the most outraged, fancy sending a patch? ;)

Coming up. In two parts — user-visible messages, followed by cosmetic
and less relevant stuff.
diff mbox

Patch

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 42274bda0ccb..28855ec1be95 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -31,7 +31,6 @@ 
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
-#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -226,6 +225,12 @@  static void __init request_standard_resources(void)
 		if (kernel_data.start >= res->start &&
 		    kernel_data.end <= res->end)
 			request_resource(res, &kernel_data);
+#ifdef CONFIG_KEXEC_CORE
+		/* Userspace will find "Crash kernel" region in /proc/iomem. */
+		if (crashk_res.end && crashk_res.start >= res->start &&
+		    crashk_res.end <= res->end)
+			request_resource(res, &crashk_res);
+#endif
 	}
 }
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 290794b1a0f1..09d19207362d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@ 
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/sort.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-contiguous.h>
@@ -37,6 +38,7 @@ 
 #include <linux/swiotlb.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
+#include <linux/kexec.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -77,6 +79,67 @@  static int __init early_initrd(char *p)
 early_param("initrd", early_initrd);
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long crash_base, crash_size;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+				&crash_size, &crash_base);
+	/* no crashkernel= or invalid value specified */
+	if (ret || !crash_size)
+		return;
+
+	crash_size = PAGE_ALIGN(crash_size);
+
+	if (crash_base == 0) {
+		/* Current arm64 boot protocol requires 2MB alignment */
+		crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+				crash_size, SZ_2M);
+		if (crash_base == 0) {
+			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+				crash_size);
+			return;
+		}
+	} else {
+		/* User specifies base address explicitly. */
+		if (!memblock_is_region_memory(crash_base, crash_size)) {
+			pr_warn("cannot reserve crashkernel: region is not memory\n");
+			return;
+		}
+
+		if (memblock_is_region_reserved(crash_base, crash_size)) {
+			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
+			return;
+		}
+
+		if (!IS_ALIGNED(crash_base, SZ_2M)) {
+			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
+			return;
+		}
+	}
+	memblock_reserve(crash_base, crash_size);
+
+	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+		crash_base, crash_base + crash_size, crash_size >> 20);
+
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+}
+#else
+static void __init reserve_crashkernel(void)
+{
+}
+#endif /* CONFIG_KEXEC_CORE */
+
 /*
  * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  * currently assumes that for memory starting above 4G, 32-bit devices will
@@ -332,6 +395,9 @@  void __init arm64_memblock_init(void)
 		arm64_dma_phys_limit = max_zone_dma_phys();
 	else
 		arm64_dma_phys_limit = PHYS_MASK + 1;
+
+	reserve_crashkernel();
+
 	dma_contiguous_reserve(arm64_dma_phys_limit);
 
 	memblock_allow_resize();