diff mbox

[v5sub1,7/8] arm64: move kernel image to base of vmalloc area

Message ID 56C34204.60605@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrey Ryabinin Feb. 16, 2016, 3:36 p.m. UTC
On 02/16/2016 06:17 PM, Ard Biesheuvel wrote:
> On 16 February 2016 at 13:59, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
>>
>>
>> On 02/15/2016 09:59 PM, Catalin Marinas wrote:
>>> On Mon, Feb 15, 2016 at 05:28:02PM +0300, Andrey Ryabinin wrote:
>>>> On 02/12/2016 07:06 PM, Catalin Marinas wrote:
>>>>> So far, we have:
>>>>>
>>>>> KASAN+for-next/kernmap goes wrong
>>>>> KASAN+UBSAN goes wrong
>>>>>
>>>>> Enabled individually, KASAN, UBSAN and for-next/kernmap seem fine. I may
>>>>> have to trim for-next/core down until we figure out where the problem
>>>>> is.
>>>>>
>>>>> BUG: KASAN: stack-out-of-bounds in find_busiest_group+0x164/0x16a0 at addr ffffffc93665bc8c
>>>>
>>>> Can it be related to TLB conflicts, which supposed to be fixed in
>>>> "arm64: kasan: avoid TLB conflicts" patch from "arm64: mm: rework page
>>>> table creation" series ?
>>>
>>> I can very easily reproduce this with a vanilla 4.5-rc1 series by
>>> enabling inline instrumentation (maybe Mark's theory is true w.r.t.
>>> image size).
>>>
>>> Some information, maybe you can shed some light on this. It seems to
>>> happen only for secondary CPUs on the swapper stack (I think allocated
>>> via fork_idle()). The code generated looks sane to me, so KASAN should
>>> not complain but maybe there is some uninitialised shadow, hence the
>>> error.
>>>
>>> The report:
>>>
>>
>> Actually, the first report is a bit more useful. It shows that shadow memory was corrupted:
>>
>>   ffffffc93665bc00: f1 f1 f1 f1 00 f4 f4 f4 f2 f2 f2 f2 00 00 f1 f1
>>> ffffffc93665bc80: f1 f1 00 00 00 00 f3 f3 00 f4 f4 f4 f3 f3 f3 f3
>>                       ^
>> F1 - left redzone, it indicates start of stack frame
>> F3 - right redzone, it should be the end of stack frame.
>>
>> But here we have the second set of F1s without F3s which should close the first set of F1s.
>> Also those two F3s in the middle cannot be right.
>>
>> So shadow is corrupted.
>> Some hypotheses:
>>
>> 1) We share stack between several tasks (e.g. stack overflow, somehow corrupted SP).
>>     But this probably should cause kernel crash later, after kasan reports.
>>
>> 2) Shadow memory wasn't cleared. GCC poison memory on function entrance and unpoisons it before return.
>>      If we use some tricky way to exit from function this could cause false-positives like that.
>>      E.g. some hand-written assembly return code.
>>
>> 3) Screwed shadow mapping. I think the patch below should uncover such problem.
>> It boot-tested on qemu and didn't show any problem
>>
> 
> I think this patch gives false positive warnings in some cases:
> 
>>
>> ---
>>  arch/arm64/mm/kasan_init.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 55 insertions(+)
>>
>> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
>> index cf038c7..25d685c 100644
>> --- a/arch/arm64/mm/kasan_init.c
>> +++ b/arch/arm64/mm/kasan_init.c
>> @@ -117,6 +117,59 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
>>         : "r" (ttbr1));
>>  }
>>
>> +static void verify_shadow(void)
>> +{
>> +       struct memblock_region *reg;
>> +       int i = 0;
>> +
>> +       for_each_memblock(memory, reg) {
>> +               void *start = (void *)__phys_to_virt(reg->base);
>> +               void *end = (void *)__phys_to_virt(reg->base + reg->size);
>> +               int *shadow_start, *shadow_end;
>> +
>> +               if (start >= end)
>> +                       break;
>> +               shadow_start = (int *)((unsigned long)kasan_mem_to_shadow(start) & ~(PAGE_SIZE - 1));
>> +               shadow_end =  (int *)kasan_mem_to_shadow(end);
> 
> shadow_start and shadow_end can refer to the same page as in the
> previous iteration. For instance, I have these two regions
> 
>   0x00006e090000-0x00006e0adfff [Conventional Memory|   |  |  |  |  |
> |   |WB|WT|WC|UC]
>   0x00006e0ae000-0x00006e0affff [Loader Data        |   |  |  |  |  |
> |   |WB|WT|WC|UC]
> 
> which are covered by different memblocks since the second one is
> marked as MEMBLOCK_NOMAP, due to the fact that it contains the UEFI
> memory map.
> 
> I get the following output
> 
> kasan: screwed shadow mapping 23575, 23573
> 
> which I think is simply a result from the fact the shadow_start refers
> to the same page as in the previous iteration(s)
> 

You are right. 
So we should write 'shadow_start' instead of 'i'.

---
 arch/arm64/mm/kasan_init.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

Comments

Mark Rutland Feb. 16, 2016, 4:42 p.m. UTC | #1
On Tue, Feb 16, 2016 at 06:36:36PM +0300, Andrey Ryabinin wrote:
> 
> On 02/16/2016 06:17 PM, Ard Biesheuvel wrote:
> > On 16 February 2016 at 13:59, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> >> +static void verify_shadow(void)
> >> +{
> >> +       struct memblock_region *reg;
> >> +       int i = 0;
> >> +
> >> +       for_each_memblock(memory, reg) {
> >> +               void *start = (void *)__phys_to_virt(reg->base);
> >> +               void *end = (void *)__phys_to_virt(reg->base + reg->size);
> >> +               int *shadow_start, *shadow_end;
> >> +
> >> +               if (start >= end)
> >> +                       break;
> >> +               shadow_start = (int *)((unsigned long)kasan_mem_to_shadow(start) & ~(PAGE_SIZE - 1));
> >> +               shadow_end =  (int *)kasan_mem_to_shadow(end);
> > 
> > shadow_start and shadow_end can refer to the same page as in the
> > previous iteration. For instance, I have these two regions
> > 
> >   0x00006e090000-0x00006e0adfff [Conventional Memory|   |  |  |  |  |
> > |   |WB|WT|WC|UC]
> >   0x00006e0ae000-0x00006e0affff [Loader Data        |   |  |  |  |  |
> > |   |WB|WT|WC|UC]
> > 
> > which are covered by different memblocks since the second one is
> > marked as MEMBLOCK_NOMAP, due to the fact that it contains the UEFI
> > memory map.
> > 
> > I get the following output
> > 
> > kasan: screwed shadow mapping 23575, 23573
> > 
> > which I think is simply a result from the fact the shadow_start refers
> > to the same page as in the previous iteration(s)
> > 
> 
> You are right. 
> So we should write 'shadow_start' instead of 'i'.

FWIW with the below patch I don't see any "screwed shadow mapping"
warnings on my board, and still later see a tonne of KASAN splats in the
scheduler.

Mark.

> ---
>  arch/arm64/mm/kasan_init.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> index cf038c7..ee035c2 100644
> --- a/arch/arm64/mm/kasan_init.c
> +++ b/arch/arm64/mm/kasan_init.c
> @@ -117,6 +117,55 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
>  	: "r" (ttbr1));
>  }
>  
> +static void verify_shadow(void)
> +{
> +	struct memblock_region *reg;
> +
> +	for_each_memblock(memory, reg) {
> +		void *start = (void *)__phys_to_virt(reg->base);
> +		void *end = (void *)__phys_to_virt(reg->base + reg->size);
> +		unsigned long *shadow_start, *shadow_end;
> +
> +		if (start >= end)
> +			break;
> +		shadow_start = (unsigned long *)kasan_mem_to_shadow(start);
> +		shadow_end =  (unsigned long *)kasan_mem_to_shadow(end);
> +		for (; shadow_start < shadow_end; shadow_start += PAGE_SIZE/sizeof(unsigned long)) {
> +			*shadow_start = (unsigned long)shadow_start;
> +		}
> +	}
> +
> +	for_each_memblock(memory, reg) {
> +		void *start = (void *)__phys_to_virt(reg->base);
> +		void *end = (void *)__phys_to_virt(reg->base + reg->size);
> +		unsigned long *shadow_start, *shadow_end;
> +
> +		if (start >= end)
> +			break;
> +		shadow_start = (unsigned long *)kasan_mem_to_shadow(start);
> +		shadow_end =  (unsigned long *)kasan_mem_to_shadow(end);
> +		for (; shadow_start < shadow_end; shadow_start += PAGE_SIZE/sizeof(unsigned long)) {
> +			if (*shadow_start != (unsigned long)shadow_start) {
> +				pr_err("screwed shadow mapping %lx, %lx\n", *shadow_start, (unsigned long)shadow_start);
> +				goto clear;
> +			}
> +		}
> +	}
> +clear:
> +	for_each_memblock(memory, reg) {
> +		void *start = (void *)__phys_to_virt(reg->base);
> +		void *end = (void *)__phys_to_virt(reg->base + reg->size);
> +		unsigned long shadow_start, shadow_end;
> +
> +		if (start >= end)
> +			break;
> +		shadow_start =  (unsigned long)kasan_mem_to_shadow(start);
> +		shadow_end =  (unsigned long)kasan_mem_to_shadow(end);
> +		memset((void *)shadow_start, 0, shadow_end - shadow_start);
> +	}
> +
> +}
> +
>  void __init kasan_init(void)
>  {
>  	struct memblock_region *reg;
> @@ -159,6 +208,8 @@ void __init kasan_init(void)
>  	cpu_set_ttbr1(__pa(swapper_pg_dir));
>  	flush_tlb_all();
>  
> +	verify_shadow();
> +
>  	/* At this point kasan is fully initialized. Enable error messages */
>  	init_task.kasan_depth = 0;
>  	pr_info("KernelAddressSanitizer initialized\n");
> -- 
> 
>
Andrey Ryabinin Feb. 17, 2016, 9:15 a.m. UTC | #2
On 02/16/2016 07:42 PM, Mark Rutland wrote:
> On Tue, Feb 16, 2016 at 06:36:36PM +0300, Andrey Ryabinin wrote:
>>
>> On 02/16/2016 06:17 PM, Ard Biesheuvel wrote:
>>> On 16 February 2016 at 13:59, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
>>>> +static void verify_shadow(void)
>>>> +{
>>>> +       struct memblock_region *reg;
>>>> +       int i = 0;
>>>> +
>>>> +       for_each_memblock(memory, reg) {
>>>> +               void *start = (void *)__phys_to_virt(reg->base);
>>>> +               void *end = (void *)__phys_to_virt(reg->base + reg->size);
>>>> +               int *shadow_start, *shadow_end;
>>>> +
>>>> +               if (start >= end)
>>>> +                       break;
>>>> +               shadow_start = (int *)((unsigned long)kasan_mem_to_shadow(start) & ~(PAGE_SIZE - 1));
>>>> +               shadow_end =  (int *)kasan_mem_to_shadow(end);
>>>
>>> shadow_start and shadow_end can refer to the same page as in the
>>> previous iteration. For instance, I have these two regions
>>>
>>>   0x00006e090000-0x00006e0adfff [Conventional Memory|   |  |  |  |  |
>>> |   |WB|WT|WC|UC]
>>>   0x00006e0ae000-0x00006e0affff [Loader Data        |   |  |  |  |  |
>>> |   |WB|WT|WC|UC]
>>>
>>> which are covered by different memblocks since the second one is
>>> marked as MEMBLOCK_NOMAP, due to the fact that it contains the UEFI
>>> memory map.
>>>
>>> I get the following output
>>>
>>> kasan: screwed shadow mapping 23575, 23573
>>>
>>> which I think is simply a result from the fact the shadow_start refers
>>> to the same page as in the previous iteration(s)
>>>
>>
>> You are right. 
>> So we should write 'shadow_start' instead of 'i'.
> 
> FWIW with the below patch I don't see any "screwed shadow mapping"
> warnings on my board, and still later see a tonne of KASAN splats in the
> scheduler.
> 

It is possible that I missed something, but I think it means that shadow is alright.

I wonder whether this happens on 4.4. If not, than something in 4.5-rc1 caused this, and the obvious suspect
here is irq stack.
Catalin Marinas Feb. 17, 2016, 10:18 a.m. UTC | #3
On Wed, Feb 17, 2016 at 12:15:15PM +0300, Andrey Ryabinin wrote:
> On 02/16/2016 07:42 PM, Mark Rutland wrote:
> > FWIW with the below patch I don't see any "screwed shadow mapping"
> > warnings on my board, and still later see a tonne of KASAN splats in the
> > scheduler.
> 
> It is possible that I missed something, but I think it means that
> shadow is alright.
> 
> I wonder whether this happens on 4.4. If not, than something in
> 4.5-rc1 caused this, and the obvious suspect here is irq stack.

It doesn't seem to happen on 4.4, it starts somewhere before 4.5-rc1. I
tested the arm64 branch that we pushed upstream with the irq stack
changes but it didn't trigger. It could as well be a combination of
multiple change or just something else.

We'll do some bisecting, though it's not that fun going through the
merging window commits, especially since many are based on 4.4-rcX (we
could try on merges only first, there are fewer).
Mark Rutland Feb. 17, 2016, 10:48 a.m. UTC | #4
On Wed, Feb 17, 2016 at 10:18:03AM +0000, Catalin Marinas wrote:
> On Wed, Feb 17, 2016 at 12:15:15PM +0300, Andrey Ryabinin wrote:
> > On 02/16/2016 07:42 PM, Mark Rutland wrote:
> > > FWIW with the below patch I don't see any "screwed shadow mapping"
> > > warnings on my board, and still later see a tonne of KASAN splats in the
> > > scheduler.
> > 
> > It is possible that I missed something, but I think it means that
> > shadow is alright.
> > 
> > I wonder whether this happens on 4.4. If not, than something in
> > 4.5-rc1 caused this, and the obvious suspect here is irq stack.
> 
> It doesn't seem to happen on 4.4, it starts somewhere before 4.5-rc1. I
> tested the arm64 branch that we pushed upstream with the irq stack
> changes but it didn't trigger. It could as well be a combination of
> multiple change or just something else.
> 
> We'll do some bisecting, though it's not that fun going through the
> merging window commits, especially since many are based on 4.4-rcX (we
> could try on merges only first, there are fewer).

FWIW I did that bisect last night, and that fingered commit
f11aef69b235bc30 ("Merge branch 'pm-cpuidle'") as the first bad commit.

Either there's some subtle interaction, or it's less reproducible than I
thought and the "good" commits are only "possibly-good". I'll try to dig
into that.

FWIW, my bisect log is:

git bisect start
# bad: [92e963f50fc74041b5e9e744c330dca48e04f08d] Linux 4.5-rc1
git bisect bad 92e963f50fc74041b5e9e744c330dca48e04f08d
# good: [afd2ff9b7e1b367172f18ba7f693dfb62bdcb2dc] Linux 4.4
git bisect good afd2ff9b7e1b367172f18ba7f693dfb62bdcb2dc
# good: [4dffbfc48d65e5d8157a634fd670065d237a9377] arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP
git bisect good 4dffbfc48d65e5d8157a634fd670065d237a9377
# good: [1289ace5b4f70f1e68ce785735b82c7e483de863] Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
git bisect good 1289ace5b4f70f1e68ce785735b82c7e483de863
# good: [984065055e6e39f8dd812529e11922374bd39352] Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
git bisect good 984065055e6e39f8dd812529e11922374bd39352
# good: [6d1c244803f2c013fb9c31b0904c01f1830b73ab] Merge tag 'armsoc-dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
git bisect good 6d1c244803f2c013fb9c31b0904c01f1830b73ab
# bad: [0a13daedf7ffc71b0c374a036355da7fddb20d6d] Merge branch 'for-4.5/lightnvm' of git://git.kernel.dk/linux-block
git bisect bad 0a13daedf7ffc71b0c374a036355da7fddb20d6d
# bad: [278e5acae1321978686e85ca92906054a36aa19b] Merge tag 'for-4.5' of git://git.osdn.jp/gitroot/uclinux-h8/linux
git bisect bad 278e5acae1321978686e85ca92906054a36aa19b
# good: [f9cd69fe5eb6347b4de56458d0378bc0fa44bce9] Merge tag 'armsoc-defconfig' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
git bisect good f9cd69fe5eb6347b4de56458d0378bc0fa44bce9
# good: [9638685e32af961943b679fcb72d4ddd458eb18f] Merge tag 'armsoc-drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
git bisect good 9638685e32af961943b679fcb72d4ddd458eb18f
# good: [fa8bb4518771b19460a318fbab3eb36c81db3a50] Merge branch 'pm-devfreq'
git bisect good fa8bb4518771b19460a318fbab3eb36c81db3a50
# bad: [30f05309bde49295e02e45c7e615f73aa4e0ccc2] Merge tag 'pm+acpi-4.5-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
git bisect bad 30f05309bde49295e02e45c7e615f73aa4e0ccc2
# good: [5bb1729cbdfbe974ad6385be94b14afbac97e19f] cpuidle: menu: Avoid pointless checks in menu_select()
git bisect good 5bb1729cbdfbe974ad6385be94b14afbac97e19f
# bad: [db2b52f75250c88ee3c6ba3d91bef38f3f1a1e8c] Merge branch 'pm-tools'
git bisect bad db2b52f75250c88ee3c6ba3d91bef38f3f1a1e8c
# good: [38cb76a307821f76c7f9dff7449f73aeb014d5cc] cpupower: Fix build error in cpufreq-info
git bisect good 38cb76a307821f76c7f9dff7449f73aeb014d5cc
# bad: [f11aef69b235bc30c323776d75ac23b43aac45bb] Merge branch 'pm-cpuidle'
git bisect bad f11aef69b235bc30c323776d75ac23b43aac45bb
# first bad commit: [f11aef69b235bc30c323776d75ac23b43aac45bb] Merge branch 'pm-cpuidle'

Thanks,
Mark.
diff mbox

Patch

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7..ee035c2 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -117,6 +117,55 @@  static void __init cpu_set_ttbr1(unsigned long ttbr1)
 	: "r" (ttbr1));
 }
 
+static void verify_shadow(void)
+{
+	struct memblock_region *reg;
+
+	for_each_memblock(memory, reg) {
+		void *start = (void *)__phys_to_virt(reg->base);
+		void *end = (void *)__phys_to_virt(reg->base + reg->size);
+		unsigned long *shadow_start, *shadow_end;
+
+		if (start >= end)
+			break;
+		shadow_start = (unsigned long *)kasan_mem_to_shadow(start);
+		shadow_end =  (unsigned long *)kasan_mem_to_shadow(end);
+		for (; shadow_start < shadow_end; shadow_start += PAGE_SIZE/sizeof(unsigned long)) {
+			*shadow_start = (unsigned long)shadow_start;
+		}
+	}
+
+	for_each_memblock(memory, reg) {
+		void *start = (void *)__phys_to_virt(reg->base);
+		void *end = (void *)__phys_to_virt(reg->base + reg->size);
+		unsigned long *shadow_start, *shadow_end;
+
+		if (start >= end)
+			break;
+		shadow_start = (unsigned long *)kasan_mem_to_shadow(start);
+		shadow_end =  (unsigned long *)kasan_mem_to_shadow(end);
+		for (; shadow_start < shadow_end; shadow_start += PAGE_SIZE/sizeof(unsigned long)) {
+			if (*shadow_start != (unsigned long)shadow_start) {
+				pr_err("screwed shadow mapping %lx, %lx\n", *shadow_start, (unsigned long)shadow_start);
+				goto clear;
+			}
+		}
+	}
+clear:
+	for_each_memblock(memory, reg) {
+		void *start = (void *)__phys_to_virt(reg->base);
+		void *end = (void *)__phys_to_virt(reg->base + reg->size);
+		unsigned long shadow_start, shadow_end;
+
+		if (start >= end)
+			break;
+		shadow_start =  (unsigned long)kasan_mem_to_shadow(start);
+		shadow_end =  (unsigned long)kasan_mem_to_shadow(end);
+		memset((void *)shadow_start, 0, shadow_end - shadow_start);
+	}
+
+}
+
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
@@ -159,6 +208,8 @@  void __init kasan_init(void)
 	cpu_set_ttbr1(__pa(swapper_pg_dir));
 	flush_tlb_all();
 
+	verify_shadow();
+
 	/* At this point kasan is fully initialized. Enable error messages */
 	init_task.kasan_depth = 0;
 	pr_info("KernelAddressSanitizer initialized\n");