From patchwork Tue Aug 14 17:52:11 2012
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Catalin Marinas <catalin.marinas@arm.com>
X-Patchwork-Id: 1322321
Return-Path: 
 <linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org>
X-Original-To: patchwork-linux-arm@patchwork.kernel.org
Delivered-To: patchwork-process-083081@patchwork2.kernel.org
Received: from merlin.infradead.org (merlin.infradead.org [205.233.59.134])
	by patchwork2.kernel.org (Postfix) with ESMTP id D573EDF266
	for <patchwork-linux-arm@patchwork.kernel.org>;
	Tue, 14 Aug 2012 17:58:07 +0000 (UTC)
Received: from localhost ([::1] helo=merlin.infradead.org)
	by merlin.infradead.org with esmtp (Exim 4.76 #1 (Red Hat Linux))
	id 1T1LJd-00073B-NS; Tue, 14 Aug 2012 17:54:05 +0000
Received: from service87.mimecast.com ([91.220.42.44])
	by merlin.infradead.org with esmtp (Exim 4.76 #1 (Red Hat Linux))
	id 1T1LIi-0006ow-AL for linux-arm-kernel@lists.infradead.org;
	Tue, 14 Aug 2012 17:53:17 +0000
Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com
	[217.140.96.21]) by service87.mimecast.com;
	Tue, 14 Aug 2012 18:53:06 +0100
Received: from e102109-lin.cambridge.arm.com ([10.1.255.212]) by
	cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.0);
	Tue, 14 Aug 2012 18:54:49 +0100
From: Catalin Marinas <catalin.marinas@arm.com>
To: linux-arch@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2 10/31] arm64: TLB maintenance functionality
Date: Tue, 14 Aug 2012 18:52:11 +0100
Message-Id: <1344966752-16102-11-git-send-email-catalin.marinas@arm.com>
X-Mailer: git-send-email 1.7.9.111.gf3fb0
In-Reply-To: <1344966752-16102-1-git-send-email-catalin.marinas@arm.com>
References: <1344966752-16102-1-git-send-email-catalin.marinas@arm.com>
X-OriginalArrivalTime: 14 Aug 2012 17:54:49.0447 (UTC)
	FILETIME=[E60BAF70:01CD7A45]
X-MC-Unique: 112081418530614601
X-Spam-Note: CRM114 invocation failed
X-Spam-Score: -2.6 (--)
X-Spam-Report: SpamAssassin version 3.3.2 on merlin.infradead.org summary:
	Content analysis details:   (-2.6 points)
	pts rule name              description
	---- ----------------------
	--------------------------------------------------
	-0.7 RCVD_IN_DNSWL_LOW RBL: Sender listed at http://www.dnswl.org/,
	low trust [91.220.42.44 listed in list.dnswl.org]
	-0.0 SPF_PASS               SPF: sender matches SPF record
	-1.9 BAYES_00               BODY: Bayes spam probability is 0 to 1%
	[score: 0.0000]
Cc: Will Deacon <will.deacon@arm.com>, linux-kernel@vger.kernel.org,
	Arnd Bergmann <arnd@arndb.de>
X-BeenThere: linux-arm-kernel@lists.infradead.org
X-Mailman-Version: 2.1.14
Precedence: list
List-Id: <linux-arm-kernel.lists.infradead.org>
List-Unsubscribe: 
 <http://lists.infradead.org/mailman/options/linux-arm-kernel>,
	<mailto:linux-arm-kernel-request@lists.infradead.org?subject=unsubscribe>
List-Archive: <http://lists.infradead.org/pipermail/linux-arm-kernel/>
List-Post: <mailto:linux-arm-kernel@lists.infradead.org>
List-Help: <mailto:linux-arm-kernel-request@lists.infradead.org?subject=help>
List-Subscribe: 
 <http://lists.infradead.org/mailman/listinfo/linux-arm-kernel>,
	<mailto:linux-arm-kernel-request@lists.infradead.org?subject=subscribe>
MIME-Version: 1.0
Sender: linux-arm-kernel-bounces@lists.infradead.org
Errors-To: 
 linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org

This patch adds the TLB maintenance functions. There is no distinction
made between the I and D TLBs. TLB maintenance operations are
automatically broadcast between CPUs in hardware. The inner-shareable
operations are always present, even on UP systems.

NOTE: Large part of this patch to be dropped once Peter Z's generic
mmu_gather patches are merged.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/tlb.h      |  190 +++++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/tlbflush.h |  123 ++++++++++++++++++++++++
 arch/arm64/mm/tlb.S               |   71 ++++++++++++++
 3 files changed, 384 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm64/include/asm/tlb.h
 create mode 100644 arch/arm64/include/asm/tlbflush.h
 create mode 100644 arch/arm64/mm/tlb.S

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
new file mode 100644
index 0000000..654f096
--- /dev/null
+++ b/arch/arm64/include/asm/tlb.h
@@ -0,0 +1,190 @@
+/*
+ * Based on arch/arm/include/asm/tlb.h
+ *
+ * Copyright (C) 2002 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_TLB_H
+#define __ASM_TLB_H
+
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+#define MMU_GATHER_BUNDLE	8
+
+/*
+ * TLB handling.  This allows us to remove pages from the page
+ * tables, and efficiently handle the TLB issues.
+ */
+struct mmu_gather {
+	struct mm_struct	*mm;
+	unsigned int		fullmm;
+	struct vm_area_struct	*vma;
+	unsigned long		range_start;
+	unsigned long		range_end;
+	unsigned int		nr;
+	unsigned int		max;
+	struct page		**pages;
+	struct page		*local[MMU_GATHER_BUNDLE];
+};
+
+/*
+ * This is unnecessarily complex.  There's three ways the TLB shootdown
+ * code is used:
+ *  1. Unmapping a range of vmas.  See zap_page_range(), unmap_region().
+ *     tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
+ *     tlb->vma will be non-NULL.
+ *  2. Unmapping all vmas.  See exit_mmap().
+ *     tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
+ *     tlb->vma will be non-NULL.  Additionally, page tables will be freed.
+ *  3. Unmapping argument pages.  See shift_arg_pages().
+ *     tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
+ *     tlb->vma will be NULL.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	if (tlb->fullmm || !tlb->vma)
+		flush_tlb_mm(tlb->mm);
+	else if (tlb->range_end > 0) {
+		flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
+		tlb->range_start = TASK_SIZE;
+		tlb->range_end = 0;
+	}
+}
+
+static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
+{
+	if (!tlb->fullmm) {
+		if (addr < tlb->range_start)
+			tlb->range_start = addr;
+		if (addr + PAGE_SIZE > tlb->range_end)
+			tlb->range_end = addr + PAGE_SIZE;
+	}
+}
+
+static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+{
+	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+
+	if (addr) {
+		tlb->pages = (void *)addr;
+		tlb->max = PAGE_SIZE / sizeof(struct page *);
+	}
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	tlb_flush(tlb);
+	free_pages_and_swap_cache(tlb->pages, tlb->nr);
+	tlb->nr = 0;
+	if (tlb->pages == tlb->local)
+		__tlb_alloc_page(tlb);
+}
+
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+{
+	tlb->mm = mm;
+	tlb->fullmm = fullmm;
+	tlb->vma = NULL;
+	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->pages = tlb->local;
+	tlb->nr = 0;
+	__tlb_alloc_page(tlb);
+}
+
+static inline void
+tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+{
+	tlb_flush_mmu(tlb);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+
+	if (tlb->pages != tlb->local)
+		free_pages((unsigned long)tlb->pages, 0);
+}
+
+/*
+ * Memorize the range for the TLB flush.
+ */
+static inline void
+tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+}
+
+/*
+ * In the case of tlb vma handling, we can optimise these away in the
+ * case where we're doing a full MM flush.  When we're doing a munmap,
+ * the vmas are adjusted to only cover the region to be torn down.
+ */
+static inline void
+tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+	if (!tlb->fullmm) {
+		tlb->vma = vma;
+		tlb->range_start = TASK_SIZE;
+		tlb->range_end = 0;
+	}
+}
+
+static inline void
+tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+	if (!tlb->fullmm)
+		tlb_flush(tlb);
+}
+
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	tlb->pages[tlb->nr++] = page;
+	VM_BUG_ON(tlb->nr > tlb->max);
+	return tlb->max - tlb->nr;
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	if (!__tlb_remove_page(tlb, page))
+		tlb_flush_mmu(tlb);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+	unsigned long addr)
+{
+	pgtable_page_dtor(pte);
+	tlb_add_flush(tlb, addr);
+	tlb_remove_page(tlb, pte);
+}
+
+#ifndef CONFIG_ARM64_64K_PAGES
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+				  unsigned long addr)
+{
+	tlb_add_flush(tlb, addr);
+	tlb_remove_page(tlb, virt_to_page(pmdp));
+}
+#endif
+
+#define pte_free_tlb(tlb, ptep, addr)	__pte_free_tlb(tlb, ptep, addr)
+#define pmd_free_tlb(tlb, pmdp, addr)	__pmd_free_tlb(tlb, pmdp, addr)
+#define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
+
+#define tlb_migrate_finish(mm)		do { } while (0)
+
+#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
new file mode 100644
index 0000000..615d131
--- /dev/null
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -0,0 +1,123 @@
+/*
+ * Based on arch/arm/include/asm/tlbflush.h
+ *
+ * Copyright (C) 1999-2003 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_TLBFLUSH_H
+#define __ASM_TLBFLUSH_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/sched.h>
+#include <asm/cputype.h>
+
+extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
+extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
+
+extern struct cpu_tlb_fns cpu_tlb;
+
+/*
+ *	TLB Management
+ *	==============
+ *
+ *	The arch/arm/mm/tlb-*.S files implement these methods.
+ *
+ *	The TLB specific code is expected to perform whatever tests it
+ *	needs to determine if it should invalidate the TLB for each
+ *	call.  Start addresses are inclusive and end addresses are
+ *	exclusive; it is safe to round these addresses down.
+ *
+ *	flush_tlb_all()
+ *
+ *		Invalidate the entire TLB.
+ *
+ *	flush_tlb_mm(mm)
+ *
+ *		Invalidate all TLB entries in a particular address
+ *		space.
+ *		- mm	- mm_struct describing address space
+ *
+ *	flush_tlb_range(mm,start,end)
+ *
+ *		Invalidate a range of TLB entries in the specified
+ *		address space.
+ *		- mm	- mm_struct describing address space
+ *		- start - start address (may not be aligned)
+ *		- end	- end address (exclusive, may not be aligned)
+ *
+ *	flush_tlb_page(vaddr,vma)
+ *
+ *		Invalidate the specified page in the specified address range.
+ *		- vaddr - virtual address (may not be aligned)
+ *		- vma	- vma_struct describing address range
+ *
+ *	flush_kern_tlb_page(kaddr)
+ *
+ *		Invalidate the TLB entry for the specified page.  The address
+ *		will be in the kernels virtual memory space.  Current uses
+ *		only require the D-TLB to be invalidated.
+ *		- kaddr - Kernel virtual memory address
+ */
+static inline void flush_tlb_all(void)
+{
+	dsb();
+	asm("tlbi	vmalle1is");
+	dsb();
+	isb();
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long asid = (unsigned long)ASID(mm) << 48;
+
+	dsb();
+	asm("tlbi	aside1is, %0" : : "r" (asid));
+	dsb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long uaddr)
+{
+	unsigned long addr = uaddr >> 12 |
+		((unsigned long)ASID(vma->vm_mm) << 48);
+
+	dsb();
+	asm("tlbi	vae1is, %0" : : "r" (addr));
+	dsb();
+}
+
+/*
+ * Convert calls to our calling convention.
+ */
+#define flush_tlb_range(vma,start,end)	__cpu_flush_user_tlb_range(start,end,vma)
+#define flush_tlb_kernel_range(s,e)	__cpu_flush_kern_tlb_range(s,e)
+
+/*
+ * On AArch64, the cache coherency is handled via the set_pte_at() function.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+				    unsigned long addr, pte_t *ptep)
+{
+	/*
+	 * set_pte() does not have a DSB, so make sure that the page table
+	 * write is visible.
+	 */
+	dsb();
+}
+
+#endif
+
+#endif
diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S
new file mode 100644
index 0000000..8ae80a1
--- /dev/null
+++ b/arch/arm64/mm/tlb.S
@@ -0,0 +1,71 @@
+/*
+ * Based on arch/arm/mm/tlb.S
+ *
+ * Copyright (C) 1997-2002 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+/*
+ *	__cpu_flush_user_tlb_range(start, end, vma)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- vma   - vma_struct describing address range
+ */
+ENTRY(__cpu_flush_user_tlb_range)
+	vma_vm_mm x3, x2			// get vma->vm_mm
+	mmid	x3, x3				// get vm_mm->context.id
+	dsb	sy
+	lsr	x0, x0, #12			// align address
+	lsr	x1, x1, #12
+	bfi	x0, x3, #48, #16		// start VA and ASID
+	bfi	x1, x3, #48, #16		// end VA and ASID
+1:	tlbi	vae1is, x0			// TLB invalidate by address and ASID
+	add	x0, x0, #1
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__cpu_flush_user_tlb_range)
+
+/*
+ *	__cpu_flush_kern_tlb_range(start,end)
+ *
+ *	Invalidate a range of kernel TLB entries.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ */
+ENTRY(__cpu_flush_kern_tlb_range)
+	dsb	sy
+	lsr	x0, x0, #12			// align address
+	lsr	x1, x1, #12
+1:	tlbi	vaae1is, x0			// TLB invalidate by address
+	add	x0, x0, #1
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	isb
+	ret
+ENDPROC(__cpu_flush_kern_tlb_range)