From: Russell King Date: Wed, 8 Jun 2005 14:28:24 +0000 (+0100) Subject: [PATCH] ARM: Fix Xscale copy_page implementation X-Git-Tag: v2.6.12~72^2 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=f8f98a9335db4a7d6285b785180fad720bf22864;p=linux-2.6-omap-h63xx.git [PATCH] ARM: Fix Xscale copy_page implementation The ARM copypage changes in 2.6.12-rc4-git1 removed the preempt locking from the copypage functions which broke the XScale implementation. This patch fixes the locking on XScale and removes the now unneeded minicache code. Signed-off-by: Russell King Checked-by: Richard Purdie --- diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 48bac7da8c7..ade0e2222f5 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -228,7 +228,6 @@ config CPU_SA1100 select CPU_CACHE_V4WB select CPU_CACHE_VIVT select CPU_TLB_V4WB - select CPU_MINICACHE # XScale config CPU_XSCALE @@ -239,7 +238,6 @@ config CPU_XSCALE select CPU_ABRT_EV5T select CPU_CACHE_VIVT select CPU_TLB_V4WBI - select CPU_MINICACHE # ARMv6 config CPU_V6 @@ -345,11 +343,6 @@ config CPU_TLB_V4WBI config CPU_TLB_V6 bool -config CPU_MINICACHE - bool - help - Processor has a minicache. - comment "Processor Features" config ARM_THUMB diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index ccf316c11e0..59f47d4c2df 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o -obj-$(CONFIG_CPU_MINICACHE) += minicache.o - obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o diff --git a/arch/arm/mm/copypage-xscale.S b/arch/arm/mm/copypage-xscale.S deleted file mode 100644 index bb277316ef5..00000000000 --- a/arch/arm/mm/copypage-xscale.S +++ /dev/null @@ -1,113 +0,0 @@ -/* - * linux/arch/arm/lib/copypage-xscale.S - * - * Copyright (C) 2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - -/* - * General note: - * We don't really want write-allocate cache behaviour for these functions - * since that will just eat through 8K of the cache. - */ - - .text - .align 5 -/* - * XScale optimised copy_user_page - * r0 = destination - * r1 = source - * r2 = virtual user address of ultimate destination page - * - * The source page may have some clean entries in the cache already, but we - * can safely ignore them - break_cow() will flush them out of the cache - * if we eventually end up using our copied page. - * - * What we could do is use the mini-cache to buffer reads from the source - * page. We rely on the mini-cache being smaller than one page, so we'll - * cycle through the complete cache anyway. - */ -ENTRY(xscale_mc_copy_user_page) - stmfd sp!, {r4, r5, lr} - mov r5, r0 - mov r0, r1 - bl map_page_minicache - mov r1, r5 - mov lr, #PAGE_SZ/64-1 - - /* - * Strangely enough, best performance is achieved - * when prefetching destination as well. (NP) - */ - pld [r0, #0] - pld [r0, #32] - pld [r1, #0] - pld [r1, #32] - -1: pld [r0, #64] - pld [r0, #96] - pld [r1, #64] - pld [r1, #96] - -2: ldrd r2, [r0], #8 - ldrd r4, [r0], #8 - mov ip, r1 - strd r2, [r1], #8 - ldrd r2, [r0], #8 - strd r4, [r1], #8 - ldrd r4, [r0], #8 - strd r2, [r1], #8 - strd r4, [r1], #8 - mcr p15, 0, ip, c7, c10, 1 @ clean D line - ldrd r2, [r0], #8 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line - ldrd r4, [r0], #8 - mov ip, r1 - strd r2, [r1], #8 - ldrd r2, [r0], #8 - strd r4, [r1], #8 - ldrd r4, [r0], #8 - strd r2, [r1], #8 - strd r4, [r1], #8 - mcr p15, 0, ip, c7, c10, 1 @ clean D line - subs lr, lr, #1 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line - bgt 1b - beq 2b - - ldmfd sp!, {r4, r5, pc} - - .align 5 -/* - * XScale optimised clear_user_page - * r0 = destination - * r1 = virtual user address of ultimate destination page - */ -ENTRY(xscale_mc_clear_user_page) - mov r1, #PAGE_SZ/32 - mov r2, #0 - mov r3, #0 -1: mov ip, r0 - strd r2, [r0], #8 - strd r2, [r0], #8 - strd r2, [r0], #8 - strd r2, [r0], #8 - mcr p15, 0, ip, c7, c10, 1 @ clean D line - subs r1, r1, #1 - mcr p15, 0, ip, c7, c6, 1 @ invalidate D line - bne 1b - mov pc, lr - - __INITDATA - - .type xscale_mc_user_fns, #object -ENTRY(xscale_mc_user_fns) - .long xscale_mc_clear_user_page - .long xscale_mc_copy_user_page - .size xscale_mc_user_fns, . - xscale_mc_user_fns diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c new file mode 100644 index 00000000000..42a6ee255ce --- /dev/null +++ b/arch/arm/mm/copypage-xscale.c @@ -0,0 +1,131 @@ +/* + * linux/arch/arm/lib/copypage-xscale.S + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include +#include + +#include +#include +#include + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently. + */ +#define COPYPAGE_MINICACHE 0xffff8000 + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_CACHEABLE) + +#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) + +static DEFINE_SPINLOCK(minicache_lock); + +/* + * XScale mini-dcache optimised copy_user_page + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + */ +static void __attribute__((naked)) +mc_copy_user_page(void *from, void *to) +{ + /* + * Strangely enough, best performance is achieved + * when prefetching destination as well. (NP) + */ + asm volatile( + "stmfd sp!, {r4, r5, lr} \n\ + mov lr, %2 \n\ + pld [r0, #0] \n\ + pld [r0, #32] \n\ + pld [r1, #0] \n\ + pld [r1, #32] \n\ +1: pld [r0, #64] \n\ + pld [r0, #96] \n\ + pld [r1, #64] \n\ + pld [r1, #96] \n\ +2: ldrd r2, [r0], #8 \n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + ldrd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs lr, lr, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bgt 1b \n\ + beq 2b \n\ + ldmfd sp!, {r4, r5, pc} " + : + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); +} + +void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +{ + spin_lock(&minicache_lock); + + set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); + flush_tlb_kernel_page(COPYPAGE_MINICACHE); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + spin_unlock(&minicache_lock); +} + +/* + * XScale optimised clear_user_page + */ +void __attribute__((naked)) +xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) +{ + asm volatile( + "mov r1, %0 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mov ip, r0 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + strd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs r1, r1, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bne 1b \n\ + mov pc, lr" + : + : "I" (PAGE_SIZE / 32)); +} + +struct cpu_user_fns xscale_mc_user_fns __initdata = { + .cpu_clear_user_page = xscale_mc_clear_user_page, + .cpu_copy_user_page = xscale_mc_copy_user_page, +}; diff --git a/arch/arm/mm/minicache.c b/arch/arm/mm/minicache.c index dedf2ab01b2..e69de29bb2d 100644 --- a/arch/arm/mm/minicache.c +++ b/arch/arm/mm/minicache.c @@ -1,73 +0,0 @@ -/* - * linux/arch/arm/mm/minicache.c - * - * Copyright (C) 2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This handles the mini data cache, as found on SA11x0 and XScale - * processors. When we copy a user page page, we map it in such a way - * that accesses to this page will not touch the main data cache, but - * will be cached in the mini data cache. This prevents us thrashing - * the main data cache on page faults. - */ -#include -#include - -#include -#include -#include - -/* - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture - * specific hacks for copying pages efficiently. - */ -#define minicache_address (0xffff8000) -#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ - L_PTE_CACHEABLE) - -static pte_t *minicache_pte; - -/* - * Note that this is intended to be called only from the copy_user_page - * asm code; anything else will require special locking to prevent the - * mini-cache space being re-used. (Note: probably preempt unsafe). - * - * We rely on the fact that the minicache is 2K, and we'll be pushing - * 4K of data through it, so we don't actually have to specifically - * flush the minicache when we change the mapping. - * - * Note also: assert(PAGE_OFFSET <= virt < high_memory). - * Unsafe: preempt, kmap. - */ -unsigned long map_page_minicache(unsigned long virt) -{ - set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot)); - flush_tlb_kernel_page(minicache_address); - - return minicache_address; -} - -static int __init minicache_init(void) -{ - pgd_t *pgd; - pmd_t *pmd; - - spin_lock(&init_mm.page_table_lock); - - pgd = pgd_offset_k(minicache_address); - pmd = pmd_alloc(&init_mm, pgd, minicache_address); - if (!pmd) - BUG(); - minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address); - if (!minicache_pte) - BUG(); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -core_initcall(minicache_init);