From 923f7f6970bd448b8e88b9e4be10fd01fc7106a4 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 15 Oct 2008 22:05:13 -0700 Subject: [PATCH] GRU driver: minor updates A few minor updates for the GRU driver. - documentation changes found in code reviews - changes to #ifdefs to make them recognized by "unifdef" (used in simulator testing) - change GRU context load/unload to prefetch data [akpm@linux-foundation.org: fix typo in comment] Signed-off-by: Jack Steiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/sgi-gru/gru.h | 4 ++-- drivers/misc/sgi-gru/gru_instructions.h | 10 +++++---- drivers/misc/sgi-gru/grufault.c | 11 +++++----- drivers/misc/sgi-gru/grufile.c | 8 ++++++- drivers/misc/sgi-gru/gruhandles.h | 5 ----- drivers/misc/sgi-gru/grukservices.c | 1 + drivers/misc/sgi-gru/grumain.c | 29 +++++++++++++++---------- 7 files changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index 40df7cb3f0a..f93f03a9e6e 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -30,9 +30,9 @@ /* * Size used to map GRU GSeg */ -#if defined CONFIG_IA64 +#if defined(CONFIG_IA64) #define GRU_GSEG_PAGESIZE (256 * 1024UL) -#elif defined CONFIG_X86_64 +#elif defined(CONFIG_X86_64) #define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ #else #error "Unsupported architecture" diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 0dc36225c7c..48762e7b98b 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -26,7 +26,7 @@ * Architecture dependent functions */ -#if defined CONFIG_IA64 +#if defined(CONFIG_IA64) #include #include #define __flush_cache(p) ia64_fc(p) @@ -36,7 +36,7 @@ barrier(); \ *((volatile int *)(p)) = v; /* force st.rel */ \ } while (0) -#elif defined CONFIG_X86_64 +#elif defined(CONFIG_X86_64) #define __flush_cache(p) clflush(p) #define gru_ordered_store_int(p,v) \ do { \ @@ -299,6 +299,7 @@ static inline void gru_flush_cache(void *p) static inline void gru_start_instruction(struct gru_instruction *ins, int op32) { gru_ordered_store_int(ins, op32); + gru_flush_cache(ins); } @@ -604,8 +605,9 @@ static inline int gru_get_cb_substatus(void *cb) static inline int gru_check_status(void *cb) { struct gru_control_block_status *cbs = (void *)cb; - int ret = cbs->istatus; + int ret; + ret = cbs->istatus; if (ret == CBS_CALL_OS) ret = gru_check_status_proc(cb); return ret; @@ -617,7 +619,7 @@ static inline int gru_check_status(void *cb) static inline int gru_wait(void *cb) { struct gru_control_block_status *cbs = (void *)cb; - int ret = cbs->istatus;; + int ret = cbs->istatus; if (ret != CBS_IDLE) ret = gru_wait_proc(cb); diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 3d33015bbf3..8c389d606c3 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -214,12 +214,14 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, } /* - * * atomic_pte_lookup * * Convert a user virtual address to a physical address * Only supports Intel large pages (2MB only) on x86_64. * ZZZ - hugepage support is incomplete + * + * NOTE: mmap_sem is already held on entry to this function. This + * guarantees existence of the page tables. */ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, int write, unsigned long *paddr, int *pageshift) @@ -229,9 +231,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, pud_t *pudp; pte_t pte; - WARN_ON(irqs_disabled()); /* ZZZ debug */ - - local_irq_disable(); pgdp = pgd_offset(vma->vm_mm, vaddr); if (unlikely(pgd_none(*pgdp))) goto err; @@ -250,8 +249,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, #endif pte = *pte_offset_kernel(pmdp, vaddr); - local_irq_enable(); - if (unlikely(!pte_present(pte) || (write && (!pte_write(pte) || !pte_dirty(pte))))) return 1; @@ -324,6 +321,7 @@ static int gru_try_dropin(struct gru_thread_state *gts, * Atomic lookup is faster & usually works even if called in non-atomic * context. */ + rmb(); /* Must/check ms_range_active before loading PTEs */ ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift); if (ret) { if (!cb) @@ -543,6 +541,7 @@ int gru_get_exception_detail(unsigned long arg) ucbnum = get_cb_number((void *)excdet.cb); cbrnum = thread_cbr_number(gts, ucbnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum); + prefetchw(cbe); /* Harmless on hardware, required for emulator */ excdet.opc = cbe->opccpy; excdet.exopc = cbe->exopccpy; excdet.ecause = cbe->ecause; diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index d61cee796ef..5c027b6b4e5 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -113,7 +113,7 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) return -EPERM; if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || - vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) + vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) return -EINVAL; vma->vm_flags |= @@ -398,6 +398,12 @@ static int __init gru_init(void) irq = get_base_irq(); for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { ret = request_irq(irq + chip, gru_intr, 0, id, NULL); + /* TODO: fix irq handling on x86. For now ignore failures because + * interrupts are not required & not yet fully supported */ + if (ret) { + printk("!!!WARNING: GRU ignoring request failure!!!\n"); + ret = 0; + } if (ret) { printk(KERN_ERR "%s: request_irq failed\n", GRU_DRIVER_ID_STR); diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index d16031d6267..b63018d60fe 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -91,12 +91,7 @@ #define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) /* Convert an arbitrary handle address to the beginning of the GRU segment */ -#ifndef __PLUGIN__ #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) -#else -extern void *gmu_grubase(void *h); -#define GRUBASE(h) gmu_grubase(h) -#endif /* General addressing macros. */ static inline void *get_gseg_base_address(void *base, int ctxnum) diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 08d29cd756b..880c55dfb66 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -122,6 +122,7 @@ int gru_get_cb_exception_detail(void *cb, struct gru_control_block_extended *cbe; cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); + prefetchw(cbe); /* Harmless on hardware, required for emulator */ excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 0eeb8dddd2f..e11e1ac5090 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -432,29 +432,35 @@ static inline long gru_copy_handle(void *d, void *s) return GRU_HANDLE_BYTES; } -/* rewrite in assembly & use lots of prefetch */ -static void gru_load_context_data(void *save, void *grubase, int ctxnum, - unsigned long cbrmap, unsigned long dsrmap) +static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap, + unsigned long length) { - void *gseg, *cb, *cbe; - unsigned long length; int i, scr; - gseg = grubase + ctxnum * GRU_GSEG_STRIDE; - length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, GRU_CACHE_LINE_BYTES); - cb = gseg + GRU_CB_BASE; - cbe = grubase + GRU_CBE_BASE; for_each_cbr_in_allocation_map(i, &cbrmap, scr) { prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, GRU_CACHE_LINE_BYTES); cb += GRU_HANDLE_STRIDE; } +} + +static void gru_load_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { save += gru_copy_handle(cb, save); save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); @@ -472,15 +478,16 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum, int i, scr; gseg = grubase + ctxnum * GRU_GSEG_STRIDE; - cb = gseg + GRU_CB_BASE; cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { save += gru_copy_handle(save, cb); save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); cb += GRU_HANDLE_STRIDE; } - length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; memcpy(save, gseg + GRU_DS_BASE, length); } -- 2.41.1