From: Christoph Lameter Date: Fri, 23 Sep 2005 04:44:02 +0000 (-0700) Subject: [PATCH] slab: fix handling of pages from foreign NUMA nodes X-Git-Tag: v2.6.14-rc3~51^2~51 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=ff69416e6323fe9d38c42a06ebdefeb58bbe9336;p=linux-2.6-omap-h63xx.git [PATCH] slab: fix handling of pages from foreign NUMA nodes The numa slab allocator may allocate pages from foreign nodes onto the lists for a particular node if a node runs out of memory. Inspecting the slab->nodeid field will not reflect that the page is now in use for the slabs of another node. This patch fixes that issue by adding a node field to free_block so that the caller can indicate which node currently uses a slab. Also removes the check for the current node from kmalloc_cache_node since the process may shift later to another node which may lead to an allocation on another node than intended. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/mm/slab.c b/mm/slab.c index cf19ff2ab5e..c9adfce0040 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -640,7 +640,7 @@ static enum { static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(kmem_cache_t* cachep, void** objpp, int len); +static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); static void enable_cpucache (kmem_cache_t *cachep); static void cache_reap (void *unused); static int __node_shrink(kmem_cache_t *cachep, int node); @@ -805,7 +805,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache if (ac->avail) { spin_lock(&rl3->list_lock); - free_block(cachep, ac->entry, ac->avail); + free_block(cachep, ac->entry, ac->avail, node); ac->avail = 0; spin_unlock(&rl3->list_lock); } @@ -926,7 +926,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; if (nc) - free_block(cachep, nc->entry, nc->avail); + free_block(cachep, nc->entry, nc->avail, node); if (!cpus_empty(mask)) { spin_unlock(&l3->list_lock); @@ -935,7 +935,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, if (l3->shared) { free_block(cachep, l3->shared->entry, - l3->shared->avail); + l3->shared->avail, node); kfree(l3->shared); l3->shared = NULL; } @@ -1883,12 +1883,13 @@ static void do_drain(void *arg) { kmem_cache_t *cachep = (kmem_cache_t*)arg; struct array_cache *ac; + int node = numa_node_id(); check_irq_off(); ac = ac_data(cachep); - spin_lock(&cachep->nodelists[numa_node_id()]->list_lock); - free_block(cachep, ac->entry, ac->avail); - spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock); + spin_lock(&cachep->nodelists[node]->list_lock); + free_block(cachep, ac->entry, ac->avail, node); + spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } @@ -2609,7 +2610,7 @@ done: /* * Caller needs to acquire correct kmem_list's list_lock */ -static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) +static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) { int i; struct kmem_list3 *l3; @@ -2618,14 +2619,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) void *objp = objpp[i]; struct slab *slabp; unsigned int objnr; - int nodeid = 0; slabp = GET_PAGE_SLAB(virt_to_page(objp)); - nodeid = slabp->nodeid; - l3 = cachep->nodelists[nodeid]; + l3 = cachep->nodelists[node]; list_del(&slabp->list); objnr = (objp - slabp->s_mem) / cachep->objsize; - check_spinlock_acquired_node(cachep, nodeid); + check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); @@ -2665,13 +2664,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) { int batchcount; struct kmem_list3 *l3; + int node = numa_node_id(); batchcount = ac->batchcount; #if DEBUG BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; spin_lock(&l3->list_lock); if (l3->shared) { struct array_cache *shared_array = l3->shared; @@ -2687,7 +2687,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) } } - free_block(cachep, ac->entry, batchcount); + free_block(cachep, ac->entry, batchcount, node); free_done: #if STATS { @@ -2752,7 +2752,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) } else { spin_lock(&(cachep->nodelists[nodeid])-> list_lock); - free_block(cachep, &objp, 1); + free_block(cachep, &objp, 1, nodeid); spin_unlock(&(cachep->nodelists[nodeid])-> list_lock); } @@ -2845,7 +2845,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i unsigned long save_flags; void *ptr; - if (nodeid == numa_node_id() || nodeid == -1) + if (nodeid == -1) return __cache_alloc(cachep, flags); if (unlikely(!cachep->nodelists[nodeid])) { @@ -3080,7 +3080,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep) if ((nc = cachep->nodelists[node]->shared)) free_block(cachep, nc->entry, - nc->avail); + nc->avail, node); l3->shared = new; if (!cachep->nodelists[node]->alien) { @@ -3161,7 +3161,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, if (!ccold) continue; spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); - free_block(cachep, ccold->entry, ccold->avail); + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); kfree(ccold); } @@ -3241,7 +3241,7 @@ static void drain_array_locked(kmem_cache_t *cachep, if (tofree > ac->avail) { tofree = (ac->avail+1)/2; } - free_block(cachep, ac->entry, tofree); + free_block(cachep, ac->entry, tofree, node); ac->avail -= tofree; memmove(ac->entry, &(ac->entry[tofree]), sizeof(void*)*ac->avail);