Linux内核中内存cache的实现
作者: 来源:zz 发表时间:2007-05-02
浏览次数:
字号:大 中 小
4.3 分配单元kmem_cache_(z)alloc()
有kmem_cache_alloc()和kmem_cache_zalloc()两个函数,后者只是增加将分配出的单元空间清零的
操作。这两个函数返回分配好的cache单元空间
/* mm/slab.c */
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
*
* Allocate an object from this cache. The flags are only relevant
* if the cache has no available objects.
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
return __cache_alloc(cachep, flags, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc);
/**
* kmem_cache_zalloc - Allocate an object. The memory is set to zero.
* @cache: The cache to allocate from.
* @flags: See kmalloc().
*
* Allocate an object from this cache and set the allocated memory to zero.
* The flags are only relevant if the cache has no available objects.
*/
void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags)
{
void *ret = __cache_alloc(cache, flags, __builtin_return_address(0));
if (ret)
memset(ret, 0, obj_size(cache));
return ret;
}
EXPORT_SYMBOL(kmem_cache_zalloc);
这两个函数核心都是调用__cahce_alloc函数来分配cache:
// 两个下划线的cache_alloc
// 在内核配置了CONFIG_NUMA时NUMA_BUILD为1,否则为0
static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
gfp_t flags, void *caller)
{
unsigned long save_flags;
void *objp = NULL;
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
if (unlikely(NUMA_BUILD &&
current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
// 进入此处的可能性比较小
objp = alternate_node_alloc(cachep, flags);
if (!objp)
// 主要是进入该函数分配,这个是4下划线的cache_cache
objp = ____cache_alloc(cachep, flags);
/*
* We may just have run out of memory on the local node.
* __cache_alloc_node() knows how to locate memory on other nodes
*/
if (NUMA_BUILD && !objp)
objp = __cache_alloc_node(cachep, flags, numa_node_id());
local_irq_restore(save_flags);
// 实际为objp=objp, 没啥操作
objp = cache_alloc_debugcheck_after(cachep, flags, objp,
caller);
prefetchw(objp);
return objp;
}
// 重点还是这个四个下划线的cache_alloc
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
check_irq_off();
// 每个cpu对应的cache数组
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
// 当前cache单元空间中有元素,不用重新分配,将缓冲的cache返回
STATS_INC_ALLOCHIT(cachep);
ac->touched = 1;
objp = ac->entry[--ac->avail];
} else {
// 否则新分配cache单元
STATS_INC_ALLOCMISS(cachep);
objp = cache_alloc_refill(cachep, flags);
}
return objp;
}
// 分配cache单元
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
int batchcount;
struct kmem_list3 *l3;
struct array_cache *ac;
int node;
// cpu到node值的转换
node = numa_node_id();
check_irq_off();
// 每个cpu对应的cache数组
ac = cpu_cache_get(cachep);
retry:
// 一次批量分配的数量, 分配是批量进行, 这样不用每次请求都分配操作一次
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
* If there was little recent activity on this cache, then
* perform only a partial refill. Otherwise we could generate
* refill bouncing.
*/
batchcount = BATCHREFILL_LIMIT;
}
// 和CPU对应的具体list3链表
l3 = cachep->nodelists[node];
BUG_ON(ac->avail > 0 || !l3);
spin_lock(&l3->list_lock);
/* See if we can refill from the shared array */
// 可从共享的数组中获取空间
if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
goto alloc_done;
// 批量循环
while (batchcount > 0) {
struct list_head *entry;
struct slab *slabp;
/* Get slab alloc is to come from. */
// 从部分使用的slab链表中获取链表元素
entry = l3->slabs_partial.next;
if (entry == &l3->slabs_partial) {
// 已经到链表头,说明该部分使用的slab链表已经都用完了
// 得从空闲slab链表中找空间了
l3->free_touched = 1;
entry = l3->slabs_free.next;
if (entry == &l3->slabs_free)
// 空闲slab链表也用完了, 整个cache该增加了
goto must_grow;
}
// 获取可用的slab指针
slabp = list_entry(entry, struct slab, list);
check_slabp(cachep, slabp);
check_spinlock_acquired(cachep);
// 从该slab块中批量提取可用的对象数
while (slabp->inuse < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
// avail记录了实际分配出的对象数
ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
node);
}
check_slabp(cachep, slabp);
/* move slabp to correct slabp list: */
// 把该slab先从所在链表断开
list_del(&slabp->list);
// 根据是否slab中的对象已经用完,将slab挂到全部使用链表或部分使用链表
if (slabp->free == BUFCTL_END)
list_add(&slabp->list, &l3->slabs_full);
else
list_add(&slabp->list, &l3->slabs_partial);
}
must_grow:
// 已经分配了一些对象出去, 减少空闲对象数
l3->free_objects -= ac->avail;
alloc_done:
spin_unlock(&l3->list_lock);
if (unlikely(!ac->avail)) {
// avail为0, 表示没有可分配的对象了, cache必须增大了
int x;
// 增加cache中内存,增加slab数
x = cache_grow(cachep, flags, node);
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
if (!x && ac->avail == 0) /* no objects in sight? abort */
return NULL;
if (!ac->avail) /* objects refilled by interrupt? */
goto retry;
}
ac->touched = 1;
// 返回对象指针
return ac->entry[--ac->avail];
}
4.4 释放cache单元kmem_cache_free
其实不是真正完全释放, 只是将对象空间添回cache的空闲slab链表中而已
/**
* kmem_cache_free - Deallocate an object
* @cachep: The cache the allocation was from.
* @objp: The previously allocated object.
*
* Free an object which was previously allocated from this
* cache.
*/
// 其实只是__cache_free()的包裹函数
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
BUG_ON(virt_to_cache(objp) != cachep);
local_irq_save(flags);
__cache_free(cachep, objp);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kmem_cache_free);
/*
* Release an obj back to its cache. If the obj has a constructed state, it must
* be in this state _before_ it is released. Called with disabled ints.
*/
static inline void __cache_free(struct kmem_cache *cachep, void *objp)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
if (cache_free_alien(cachep, objp))
return;
if (likely(ac->avail < ac->limit)) {
// 空闲值小于限值
STATS_INC_FREEHIT(cachep);
// 只是简单将要释放的cache单元添加到空闲单元数组中
// avail增加表示可用对象增加
ac->entry[ac->avail++] = objp;
return;
} else {
// 空闲数大于等于限值
STATS_INC_FREEMISS(cachep);
// 释放一些节点
cache_flusharray(cachep, ac);
// 再将要释放的cache单元添加到空闲单元数组中
// avail增加表示可用对象增加
ac->entry[ac->avail++] = objp;
}
}
4.5 摧毁cache结构
这个一般是在模块退出函数中进行清理工作时调用的,如果已经编到内核了, 那这个函数基本不会被
调用:
/**
* kmem_cache_destroy - delete a cache
* @cachep: the cache to destroy
*
* Remove a struct kmem_cache object from the slab cache.
*
* It is expected this function will be called by a module when it is
* unloaded. This will remove the cache completely, and avoid a duplicate
* cache being allocated each time a module is loaded and unloaded, if the
* module doesn't have persistent in-kernel storage across loads and unloads.
*
* The cache must be empty before calling this function.
*
* The caller must guarantee that noone will allocate memory from the cache
* during the kmem_cache_destroy().
*/
void kmem_cache_destroy(struct kmem_cache *cachep)
{
BUG_ON(!cachep || in_interrupt());
/* Don't let CPUs to come and go */
lock_cpu_hotplug();
/* Find the cache in the chain of caches. */
mutex_lock(&cache_chain_mutex);
/*
* the chain is never empty, cache_cache is never destroyed
*/
// cache的第一个元素cache_cache是静态量,该链表永远不会空
// 从cache链表中删除cache
list_del(&cachep->next);
mutex_unlock(&cache_chain_mutex);
// 尽可能释放cache中的slab单元块
if (__cache_shrink(cachep)) {
slab_error(cachep, "Can't free all objects");
mutex_lock(&cache_chain_mutex);
list_add(&cachep->next, &cache_chain);
mutex_unlock(&cache_chain_mutex);
unlock_cpu_hotplug();
return;
}
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
synchronize_rcu();
// 释放cache
__kmem_cache_destroy(cachep);
unlock_cpu_hotplug();
}
EXPORT_SYMBOL(kmem_cache_destroy);
// 真正的摧毁cache函数
static void __kmem_cache_destroy(struct kmem_cache *cachep)
{
int i;
struct kmem_list3 *l3;
// 释放cache中所有CPU的数组
for_each_online_cpu(i)
kfree(cachep->array[i]);
/* NUMA: free the list3 structures */
// 释放list3中的所有内存
for_each_online_node(i) {
l3 = cachep->nodelists[i];
if (l3) {
kfree(l3->shared);
free_alien_cache(l3->alien);
kfree(l3);
}
}
// 释放cache本身
kmem_cache_free(&cache_cache, cachep);
}
4.6 缩减cache
该函数尽可能地释放cache中的slab块, 当cache空闲空间太多时会释放掉一些内存供其他内核部分使
用.
/**
* kmem_cache_shrink - Shrink a cache.
* @cachep: The cache to shrink.
*
* Releases as many slabs as possible for a cache.
* To help debugging, a zero exit status indicates all slabs were released.
*/
// 只是一个包裹函数
int kmem_cache_shrink(struct kmem_cache *cachep)
{
BUG_ON(!cachep || in_interrupt());
return __cache_shrink(cachep);
}
EXPORT_SYMBOL(kmem_cache_shrink);
static int __cache_shrink(struct kmem_cache *cachep)
{
int ret = 0, i = 0;
struct kmem_list3 *l3;
// 释放cache中每个CPU对应的空间
drain_cpu_caches(cachep);
check_irq_on();
for_each_online_node(i) {
// 释放每个节点的list3
l3 = cachep->nodelists[i];
if (!l3)
continue;
// 将slab从slab_free中释放
drain_freelist(cachep, l3, l3->free_objects);
ret += !list_empty(&l3->slabs_full) ||
!list_empty(&l3->slabs_partial);
}
return (ret ? 1 : 0);
}
static void drain_cpu_caches(struct kmem_cache *cachep)
{
struct kmem_list3 *l3;
int node;
on_each_cpu(do_drain, cachep, 1, 1);
check_irq_on();
for_each_online_node(node) {
l3 = cachep->nodelists[node];
if (l3 && l3->alien)
// 释放cache的list3的alien部分
drain_alien_cache(cachep, l3->alien);
}
for_each_online_node(node) {
l3 = cachep->nodelists[node];
if (l3)
// 释放list3的数组空间
drain_array(cachep, l3, l3->shared, 1, node);
}
}
/*
* Remove slabs from the list of free slabs.
* Specify the number of slabs to drain in tofree.
*
* Returns the actual number of slabs released.
*/
static int drain_freelist(struct kmem_cache *cache,
struct kmem_list3 *l3, int tofree)
{
struct list_head *p;
int nr_freed;
struct slab *slabp;
nr_freed = 0;
// 从slabs_free链表释放
while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
spin_lock_irq(&l3->list_lock);
p = l3->slabs_free.prev;
if (p == &l3->slabs_free) {
spin_unlock_irq(&l3->list_lock);
goto out;
}
// 获取slab
slabp = list_entry(p, struct slab, list);
#if DEBUG
BUG_ON(slabp->inuse);
#endif
// 将slab从链表中删除
list_del(&slabp->list);
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
*/
// 空闲对象数减少一个slab中的对象数
l3->free_objects -= cache->num;
spin_unlock_irq(&l3->list_lock);
// 释放slab
slab_destroy(cache, slabp);
nr_freed++;
}
out:
return nr_freed;
}
4.7 kmalloc和kfree
kmalloc也是通过cache来实现的, 只不过每此kmalloc的大小不同, 因此是从不同的cache中分配:
/* include/linux/slab.h */
// 注意kmalloc是在头文件中定义的
static inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
// 以下是找一个对象大小刚好大于等于size的cache
int i = 0;
#define CACHE(x) \
if (size <= x) \
goto found; \
else \
i++;
#include "kmalloc_sizes.h"
#undef CACHE
{
extern void __you_cannot_kmalloc_that_much(void);
__you_cannot_kmalloc_that_much();
}
found:
// 实际还是通过kmem_cache_alloc来分配内存空间, 因此也是cache
return kmem_cache_alloc((flags & GFP_DMA) ?
malloc_sizes[i].cs_dmacachep :
malloc_sizes[i].cs_cachep, flags);
}
// 通过该函数最后也是由__cache_alloc()函数来分配空间
return __kmalloc(size, flags);
}
// 这是kmalloc_sizes.h文件内容, 实际就是定义CACHE中可用的对象大小
// 普通情况下最大是128K, 也就是kmalloc能分配的最大内存量
#if (PAGE_SIZE == 4096)
CACHE(32)
#endif
CACHE(64)
#if L1_CACHE_BYTES < 64
CACHE(96)
#endif
CACHE(128)
#if L1_CACHE_BYTES < 128
CACHE(192)
#endif
CACHE(256)
CACHE(512)
CACHE(1024)
CACHE(2048)
CACHE(4096)
CACHE(8192)
CACHE(16384)
CACHE(32768)
CACHE(65536)
CACHE(131072)
#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU)
CACHE(262144)
#endif
#ifndef CONFIG_MMU
CACHE(524288)
CACHE(1048576)
#ifdef CONFIG_LARGE_ALLOCS
CACHE(2097152)
CACHE(4194304)
CACHE(8388608)
CACHE(16777216)
CACHE(33554432)
#endif /* CONFIG_LARGE_ALLOCS */
#endif /* CONFIG_MMU
/* mm/slab.c */
// kfree实际也是调用__cache_free来释放空间
void kfree(const void *objp)
{
struct kmem_cache *c;
unsigned long flags;
if (unlikely(!objp))
return;
local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, obj_size(c));
__cache_free(c, (void *)objp);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kfree);
5. 结论
cache的使用使得在频繁增加删除对象的处理效率得到提高, 这也就是为什么普通情况下
从/proc/meminfo中看Linux的空闲内存不多的原因,因为很多内存都是cache的,没有真正释放