众所周知 Objective-C 在查找方法的 imp 时会先查找缓存,那么缓存是如何实现的呢?本文就记录下阅读 runtime 源码的过程。
objc-runtime-new.h
先从 objc-runtime-new.h 中 objc_class 的定义开始。
代码语言:javascript复制struct objc_class : objc_object {
// Class ISA;
Class superclass;
cache_t cache;
// ...
}
struct cache_t {
private:
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
// ...
}
在 objc_class 中有一个结构体 cache_t,这个就是方法的缓存。而 cache_t 的第一个成员变量是 _bucketsAndMaybeMask
,熟悉数据结构相关知识的同学应该会马上联想到哈希表。没错,方法缓存就是通过哈希表实现的。
再仔细看看 cache_t ,根据CACHE_MASK_STORAGE
宏定义了不同的字段,而 CACHE_MASK_STORAGE
定义在 objc-config.h 中。
#if defined(__arm64__) && __LP64__
#if TARGET_OS_EXCLAVEKIT
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
#elif TARGET_OS_OSX || TARGET_OS_SIMULATOR
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
#endif
#elif defined(__arm64__) && !__LP64__
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED
#endif
代码语言:javascript复制#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
//...
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
//...
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// ...
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// ...
#else
#error Unknown cache mask storage type.
#endif
以 __arm64__ && __LP64__
为例:
定义了成员变量:
maskShift
mask 偏移量 48maskZeroBits
mask 后方必须为 0 的 bits 4maxMask
mask 最大值 1111 1111 1111 1111,即 16 bits- 1 << (64 - maskShift) 表示 1 左移 16 位,结果为 1 0000 0000 0000 0000
- 1 0000 0000 0000 0000 - 1 ,结果为 0 1111 1111 1111 1111
bucketsMask
buckets 的 mask 1111 ... 1111 (44 个 1)- 1 << (maskShift - maskZeroBits) 表示 1 左移 44 位,结果为 1 0000 ... 0000 (44 个 0)
- 1 0000 ... 0000 (44 个 0) - 1, 结果为 1111 ... 1111 (44 个 1)
定义了以下方法:
代码语言:javascript复制private:
mask_t mask() const;
static bucket_t *allocateBuckets(mask_t newCapacity);
public:
unsigned capacity() const;
struct bucket_t *buckets() const;
Class cls() const;
void insert(SEL sel, IMP imp, id receiver);
下面到 objc-cache.mm 看看具体实现
objc-cache.mm
从最主要 insert 方法开始,大致分为三个部分。
Part 1 initialize 判断
代码语言:javascript复制void cache_t::insert(SEL sel, IMP imp, id receiver)
{
// Never cache before initialize is done
if (slowpath(!cls()->isInitialized())) {
return;
}
ASSERT(sel != 0 && cls()->isInitialized());
// Part 2
// Part 3
}
第一部分是对 cls
initialize
方法是否执行完成的判断,如果没有则直接 return
Part 2 初始化及扩容
代码语言:javascript复制void cache_t::insert(SEL sel, IMP imp, id receiver)
{
// Part 1
// Use the cache as-is if until we exceed our expected fill ratio.
mask_t newOccupied = occupied() 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {
// Cache is read-only. Replace it.
if (!capacity) capacity = INIT_CACHE_SIZE;
reallocate(oldCapacity, capacity, /* freeOld */false);
}
else if (fastpath(newOccupied CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
// Cache is less than 3/4 or 7/8 full. Use it as-is.
}
else {
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
// Part 3
}
isConstantEmptyCache
首先会获取哈希表当前的元素个数 occupied 以及容量 capacity, 然后通过isConstantEmptyCache
判断哈希表是否为空
bool cache_t::isConstantEmptyCache() const
{
return
occupied() == 0 &&
buckets() == emptyBucketsForCapacity(capacity(), false);
}
struct bucket_t *cache_t::buckets() const
{
uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
return (bucket_t *)(addr & bucketsMask);
}
其中通过 buckets 函数获取了当前桶数组,是将_bucketsAndMaybeMask
指针与 bucketsMask
做位与,前面我们知道了bucketsMask
的值为 44,所以 _bucketsAndMaybeMask
中的低 44 位存储的是 buckets 数组的地址。
INIT_CACHE_SIZE
再回到 insert 中,如果哈希表为空则进行初始化分配空间,大小为 INIT_CACHE_SIZE
, INIT_CACHE_SIZE
为枚举值
/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
// When we have a cache end marker it fills a bucket slot, so having a
// initial cache size of 2 buckets would not be efficient when one of the
// slots is always filled with the end marker. So start with a cache size
// 4 buckets.
INIT_CACHE_SIZE_LOG2 = 2,
#else
// Allow an initial bucket size of 2 buckets, since a large number of
// classes, especially metaclasses, have very few imps, and we support
// the ability to fill 100% of the cache before resizing.
INIT_CACHE_SIZE_LOG2 = 1,
#endif
INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2),
};
这里又引入了一个新的宏定义 CACHE_END_MARKER
#if __arm__ || __x86_64__ || __i386__
// objc_msgSend has few registers available.
// Cache scan increments and wraps at special end-marking bucket.
#define CACHE_END_MARKER 1
#elif __arm64__ && !__LP64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
#elif __arm64__ && __LP64__
// objc_msgSend has lots of registers available.
// Cache scan decrements. No end marker needed.
#define CACHE_END_MARKER 0
#else
#error unknown architecture
#endif
从注释和命名可以得知,这是用于标记缓存的终止位置,在 __arm64__ && __LP64__
的 case 下有很多寄存器,缓存扫描是递减的,不需要 marker 。
所以这里缓存的初始大小是 1 << 1 , 即 2 。
reallocate
再再回到 insert 中,调用了 reallocate
函数
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
bucket_t *newBuckets = allocateBuckets(newCapacity);
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);
if (freeOld) {
collect_free(oldBuckets, oldCapacity);
}
}
获取旧的 buckets,调用allocateBuckets
开辟新的空间,调用 setBucketsAndMask
设置新的 buckets 和 mask 到 _bucketsAndMaybeMask
, mask 值为当前 buckets 的最大 index。最后再释放旧的 buckets
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
uintptr_t buckets = (uintptr_t)newBuckets;
uintptr_t mask = (uintptr_t)newMask;
ASSERT(buckets <= bucketsMask);
ASSERT(mask <= maxMask);
_bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_release);
_occupied = 0;
}
这里用到前面在 .h 中看到的 maskShift(48),所以这里可以得出 _bucketsAndMaybeMask
的高 16 位是 mask,低 44 位是 buckets,中间 4 位是 maskZeroBits
为 0
cache_fill_ratio
再再再回到 insert 中,会调用 cache_fill_ratio
判断是否需要扩容
#if __arm__ || __x86_64__ || __i386__
// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 3 / 4;
}
#elif __arm64__ && !__LP64__
// Historical fill ratio of 75% (since the new objc runtime was introduced).
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 3 / 4;
}
#elif __arm64__ && __LP64__
// Allow 87.5% fill ratio in the fast path for all cache sizes.
// Increasing the cache fill ratio reduces the fragmentation and wasted space
// in imp-caches at the cost of potentially increasing the average lookup of
// a selector in imp-caches by increasing collision chains. Another potential
// change is that cache table resizes / resets happen at different moments.
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 7 / 8;
}
#else
#error unknown architecture
#endif
Part 3 插入数据
再再再再回到 insert 中,看看第三部分
代码语言:javascript复制void cache_t::insert(SEL sel, IMP imp, id receiver)
{
// Part 1
// Part 2
bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(b, sel, imp, cls());
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
}
cache_hash
通过 cache_hash
函数计算 sel 在 buckets 中的索引
// Class points to cache. SEL is key. Cache buckets store SEL IMP.
// Caches are never built in the dyld shared cache.
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
uintptr_t value = (uintptr_t)sel;
//...
return (mask_t)(value & mask);
}
和哈希表的基本思想一致,把 sel 与 mask 做位与运算,等同于模运算 % (mask 1)。
do - while
再再再再再回到 insert 中,是一个 do - while 循环
- 如果 bucket 的 sel 值为 0,则表示这是一个空槽可以插入数据。
- 如果 sel 的值和当前 sel 相等,则表示其他线程已经缓存过该方法。
- 如果都不满足,则会调用 cache_next 查找下一个位置,是哈希表解决哈希冲突方式的开放寻址-线性探测
#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
return (i 1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
#else
在 __arm64__ && __LP64__
case 下,只要 i 不为 0,就会一直递减,和前面 CACHE_END_MARKER
的注释相对应。
总结
- 方法缓存是基于哈希表的数据结构实现的
- 确定索引的哈希算法是将 sel 与 buckets 大小做位与运算,即取余数
- 哈希表解决哈希冲突的方式是线性探查
以上内容基于 objc4-906.2 纯理论阅读所写,省去了部分逻辑分支,如有错误,欢迎指正。