一、Slice
代码语言:C复制//变长字符串,出现的比较多
class LEVELDB_EXPORT Slice {
private:
const char* data_;
size_t size_;
};
LEVELDB_EXPORT表示只有在namespace leveldb下才能可见,限制作用域。
二、BloomFilter
代码语言:c复制//sst的data block使用了bloom filter,放在meta index block
static uint32_t BloomHash(const Slice& key) {
return Hash(key.data(), key.size(), 0xbc9f1d34);
}
class BloomFilterPolicy : public FilterPolicy {
private:
size_t bits_per_key_;
size_t k_; //hash function num
public:
//记录bits_per_key,计算最优的hash函数个数
explicit BloomFilterPolicy(int bits_per_key)
: bits_per_key_(bits_per_key) {
// We intentionally round down to reduce probing cost a little bit
// bits_per_key = m / n
// k_ = ln(2) * bits_per_key
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
// [1, 30]个hash函数
if (k_ < 1) k_ = 1;
if (k_ > 30) k_ = 30;
}
virtual const char* Name() const {
return "leveldb.BuiltinBloomFilter2";
}
//dst就是bloom_filter
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
// Compute bloom filter size (in both bits and bytes)
// 计算需要的bit数
size_t bits = n * bits_per_key_;
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64;
//计算需要的bytes数,最少8bytes
size_t bytes = (bits 7) / 8;
bits = bytes * 8;
//dst[init_size : init_size bytes]写入过滤器内容,默认全为0
//dst[init_size bytes]写入hash函数个数
const size_t init_size = dst->size();
dst->resize(init_size bytes, 0);
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
char* array = &(*dst)[init_size];//更新array <-> dst[init_size : init_size bytes]
for (int i = 0; i < n; i ) {
// Use double-hashing to generate a sequence of hash values.
// See analysis in [Kirsch,Mitzenmacher 2006].
//实际上并没有k个函数, 而是旋转哈希值
uint32_t h = BloomHash(keys[i]);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < k_; j ) {
//计算并更新对应的bit为1
const uint32_t bitpos = h % bits;
array[bitpos/8] |= (1 << (bitpos % 8));
h = delta;
}
}
}
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
const size_t len = bloom_filter.size();
if (len < 2) return false;
const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8;
const size_t k = array[len-1];
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
uint32_t h = BloomHash(key);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < k; j ) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h = delta;
}
return true;
}
};
}
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
return new BloomFilterPolicy(bits_per_key);
}
sst文件是以key来排序的,L0层的sst文件可能有重叠,而L1--L7层是不会有重叠的。通过bloom filter可以来判断某个sst中是否拥有查找的key,可以减少io次数。sst中每个block都是用了bloom filter加速查找。
三、Arena
代码语言:c复制#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <vector>
class Arena {
public:
Arena();
Arena(const Arena&) = delete;
Arena& operator=(const Arena&) = delete;
~Arena();
// Return a pointer to a newly allocated memory block of "bytes" bytes.
char* Allocate(size_t bytes);
// Allocate memory with the normal alignment guarantees provided by malloc.
char* AllocateAligned(size_t bytes);
// Returns an estimate of the total memory usage of data allocated
// by the arena.
size_t MemoryUsage() const {
return memory_usage_.load(std::memory_order_relaxed);
}
private:
//指定大小分配
char* AllocateFallback(size_t bytes);
//分配新块
char* AllocateNewBlock(size_t block_bytes);
// Allocation state
char* alloc_ptr_;
size_t alloc_bytes_remaining_;
// Array of new[] allocated memory blocks
std::vector<char*> blocks_;
// Total memory usage of the arena.
std::atomic<size_t> memory_usage_;
};
inline char* Arena::Allocate(size_t bytes) {
// The semantics of what to return are a bit messy if we allow
// 0-byte allocations, so we disallow them here (we don't need
// them for our internal use).
assert(bytes > 0);
if (bytes <= alloc_bytes_remaining_) {
char* result = alloc_ptr_;
alloc_ptr_ = bytes;
alloc_bytes_remaining_ -= bytes;
return result;
}
return AllocateFallback(bytes);
}
static const int kBlockSize = 4096;
Arena::Arena()
: alloc_ptr_(nullptr), alloc_bytes_remaining_(0), memory_usage_(0) {}
Arena::~Arena() {
for (size_t i = 0; i < blocks_.size(); i ) {
delete[] blocks_[i];
}
}
char* Arena::AllocateFallback(size_t bytes) {
//超过1/4 block,直接分配新块
if (bytes > kBlockSize / 4) {
// Object is more than a quarter of our block size. Allocate it separately
// to avoid wasting too much space in leftover bytes.
char* result = AllocateNewBlock(bytes);
return result;
}
// We waste the remaining space in the current block.
alloc_ptr_ = AllocateNewBlock(kBlockSize);
alloc_bytes_remaining_ = kBlockSize;
char* result = alloc_ptr_;
alloc_ptr_ = bytes;
alloc_bytes_remaining_ -= bytes;
return result;
}
//字节对齐分配
char* Arena::AllocateAligned(size_t bytes) {
//一般是64位
const int align = (sizeof(void*) > 8) ? sizeof(void*) : 8;
static_assert((align & (align - 1)) == 0,
"Pointer size should be a power of 2");
//alloc_ptr很可能不是8的倍数, 因此需要补齐剩余的字节
size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align - 1);
size_t slop = (current_mod == 0 ? 0 : align - current_mod);
size_t needed = bytes slop;
char* result;
if (needed <= alloc_bytes_remaining_) {
result = alloc_ptr_ slop;
alloc_ptr_ = needed;
alloc_bytes_remaining_ -= needed;
} else {
// AllocateFallback always returned aligned memory
result = AllocateFallback(bytes);
}
assert((reinterpret_cast<uintptr_t>(result) & (align - 1)) == 0);
return result;
}
char* Arena::AllocateNewBlock(size_t block_bytes) {
char* result = new char[block_bytes];
blocks_.push_back(result);
memory_usage_.fetch_add(block_bytes sizeof(char*),
std::memory_order_relaxed);
return result;
}
Arena是简易的内存分配器,只负责分配, 并没有考虑回收利用, 而是直接释放, 总的来说还是可以减少系统调用次数。
四、SkipList
1 数据结构
代码语言:c复制template <typename Key, class Comparator>
class SkipList {
private:
struct Node;
public:
// Insert key into the list.
// REQUIRES: nothing that compares equal to key is currently in the list.
void Insert(const Key& key);
private:
enum { kMaxHeight = 12 };
inline int GetMaxHeight() const {
return max_height_.load(std::memory_order_relaxed);
}
Node* NewNode(const Key& key, int height);
int RandomHeight();
bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
// Return true if key is greater than the data stored in "n"
bool KeyIsAfterNode(const Key& key, Node* n) const;
Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
// Immutable after construction
Comparator const compare_;
Arena* const arena_; // Arena used for allocations of nodes
Node* const head_;
// Modified only by Insert(). Read racily by readers, but stale
// values are ok.
std::atomic<int> max_height_; // Height of the entire list
// Read/written only by Insert().
Random rnd_;
};
// Implementation details follow
template <typename Key, class Comparator>
struct SkipList<Key, Comparator>::Node {
explicit Node(const Key& k) : key(k) {}
Key const key;
// Accessors/mutators for links. Wrapped in methods so we can
// add the appropriate barriers as necessary.
Node* Next(int n) {
assert(n >= 0);
// Use an 'acquire load' so that we observe a fully initialized
// version of the returned Node.
return next_[n].load(std::memory_order_acquire);
}
void SetNext(int n, Node* x) {
assert(n >= 0);
// Use a 'release store' so that anybody who reads through this
// pointer observes a fully initialized version of the inserted node.
next_[n].store(x, std::memory_order_release);
}
// No-barrier variants that can be safely used in a few locations.
Node* NoBarrier_Next(int n) {
assert(n >= 0);
return next_[n].load(std::memory_order_relaxed);
}
void NoBarrier_SetNext(int n, Node* x) {
assert(n >= 0);
next_[n].store(x, std::memory_order_relaxed);
}
private:
// 弹性数组,最少一个元素
std::atomic<Node*> next_[1];
};
2 核心方法
代码语言:c复制//求高度
template <typename Key, class Comparator>
int SkipList<Key, Comparator>::RandomHeight() {
// Increase height with probability 1 in kBranching
static const unsigned int kBranching = 4;
int height = 1;
while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
height ;
}
assert(height > 0);
assert(height <= kMaxHeight);
return height;
}
//判断目标key是否在节点n后边
template <typename Key, class Comparator>
bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
// null n is considered infinite
return (n != nullptr) && (compare_(n->key, key) < 0);
}
template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
Node** prev) const {
Node* x = head_;
int level = GetMaxHeight() - 1;
while (true) {
Node* next = x->Next(level);
//next!=nullptr && next.key<key继续向右移动,
if (KeyIsAfterNode(key, next)) {
// Keep searching in this list
x = next;
} else {
//当遇到第一个比key大的节点时,构建前向指针数组。
if (prev != nullptr) prev[level] = x;
if (level == 0) {
return next;
} else {
// Switch to next list
level--;
}
}
}
}
//插入
template <typename Key, class Comparator>
void SkipList<Key, Comparator>::Insert(const Key& key) {
//构建新节点前向指针数组
Node* prev[kMaxHeight];
//寻找大于等于key的结点并获取它的前驱指针数组
Node* x = FindGreaterOrEqual(key, prev);
// Our data structure does not allow duplicate insertion
assert(x == nullptr || !Equal(key, x->key));
int height = RandomHeight();
if (height > GetMaxHeight()) {
//如果高度超出历史最大高度,则该结点前驱指针是head_
for (int i = GetMaxHeight(); i < height; i ) {
prev[i] = head_;
}
max_height_.store(height, std::memory_order_relaxed);
}
x = NewNode(key, height);
for (int i = 0; i < height; i ) {
// NoBarrier_SetNext() suffices since we will add a barrier when
// we publish a pointer to "x" in prev[i].
x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
prev[i]->SetNext(i, x);
}
}
跳表最重要的就是FindGreaterOrEqual和Insert方法,而在leveldb中是不需要删除的。