You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
95 lines
2.9 KiB
95 lines
2.9 KiB
// Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style license that can be |
|
// found in the LICENSE file. See the AUTHORS file for names of contributors. |
|
|
|
#include "leveldb/filter_policy.h" |
|
|
|
#include "leveldb/slice.h" |
|
#include "util/hash.h" |
|
|
|
namespace leveldb { |
|
|
|
namespace { |
|
static uint32_t BloomHash(const Slice& key) { |
|
return Hash(key.data(), key.size(), 0xbc9f1d34); |
|
} |
|
|
|
class BloomFilterPolicy : public FilterPolicy { |
|
private: |
|
size_t bits_per_key_; |
|
size_t k_; |
|
|
|
public: |
|
explicit BloomFilterPolicy(int bits_per_key) |
|
: bits_per_key_(bits_per_key) { |
|
// We intentionally round down to reduce probing cost a little bit |
|
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2) |
|
if (k_ < 1) k_ = 1; |
|
if (k_ > 30) k_ = 30; |
|
} |
|
|
|
virtual const char* Name() const { |
|
return "leveldb.BuiltinBloomFilter2"; |
|
} |
|
|
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { |
|
// Compute bloom filter size (in both bits and bytes) |
|
size_t bits = n * bits_per_key_; |
|
|
|
// For small n, we can see a very high false positive rate. Fix it |
|
// by enforcing a minimum bloom filter length. |
|
if (bits < 64) bits = 64; |
|
|
|
size_t bytes = (bits + 7) / 8; |
|
bits = bytes * 8; |
|
|
|
const size_t init_size = dst->size(); |
|
dst->resize(init_size + bytes, 0); |
|
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter |
|
char* array = &(*dst)[init_size]; |
|
for (size_t i = 0; i < n; i++) { |
|
// Use double-hashing to generate a sequence of hash values. |
|
// See analysis in [Kirsch,Mitzenmacher 2006]. |
|
uint32_t h = BloomHash(keys[i]); |
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
|
for (size_t j = 0; j < k_; j++) { |
|
const uint32_t bitpos = h % bits; |
|
array[bitpos/8] |= (1 << (bitpos % 8)); |
|
h += delta; |
|
} |
|
} |
|
} |
|
|
|
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { |
|
const size_t len = bloom_filter.size(); |
|
if (len < 2) return false; |
|
|
|
const char* array = bloom_filter.data(); |
|
const size_t bits = (len - 1) * 8; |
|
|
|
// Use the encoded k so that we can read filters generated by |
|
// bloom filters created using different parameters. |
|
const size_t k = array[len-1]; |
|
if (k > 30) { |
|
// Reserved for potentially new encodings for short bloom filters. |
|
// Consider it a match. |
|
return true; |
|
} |
|
|
|
uint32_t h = BloomHash(key); |
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
|
for (size_t j = 0; j < k; j++) { |
|
const uint32_t bitpos = h % bits; |
|
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; |
|
h += delta; |
|
} |
|
return true; |
|
} |
|
}; |
|
} |
|
|
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { |
|
return new BloomFilterPolicy(bits_per_key); |
|
} |
|
|
|
} // namespace leveldb
|
|
|