96 lines
2.8 KiB
C++
Raw Normal View History

// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "leveldb/filter_policy.h"
#include "leveldb/slice.h"
#include "util/hash.h"
namespace leveldb {
namespace {
static uint32_t BloomHash(const Slice& key) {
return Hash(key.data(), key.size(), 0xbc9f1d34);
}
class BloomFilterPolicy : public FilterPolicy {
private:
size_t bits_per_key_;
size_t k_;
public:
explicit BloomFilterPolicy(int bits_per_key)
: bits_per_key_(bits_per_key) {
// We intentionally round down to reduce probing cost a little bit
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
if (k_ < 1) k_ = 1;
if (k_ > 30) k_ = 30;
}
virtual const char* Name() const {
return "leveldb.BuiltinBloomFilter2";
}
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
// Compute bloom filter size (in both bits and bytes)
size_t bits = n * bits_per_key_;
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64;
size_t bytes = (bits + 7) / 8;
bits = bytes * 8;
const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
char* array = &(*dst)[init_size];
Squashed 'src/leveldb/' changes from 20ca81f..a31c8aa a31c8aa Add NewAppendableFile for win32 environment 1913d71 Merge upstream LevelDB 1.19 3080a45 Increase leveldb version to 1.19. fa6dc01 A zippy change broke test assumptions about the size of compressed output. Fix the tests by allowing more slop in zippy's behavior. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=123432472 06a191b fix problems in LevelDB's caching code a7bff69 Fix LevelDB build when asserts are enabled in release builds. (#367) ea992b4 Change std::uint64_t to uint64_t (#354) e84b5bd This CL fixes a bug encountered when reading records from leveldb files that have been split, as in a [] input task split. 3211343 Deleted redundant null ptr check prior to delete. 7306ef8 Merge pull request #348 from randomascii/master 6b18316 Fix signed/unsigned mismatch on VC++ builds adbe3eb Putting build artifacts in subdirectory. 2d0320a Merge pull request #329 from ralphtheninja/travis-badge dd1c3c3 add travis build badge 43fcf23 Merge pull request #328 from cmumford/master 9fcae61 Added a Travis CI build file. dac40d2 Merge pull request #284 from ideawu/master 8ec241a Merge pull request #317 from falvojr/patch-1 5d36bed Merge pull request #272 from vapier/master 4753c9b Added a contributors section to README.md e2446d0 Merge pull request #275 from paulirish/patch-1 706b7f8 Resolve race when getting approximate-memory-usage property 3c9ff3c Only compiling TrimSpace on linux. f8d205c Including atomic_pointer.h in port_posix 889de31 Let LevelDB use xcrun to determine Xcode.app path instead of using a hardcoded path. 528c2bc Add "approximate-memory-usage" property to leveldb::DB::GetProperty 359b6bc Add leveldb::Cache::Prune 50e77a8 Fix size_t/int comparison/conversion issues in leveldb. 5208e79 Added leveldb::Status::IsInvalidArgument() method. ce45404 Suppress error reporting after seeking but before a valid First or Full record is encountered. b9afa1f include <assert> -> <cassert> edf2939 Update README.md 65190ac Will not reuse manifest if reuse_logs options is false. ac1d69d LevelDB now attempts to reuse the preceding MANIFEST and log file when re-opened. 76bba13 fix indent 8fcceb2 log compaction output file's level along with number 0e0f074 documentation. improved link c85addc readme: improved documentation link ceff6f1 Fix Android/MIPS build. 77948e7 Add benchmark that measures cost of repeatedly opening the database. 34ad72e Move header guard below copyright banner. a75d435 Clean up layering of storage/leveldb/... b234f65 Added a new fault injection test. c4c38f9 Add arm64 support to leveldb. cea9b10 Fixed incorrect comment wording for Iterator::Seek. c00c569 Deleted old README file. git-subtree-dir: src/leveldb git-subtree-split: a31c8aa408d5594830f7cb20ead1ef1dff51b79e
2016-12-01 16:14:45 -08:00
for (int i = 0; i < n; i++) {
// Use double-hashing to generate a sequence of hash values.
// See analysis in [Kirsch,Mitzenmacher 2006].
uint32_t h = BloomHash(keys[i]);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < k_; j++) {
const uint32_t bitpos = h % bits;
array[bitpos/8] |= (1 << (bitpos % 8));
h += delta;
}
}
}
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
const size_t len = bloom_filter.size();
if (len < 2) return false;
const char* array = bloom_filter.data();
const size_t bits = (len - 1) * 8;
// Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters.
const size_t k = array[len-1];
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
uint32_t h = BloomHash(key);
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
for (size_t j = 0; j < k; j++) {
const uint32_t bitpos = h % bits;
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
h += delta;
}
return true;
}
};
}
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
return new BloomFilterPolicy(bits_per_key);
}
} // namespace leveldb