You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
109 lines
3.6 KiB
109 lines
3.6 KiB
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style license that can be |
|
// found in the LICENSE file. See the AUTHORS file for names of contributors. |
|
// |
|
// BlockBuilder generates blocks where keys are prefix-compressed: |
|
// |
|
// When we store a key, we drop the prefix shared with the previous |
|
// string. This helps reduce the space requirement significantly. |
|
// Furthermore, once every K keys, we do not apply the prefix |
|
// compression and store the entire key. We call this a "restart |
|
// point". The tail end of the block stores the offsets of all of the |
|
// restart points, and can be used to do a binary search when looking |
|
// for a particular key. Values are stored as-is (without compression) |
|
// immediately following the corresponding key. |
|
// |
|
// An entry for a particular key-value pair has the form: |
|
// shared_bytes: varint32 |
|
// unshared_bytes: varint32 |
|
// value_length: varint32 |
|
// key_delta: char[unshared_bytes] |
|
// value: char[value_length] |
|
// shared_bytes == 0 for restart points. |
|
// |
|
// The trailer of the block has the form: |
|
// restarts: uint32[num_restarts] |
|
// num_restarts: uint32 |
|
// restarts[i] contains the offset within the block of the ith restart point. |
|
|
|
#include "table/block_builder.h" |
|
|
|
#include <algorithm> |
|
#include <assert.h> |
|
#include "leveldb/comparator.h" |
|
#include "leveldb/table_builder.h" |
|
#include "util/coding.h" |
|
|
|
namespace leveldb { |
|
|
|
BlockBuilder::BlockBuilder(const Options* options) |
|
: options_(options), |
|
restarts_(), |
|
counter_(0), |
|
finished_(false) { |
|
assert(options->block_restart_interval >= 1); |
|
restarts_.push_back(0); // First restart point is at offset 0 |
|
} |
|
|
|
void BlockBuilder::Reset() { |
|
buffer_.clear(); |
|
restarts_.clear(); |
|
restarts_.push_back(0); // First restart point is at offset 0 |
|
counter_ = 0; |
|
finished_ = false; |
|
last_key_.clear(); |
|
} |
|
|
|
size_t BlockBuilder::CurrentSizeEstimate() const { |
|
return (buffer_.size() + // Raw data buffer |
|
restarts_.size() * sizeof(uint32_t) + // Restart array |
|
sizeof(uint32_t)); // Restart array length |
|
} |
|
|
|
Slice BlockBuilder::Finish() { |
|
// Append restart array |
|
for (size_t i = 0; i < restarts_.size(); i++) { |
|
PutFixed32(&buffer_, restarts_[i]); |
|
} |
|
PutFixed32(&buffer_, restarts_.size()); |
|
finished_ = true; |
|
return Slice(buffer_); |
|
} |
|
|
|
void BlockBuilder::Add(const Slice& key, const Slice& value) { |
|
Slice last_key_piece(last_key_); |
|
assert(!finished_); |
|
assert(counter_ <= options_->block_restart_interval); |
|
assert(buffer_.empty() // No values yet? |
|
|| options_->comparator->Compare(key, last_key_piece) > 0); |
|
size_t shared = 0; |
|
if (counter_ < options_->block_restart_interval) { |
|
// See how much sharing to do with previous string |
|
const size_t min_length = std::min(last_key_piece.size(), key.size()); |
|
while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { |
|
shared++; |
|
} |
|
} else { |
|
// Restart compression |
|
restarts_.push_back(buffer_.size()); |
|
counter_ = 0; |
|
} |
|
const size_t non_shared = key.size() - shared; |
|
|
|
// Add "<shared><non_shared><value_size>" to buffer_ |
|
PutVarint32(&buffer_, shared); |
|
PutVarint32(&buffer_, non_shared); |
|
PutVarint32(&buffer_, value.size()); |
|
|
|
// Add string delta to buffer_ followed by value |
|
buffer_.append(key.data() + shared, non_shared); |
|
buffer_.append(value.data(), value.size()); |
|
|
|
// Update state |
|
last_key_.resize(shared); |
|
last_key_.append(key.data() + shared, non_shared); |
|
assert(Slice(last_key_) == key); |
|
counter_++; |
|
} |
|
|
|
} // namespace leveldb
|
|
|