You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
398 lines
14 KiB
398 lines
14 KiB
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style license that can be |
|
// found in the LICENSE file. See the AUTHORS file for names of contributors. |
|
// |
|
// The representation of a DBImpl consists of a set of Versions. The |
|
// newest version is called "current". Older versions may be kept |
|
// around to provide a consistent view to live iterators. |
|
// |
|
// Each Version keeps track of a set of Table files per level. The |
|
// entire set of versions is maintained in a VersionSet. |
|
// |
|
// Version,VersionSet are thread-compatible, but require external |
|
// synchronization on all accesses. |
|
|
|
#ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ |
|
#define STORAGE_LEVELDB_DB_VERSION_SET_H_ |
|
|
|
#include <map> |
|
#include <set> |
|
#include <vector> |
|
#include "db/dbformat.h" |
|
#include "db/version_edit.h" |
|
#include "port/port.h" |
|
#include "port/thread_annotations.h" |
|
|
|
namespace leveldb { |
|
|
|
namespace log { class Writer; } |
|
|
|
class Compaction; |
|
class Iterator; |
|
class MemTable; |
|
class TableBuilder; |
|
class TableCache; |
|
class Version; |
|
class VersionSet; |
|
class WritableFile; |
|
|
|
// Return the smallest index i such that files[i]->largest >= key. |
|
// Return files.size() if there is no such file. |
|
// REQUIRES: "files" contains a sorted list of non-overlapping files. |
|
extern int FindFile(const InternalKeyComparator& icmp, |
|
const std::vector<FileMetaData*>& files, |
|
const Slice& key); |
|
|
|
// Returns true iff some file in "files" overlaps the user key range |
|
// [*smallest,*largest]. |
|
// smallest==NULL represents a key smaller than all keys in the DB. |
|
// largest==NULL represents a key largest than all keys in the DB. |
|
// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges |
|
// in sorted order. |
|
extern bool SomeFileOverlapsRange( |
|
const InternalKeyComparator& icmp, |
|
bool disjoint_sorted_files, |
|
const std::vector<FileMetaData*>& files, |
|
const Slice* smallest_user_key, |
|
const Slice* largest_user_key); |
|
|
|
class Version { |
|
public: |
|
// Append to *iters a sequence of iterators that will |
|
// yield the contents of this Version when merged together. |
|
// REQUIRES: This version has been saved (see VersionSet::SaveTo) |
|
void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); |
|
|
|
// Lookup the value for key. If found, store it in *val and |
|
// return OK. Else return a non-OK status. Fills *stats. |
|
// REQUIRES: lock is not held |
|
struct GetStats { |
|
FileMetaData* seek_file; |
|
int seek_file_level; |
|
}; |
|
Status Get(const ReadOptions&, const LookupKey& key, std::string* val, |
|
GetStats* stats); |
|
|
|
// Adds "stats" into the current state. Returns true if a new |
|
// compaction may need to be triggered, false otherwise. |
|
// REQUIRES: lock is held |
|
bool UpdateStats(const GetStats& stats); |
|
|
|
// Record a sample of bytes read at the specified internal key. |
|
// Samples are taken approximately once every config::kReadBytesPeriod |
|
// bytes. Returns true if a new compaction may need to be triggered. |
|
// REQUIRES: lock is held |
|
bool RecordReadSample(Slice key); |
|
|
|
// Reference count management (so Versions do not disappear out from |
|
// under live iterators) |
|
void Ref(); |
|
void Unref(); |
|
|
|
void GetOverlappingInputs( |
|
int level, |
|
const InternalKey* begin, // NULL means before all keys |
|
const InternalKey* end, // NULL means after all keys |
|
std::vector<FileMetaData*>* inputs); |
|
|
|
// Returns true iff some file in the specified level overlaps |
|
// some part of [*smallest_user_key,*largest_user_key]. |
|
// smallest_user_key==NULL represents a key smaller than all keys in the DB. |
|
// largest_user_key==NULL represents a key largest than all keys in the DB. |
|
bool OverlapInLevel(int level, |
|
const Slice* smallest_user_key, |
|
const Slice* largest_user_key); |
|
|
|
// Return the level at which we should place a new memtable compaction |
|
// result that covers the range [smallest_user_key,largest_user_key]. |
|
int PickLevelForMemTableOutput(const Slice& smallest_user_key, |
|
const Slice& largest_user_key); |
|
|
|
int NumFiles(int level) const { return files_[level].size(); } |
|
|
|
// Return a human readable string that describes this version's contents. |
|
std::string DebugString() const; |
|
|
|
private: |
|
friend class Compaction; |
|
friend class VersionSet; |
|
|
|
class LevelFileNumIterator; |
|
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; |
|
|
|
// Call func(arg, level, f) for every file that overlaps user_key in |
|
// order from newest to oldest. If an invocation of func returns |
|
// false, makes no more calls. |
|
// |
|
// REQUIRES: user portion of internal_key == user_key. |
|
void ForEachOverlapping(Slice user_key, Slice internal_key, |
|
void* arg, |
|
bool (*func)(void*, int, FileMetaData*)); |
|
|
|
VersionSet* vset_; // VersionSet to which this Version belongs |
|
Version* next_; // Next version in linked list |
|
Version* prev_; // Previous version in linked list |
|
int refs_; // Number of live refs to this version |
|
|
|
// List of files per level |
|
std::vector<FileMetaData*> files_[config::kNumLevels]; |
|
|
|
// Next file to compact based on seek stats. |
|
FileMetaData* file_to_compact_; |
|
int file_to_compact_level_; |
|
|
|
// Level that should be compacted next and its compaction score. |
|
// Score < 1 means compaction is not strictly needed. These fields |
|
// are initialized by Finalize(). |
|
double compaction_score_; |
|
int compaction_level_; |
|
|
|
explicit Version(VersionSet* vset) |
|
: vset_(vset), next_(this), prev_(this), refs_(0), |
|
file_to_compact_(NULL), |
|
file_to_compact_level_(-1), |
|
compaction_score_(-1), |
|
compaction_level_(-1) { |
|
} |
|
|
|
~Version(); |
|
|
|
// No copying allowed |
|
Version(const Version&); |
|
void operator=(const Version&); |
|
}; |
|
|
|
class VersionSet { |
|
public: |
|
VersionSet(const std::string& dbname, |
|
const Options* options, |
|
TableCache* table_cache, |
|
const InternalKeyComparator*); |
|
~VersionSet(); |
|
|
|
// Apply *edit to the current version to form a new descriptor that |
|
// is both saved to persistent state and installed as the new |
|
// current version. Will release *mu while actually writing to the file. |
|
// REQUIRES: *mu is held on entry. |
|
// REQUIRES: no other thread concurrently calls LogAndApply() |
|
Status LogAndApply(VersionEdit* edit, port::Mutex* mu) |
|
EXCLUSIVE_LOCKS_REQUIRED(mu); |
|
|
|
// Recover the last saved descriptor from persistent storage. |
|
Status Recover(bool *save_manifest); |
|
|
|
// Return the current version. |
|
Version* current() const { return current_; } |
|
|
|
// Return the current manifest file number |
|
uint64_t ManifestFileNumber() const { return manifest_file_number_; } |
|
|
|
// Allocate and return a new file number |
|
uint64_t NewFileNumber() { return next_file_number_++; } |
|
|
|
// Arrange to reuse "file_number" unless a newer file number has |
|
// already been allocated. |
|
// REQUIRES: "file_number" was returned by a call to NewFileNumber(). |
|
void ReuseFileNumber(uint64_t file_number) { |
|
if (next_file_number_ == file_number + 1) { |
|
next_file_number_ = file_number; |
|
} |
|
} |
|
|
|
// Return the number of Table files at the specified level. |
|
int NumLevelFiles(int level) const; |
|
|
|
// Return the combined file size of all files at the specified level. |
|
int64_t NumLevelBytes(int level) const; |
|
|
|
// Return the last sequence number. |
|
uint64_t LastSequence() const { return last_sequence_; } |
|
|
|
// Set the last sequence number to s. |
|
void SetLastSequence(uint64_t s) { |
|
assert(s >= last_sequence_); |
|
last_sequence_ = s; |
|
} |
|
|
|
// Mark the specified file number as used. |
|
void MarkFileNumberUsed(uint64_t number); |
|
|
|
// Return the current log file number. |
|
uint64_t LogNumber() const { return log_number_; } |
|
|
|
// Return the log file number for the log file that is currently |
|
// being compacted, or zero if there is no such log file. |
|
uint64_t PrevLogNumber() const { return prev_log_number_; } |
|
|
|
// Pick level and inputs for a new compaction. |
|
// Returns NULL if there is no compaction to be done. |
|
// Otherwise returns a pointer to a heap-allocated object that |
|
// describes the compaction. Caller should delete the result. |
|
Compaction* PickCompaction(); |
|
|
|
// Return a compaction object for compacting the range [begin,end] in |
|
// the specified level. Returns NULL if there is nothing in that |
|
// level that overlaps the specified range. Caller should delete |
|
// the result. |
|
Compaction* CompactRange( |
|
int level, |
|
const InternalKey* begin, |
|
const InternalKey* end); |
|
|
|
// Return the maximum overlapping data (in bytes) at next level for any |
|
// file at a level >= 1. |
|
int64_t MaxNextLevelOverlappingBytes(); |
|
|
|
// Create an iterator that reads over the compaction inputs for "*c". |
|
// The caller should delete the iterator when no longer needed. |
|
Iterator* MakeInputIterator(Compaction* c); |
|
|
|
// Returns true iff some level needs a compaction. |
|
bool NeedsCompaction() const { |
|
Version* v = current_; |
|
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL); |
|
} |
|
|
|
// Add all files listed in any live version to *live. |
|
// May also mutate some internal state. |
|
void AddLiveFiles(std::set<uint64_t>* live); |
|
|
|
// Return the approximate offset in the database of the data for |
|
// "key" as of version "v". |
|
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); |
|
|
|
// Return a human-readable short (single-line) summary of the number |
|
// of files per level. Uses *scratch as backing store. |
|
struct LevelSummaryStorage { |
|
char buffer[100]; |
|
}; |
|
const char* LevelSummary(LevelSummaryStorage* scratch) const; |
|
|
|
private: |
|
class Builder; |
|
|
|
friend class Compaction; |
|
friend class Version; |
|
|
|
bool ReuseManifest(const std::string& dscname, const std::string& dscbase); |
|
|
|
void Finalize(Version* v); |
|
|
|
void GetRange(const std::vector<FileMetaData*>& inputs, |
|
InternalKey* smallest, |
|
InternalKey* largest); |
|
|
|
void GetRange2(const std::vector<FileMetaData*>& inputs1, |
|
const std::vector<FileMetaData*>& inputs2, |
|
InternalKey* smallest, |
|
InternalKey* largest); |
|
|
|
void SetupOtherInputs(Compaction* c); |
|
|
|
// Save current contents to *log |
|
Status WriteSnapshot(log::Writer* log); |
|
|
|
void AppendVersion(Version* v); |
|
|
|
Env* const env_; |
|
const std::string dbname_; |
|
const Options* const options_; |
|
TableCache* const table_cache_; |
|
const InternalKeyComparator icmp_; |
|
uint64_t next_file_number_; |
|
uint64_t manifest_file_number_; |
|
uint64_t last_sequence_; |
|
uint64_t log_number_; |
|
uint64_t prev_log_number_; // 0 or backing store for memtable being compacted |
|
|
|
// Opened lazily |
|
WritableFile* descriptor_file_; |
|
log::Writer* descriptor_log_; |
|
Version dummy_versions_; // Head of circular doubly-linked list of versions. |
|
Version* current_; // == dummy_versions_.prev_ |
|
|
|
// Per-level key at which the next compaction at that level should start. |
|
// Either an empty string, or a valid InternalKey. |
|
std::string compact_pointer_[config::kNumLevels]; |
|
|
|
// No copying allowed |
|
VersionSet(const VersionSet&); |
|
void operator=(const VersionSet&); |
|
}; |
|
|
|
// A Compaction encapsulates information about a compaction. |
|
class Compaction { |
|
public: |
|
~Compaction(); |
|
|
|
// Return the level that is being compacted. Inputs from "level" |
|
// and "level+1" will be merged to produce a set of "level+1" files. |
|
int level() const { return level_; } |
|
|
|
// Return the object that holds the edits to the descriptor done |
|
// by this compaction. |
|
VersionEdit* edit() { return &edit_; } |
|
|
|
// "which" must be either 0 or 1 |
|
int num_input_files(int which) const { return inputs_[which].size(); } |
|
|
|
// Return the ith input file at "level()+which" ("which" must be 0 or 1). |
|
FileMetaData* input(int which, int i) const { return inputs_[which][i]; } |
|
|
|
// Maximum size of files to build during this compaction. |
|
uint64_t MaxOutputFileSize() const { return max_output_file_size_; } |
|
|
|
// Is this a trivial compaction that can be implemented by just |
|
// moving a single input file to the next level (no merging or splitting) |
|
bool IsTrivialMove() const; |
|
|
|
// Add all inputs to this compaction as delete operations to *edit. |
|
void AddInputDeletions(VersionEdit* edit); |
|
|
|
// Returns true if the information we have available guarantees that |
|
// the compaction is producing data in "level+1" for which no data exists |
|
// in levels greater than "level+1". |
|
bool IsBaseLevelForKey(const Slice& user_key); |
|
|
|
// Returns true iff we should stop building the current output |
|
// before processing "internal_key". |
|
bool ShouldStopBefore(const Slice& internal_key); |
|
|
|
// Release the input version for the compaction, once the compaction |
|
// is successful. |
|
void ReleaseInputs(); |
|
|
|
private: |
|
friend class Version; |
|
friend class VersionSet; |
|
|
|
Compaction(const Options* options, int level); |
|
|
|
int level_; |
|
uint64_t max_output_file_size_; |
|
Version* input_version_; |
|
VersionEdit edit_; |
|
|
|
// Each compaction reads inputs from "level_" and "level_+1" |
|
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs |
|
|
|
// State used to check for number of overlapping grandparent files |
|
// (parent == level_ + 1, grandparent == level_ + 2) |
|
std::vector<FileMetaData*> grandparents_; |
|
size_t grandparent_index_; // Index in grandparent_starts_ |
|
bool seen_key_; // Some output key has been seen |
|
int64_t overlapped_bytes_; // Bytes of overlap between current output |
|
// and grandparent files |
|
|
|
// State for implementing IsBaseLevelForKey |
|
|
|
// level_ptrs_ holds indices into input_version_->levels_: our state |
|
// is that we are positioned at one of the file ranges for each |
|
// higher level than the ones involved in this compaction (i.e. for |
|
// all L >= level_ + 2). |
|
size_t level_ptrs_[config::kNumLevels]; |
|
}; |
|
|
|
} // namespace leveldb |
|
|
|
#endif // STORAGE_LEVELDB_DB_VERSION_SET_H_
|
|
|