FdWithKeyRange* GetNextFile(){ while (!search_ended_) { // Loops over different levels. while (curr_index_in_curr_level_ < curr_file_level_->num_files) { // Loops over all files in current level. FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_]; hit_file_level_ = curr_level_; is_hit_file_last_in_level_ = curr_index_in_curr_level_ == curr_file_level_->num_files - 1; int cmp_largest = -1; if (num_levels_ > 1 || curr_file_level_->num_files > 3) { // Check if key is within a file's range. If search left bound and // right bound point to the same find, we are sure key falls in // range. assert( curr_level_ == 0 || curr_index_in_curr_level_ == start_index_in_curr_level_ || user_comparator_->Compare(user_key_, ExtractUserKey(f->smallest_key)) <= 0);
int cmp_smallest = user_comparator_->Compare(user_key_, ExtractUserKey(f->smallest_key)); if (cmp_smallest >= 0) { cmp_largest = user_comparator_->Compare(user_key_, ExtractUserKey(f->largest_key)); }
// Setup file search bound for the next level based on the // comparison results if (curr_level_ > 0) { file_indexer_->GetNextLevelIndex(curr_level_, curr_index_in_curr_level_, cmp_smallest, cmp_largest, &search_left_bound_, &search_right_bound_); } // Key falls out of current file's range if (cmp_smallest < 0 || cmp_largest > 0) { if (curr_level_ == 0) { ++curr_index_in_curr_level_; continue; } else { // Search next level. break; } } } returned_file_level_ = curr_level_; if (curr_level_ > 0 && cmp_largest < 0) { // No more files to search in this level. search_ended_ = !PrepareNextLevel(); } else { ++curr_index_in_curr_level_; } return f; } // Start searching next level. search_ended_ = !PrepareNextLevel(); } // Search ended. returnnullptr; }
// Point to a left most file in a lower level that may contain a key, // which compares greater than smallest of a FileMetaData (upper level) int32_t smallest_lb; // Point to a left most file in a lower level that may contain a key, // which compares greater than largest of a FileMetaData (upper level) int32_t largest_lb; // Point to a right most file in a lower level that may contain a key, // which compares smaller than smallest of a FileMetaData (upper level) int32_t smallest_rb; // Point to a right most file in a lower level that may contain a key, // which compares smaller than largest of a FileMetaData (upper level) int32_t largest_rb;
uint64_t fd_number = fd.GetNumber(); auto user_key = ExtractUserKey(k); // We use the user key as cache key instead of the internal key, // otherwise the whole cache would be invalidated every time the // sequence key increases. However, to support caching snapshot // reads, we append the sequence number (incremented by 1 to // distinguish from 0) only in this case. uint64_t seq_no = options.snapshot == nullptr ? 0 : 1 + GetInternalKeySeqno(k);
Status TableCache::FindTable(const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, const FileDescriptor& fd, Cache::Handle** handle, constbool no_io, bool record_read_stats, HistogramImpl* file_read_hist, bool skip_filters, int level, bool prefetch_index_and_filter_in_cache) { ................................................... if (*handle == nullptr) { if (no_io) { // Don't do IO and return a not-found status return Status::Incomplete("Table not found in table_cache, no_io is set"); } unique_ptr<TableReader> table_reader; s = GetTableReader(env_options, internal_comparator, fd, false/* sequential mode */, 0/* readahead */, record_read_stats, file_read_hist, &table_reader, skip_filters, level, prefetch_index_and_filter_in_cache); if (!s.ok()) { assert(table_reader == nullptr); RecordTick(ioptions_.statistics, NO_FILE_ERRORS); // We do not cache error results so that if the error is transient, // or somebody repairs the file, we recover automatically. } else { s = cache_->Insert(key, table_reader.get(), 1, &DeleteEntry<TableReader>, handle); if (s.ok()) { // Release ownership of table reader. table_reader.release(); } } } return s; }
当读取完毕TableReader之后,RocksDB就需要从sst文件中get key了,也就是最终的key查找方式是在每个sst format class的Get方法中实现的。
1 2 3 4 5
if (s.ok()) { get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. s = t->Get(options, k, get_context, skip_filters); get_context->SetReplayLog(nullptr); }