]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #ifndef ROCKSDB_LITE | |
7 | #include "table/plain_table_builder.h" | |
8 | ||
9 | #include <assert.h> | |
10 | ||
11 | #include <string> | |
12 | #include <limits> | |
13 | #include <map> | |
14 | ||
15 | #include "rocksdb/comparator.h" | |
16 | #include "rocksdb/env.h" | |
17 | #include "rocksdb/filter_policy.h" | |
18 | #include "rocksdb/options.h" | |
19 | #include "rocksdb/table.h" | |
20 | #include "table/plain_table_factory.h" | |
21 | #include "db/dbformat.h" | |
22 | #include "table/block_builder.h" | |
23 | #include "table/bloom_block.h" | |
24 | #include "table/plain_table_index.h" | |
25 | #include "table/format.h" | |
26 | #include "table/meta_blocks.h" | |
27 | #include "util/coding.h" | |
28 | #include "util/crc32c.h" | |
29 | #include "util/file_reader_writer.h" | |
30 | #include "util/stop_watch.h" | |
31 | ||
32 | namespace rocksdb { | |
33 | ||
34 | namespace { | |
35 | ||
36 | // a utility that helps writing block content to the file | |
37 | // @offset will advance if @block_contents was successfully written. | |
38 | // @block_handle the block handle this particular block. | |
39 | Status WriteBlock(const Slice& block_contents, WritableFileWriter* file, | |
40 | uint64_t* offset, BlockHandle* block_handle) { | |
41 | block_handle->set_offset(*offset); | |
42 | block_handle->set_size(block_contents.size()); | |
43 | Status s = file->Append(block_contents); | |
44 | ||
45 | if (s.ok()) { | |
46 | *offset += block_contents.size(); | |
47 | } | |
48 | return s; | |
49 | } | |
50 | ||
51 | } // namespace | |
52 | ||
53 | // kPlainTableMagicNumber was picked by running | |
54 | // echo rocksdb.table.plain | sha1sum | |
55 | // and taking the leading 64 bits. | |
56 | extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull; | |
57 | extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull; | |
58 | ||
59 | PlainTableBuilder::PlainTableBuilder( | |
11fdf7f2 | 60 | const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, |
7c673cae FG |
61 | const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* |
62 | int_tbl_prop_collector_factories, | |
63 | uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len, | |
64 | EncodingType encoding_type, size_t index_sparseness, | |
65 | uint32_t bloom_bits_per_key, const std::string& column_family_name, | |
66 | uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio, | |
67 | bool store_index_in_file) | |
68 | : ioptions_(ioptions), | |
11fdf7f2 | 69 | moptions_(moptions), |
7c673cae FG |
70 | bloom_block_(num_probes), |
71 | file_(file), | |
72 | bloom_bits_per_key_(bloom_bits_per_key), | |
73 | huge_page_tlb_size_(huge_page_tlb_size), | |
11fdf7f2 | 74 | encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(), |
7c673cae FG |
75 | index_sparseness), |
76 | store_index_in_file_(store_index_in_file), | |
11fdf7f2 | 77 | prefix_extractor_(moptions.prefix_extractor.get()) { |
7c673cae FG |
78 | // Build index block and save it in the file if hash_table_ratio > 0 |
79 | if (store_index_in_file_) { | |
80 | assert(hash_table_ratio > 0 || IsTotalOrderMode()); | |
11fdf7f2 TL |
81 | index_builder_.reset(new PlainTableIndexBuilder( |
82 | &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness, | |
83 | hash_table_ratio, huge_page_tlb_size_)); | |
7c673cae FG |
84 | properties_.user_collected_properties |
85 | [PlainTablePropertyNames::kBloomVersion] = "1"; // For future use | |
86 | } | |
87 | ||
88 | properties_.fixed_key_len = user_key_len; | |
89 | ||
90 | // for plain table, we put all the data in a big chuck. | |
91 | properties_.num_data_blocks = 1; | |
92 | // Fill it later if store_index_in_file_ == true | |
93 | properties_.index_size = 0; | |
94 | properties_.filter_size = 0; | |
95 | // To support roll-back to previous version, now still use version 0 for | |
96 | // plain encoding. | |
97 | properties_.format_version = (encoding_type == kPlain) ? 0 : 1; | |
98 | properties_.column_family_id = column_family_id; | |
99 | properties_.column_family_name = column_family_name; | |
11fdf7f2 TL |
100 | properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr |
101 | ? moptions_.prefix_extractor->Name() | |
7c673cae FG |
102 | : "nullptr"; |
103 | ||
104 | std::string val; | |
105 | PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType())); | |
106 | properties_.user_collected_properties | |
107 | [PlainTablePropertyNames::kEncodingType] = val; | |
108 | ||
109 | for (auto& collector_factories : *int_tbl_prop_collector_factories) { | |
110 | table_properties_collectors_.emplace_back( | |
111 | collector_factories->CreateIntTblPropCollector(column_family_id)); | |
112 | } | |
113 | } | |
114 | ||
115 | PlainTableBuilder::~PlainTableBuilder() { | |
116 | } | |
117 | ||
118 | void PlainTableBuilder::Add(const Slice& key, const Slice& value) { | |
119 | // temp buffer for metadata bytes between key and value. | |
120 | char meta_bytes_buf[6]; | |
121 | size_t meta_bytes_buf_size = 0; | |
122 | ||
123 | ParsedInternalKey internal_key; | |
124 | if (!ParseInternalKey(key, &internal_key)) { | |
125 | assert(false); | |
126 | return; | |
127 | } | |
128 | if (internal_key.type == kTypeRangeDeletion) { | |
129 | status_ = Status::NotSupported("Range deletion unsupported"); | |
130 | return; | |
131 | } | |
132 | ||
133 | // Store key hash | |
134 | if (store_index_in_file_) { | |
11fdf7f2 | 135 | if (moptions_.prefix_extractor == nullptr) { |
7c673cae FG |
136 | keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key)); |
137 | } else { | |
138 | Slice prefix = | |
11fdf7f2 | 139 | moptions_.prefix_extractor->Transform(internal_key.user_key); |
7c673cae FG |
140 | keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix)); |
141 | } | |
142 | } | |
143 | ||
144 | // Write value | |
145 | assert(offset_ <= std::numeric_limits<uint32_t>::max()); | |
146 | auto prev_offset = static_cast<uint32_t>(offset_); | |
147 | // Write out the key | |
148 | encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf, | |
149 | &meta_bytes_buf_size); | |
150 | if (SaveIndexInFile()) { | |
151 | index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset); | |
152 | } | |
153 | ||
154 | // Write value length | |
155 | uint32_t value_size = static_cast<uint32_t>(value.size()); | |
156 | char* end_ptr = | |
157 | EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size); | |
158 | assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf)); | |
159 | meta_bytes_buf_size = end_ptr - meta_bytes_buf; | |
160 | file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size)); | |
161 | ||
162 | // Write value | |
163 | file_->Append(value); | |
164 | offset_ += value_size + meta_bytes_buf_size; | |
165 | ||
166 | properties_.num_entries++; | |
167 | properties_.raw_key_size += key.size(); | |
168 | properties_.raw_value_size += value.size(); | |
494da23a TL |
169 | if (internal_key.type == kTypeDeletion || |
170 | internal_key.type == kTypeSingleDeletion) { | |
171 | properties_.num_deletions++; | |
172 | } else if (internal_key.type == kTypeMerge) { | |
173 | properties_.num_merge_operands++; | |
174 | } | |
7c673cae FG |
175 | |
176 | // notify property collectors | |
177 | NotifyCollectTableCollectorsOnAdd( | |
178 | key, value, offset_, table_properties_collectors_, ioptions_.info_log); | |
179 | } | |
180 | ||
181 | Status PlainTableBuilder::status() const { return status_; } | |
182 | ||
183 | Status PlainTableBuilder::Finish() { | |
184 | assert(!closed_); | |
185 | closed_ = true; | |
186 | ||
187 | properties_.data_size = offset_; | |
188 | ||
189 | // Write the following blocks | |
190 | // 1. [meta block: bloom] - optional | |
191 | // 2. [meta block: index] - optional | |
192 | // 3. [meta block: properties] | |
193 | // 4. [metaindex block] | |
194 | // 5. [footer] | |
195 | ||
196 | MetaIndexBuilder meta_index_builer; | |
197 | ||
198 | if (store_index_in_file_ && (properties_.num_entries > 0)) { | |
199 | assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max()); | |
200 | Status s; | |
201 | BlockHandle bloom_block_handle; | |
202 | if (bloom_bits_per_key_ > 0) { | |
203 | bloom_block_.SetTotalBits( | |
204 | &arena_, | |
205 | static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_, | |
206 | ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log); | |
207 | ||
208 | PutVarint32(&properties_.user_collected_properties | |
209 | [PlainTablePropertyNames::kNumBloomBlocks], | |
210 | bloom_block_.GetNumBlocks()); | |
211 | ||
212 | bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_); | |
213 | ||
214 | Slice bloom_finish_result = bloom_block_.Finish(); | |
215 | ||
216 | properties_.filter_size = bloom_finish_result.size(); | |
217 | s = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle); | |
218 | ||
219 | if (!s.ok()) { | |
220 | return s; | |
221 | } | |
222 | meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle); | |
223 | } | |
224 | BlockHandle index_block_handle; | |
225 | Slice index_finish_result = index_builder_->Finish(); | |
226 | ||
227 | properties_.index_size = index_finish_result.size(); | |
228 | s = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle); | |
229 | ||
230 | if (!s.ok()) { | |
231 | return s; | |
232 | } | |
233 | ||
234 | meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock, | |
235 | index_block_handle); | |
236 | } | |
237 | ||
238 | // Calculate bloom block size and index block size | |
239 | PropertyBlockBuilder property_block_builder; | |
240 | // -- Add basic properties | |
241 | property_block_builder.AddTableProperty(properties_); | |
242 | ||
243 | property_block_builder.Add(properties_.user_collected_properties); | |
244 | ||
245 | // -- Add user collected properties | |
246 | NotifyCollectTableCollectorsOnFinish(table_properties_collectors_, | |
247 | ioptions_.info_log, | |
248 | &property_block_builder); | |
249 | ||
250 | // -- Write property block | |
251 | BlockHandle property_block_handle; | |
252 | auto s = WriteBlock( | |
253 | property_block_builder.Finish(), | |
254 | file_, | |
255 | &offset_, | |
256 | &property_block_handle | |
257 | ); | |
258 | if (!s.ok()) { | |
259 | return s; | |
260 | } | |
261 | meta_index_builer.Add(kPropertiesBlock, property_block_handle); | |
262 | ||
263 | // -- write metaindex block | |
264 | BlockHandle metaindex_block_handle; | |
265 | s = WriteBlock( | |
266 | meta_index_builer.Finish(), | |
267 | file_, | |
268 | &offset_, | |
269 | &metaindex_block_handle | |
270 | ); | |
271 | if (!s.ok()) { | |
272 | return s; | |
273 | } | |
274 | ||
275 | // Write Footer | |
276 | // no need to write out new footer if we're using default checksum | |
277 | Footer footer(kLegacyPlainTableMagicNumber, 0); | |
278 | footer.set_metaindex_handle(metaindex_block_handle); | |
279 | footer.set_index_handle(BlockHandle::NullBlockHandle()); | |
280 | std::string footer_encoding; | |
281 | footer.EncodeTo(&footer_encoding); | |
282 | s = file_->Append(footer_encoding); | |
283 | if (s.ok()) { | |
284 | offset_ += footer_encoding.size(); | |
285 | } | |
286 | ||
287 | return s; | |
288 | } | |
289 | ||
290 | void PlainTableBuilder::Abandon() { | |
291 | closed_ = true; | |
292 | } | |
293 | ||
294 | uint64_t PlainTableBuilder::NumEntries() const { | |
295 | return properties_.num_entries; | |
296 | } | |
297 | ||
298 | uint64_t PlainTableBuilder::FileSize() const { | |
299 | return offset_; | |
300 | } | |
301 | ||
302 | } // namespace rocksdb | |
303 | #endif // ROCKSDB_LITE |