]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/exec/key_encode.h
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / exec / key_encode.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #pragma once
19
20 #include <cstdint>
21 #include <memory>
22 #include <vector>
23
24 #include "arrow/compute/exec/util.h"
25 #include "arrow/memory_pool.h"
26 #include "arrow/result.h"
27 #include "arrow/status.h"
28 #include "arrow/util/bit_util.h"
29
30 namespace arrow {
31 namespace compute {
32
33 class KeyColumnMetadata;
34
35 /// Converts between key representation as a collection of arrays for
36 /// individual columns and another representation as a single array of rows
37 /// combining data from all columns into one value.
38 /// This conversion is reversible.
39 /// Row-oriented storage is beneficial when there is a need for random access
40 /// of individual rows and at the same time all included columns are likely to
41 /// be accessed together, as in the case of hash table key.
42 class KeyEncoder {
43 public:
44 struct KeyEncoderContext {
45 bool has_avx2() const {
46 return (hardware_flags & arrow::internal::CpuInfo::AVX2) > 0;
47 }
48 int64_t hardware_flags;
49 util::TempVectorStack* stack;
50 };
51
52 /// Description of a storage format of a single key column as needed
53 /// for the purpose of row encoding.
54 struct KeyColumnMetadata {
55 KeyColumnMetadata() = default;
56 KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in)
57 : is_fixed_length(is_fixed_length_in), fixed_length(fixed_length_in) {}
58 /// Is column storing a varying-length binary, using offsets array
59 /// to find a beginning of a value, or is it a fixed-length binary.
60 bool is_fixed_length;
61 /// For a fixed-length binary column: number of bytes per value.
62 /// Zero has a special meaning, indicating a bit vector with one bit per value.
63 /// For a varying-length binary column: number of bytes per offset.
64 uint32_t fixed_length;
65 };
66
67 /// Description of a storage format for rows produced by encoder.
68 struct KeyRowMetadata {
69 /// Is row a varying-length binary, using offsets array to find a beginning of a row,
70 /// or is it a fixed-length binary.
71 bool is_fixed_length;
72
73 /// For a fixed-length binary row, common size of rows in bytes,
74 /// rounded up to the multiple of alignment.
75 ///
76 /// For a varying-length binary, size of all encoded fixed-length key columns,
77 /// including lengths of varying-length columns, rounded up to the multiple of string
78 /// alignment.
79 uint32_t fixed_length;
80
81 /// Offset within a row to the array of 32-bit offsets within a row of
82 /// ends of varbinary fields.
83 /// Used only when the row is not fixed-length, zero for fixed-length row.
84 /// There are N elements for N varbinary fields.
85 /// Each element is the offset within a row of the first byte after
86 /// the corresponding varbinary field bytes in that row.
87 /// If varbinary fields begin at aligned addresses, than the end of the previous
88 /// varbinary field needs to be rounded up according to the specified alignment
89 /// to obtain the beginning of the next varbinary field.
90 /// The first varbinary field starts at offset specified by fixed_length,
91 /// which should already be aligned.
92 uint32_t varbinary_end_array_offset;
93
94 /// Fixed number of bytes per row that are used to encode null masks.
95 /// Null masks indicate for a single row which of its key columns are null.
96 /// Nth bit in the sequence of bytes assigned to a row represents null
97 /// information for Nth field according to the order in which they are encoded.
98 int null_masks_bytes_per_row;
99
100 /// Power of 2. Every row will start at the offset aligned to that number of bytes.
101 int row_alignment;
102
103 /// Power of 2. Must be no greater than row alignment.
104 /// Every non-power-of-2 binary field and every varbinary field bytes
105 /// will start aligned to that number of bytes.
106 int string_alignment;
107
108 /// Metadata of encoded columns in their original order.
109 std::vector<KeyColumnMetadata> column_metadatas;
110
111 /// Order in which fields are encoded.
112 std::vector<uint32_t> column_order;
113
114 /// Offsets within a row to fields in their encoding order.
115 std::vector<uint32_t> column_offsets;
116
117 /// Rounding up offset to the nearest multiple of alignment value.
118 /// Alignment must be a power of 2.
119 static inline uint32_t padding_for_alignment(uint32_t offset,
120 int required_alignment) {
121 ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
122 return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
123 (required_alignment - 1));
124 }
125
126 /// Rounding up offset to the beginning of next column,
127 /// chosing required alignment based on the data type of that column.
128 static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
129 const KeyColumnMetadata& col_metadata) {
130 if (!col_metadata.is_fixed_length ||
131 ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
132 return 0;
133 } else {
134 return padding_for_alignment(offset, string_alignment);
135 }
136 }
137
138 /// Returns an array of offsets within a row of ends of varbinary fields.
139 inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
140 ARROW_DCHECK(!is_fixed_length);
141 return reinterpret_cast<const uint32_t*>(row + varbinary_end_array_offset);
142 }
143 inline uint32_t* varbinary_end_array(uint8_t* row) const {
144 ARROW_DCHECK(!is_fixed_length);
145 return reinterpret_cast<uint32_t*>(row + varbinary_end_array_offset);
146 }
147
148 /// Returns the offset within the row and length of the first varbinary field.
149 inline void first_varbinary_offset_and_length(const uint8_t* row, uint32_t* offset,
150 uint32_t* length) const {
151 ARROW_DCHECK(!is_fixed_length);
152 *offset = fixed_length;
153 *length = varbinary_end_array(row)[0] - fixed_length;
154 }
155
156 /// Returns the offset within the row and length of the second and further varbinary
157 /// fields.
158 inline void nth_varbinary_offset_and_length(const uint8_t* row, int varbinary_id,
159 uint32_t* out_offset,
160 uint32_t* out_length) const {
161 ARROW_DCHECK(!is_fixed_length);
162 ARROW_DCHECK(varbinary_id > 0);
163 const uint32_t* varbinary_end = varbinary_end_array(row);
164 uint32_t offset = varbinary_end[varbinary_id - 1];
165 offset += padding_for_alignment(offset, string_alignment);
166 *out_offset = offset;
167 *out_length = varbinary_end[varbinary_id] - offset;
168 }
169
170 uint32_t encoded_field_order(uint32_t icol) const { return column_order[icol]; }
171
172 uint32_t encoded_field_offset(uint32_t icol) const { return column_offsets[icol]; }
173
174 uint32_t num_cols() const { return static_cast<uint32_t>(column_metadatas.size()); }
175
176 uint32_t num_varbinary_cols() const;
177
178 void FromColumnMetadataVector(const std::vector<KeyColumnMetadata>& cols,
179 int in_row_alignment, int in_string_alignment);
180
181 bool is_compatible(const KeyRowMetadata& other) const;
182 };
183
184 class KeyRowArray {
185 public:
186 KeyRowArray();
187 Status Init(MemoryPool* pool, const KeyRowMetadata& metadata);
188 void Clean();
189 Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
190 Status AppendSelectionFrom(const KeyRowArray& from, uint32_t num_rows_to_append,
191 const uint16_t* source_row_ids);
192 const KeyRowMetadata& metadata() const { return metadata_; }
193 int64_t length() const { return num_rows_; }
194 const uint8_t* data(int i) const {
195 ARROW_DCHECK(i >= 0 && i <= max_buffers_);
196 return buffers_[i];
197 }
198 uint8_t* mutable_data(int i) {
199 ARROW_DCHECK(i >= 0 && i <= max_buffers_);
200 return mutable_buffers_[i];
201 }
202 const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
203 uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
204 const uint8_t* null_masks() const { return null_masks_->data(); }
205 uint8_t* null_masks() { return null_masks_->mutable_data(); }
206
207 bool has_any_nulls(const KeyEncoderContext* ctx) const;
208
209 private:
210 Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
211 Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
212
213 int64_t size_null_masks(int64_t num_rows);
214 int64_t size_offsets(int64_t num_rows);
215 int64_t size_rows_fixed_length(int64_t num_rows);
216 int64_t size_rows_varying_length(int64_t num_bytes);
217 void update_buffer_pointers();
218
219 static constexpr int64_t padding_for_vectors = 64;
220 MemoryPool* pool_;
221 KeyRowMetadata metadata_;
222 /// Buffers can only expand during lifetime and never shrink.
223 std::unique_ptr<ResizableBuffer> null_masks_;
224 std::unique_ptr<ResizableBuffer> offsets_;
225 std::unique_ptr<ResizableBuffer> rows_;
226 static constexpr int max_buffers_ = 3;
227 const uint8_t* buffers_[max_buffers_];
228 uint8_t* mutable_buffers_[max_buffers_];
229 int64_t num_rows_;
230 int64_t rows_capacity_;
231 int64_t bytes_capacity_;
232
233 // Mutable to allow lazy evaluation
234 mutable int64_t num_rows_for_has_any_nulls_;
235 mutable bool has_any_nulls_;
236 };
237
238 /// A lightweight description of an array representing one of key columns.
239 class KeyColumnArray {
240 public:
241 KeyColumnArray() = default;
242 /// Create as a mix of buffers according to the mask from two descriptions
243 /// (Nth bit is set to 0 if Nth buffer from the first input
244 /// should be used and is set to 1 otherwise).
245 /// Metadata is inherited from the first input.
246 KeyColumnArray(const KeyColumnMetadata& metadata, const KeyColumnArray& left,
247 const KeyColumnArray& right, int buffer_id_to_replace);
248 /// Create for reading
249 KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length,
250 const uint8_t* buffer0, const uint8_t* buffer1, const uint8_t* buffer2,
251 int bit_offset0 = 0, int bit_offset1 = 0);
252 /// Create for writing
253 KeyColumnArray(const KeyColumnMetadata& metadata, int64_t length, uint8_t* buffer0,
254 uint8_t* buffer1, uint8_t* buffer2, int bit_offset0 = 0,
255 int bit_offset1 = 0);
256 /// Create as a window view of original description that is offset
257 /// by a given number of rows.
258 /// The number of rows used in offset must be divisible by 8
259 /// in order to not split bit vectors within a single byte.
260 KeyColumnArray(const KeyColumnArray& from, int64_t start, int64_t length);
261 uint8_t* mutable_data(int i) {
262 ARROW_DCHECK(i >= 0 && i <= max_buffers_);
263 return mutable_buffers_[i];
264 }
265 const uint8_t* data(int i) const {
266 ARROW_DCHECK(i >= 0 && i <= max_buffers_);
267 return buffers_[i];
268 }
269 uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
270 const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
271 const KeyColumnMetadata& metadata() const { return metadata_; }
272 int64_t length() const { return length_; }
273 int bit_offset(int i) const {
274 ARROW_DCHECK(i >= 0 && i < max_buffers_);
275 return bit_offset_[i];
276 }
277
278 private:
279 static constexpr int max_buffers_ = 3;
280 const uint8_t* buffers_[max_buffers_];
281 uint8_t* mutable_buffers_[max_buffers_];
282 KeyColumnMetadata metadata_;
283 int64_t length_;
284 // Starting bit offset within the first byte (between 0 and 7)
285 // to be used when accessing buffers that store bit vectors.
286 int bit_offset_[max_buffers_ - 1];
287 };
288
289 void Init(const std::vector<KeyColumnMetadata>& cols, KeyEncoderContext* ctx,
290 int row_alignment, int string_alignment);
291
292 const KeyRowMetadata& row_metadata() { return row_metadata_; }
293
294 void PrepareEncodeSelected(int64_t start_row, int64_t num_rows,
295 const std::vector<KeyColumnArray>& cols);
296 Status EncodeSelected(KeyRowArray* rows, uint32_t num_selected,
297 const uint16_t* selection);
298
299 /// Decode a window of row oriented data into a corresponding
300 /// window of column oriented storage.
301 /// The output buffers need to be correctly allocated and sized before
302 /// calling each method.
303 /// For that reason decoding is split into two functions.
304 /// The output of the first one, that processes everything except for
305 /// varying length buffers, can be used to find out required varying
306 /// length buffers sizes.
307 void DecodeFixedLengthBuffers(int64_t start_row_input, int64_t start_row_output,
308 int64_t num_rows, const KeyRowArray& rows,
309 std::vector<KeyColumnArray>* cols);
310
311 void DecodeVaryingLengthBuffers(int64_t start_row_input, int64_t start_row_output,
312 int64_t num_rows, const KeyRowArray& rows,
313 std::vector<KeyColumnArray>* cols);
314
315 const std::vector<KeyColumnArray>& GetBatchColumns() const { return batch_all_cols_; }
316
317 private:
318 /// Prepare column array vectors.
319 /// Output column arrays represent a range of input column arrays
320 /// specified by starting row and number of rows.
321 /// Three vectors are generated:
322 /// - all columns
323 /// - fixed-length columns only
324 /// - varying-length columns only
325 void PrepareKeyColumnArrays(int64_t start_row, int64_t num_rows,
326 const std::vector<KeyColumnArray>& cols_in);
327
328 class TransformBoolean {
329 public:
330 static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
331 const KeyColumnArray& temp);
332 static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
333 KeyEncoderContext* ctx);
334 };
335
336 class EncoderInteger {
337 public:
338 static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
339 const KeyRowArray& rows, KeyColumnArray* col,
340 KeyEncoderContext* ctx, KeyColumnArray* temp);
341 static bool UsesTransform(const KeyColumnArray& column);
342 static KeyColumnArray ArrayReplace(const KeyColumnArray& column,
343 const KeyColumnArray& temp);
344 static void PostDecode(const KeyColumnArray& input, KeyColumnArray* output,
345 KeyEncoderContext* ctx);
346
347 private:
348 static bool IsBoolean(const KeyColumnMetadata& metadata);
349 };
350
351 class EncoderBinary {
352 public:
353 static void EncodeSelected(uint32_t offset_within_row, KeyRowArray* rows,
354 const KeyColumnArray& col, uint32_t num_selected,
355 const uint16_t* selection);
356 static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
357 const KeyRowArray& rows, KeyColumnArray* col,
358 KeyEncoderContext* ctx, KeyColumnArray* temp);
359 static bool IsInteger(const KeyColumnMetadata& metadata);
360
361 private:
362 template <class COPY_FN, class SET_NULL_FN>
363 static void EncodeSelectedImp(uint32_t offset_within_row, KeyRowArray* rows,
364 const KeyColumnArray& col, uint32_t num_selected,
365 const uint16_t* selection, COPY_FN copy_fn,
366 SET_NULL_FN set_null_fn);
367
368 template <bool is_row_fixed_length, class COPY_FN>
369 static inline void DecodeHelper(uint32_t start_row, uint32_t num_rows,
370 uint32_t offset_within_row,
371 const KeyRowArray* rows_const,
372 KeyRowArray* rows_mutable_maybe_null,
373 const KeyColumnArray* col_const,
374 KeyColumnArray* col_mutable_maybe_null,
375 COPY_FN copy_fn);
376 template <bool is_row_fixed_length>
377 static void DecodeImp(uint32_t start_row, uint32_t num_rows,
378 uint32_t offset_within_row, const KeyRowArray& rows,
379 KeyColumnArray* col);
380 #if defined(ARROW_HAVE_AVX2)
381 static void DecodeHelper_avx2(bool is_row_fixed_length, uint32_t start_row,
382 uint32_t num_rows, uint32_t offset_within_row,
383 const KeyRowArray& rows, KeyColumnArray* col);
384 template <bool is_row_fixed_length>
385 static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
386 uint32_t offset_within_row, const KeyRowArray& rows,
387 KeyColumnArray* col);
388 #endif
389 };
390
391 class EncoderBinaryPair {
392 public:
393 static bool CanProcessPair(const KeyColumnMetadata& col1,
394 const KeyColumnMetadata& col2) {
395 return EncoderBinary::IsInteger(col1) && EncoderBinary::IsInteger(col2);
396 }
397 static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
398 const KeyRowArray& rows, KeyColumnArray* col1,
399 KeyColumnArray* col2, KeyEncoderContext* ctx,
400 KeyColumnArray* temp1, KeyColumnArray* temp2);
401
402 private:
403 template <bool is_row_fixed_length, typename col1_type, typename col2_type>
404 static void DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
405 uint32_t num_rows, uint32_t offset_within_row,
406 const KeyRowArray& rows, KeyColumnArray* col1,
407 KeyColumnArray* col2);
408 #if defined(ARROW_HAVE_AVX2)
409 static uint32_t DecodeHelper_avx2(bool is_row_fixed_length, uint32_t col_width,
410 uint32_t start_row, uint32_t num_rows,
411 uint32_t offset_within_row, const KeyRowArray& rows,
412 KeyColumnArray* col1, KeyColumnArray* col2);
413 template <bool is_row_fixed_length, uint32_t col_width>
414 static uint32_t DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
415 uint32_t offset_within_row, const KeyRowArray& rows,
416 KeyColumnArray* col1, KeyColumnArray* col2);
417 #endif
418 };
419
420 class EncoderOffsets {
421 public:
422 static void GetRowOffsetsSelected(KeyRowArray* rows,
423 const std::vector<KeyColumnArray>& cols,
424 uint32_t num_selected, const uint16_t* selection);
425 static void EncodeSelected(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
426 uint32_t num_selected, const uint16_t* selection);
427
428 static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
429 std::vector<KeyColumnArray>* varbinary_cols,
430 const std::vector<uint32_t>& varbinary_cols_base_offset,
431 KeyEncoderContext* ctx);
432
433 private:
434 template <bool has_nulls, bool is_first_varbinary>
435 static void EncodeSelectedImp(uint32_t ivarbinary, KeyRowArray* rows,
436 const std::vector<KeyColumnArray>& cols,
437 uint32_t num_selected, const uint16_t* selection);
438 };
439
440 class EncoderVarBinary {
441 public:
442 static void EncodeSelected(uint32_t ivarbinary, KeyRowArray* rows,
443 const KeyColumnArray& cols, uint32_t num_selected,
444 const uint16_t* selection);
445
446 static void Decode(uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
447 const KeyRowArray& rows, KeyColumnArray* col,
448 KeyEncoderContext* ctx);
449
450 private:
451 template <bool first_varbinary_col, class COPY_FN>
452 static inline void DecodeHelper(uint32_t start_row, uint32_t num_rows,
453 uint32_t varbinary_col_id,
454 const KeyRowArray* rows_const,
455 KeyRowArray* rows_mutable_maybe_null,
456 const KeyColumnArray* col_const,
457 KeyColumnArray* col_mutable_maybe_null,
458 COPY_FN copy_fn);
459 template <bool first_varbinary_col>
460 static void DecodeImp(uint32_t start_row, uint32_t num_rows,
461 uint32_t varbinary_col_id, const KeyRowArray& rows,
462 KeyColumnArray* col);
463 #if defined(ARROW_HAVE_AVX2)
464 static void DecodeHelper_avx2(uint32_t start_row, uint32_t num_rows,
465 uint32_t varbinary_col_id, const KeyRowArray& rows,
466 KeyColumnArray* col);
467 template <bool first_varbinary_col>
468 static void DecodeImp_avx2(uint32_t start_row, uint32_t num_rows,
469 uint32_t varbinary_col_id, const KeyRowArray& rows,
470 KeyColumnArray* col);
471 #endif
472 };
473
474 class EncoderNulls {
475 public:
476 static void EncodeSelected(KeyRowArray* rows, const std::vector<KeyColumnArray>& cols,
477 uint32_t num_selected, const uint16_t* selection);
478
479 static void Decode(uint32_t start_row, uint32_t num_rows, const KeyRowArray& rows,
480 std::vector<KeyColumnArray>* cols);
481 };
482
483 KeyEncoderContext* ctx_;
484
485 // Data initialized once, based on data types of key columns
486 KeyRowMetadata row_metadata_;
487
488 // Data initialized for each input batch.
489 // All elements are ordered according to the order of encoded fields in a row.
490 std::vector<KeyColumnArray> batch_all_cols_;
491 std::vector<KeyColumnArray> batch_varbinary_cols_;
492 std::vector<uint32_t> batch_varbinary_cols_base_offsets_;
493 };
494
495 template <bool is_row_fixed_length, class COPY_FN>
496 inline void KeyEncoder::EncoderBinary::DecodeHelper(
497 uint32_t start_row, uint32_t num_rows, uint32_t offset_within_row,
498 const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
499 const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
500 COPY_FN copy_fn) {
501 ARROW_DCHECK(col_const && col_const->metadata().is_fixed_length);
502 uint32_t col_width = col_const->metadata().fixed_length;
503
504 if (is_row_fixed_length) {
505 uint32_t row_width = rows_const->metadata().fixed_length;
506 for (uint32_t i = 0; i < num_rows; ++i) {
507 const uint8_t* src;
508 uint8_t* dst;
509 src = rows_const->data(1) + row_width * (start_row + i) + offset_within_row;
510 dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
511 copy_fn(dst, src, col_width);
512 }
513 } else {
514 const uint32_t* row_offsets = rows_const->offsets();
515 for (uint32_t i = 0; i < num_rows; ++i) {
516 const uint8_t* src;
517 uint8_t* dst;
518 src = rows_const->data(2) + row_offsets[start_row + i] + offset_within_row;
519 dst = col_mutable_maybe_null->mutable_data(1) + col_width * i;
520 copy_fn(dst, src, col_width);
521 }
522 }
523 }
524
525 template <bool first_varbinary_col, class COPY_FN>
526 inline void KeyEncoder::EncoderVarBinary::DecodeHelper(
527 uint32_t start_row, uint32_t num_rows, uint32_t varbinary_col_id,
528 const KeyRowArray* rows_const, KeyRowArray* rows_mutable_maybe_null,
529 const KeyColumnArray* col_const, KeyColumnArray* col_mutable_maybe_null,
530 COPY_FN copy_fn) {
531 // Column and rows need to be varying length
532 ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
533 !col_const->metadata().is_fixed_length);
534
535 const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
536 const uint32_t* col_offsets = col_const->offsets();
537
538 uint32_t col_offset_next = col_offsets[0];
539 for (uint32_t i = 0; i < num_rows; ++i) {
540 uint32_t col_offset = col_offset_next;
541 col_offset_next = col_offsets[i + 1];
542
543 uint32_t row_offset = row_offsets_for_batch[i];
544 const uint8_t* row = rows_const->data(2) + row_offset;
545
546 uint32_t offset_within_row;
547 uint32_t length;
548 if (first_varbinary_col) {
549 rows_const->metadata().first_varbinary_offset_and_length(row, &offset_within_row,
550 &length);
551 } else {
552 rows_const->metadata().nth_varbinary_offset_and_length(row, varbinary_col_id,
553 &offset_within_row, &length);
554 }
555
556 row_offset += offset_within_row;
557
558 const uint8_t* src;
559 uint8_t* dst;
560 src = rows_const->data(2) + row_offset;
561 dst = col_mutable_maybe_null->mutable_data(2) + col_offset;
562 copy_fn(dst, src, length);
563 }
564 }
565
566 } // namespace compute
567 } // namespace arrow