--- /dev/null
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor/converter_internal.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+namespace {
+
+template <typename c_index_type>
+inline void IncrementRowMajorIndex(std::vector<c_index_type>& coord,
+ const std::vector<int64_t>& shape) {
+ const int64_t ndim = shape.size();
+ ++coord[ndim - 1];
+ if (coord[ndim - 1] == shape[ndim - 1]) {
+ int64_t d = ndim - 1;
+ while (d > 0 && coord[d] == shape[d]) {
+ coord[d] = 0;
+ ++coord[d - 1];
+ --d;
+ }
+ }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices,
+ c_value_type* values, const int64_t size) {
+ const auto ndim = tensor.ndim();
+ const auto& shape = tensor.shape();
+ const c_value_type* tensor_data =
+ reinterpret_cast<const c_value_type*>(tensor.raw_data());
+
+ constexpr c_value_type zero = 0;
+ std::vector<c_index_type> coord(ndim, 0);
+ for (int64_t n = tensor.size(); n > 0; --n) {
+ const c_value_type x = *tensor_data;
+ if (ARROW_PREDICT_FALSE(x != zero)) {
+ std::copy(coord.begin(), coord.end(), indices);
+ *values++ = x;
+ indices += ndim;
+ }
+
+ IncrementRowMajorIndex(coord, shape);
+ ++tensor_data;
+ }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices,
+ c_value_type* out_values, const int64_t size) {
+ const auto ndim = tensor.ndim();
+ std::vector<c_index_type> indices(ndim * size);
+ std::vector<c_value_type> values(size);
+ ConvertRowMajorTensor(tensor, indices.data(), values.data(), size);
+
+ // transpose indices
+ for (int64_t i = 0; i < size; ++i) {
+ for (int j = 0; j < ndim / 2; ++j) {
+ std::swap(indices[i * ndim + j], indices[i * ndim + ndim - j - 1]);
+ }
+ }
+
+ // sort indices
+ std::vector<int64_t> order(size);
+ std::iota(order.begin(), order.end(), 0);
+ std::sort(order.begin(), order.end(), [&](const int64_t xi, const int64_t yi) {
+ const int64_t x_offset = xi * ndim;
+ const int64_t y_offset = yi * ndim;
+ for (int j = 0; j < ndim; ++j) {
+ const auto x = indices[x_offset + j];
+ const auto y = indices[y_offset + j];
+ if (x < y) return true;
+ if (x > y) return false;
+ }
+ return false;
+ });
+
+ // transfer result
+ const auto* indices_data = indices.data();
+ for (int64_t i = 0; i < size; ++i) {
+ out_values[i] = values[i];
+
+ std::copy_n(indices_data, ndim, out_indices);
+ indices_data += ndim;
+ out_indices += ndim;
+ }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
+ c_value_type* values, const int64_t size) {
+ using ValueType = typename CTypeTraits<c_value_type>::ArrowType;
+ const auto& shape = tensor.shape();
+ const auto ndim = tensor.ndim();
+ std::vector<int64_t> coord(ndim, 0);
+
+ constexpr c_value_type zero = 0;
+ c_value_type x;
+ int64_t i;
+ for (int64_t n = tensor.size(); n > 0; --n) {
+ x = tensor.Value<ValueType>(coord);
+ if (ARROW_PREDICT_FALSE(x != zero)) {
+ *values++ = x;
+ for (i = 0; i < ndim; ++i) {
+ *indices++ = static_cast<c_index_type>(coord[i]);
+ }
+ }
+
+ IncrementRowMajorIndex(coord, shape);
+ }
+}
+
+#define CONVERT_TENSOR(func, index_type, value_type, indices, values, size) \
+ func<index_type, value_type>(tensor_, reinterpret_cast<index_type*>(indices), \
+ reinterpret_cast<value_type*>(values), size)
+
+// Using ARROW_EXPAND is necessary to expand __VA_ARGS__ correctly on VC++.
+#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, ...) \
+ ARROW_EXPAND(CONVERT_TENSOR(ConvertRowMajorTensor, index_type, value_type, __VA_ARGS__))
+
+#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, ...) \
+ ARROW_EXPAND( \
+ CONVERT_TENSOR(ConvertColumnMajorTensor, index_type, value_type, __VA_ARGS__))
+
+#define CONVERT_STRIDED_TENSOR(index_type, value_type, ...) \
+ ARROW_EXPAND(CONVERT_TENSOR(ConvertStridedTensor, index_type, value_type, __VA_ARGS__))
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCOOIndex
+
+class SparseCOOTensorConverter : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::AssignIndex;
+ using SparseTensorConverterMixin::IsNonZero;
+
+ public:
+ SparseCOOTensorConverter(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool)
+ : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
+
+ Status Convert() {
+ RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
+ tensor_.shape()));
+
+ const int index_elsize = GetByteWidth(*index_value_type_);
+ const int value_elsize = GetByteWidth(*tensor_.type());
+
+ const int64_t ndim = tensor_.ndim();
+ ARROW_ASSIGN_OR_RAISE(int64_t nonzero_count, tensor_.CountNonZero());
+
+ ARROW_ASSIGN_OR_RAISE(auto indices_buffer,
+ AllocateBuffer(index_elsize * ndim * nonzero_count, pool_));
+ uint8_t* indices = indices_buffer->mutable_data();
+
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * nonzero_count, pool_));
+ uint8_t* values = values_buffer->mutable_data();
+
+ const uint8_t* tensor_data = tensor_.raw_data();
+ if (ndim <= 1) {
+ const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0];
+ for (int64_t i = 0; i < count; ++i) {
+ if (std::any_of(tensor_data, tensor_data + value_elsize, IsNonZero)) {
+ AssignIndex(indices, i, index_elsize);
+ std::copy_n(tensor_data, value_elsize, values);
+
+ indices += index_elsize;
+ values += value_elsize;
+ }
+ tensor_data += value_elsize;
+ }
+ } else if (tensor_.is_row_major()) {
+ DISPATCH(CONVERT_ROW_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
+ nonzero_count);
+ } else if (tensor_.is_column_major()) {
+ DISPATCH(CONVERT_COLUMN_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
+ nonzero_count);
+ } else {
+ DISPATCH(CONVERT_STRIDED_TENSOR, index_elsize, value_elsize, indices, values,
+ nonzero_count);
+ }
+
+ // make results
+ const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
+ std::vector<int64_t> indices_strides;
+ RETURN_NOT_OK(internal::ComputeRowMajorStrides(
+ checked_cast<const FixedWidthType&>(*index_value_type_), indices_shape,
+ &indices_strides));
+ auto coords = std::make_shared<Tensor>(index_value_type_, std::move(indices_buffer),
+ indices_shape, indices_strides);
+ ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true));
+ data = std::move(values_buffer);
+
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCOOIndex> sparse_index;
+ std::shared_ptr<Buffer> data;
+
+ private:
+ const Tensor& tensor_;
+ const std::shared_ptr<DataType>& index_value_type_;
+ MemoryPool* pool_;
+};
+
+} // namespace
+
+void SparseTensorConverterMixin::AssignIndex(uint8_t* indices, int64_t val,
+ const int elsize) {
+ switch (elsize) {
+ case 1:
+ *indices = static_cast<uint8_t>(val);
+ break;
+ case 2:
+ *reinterpret_cast<uint16_t*>(indices) = static_cast<uint16_t>(val);
+ break;
+ case 4:
+ *reinterpret_cast<uint32_t*>(indices) = static_cast<uint32_t>(val);
+ break;
+ case 8:
+ *reinterpret_cast<int64_t*>(indices) = val;
+ break;
+ default:
+ break;
+ }
+}
+
+int64_t SparseTensorConverterMixin::GetIndexValue(const uint8_t* value_ptr,
+ const int elsize) {
+ switch (elsize) {
+ case 1:
+ return *value_ptr;
+
+ case 2:
+ return *reinterpret_cast<const uint16_t*>(value_ptr);
+
+ case 4:
+ return *reinterpret_cast<const uint32_t*>(value_ptr);
+
+ case 8:
+ return *reinterpret_cast<const int64_t*>(value_ptr);
+
+ default:
+ return 0;
+ }
+}
+
+Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
+ const std::shared_ptr<DataType>& index_value_type,
+ MemoryPool* pool,
+ std::shared_ptr<SparseIndex>* out_sparse_index,
+ std::shared_ptr<Buffer>* out_data) {
+ SparseCOOTensorConverter converter(tensor, index_value_type, pool);
+ RETURN_NOT_OK(converter.Convert());
+
+ *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
+ *out_data = converter.data;
+ return Status::OK();
+}
+
+Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCOOTensor(
+ MemoryPool* pool, const SparseCOOTensor* sparse_tensor) {
+ const auto& sparse_index =
+ checked_cast<const SparseCOOIndex&>(*sparse_tensor->sparse_index());
+ const auto& coords = sparse_index.indices();
+ const auto* coords_data = coords->raw_data();
+
+ const int index_elsize = GetByteWidth(*coords->type());
+
+ const auto& value_type = checked_cast<const FixedWidthType&>(*sparse_tensor->type());
+ const int value_elsize = GetByteWidth(value_type);
+ ARROW_ASSIGN_OR_RAISE(auto values_buffer,
+ AllocateBuffer(value_elsize * sparse_tensor->size(), pool));
+ auto values = values_buffer->mutable_data();
+ std::fill_n(values, value_elsize * sparse_tensor->size(), 0);
+
+ std::vector<int64_t> strides;
+ RETURN_NOT_OK(ComputeRowMajorStrides(value_type, sparse_tensor->shape(), &strides));
+
+ const auto* raw_data = sparse_tensor->raw_data();
+ const int ndim = sparse_tensor->ndim();
+
+ for (int64_t i = 0; i < sparse_tensor->non_zero_length(); ++i) {
+ int64_t offset = 0;
+
+ for (int j = 0; j < ndim; ++j) {
+ auto index = static_cast<int64_t>(
+ SparseTensorConverterMixin::GetIndexValue(coords_data, index_elsize));
+ offset += index * strides[j];
+ coords_data += index_elsize;
+ }
+
+ std::copy_n(raw_data, value_elsize, values + offset);
+ raw_data += value_elsize;
+ }
+
+ return std::make_shared<Tensor>(sparse_tensor->type(), std::move(values_buffer),
+ sparse_tensor->shape(), strides,
+ sparse_tensor->dim_names());
+}
+
+} // namespace internal
+} // namespace arrow