ceph/src/arrow/cpp/src/arrow/python/python_to_arrow.cc

   1 // Licensed to the Apache Software Foundation (ASF) under one
   2 // or more contributor license agreements.  See the NOTICE file
   3 // distributed with this work for additional information
   4 // regarding copyright ownership.  The ASF licenses this file
   5 // to you under the Apache License, Version 2.0 (the
   6 // "License"); you may not use this file except in compliance
   7 // with the License.  You may obtain a copy of the License at
   8 //
   9 //   http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing,
  12 // software distributed under the License is distributed on an
  13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14 // KIND, either express or implied.  See the License for the
  15 // specific language governing permissions and limitations
  16 // under the License.
  17
  18 #include "arrow/python/python_to_arrow.h"
  19 #include "arrow/python/numpy_interop.h"
  20
  21 #include <datetime.h>
  22
  23 #include <algorithm>
  24 #include <limits>
  25 #include <sstream>
  26 #include <string>
  27 #include <utility>
  28 #include <vector>
  29
  30 #include "arrow/array.h"
  31 #include "arrow/array/builder_binary.h"
  32 #include "arrow/array/builder_decimal.h"
  33 #include "arrow/array/builder_dict.h"
  34 #include "arrow/array/builder_nested.h"
  35 #include "arrow/array/builder_primitive.h"
  36 #include "arrow/array/builder_time.h"
  37 #include "arrow/chunked_array.h"
  38 #include "arrow/status.h"
  39 #include "arrow/type.h"
  40 #include "arrow/type_traits.h"
  41 #include "arrow/util/checked_cast.h"
  42 #include "arrow/util/converter.h"
  43 #include "arrow/util/decimal.h"
  44 #include "arrow/util/int_util_internal.h"
  45 #include "arrow/util/logging.h"
  46
  47 #include "arrow/python/datetime.h"
  48 #include "arrow/python/decimal.h"
  49 #include "arrow/python/helpers.h"
  50 #include "arrow/python/inference.h"
  51 #include "arrow/python/iterators.h"
  52 #include "arrow/python/numpy_convert.h"
  53 #include "arrow/python/type_traits.h"
  54 #include "arrow/visitor_inline.h"
  55
  56 namespace arrow {
  57
  58 using internal::checked_cast;
  59 using internal::checked_pointer_cast;
  60
  61 using internal::Converter;
  62 using internal::DictionaryConverter;
  63 using internal::ListConverter;
  64 using internal::PrimitiveConverter;
  65 using internal::StructConverter;
  66
  67 using internal::MakeChunker;
  68 using internal::MakeConverter;
  69
  70 namespace py {
  71
  72 namespace {
  73 enum class MonthDayNanoField { kMonths, kWeeksAndDays, kDaysOnly, kNanoseconds };
  74
  75 template <MonthDayNanoField field>
  76 struct MonthDayNanoTraits;
  77
  78 struct MonthDayNanoAttrData {
  79   const char* name;
  80   const int64_t multiplier;
  81 };
  82
  83 template <>
  84 struct MonthDayNanoTraits<MonthDayNanoField::kMonths> {
  85   using c_type = int32_t;
  86   static const MonthDayNanoAttrData attrs[];
  87 };
  88
  89 const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kMonths>::attrs[] = {
  90     {"years", 1}, {"months", /*months_in_year=*/12}, {nullptr, 0}};
  91
  92 template <>
  93 struct MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays> {
  94   using c_type = int32_t;
  95   static const MonthDayNanoAttrData attrs[];
  96 };
  97
  98 const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays>::attrs[] =
  99     {{"weeks", 1}, {"days", /*days_in_week=*/7}, {nullptr, 0}};
 100
 101 template <>
 102 struct MonthDayNanoTraits<MonthDayNanoField::kDaysOnly> {
 103   using c_type = int32_t;
 104   static const MonthDayNanoAttrData attrs[];
 105 };
 106
 107 const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kDaysOnly>::attrs[] = {
 108     {"days", 1}, {nullptr, 0}};
 109
 110 template <>
 111 struct MonthDayNanoTraits<MonthDayNanoField::kNanoseconds> {
 112   using c_type = int64_t;
 113   static const MonthDayNanoAttrData attrs[];
 114 };
 115
 116 const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::attrs[] =
 117     {{"hours", 1},
 118      {"minutes", /*minutes_in_hours=*/60},
 119      {"seconds", /*seconds_in_minute=*/60},
 120      {"milliseconds", /*milliseconds_in_seconds*/ 1000},
 121      {"microseconds", /*microseconds_in_millseconds=*/1000},
 122      {"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
 123      {nullptr, 0}};
 124
 125 template <MonthDayNanoField field>
 126 struct PopulateMonthDayNano {
 127   using Traits = MonthDayNanoTraits<field>;
 128   using field_c_type = typename Traits::c_type;
 129
 130   static Status Field(PyObject* obj, field_c_type* out, bool* found_attrs) {
 131     *out = 0;
 132     for (const MonthDayNanoAttrData* attr = &Traits::attrs[0]; attr->multiplier != 0;
 133          ++attr) {
 134       if (attr->multiplier != 1 &&
 135           ::arrow::internal::MultiplyWithOverflow(
 136               static_cast<field_c_type>(attr->multiplier), *out, out)) {
 137         return Status::Invalid("Overflow on: ", (attr - 1)->name,
 138                                " for: ", internal::PyObject_StdStringRepr(obj));
 139       }
 140
 141       OwnedRef field_value(PyObject_GetAttrString(obj, attr->name));
 142       if (field_value.obj() == nullptr) {
 143         // No attribute present, skip  to the next one.
 144         PyErr_Clear();
 145         continue;
 146       }
 147       RETURN_IF_PYERROR();
 148       *found_attrs = true;
 149       field_c_type value;
 150       RETURN_NOT_OK(internal::CIntFromPython(field_value.obj(), &value, attr->name));
 151       if (::arrow::internal::AddWithOverflow(*out, value, out)) {
 152         return Status::Invalid("Overflow on: ", attr->name,
 153                                " for: ", internal::PyObject_StdStringRepr(obj));
 154       }
 155     }
 156
 157     return Status::OK();
 158   }
 159 };
 160
 161 // Utility for converting single python objects to their intermediate C representations
 162 // which can be fed to the typed builders
 163 class PyValue {
 164  public:
 165   // Type aliases for shorter signature definitions
 166   using I = PyObject*;
 167   using O = PyConversionOptions;
 168
 169   // Used for null checking before actually converting the values
 170   static bool IsNull(const O& options, I obj) {
 171     if (options.from_pandas) {
 172       return internal::PandasObjectIsNull(obj);
 173     } else {
 174       return obj == Py_None;
 175     }
 176   }
 177
 178   // Used for post-conversion numpy NaT sentinel checking
 179   static bool IsNaT(const TimestampType*, int64_t value) {
 180     return internal::npy_traits<NPY_DATETIME>::isnull(value);
 181   }
 182
 183   // Used for post-conversion numpy NaT sentinel checking
 184   static bool IsNaT(const DurationType*, int64_t value) {
 185     return internal::npy_traits<NPY_TIMEDELTA>::isnull(value);
 186   }
 187
 188   static Result<std::nullptr_t> Convert(const NullType*, const O&, I obj) {
 189     if (obj == Py_None) {
 190       return nullptr;
 191     } else {
 192       return Status::Invalid("Invalid null value");
 193     }
 194   }
 195
 196   static Result<bool> Convert(const BooleanType*, const O&, I obj) {
 197     if (obj == Py_True) {
 198       return true;
 199     } else if (obj == Py_False) {
 200       return false;
 201     } else if (PyArray_IsScalar(obj, Bool)) {
 202       return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
 203     } else {
 204       return internal::InvalidValue(obj, "tried to convert to boolean");
 205     }
 206   }
 207
 208   template <typename T>
 209   static enable_if_integer<T, Result<typename T::c_type>> Convert(const T* type, const O&,
 210                                                                   I obj) {
 211     typename T::c_type value;
 212     auto status = internal::CIntFromPython(obj, &value);
 213     if (ARROW_PREDICT_TRUE(status.ok())) {
 214       return value;
 215     } else if (!internal::PyIntScalar_Check(obj)) {
 216       std::stringstream ss;
 217       ss << "tried to convert to " << type->ToString();
 218       return internal::InvalidValue(obj, ss.str());
 219     } else {
 220       return status;
 221     }
 222   }
 223
 224   static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
 225     uint16_t value;
 226     RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
 227     return value;
 228   }
 229
 230   static Result<float> Convert(const FloatType*, const O&, I obj) {
 231     float value;
 232     if (internal::PyFloatScalar_Check(obj)) {
 233       value = static_cast<float>(PyFloat_AsDouble(obj));
 234       RETURN_IF_PYERROR();
 235     } else if (internal::PyIntScalar_Check(obj)) {
 236       RETURN_NOT_OK(internal::IntegerScalarToFloat32Safe(obj, &value));
 237     } else {
 238       return internal::InvalidValue(obj, "tried to convert to float32");
 239     }
 240     return value;
 241   }
 242
 243   static Result<double> Convert(const DoubleType*, const O&, I obj) {
 244     double value;
 245     if (PyFloat_Check(obj)) {
 246       value = PyFloat_AS_DOUBLE(obj);
 247     } else if (internal::PyFloatScalar_Check(obj)) {
 248       // Other kinds of float-y things
 249       value = PyFloat_AsDouble(obj);
 250       RETURN_IF_PYERROR();
 251     } else if (internal::PyIntScalar_Check(obj)) {
 252       RETURN_NOT_OK(internal::IntegerScalarToDoubleSafe(obj, &value));
 253     } else {
 254       return internal::InvalidValue(obj, "tried to convert to double");
 255     }
 256     return value;
 257   }
 258
 259   static Result<Decimal128> Convert(const Decimal128Type* type, const O&, I obj) {
 260     Decimal128 value;
 261     RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value));
 262     return value;
 263   }
 264
 265   static Result<Decimal256> Convert(const Decimal256Type* type, const O&, I obj) {
 266     Decimal256 value;
 267     RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value));
 268     return value;
 269   }
 270
 271   static Result<int32_t> Convert(const Date32Type*, const O&, I obj) {
 272     int32_t value;
 273     if (PyDate_Check(obj)) {
 274       auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
 275       value = static_cast<int32_t>(internal::PyDate_to_days(pydate));
 276     } else {
 277       RETURN_NOT_OK(
 278           internal::CIntFromPython(obj, &value, "Integer too large for date32"));
 279     }
 280     return value;
 281   }
 282
 283   static Result<int64_t> Convert(const Date64Type*, const O&, I obj) {
 284     int64_t value;
 285     if (PyDateTime_Check(obj)) {
 286       auto pydate = reinterpret_cast<PyDateTime_DateTime*>(obj);
 287       value = internal::PyDateTime_to_ms(pydate);
 288       // Truncate any intraday milliseconds
 289       // TODO: introduce an option for this
 290       value -= value % 86400000LL;
 291     } else if (PyDate_Check(obj)) {
 292       auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
 293       value = internal::PyDate_to_ms(pydate);
 294     } else {
 295       RETURN_NOT_OK(
 296           internal::CIntFromPython(obj, &value, "Integer too large for date64"));
 297     }
 298     return value;
 299   }
 300
 301   static Result<int32_t> Convert(const Time32Type* type, const O&, I obj) {
 302     int32_t value;
 303     if (PyTime_Check(obj)) {
 304       switch (type->unit()) {
 305         case TimeUnit::SECOND:
 306           value = static_cast<int32_t>(internal::PyTime_to_s(obj));
 307           break;
 308         case TimeUnit::MILLI:
 309           value = static_cast<int32_t>(internal::PyTime_to_ms(obj));
 310           break;
 311         default:
 312           return Status::UnknownError("Invalid time unit");
 313       }
 314     } else {
 315       RETURN_NOT_OK(internal::CIntFromPython(obj, &value, "Integer too large for int32"));
 316     }
 317     return value;
 318   }
 319
 320   static Result<int64_t> Convert(const Time64Type* type, const O&, I obj) {
 321     int64_t value;
 322     if (PyTime_Check(obj)) {
 323       switch (type->unit()) {
 324         case TimeUnit::MICRO:
 325           value = internal::PyTime_to_us(obj);
 326           break;
 327         case TimeUnit::NANO:
 328           value = internal::PyTime_to_ns(obj);
 329           break;
 330         default:
 331           return Status::UnknownError("Invalid time unit");
 332       }
 333     } else {
 334       RETURN_NOT_OK(internal::CIntFromPython(obj, &value, "Integer too large for int64"));
 335     }
 336     return value;
 337   }
 338
 339   static Result<int64_t> Convert(const TimestampType* type, const O& options, I obj) {
 340     int64_t value, offset;
 341     if (PyDateTime_Check(obj)) {
 342       if (ARROW_PREDICT_FALSE(options.ignore_timezone)) {
 343         offset = 0;
 344       } else {
 345         ARROW_ASSIGN_OR_RAISE(offset, internal::PyDateTime_utcoffset_s(obj));
 346       }
 347       auto dt = reinterpret_cast<PyDateTime_DateTime*>(obj);
 348       switch (type->unit()) {
 349         case TimeUnit::SECOND:
 350           value = internal::PyDateTime_to_s(dt) - offset;
 351           break;
 352         case TimeUnit::MILLI:
 353           value = internal::PyDateTime_to_ms(dt) - offset * 1000LL;
 354           break;
 355         case TimeUnit::MICRO:
 356           value = internal::PyDateTime_to_us(dt) - offset * 1000000LL;
 357           break;
 358         case TimeUnit::NANO:
 359           if (internal::IsPandasTimestamp(obj)) {
 360             // pd.Timestamp value attribute contains the offset from unix epoch
 361             // so no adjustment for timezone is need.
 362             OwnedRef nanos(PyObject_GetAttrString(obj, "value"));
 363             RETURN_IF_PYERROR();
 364             RETURN_NOT_OK(internal::CIntFromPython(nanos.obj(), &value));
 365           } else {
 366             // Conversion to nanoseconds can overflow -> check multiply of microseconds
 367             value = internal::PyDateTime_to_us(dt);
 368             if (arrow::internal::MultiplyWithOverflow(value, 1000LL, &value)) {
 369               return internal::InvalidValue(obj,
 370                                             "out of bounds for nanosecond resolution");
 371             }
 372
 373             // Adjust with offset and check for overflow
 374             if (arrow::internal::SubtractWithOverflow(value, offset * 1000000000LL,
 375                                                       &value)) {
 376               return internal::InvalidValue(obj,
 377                                             "out of bounds for nanosecond resolution");
 378             }
 379           }
 380           break;
 381         default:
 382           return Status::UnknownError("Invalid time unit");
 383       }
 384     } else if (PyArray_CheckAnyScalarExact(obj)) {
 385       // validate that the numpy scalar has np.datetime64 dtype
 386       std::shared_ptr<DataType> numpy_type;
 387       RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
 388       if (!numpy_type->Equals(*type)) {
 389         return Status::NotImplemented("Expected np.datetime64 but got: ",
 390                                       numpy_type->ToString());
 391       }
 392       return reinterpret_cast<PyDatetimeScalarObject*>(obj)->obval;
 393     } else {
 394       RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
 395     }
 396     return value;
 397   }
 398
 399   static Result<MonthDayNanoIntervalType::MonthDayNanos> Convert(
 400       const MonthDayNanoIntervalType* /*type*/, const O& /*options*/, I obj) {
 401     MonthDayNanoIntervalType::MonthDayNanos output;
 402     bool found_attrs = false;
 403     RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kMonths>::Field(
 404         obj, &output.months, &found_attrs));
 405     // on relativeoffset weeks is a property calculated from days.  On
 406     // DateOffset is is a field on its own. timedelta doesn't have a weeks
 407     // attribute.
 408     PyObject* pandas_date_offset_type = internal::BorrowPandasDataOffsetType();
 409     bool is_date_offset = pandas_date_offset_type == (PyObject*)Py_TYPE(obj);
 410     if (!is_date_offset) {
 411       RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kDaysOnly>::Field(
 412           obj, &output.days, &found_attrs));
 413     } else {
 414       RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kWeeksAndDays>::Field(
 415           obj, &output.days, &found_attrs));
 416     }
 417     RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kNanoseconds>::Field(
 418         obj, &output.nanoseconds, &found_attrs));
 419
 420     if (ARROW_PREDICT_FALSE(!found_attrs) && !is_date_offset) {
 421       // date_offset can have zero fields.
 422       return Status::TypeError("No temporal attributes found on object.");
 423     }
 424     return output;
 425   }
 426
 427   static Result<int64_t> Convert(const DurationType* type, const O&, I obj) {
 428     int64_t value;
 429     if (PyDelta_Check(obj)) {
 430       auto dt = reinterpret_cast<PyDateTime_Delta*>(obj);
 431       switch (type->unit()) {
 432         case TimeUnit::SECOND:
 433           value = internal::PyDelta_to_s(dt);
 434           break;
 435         case TimeUnit::MILLI:
 436           value = internal::PyDelta_to_ms(dt);
 437           break;
 438         case TimeUnit::MICRO:
 439           value = internal::PyDelta_to_us(dt);
 440           break;
 441         case TimeUnit::NANO:
 442           if (internal::IsPandasTimedelta(obj)) {
 443             OwnedRef nanos(PyObject_GetAttrString(obj, "value"));
 444             RETURN_IF_PYERROR();
 445             RETURN_NOT_OK(internal::CIntFromPython(nanos.obj(), &value));
 446           } else {
 447             value = internal::PyDelta_to_ns(dt);
 448           }
 449           break;
 450         default:
 451           return Status::UnknownError("Invalid time unit");
 452       }
 453     } else if (PyArray_CheckAnyScalarExact(obj)) {
 454       // validate that the numpy scalar has np.datetime64 dtype
 455       std::shared_ptr<DataType> numpy_type;
 456       RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
 457       if (!numpy_type->Equals(*type)) {
 458         return Status::NotImplemented("Expected np.timedelta64 but got: ",
 459                                       numpy_type->ToString());
 460       }
 461       return reinterpret_cast<PyTimedeltaScalarObject*>(obj)->obval;
 462     } else {
 463       RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
 464     }
 465     return value;
 466   }
 467
 468   // The binary-like intermediate representation is PyBytesView because it keeps temporary
 469   // python objects alive (non-contiguous memoryview) and stores whether the original
 470   // object was unicode encoded or not, which is used for unicode -> bytes coersion if
 471   // there is a non-unicode object observed.
 472
 473   static Status Convert(const BaseBinaryType*, const O&, I obj, PyBytesView& view) {
 474     return view.ParseString(obj);
 475   }
 476
 477   static Status Convert(const FixedSizeBinaryType* type, const O&, I obj,
 478                         PyBytesView& view) {
 479     ARROW_RETURN_NOT_OK(view.ParseString(obj));
 480     if (view.size != type->byte_width()) {
 481       std::stringstream ss;
 482       ss << "expected to be length " << type->byte_width() << " was " << view.size;
 483       return internal::InvalidValue(obj, ss.str());
 484     } else {
 485       return Status::OK();
 486     }
 487   }
 488
 489   template <typename T>
 490   static enable_if_string<T, Status> Convert(const T*, const O& options, I obj,
 491                                              PyBytesView& view) {
 492     if (options.strict) {
 493       // Strict conversion, force output to be unicode / utf8 and validate that
 494       // any binary values are utf8
 495       ARROW_RETURN_NOT_OK(view.ParseString(obj, true));
 496       if (!view.is_utf8) {
 497         return internal::InvalidValue(obj, "was not a utf8 string");
 498       }
 499       return Status::OK();
 500     } else {
 501       // Non-strict conversion; keep track of whether values are unicode or bytes
 502       return view.ParseString(obj);
 503     }
 504   }
 505
 506   static Result<bool> Convert(const DataType* type, const O&, I obj) {
 507     return Status::NotImplemented("PyValue::Convert is not implemented for type ", type);
 508   }
 509 };
 510
 511 // The base Converter class is a mixin with predefined behavior and constructors.
 512 class PyConverter : public Converter<PyObject*, PyConversionOptions> {
 513  public:
 514   // Iterate over the input values and defer the conversion to the Append method
 515   Status Extend(PyObject* values, int64_t size, int64_t offset = 0) override {
 516     DCHECK_GE(size, offset);
 517     /// Ensure we've allocated enough space
 518     RETURN_NOT_OK(this->Reserve(size - offset));
 519     // Iterate over the items adding each one
 520     return internal::VisitSequence(
 521         values, offset,
 522         [this](PyObject* item, bool* /* unused */) { return this->Append(item); });
 523   }
 524
 525   // Convert and append a sequence of values masked with a numpy array
 526   Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size,
 527                       int64_t offset = 0) override {
 528     DCHECK_GE(size, offset);
 529     /// Ensure we've allocated enough space
 530     RETURN_NOT_OK(this->Reserve(size - offset));
 531     // Iterate over the items adding each one
 532     return internal::VisitSequenceMasked(
 533         values, mask, offset, [this](PyObject* item, bool is_masked, bool* /* unused */) {
 534           if (is_masked) {
 535             return this->AppendNull();
 536           } else {
 537             // This will also apply the null-checking convention in the event
 538             // that the value is not masked
 539             return this->Append(item);  // perhaps use AppendValue instead?
 540           }
 541         });
 542   }
 543 };
 544
 545 template <typename T, typename Enable = void>
 546 class PyPrimitiveConverter;
 547
 548 template <typename T>
 549 class PyListConverter;
 550
 551 template <typename U, typename Enable = void>
 552 class PyDictionaryConverter;
 553
 554 class PyStructConverter;
 555
 556 template <typename T, typename Enable = void>
 557 struct PyConverterTrait;
 558
 559 template <typename T>
 560 struct PyConverterTrait<
 561     T, enable_if_t<(!is_nested_type<T>::value && !is_interval_type<T>::value &&
 562                     !is_extension_type<T>::value) ||
 563                    std::is_same<T, MonthDayNanoIntervalType>::value>> {
 564   using type = PyPrimitiveConverter<T>;
 565 };
 566
 567 template <typename T>
 568 struct PyConverterTrait<T, enable_if_list_like<T>> {
 569   using type = PyListConverter<T>;
 570 };
 571
 572 template <>
 573 struct PyConverterTrait<StructType> {
 574   using type = PyStructConverter;
 575 };
 576
 577 template <>
 578 struct PyConverterTrait<DictionaryType> {
 579   template <typename T>
 580   using dictionary_type = PyDictionaryConverter<T>;
 581 };
 582
 583 template <typename T>
 584 class PyPrimitiveConverter<T, enable_if_null<T>>
 585     : public PrimitiveConverter<T, PyConverter> {
 586  public:
 587   Status Append(PyObject* value) override {
 588     if (PyValue::IsNull(this->options_, value)) {
 589       return this->primitive_builder_->AppendNull();
 590     } else {
 591       ARROW_ASSIGN_OR_RAISE(
 592           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
 593       return this->primitive_builder_->Append(converted);
 594     }
 595   }
 596 };
 597
 598 template <typename T>
 599 class PyPrimitiveConverter<
 600     T, enable_if_t<is_boolean_type<T>::value || is_number_type<T>::value ||
 601                    is_decimal_type<T>::value || is_date_type<T>::value ||
 602                    is_time_type<T>::value ||
 603                    std::is_same<MonthDayNanoIntervalType, T>::value>>
 604     : public PrimitiveConverter<T, PyConverter> {
 605  public:
 606   Status Append(PyObject* value) override {
 607     // Since the required space has been already allocated in the Extend functions we can
 608     // rely on the Unsafe builder API which improves the performance.
 609     if (PyValue::IsNull(this->options_, value)) {
 610       this->primitive_builder_->UnsafeAppendNull();
 611     } else {
 612       ARROW_ASSIGN_OR_RAISE(
 613           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
 614       this->primitive_builder_->UnsafeAppend(converted);
 615     }
 616     return Status::OK();
 617   }
 618 };
 619
 620 template <typename T>
 621 class PyPrimitiveConverter<
 622     T, enable_if_t<is_timestamp_type<T>::value || is_duration_type<T>::value>>
 623     : public PrimitiveConverter<T, PyConverter> {
 624  public:
 625   Status Append(PyObject* value) override {
 626     if (PyValue::IsNull(this->options_, value)) {
 627       this->primitive_builder_->UnsafeAppendNull();
 628     } else {
 629       ARROW_ASSIGN_OR_RAISE(
 630           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
 631       // Numpy NaT sentinels can be checked after the conversion
 632       if (PyArray_CheckAnyScalarExact(value) &&
 633           PyValue::IsNaT(this->primitive_type_, converted)) {
 634         this->primitive_builder_->UnsafeAppendNull();
 635       } else {
 636         this->primitive_builder_->UnsafeAppend(converted);
 637       }
 638     }
 639     return Status::OK();
 640   }
 641 };
 642
 643 template <typename T>
 644 class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
 645     : public PrimitiveConverter<T, PyConverter> {
 646  public:
 647   Status Append(PyObject* value) override {
 648     if (PyValue::IsNull(this->options_, value)) {
 649       this->primitive_builder_->UnsafeAppendNull();
 650     } else {
 651       ARROW_RETURN_NOT_OK(
 652           PyValue::Convert(this->primitive_type_, this->options_, value, view_));
 653       ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
 654       this->primitive_builder_->UnsafeAppend(view_.bytes);
 655     }
 656     return Status::OK();
 657   }
 658
 659  protected:
 660   PyBytesView view_;
 661 };
 662
 663 template <typename T>
 664 class PyPrimitiveConverter<T, enable_if_base_binary<T>>
 665     : public PrimitiveConverter<T, PyConverter> {
 666  public:
 667   using OffsetType = typename T::offset_type;
 668
 669   Status Append(PyObject* value) override {
 670     if (PyValue::IsNull(this->options_, value)) {
 671       this->primitive_builder_->UnsafeAppendNull();
 672     } else {
 673       ARROW_RETURN_NOT_OK(
 674           PyValue::Convert(this->primitive_type_, this->options_, value, view_));
 675       if (!view_.is_utf8) {
 676         // observed binary value
 677         observed_binary_ = true;
 678       }
 679       // Since we don't know the varying length input size in advance, we need to
 680       // reserve space in the value builder one by one. ReserveData raises CapacityError
 681       // if the value would not fit into the array.
 682       ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
 683       this->primitive_builder_->UnsafeAppend(view_.bytes,
 684                                              static_cast<OffsetType>(view_.size));
 685     }
 686     return Status::OK();
 687   }
 688
 689   Result<std::shared_ptr<Array>> ToArray() override {
 690     ARROW_ASSIGN_OR_RAISE(auto array, (PrimitiveConverter<T, PyConverter>::ToArray()));
 691     if (observed_binary_) {
 692       // if we saw any non-unicode, cast results to BinaryArray
 693       auto binary_type = TypeTraits<typename T::PhysicalType>::type_singleton();
 694       return array->View(binary_type);
 695     } else {
 696       return array;
 697     }
 698   }
 699
 700  protected:
 701   PyBytesView view_;
 702   bool observed_binary_ = false;
 703 };
 704
 705 template <typename U>
 706 class PyDictionaryConverter<U, enable_if_has_c_type<U>>
 707     : public DictionaryConverter<U, PyConverter> {
 708  public:
 709   Status Append(PyObject* value) override {
 710     if (PyValue::IsNull(this->options_, value)) {
 711       return this->value_builder_->AppendNull();
 712     } else {
 713       ARROW_ASSIGN_OR_RAISE(auto converted,
 714                             PyValue::Convert(this->value_type_, this->options_, value));
 715       return this->value_builder_->Append(converted);
 716     }
 717   }
 718 };
 719
 720 template <typename U>
 721 class PyDictionaryConverter<U, enable_if_has_string_view<U>>
 722     : public DictionaryConverter<U, PyConverter> {
 723  public:
 724   Status Append(PyObject* value) override {
 725     if (PyValue::IsNull(this->options_, value)) {
 726       return this->value_builder_->AppendNull();
 727     } else {
 728       ARROW_RETURN_NOT_OK(
 729           PyValue::Convert(this->value_type_, this->options_, value, view_));
 730       return this->value_builder_->Append(view_.bytes, static_cast<int32_t>(view_.size));
 731     }
 732   }
 733
 734  protected:
 735   PyBytesView view_;
 736 };
 737
 738 template <typename T>
 739 class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
 740  public:
 741   Status Append(PyObject* value) override {
 742     if (PyValue::IsNull(this->options_, value)) {
 743       return this->list_builder_->AppendNull();
 744     }
 745
 746     RETURN_NOT_OK(this->list_builder_->Append());
 747     if (PyArray_Check(value)) {
 748       RETURN_NOT_OK(AppendNdarray(value));
 749     } else if (PySequence_Check(value)) {
 750       RETURN_NOT_OK(AppendSequence(value));
 751     } else if (PySet_Check(value) || (Py_TYPE(value) == &PyDictValues_Type)) {
 752       RETURN_NOT_OK(AppendIterable(value));
 753     } else {
 754       return internal::InvalidType(
 755           value, "was not a sequence or recognized null for conversion to list type");
 756     }
 757
 758     return ValidateBuilder(this->list_type_);
 759   }
 760
 761  protected:
 762   Status ValidateBuilder(const MapType*) {
 763     if (this->list_builder_->key_builder()->null_count() > 0) {
 764       return Status::Invalid("Invalid Map: key field can not contain null values");
 765     } else {
 766       return Status::OK();
 767     }
 768   }
 769
 770   Status ValidateBuilder(const BaseListType*) { return Status::OK(); }
 771
 772   Status AppendSequence(PyObject* value) {
 773     int64_t size = static_cast<int64_t>(PySequence_Size(value));
 774     RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
 775     return this->value_converter_->Extend(value, size);
 776   }
 777
 778   Status AppendIterable(PyObject* value) {
 779     PyObject* iterator = PyObject_GetIter(value);
 780     OwnedRef iter_ref(iterator);
 781     while (PyObject* item = PyIter_Next(iterator)) {
 782       OwnedRef item_ref(item);
 783       RETURN_NOT_OK(this->value_converter_->Reserve(1));
 784       RETURN_NOT_OK(this->value_converter_->Append(item));
 785     }
 786     return Status::OK();
 787   }
 788
 789   Status AppendNdarray(PyObject* value) {
 790     PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(value);
 791     if (PyArray_NDIM(ndarray) != 1) {
 792       return Status::Invalid("Can only convert 1-dimensional array values");
 793     }
 794     const int64_t size = PyArray_SIZE(ndarray);
 795     RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
 796
 797     const auto value_type = this->value_converter_->builder()->type();
 798     switch (value_type->id()) {
 799 // If the value type does not match the expected NumPy dtype, then fall through
 800 // to a slower PySequence-based path
 801 #define LIST_FAST_CASE(TYPE_ID, TYPE, NUMPY_TYPE)         \
 802   case Type::TYPE_ID: {                                   \
 803     if (PyArray_DESCR(ndarray)->type_num != NUMPY_TYPE) { \
 804       return this->value_converter_->Extend(value, size); \
 805     }                                                     \
 806     return AppendNdarrayTyped<TYPE, NUMPY_TYPE>(ndarray); \
 807   }
 808       LIST_FAST_CASE(BOOL, BooleanType, NPY_BOOL)
 809       LIST_FAST_CASE(UINT8, UInt8Type, NPY_UINT8)
 810       LIST_FAST_CASE(INT8, Int8Type, NPY_INT8)
 811       LIST_FAST_CASE(UINT16, UInt16Type, NPY_UINT16)
 812       LIST_FAST_CASE(INT16, Int16Type, NPY_INT16)
 813       LIST_FAST_CASE(UINT32, UInt32Type, NPY_UINT32)
 814       LIST_FAST_CASE(INT32, Int32Type, NPY_INT32)
 815       LIST_FAST_CASE(UINT64, UInt64Type, NPY_UINT64)
 816       LIST_FAST_CASE(INT64, Int64Type, NPY_INT64)
 817       LIST_FAST_CASE(HALF_FLOAT, HalfFloatType, NPY_FLOAT16)
 818       LIST_FAST_CASE(FLOAT, FloatType, NPY_FLOAT)
 819       LIST_FAST_CASE(DOUBLE, DoubleType, NPY_DOUBLE)
 820       LIST_FAST_CASE(TIMESTAMP, TimestampType, NPY_DATETIME)
 821       LIST_FAST_CASE(DURATION, DurationType, NPY_TIMEDELTA)
 822 #undef LIST_FAST_CASE
 823       default: {
 824         return this->value_converter_->Extend(value, size);
 825       }
 826     }
 827   }
 828
 829   template <typename ArrowType, int NUMPY_TYPE>
 830   Status AppendNdarrayTyped(PyArrayObject* ndarray) {
 831     // no need to go through the conversion
 832     using NumpyTrait = internal::npy_traits<NUMPY_TYPE>;
 833     using NumpyType = typename NumpyTrait::value_type;
 834     using ValueBuilderType = typename TypeTraits<ArrowType>::BuilderType;
 835
 836     const bool null_sentinels_possible =
 837         // Always treat Numpy's NaT as null
 838         NUMPY_TYPE == NPY_DATETIME || NUMPY_TYPE == NPY_TIMEDELTA ||
 839         // Observing pandas's null sentinels
 840         (this->options_.from_pandas && NumpyTrait::supports_nulls);
 841
 842     auto value_builder =
 843         checked_cast<ValueBuilderType*>(this->value_converter_->builder().get());
 844
 845     Ndarray1DIndexer<NumpyType> values(ndarray);
 846     if (null_sentinels_possible) {
 847       for (int64_t i = 0; i < values.size(); ++i) {
 848         if (NumpyTrait::isnull(values[i])) {
 849           RETURN_NOT_OK(value_builder->AppendNull());
 850         } else {
 851           RETURN_NOT_OK(value_builder->Append(values[i]));
 852         }
 853       }
 854     } else if (!values.is_strided()) {
 855       RETURN_NOT_OK(value_builder->AppendValues(values.data(), values.size()));
 856     } else {
 857       for (int64_t i = 0; i < values.size(); ++i) {
 858         RETURN_NOT_OK(value_builder->Append(values[i]));
 859       }
 860     }
 861     return Status::OK();
 862   }
 863 };
 864
 865 class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait> {
 866  public:
 867   Status Append(PyObject* value) override {
 868     if (PyValue::IsNull(this->options_, value)) {
 869       return this->struct_builder_->AppendNull();
 870     }
 871     switch (input_kind_) {
 872       case InputKind::DICT:
 873         RETURN_NOT_OK(this->struct_builder_->Append());
 874         return AppendDict(value);
 875       case InputKind::TUPLE:
 876         RETURN_NOT_OK(this->struct_builder_->Append());
 877         return AppendTuple(value);
 878       case InputKind::ITEMS:
 879         RETURN_NOT_OK(this->struct_builder_->Append());
 880         return AppendItems(value);
 881       default:
 882         RETURN_NOT_OK(InferInputKind(value));
 883         return Append(value);
 884     }
 885   }
 886
 887  protected:
 888   Status Init(MemoryPool* pool) override {
 889     RETURN_NOT_OK((StructConverter<PyConverter, PyConverterTrait>::Init(pool)));
 890
 891     // Store the field names as a PyObjects for dict matching
 892     num_fields_ = this->struct_type_->num_fields();
 893     bytes_field_names_.reset(PyList_New(num_fields_));
 894     unicode_field_names_.reset(PyList_New(num_fields_));
 895     RETURN_IF_PYERROR();
 896
 897     for (int i = 0; i < num_fields_; i++) {
 898       const auto& field_name = this->struct_type_->field(i)->name();
 899       PyObject* bytes = PyBytes_FromStringAndSize(field_name.c_str(), field_name.size());
 900       PyObject* unicode =
 901           PyUnicode_FromStringAndSize(field_name.c_str(), field_name.size());
 902       RETURN_IF_PYERROR();
 903       PyList_SET_ITEM(bytes_field_names_.obj(), i, bytes);
 904       PyList_SET_ITEM(unicode_field_names_.obj(), i, unicode);
 905     }
 906     return Status::OK();
 907   }
 908
 909   Status InferInputKind(PyObject* value) {
 910     // Infer input object's type, note that heterogeneous sequences are not allowed
 911     if (PyDict_Check(value)) {
 912       input_kind_ = InputKind::DICT;
 913     } else if (PyTuple_Check(value)) {
 914       input_kind_ = InputKind::TUPLE;
 915     } else if (PySequence_Check(value)) {
 916       input_kind_ = InputKind::ITEMS;
 917     } else {
 918       return internal::InvalidType(value,
 919                                    "was not a dict, tuple, or recognized null value "
 920                                    "for conversion to struct type");
 921     }
 922     return Status::OK();
 923   }
 924
 925   Status InferKeyKind(PyObject* items) {
 926     for (int i = 0; i < PySequence_Length(items); i++) {
 927       // retrieve the key from the passed key-value pairs
 928       ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
 929
 930       // check key exists between the unicode field names
 931       bool do_contain = PySequence_Contains(unicode_field_names_.obj(), pair.first);
 932       RETURN_IF_PYERROR();
 933       if (do_contain) {
 934         key_kind_ = KeyKind::UNICODE;
 935         return Status::OK();
 936       }
 937
 938       // check key exists between the bytes field names
 939       do_contain = PySequence_Contains(bytes_field_names_.obj(), pair.first);
 940       RETURN_IF_PYERROR();
 941       if (do_contain) {
 942         key_kind_ = KeyKind::BYTES;
 943         return Status::OK();
 944       }
 945     }
 946     return Status::OK();
 947   }
 948
 949   Status AppendEmpty() {
 950     for (int i = 0; i < num_fields_; i++) {
 951       RETURN_NOT_OK(this->children_[i]->Append(Py_None));
 952     }
 953     return Status::OK();
 954   }
 955
 956   Status AppendTuple(PyObject* tuple) {
 957     if (!PyTuple_Check(tuple)) {
 958       return internal::InvalidType(tuple, "was expecting a tuple");
 959     }
 960     if (PyTuple_GET_SIZE(tuple) != num_fields_) {
 961       return Status::Invalid("Tuple size must be equal to number of struct fields");
 962     }
 963     for (int i = 0; i < num_fields_; i++) {
 964       PyObject* value = PyTuple_GET_ITEM(tuple, i);
 965       RETURN_NOT_OK(this->children_[i]->Append(value));
 966     }
 967     return Status::OK();
 968   }
 969
 970   Status AppendDict(PyObject* dict) {
 971     if (!PyDict_Check(dict)) {
 972       return internal::InvalidType(dict, "was expecting a dict");
 973     }
 974     switch (key_kind_) {
 975       case KeyKind::UNICODE:
 976         return AppendDict(dict, unicode_field_names_.obj());
 977       case KeyKind::BYTES:
 978         return AppendDict(dict, bytes_field_names_.obj());
 979       default:
 980         RETURN_NOT_OK(InferKeyKind(PyDict_Items(dict)));
 981         if (key_kind_ == KeyKind::UNKNOWN) {
 982           // was unable to infer the type which means that all keys are absent
 983           return AppendEmpty();
 984         } else {
 985           return AppendDict(dict);
 986         }
 987     }
 988   }
 989
 990   Status AppendItems(PyObject* items) {
 991     if (!PySequence_Check(items)) {
 992       return internal::InvalidType(items, "was expecting a sequence of key-value items");
 993     }
 994     switch (key_kind_) {
 995       case KeyKind::UNICODE:
 996         return AppendItems(items, unicode_field_names_.obj());
 997       case KeyKind::BYTES:
 998         return AppendItems(items, bytes_field_names_.obj());
 999       default:
1000         RETURN_NOT_OK(InferKeyKind(items));
1001         if (key_kind_ == KeyKind::UNKNOWN) {
1002           // was unable to infer the type which means that all keys are absent
1003           return AppendEmpty();
1004         } else {
1005           return AppendItems(items);
1006         }
1007     }
1008   }
1009
1010   Status AppendDict(PyObject* dict, PyObject* field_names) {
1011     // NOTE we're ignoring any extraneous dict items
1012     for (int i = 0; i < num_fields_; i++) {
1013       PyObject* name = PyList_GET_ITEM(field_names, i);  // borrowed
1014       PyObject* value = PyDict_GetItem(dict, name);      // borrowed
1015       if (value == NULL) {
1016         RETURN_IF_PYERROR();
1017       }
1018       RETURN_NOT_OK(this->children_[i]->Append(value ? value : Py_None));
1019     }
1020     return Status::OK();
1021   }
1022
1023   Result<std::pair<PyObject*, PyObject*>> GetKeyValuePair(PyObject* seq, int index) {
1024     PyObject* pair = PySequence_GetItem(seq, index);
1025     RETURN_IF_PYERROR();
1026     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
1027       return internal::InvalidType(pair, "was expecting tuple of (key, value) pair");
1028     }
1029     PyObject* key = PyTuple_GetItem(pair, 0);
1030     RETURN_IF_PYERROR();
1031     PyObject* value = PyTuple_GetItem(pair, 1);
1032     RETURN_IF_PYERROR();
1033     return std::make_pair(key, value);
1034   }
1035
1036   Status AppendItems(PyObject* items, PyObject* field_names) {
1037     auto length = static_cast<int>(PySequence_Size(items));
1038     RETURN_IF_PYERROR();
1039
1040     // append the values for the defined fields
1041     for (int i = 0; i < std::min(num_fields_, length); i++) {
1042       // retrieve the key-value pair
1043       ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
1044
1045       // validate that the key and the field name are equal
1046       PyObject* name = PyList_GET_ITEM(field_names, i);
1047       bool are_equal = PyObject_RichCompareBool(pair.first, name, Py_EQ);
1048       RETURN_IF_PYERROR();
1049
1050       // finally append to the respective child builder
1051       if (are_equal) {
1052         RETURN_NOT_OK(this->children_[i]->Append(pair.second));
1053       } else {
1054         ARROW_ASSIGN_OR_RAISE(auto key_view, PyBytesView::FromString(pair.first));
1055         ARROW_ASSIGN_OR_RAISE(auto name_view, PyBytesView::FromString(name));
1056         return Status::Invalid("The expected field name is `", name_view.bytes, "` but `",
1057                                key_view.bytes, "` was given");
1058       }
1059     }
1060     // insert null values for missing fields
1061     for (int i = length; i < num_fields_; i++) {
1062       RETURN_NOT_OK(this->children_[i]->AppendNull());
1063     }
1064     return Status::OK();
1065   }
1066
1067   // Whether we're converting from a sequence of dicts or tuples or list of pairs
1068   enum class InputKind { UNKNOWN, DICT, TUPLE, ITEMS } input_kind_ = InputKind::UNKNOWN;
1069   // Whether the input dictionary keys' type is python bytes or unicode
1070   enum class KeyKind { UNKNOWN, BYTES, UNICODE } key_kind_ = KeyKind::UNKNOWN;
1071   // Store the field names as a PyObjects for dict matching
1072   OwnedRef bytes_field_names_;
1073   OwnedRef unicode_field_names_;
1074   // Store the number of fields for later reuse
1075   int num_fields_;
1076 };
1077
1078 // Convert *obj* to a sequence if necessary
1079 // Fill *size* to its length.  If >= 0 on entry, *size* is an upper size
1080 // bound that may lead to truncation.
1081 Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* size) {
1082   if (PySequence_Check(obj)) {
1083     // obj is already a sequence
1084     int64_t real_size = static_cast<int64_t>(PySequence_Size(obj));
1085     if (*size < 0) {
1086       *size = real_size;
1087     } else {
1088       *size = std::min(real_size, *size);
1089     }
1090     Py_INCREF(obj);
1091     *seq = obj;
1092   } else if (*size < 0) {
1093     // unknown size, exhaust iterator
1094     *seq = PySequence_List(obj);
1095     RETURN_IF_PYERROR();
1096     *size = static_cast<int64_t>(PyList_GET_SIZE(*seq));
1097   } else {
1098     // size is known but iterator could be infinite
1099     Py_ssize_t i, n = *size;
1100     PyObject* iter = PyObject_GetIter(obj);
1101     RETURN_IF_PYERROR();
1102     OwnedRef iter_ref(iter);
1103     PyObject* lst = PyList_New(n);
1104     RETURN_IF_PYERROR();
1105     for (i = 0; i < n; i++) {
1106       PyObject* item = PyIter_Next(iter);
1107       if (!item) break;
1108       PyList_SET_ITEM(lst, i, item);
1109     }
1110     // Shrink list if len(iterator) < size
1111     if (i < n && PyList_SetSlice(lst, i, n, NULL)) {
1112       Py_DECREF(lst);
1113       return Status::UnknownError("failed to resize list");
1114     }
1115     *seq = lst;
1116     *size = std::min<int64_t>(i, *size);
1117   }
1118   return Status::OK();
1119 }
1120
1121 }  // namespace
1122
1123 Result<std::shared_ptr<ChunkedArray>> ConvertPySequence(PyObject* obj, PyObject* mask,
1124                                                         PyConversionOptions options,
1125                                                         MemoryPool* pool) {
1126   PyAcquireGIL lock;
1127
1128   PyObject* seq;
1129   OwnedRef tmp_seq_nanny;
1130
1131   ARROW_ASSIGN_OR_RAISE(auto is_pandas_imported, internal::IsModuleImported("pandas"));
1132   if (is_pandas_imported) {
1133     // If pandas has been already imported initialize the static pandas objects to
1134     // support converting from pd.Timedelta and pd.Timestamp objects
1135     internal::InitPandasStaticData();
1136   }
1137
1138   int64_t size = options.size;
1139   RETURN_NOT_OK(ConvertToSequenceAndInferSize(obj, &seq, &size));
1140   tmp_seq_nanny.reset(seq);
1141
1142   // In some cases, type inference may be "loose", like strings. If the user
1143   // passed pa.string(), then we will error if we encounter any non-UTF8
1144   // value. If not, then we will allow the result to be a BinaryArray
1145   if (options.type == nullptr) {
1146     ARROW_ASSIGN_OR_RAISE(options.type, InferArrowType(seq, mask, options.from_pandas));
1147     options.strict = false;
1148   } else {
1149     options.strict = true;
1150   }
1151   DCHECK_GE(size, 0);
1152
1153   ARROW_ASSIGN_OR_RAISE(auto converter, (MakeConverter<PyConverter, PyConverterTrait>(
1154                                             options.type, options, pool)));
1155   if (converter->may_overflow()) {
1156     // The converter hierarchy contains binary- or list-like builders which can overflow
1157     // depending on the input values. Wrap the converter with a chunker which detects
1158     // the overflow and automatically creates new chunks.
1159     ARROW_ASSIGN_OR_RAISE(auto chunked_converter, MakeChunker(std::move(converter)));
1160     if (mask != nullptr && mask != Py_None) {
1161       RETURN_NOT_OK(chunked_converter->ExtendMasked(seq, mask, size));
1162     } else {
1163       RETURN_NOT_OK(chunked_converter->Extend(seq, size));
1164     }
1165     return chunked_converter->ToChunkedArray();
1166   } else {
1167     // If the converter can't overflow spare the capacity error checking on the hot-path,
1168     // this improves the performance roughly by ~10% for primitive types.
1169     if (mask != nullptr && mask != Py_None) {
1170       RETURN_NOT_OK(converter->ExtendMasked(seq, mask, size));
1171     } else {
1172       RETURN_NOT_OK(converter->Extend(seq, size));
1173     }
1174     return converter->ToChunkedArray();
1175   }
1176 }
1177
1178 }  // namespace py
1179 }  // namespace arrow