]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/python/pyarrow/scalar.pxi
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / python / pyarrow / scalar.pxi
1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
16 # under the License.
17
18 import collections
19
20
21 cdef class Scalar(_Weakrefable):
22 """
23 The base class for scalars.
24 """
25
26 def __init__(self):
27 raise TypeError("Do not call {}'s constructor directly, use "
28 "pa.scalar() instead.".format(self.__class__.__name__))
29
30 cdef void init(self, const shared_ptr[CScalar]& wrapped):
31 self.wrapped = wrapped
32
33 @staticmethod
34 cdef wrap(const shared_ptr[CScalar]& wrapped):
35 cdef:
36 Scalar self
37 Type type_id = wrapped.get().type.get().id()
38
39 if type_id == _Type_NA:
40 return _NULL
41
42 try:
43 typ = _scalar_classes[type_id]
44 except KeyError:
45 raise NotImplementedError(
46 "Wrapping scalar of type " +
47 frombytes(wrapped.get().type.get().ToString()))
48 self = typ.__new__(typ)
49 self.init(wrapped)
50
51 return self
52
53 cdef inline shared_ptr[CScalar] unwrap(self) nogil:
54 return self.wrapped
55
56 @property
57 def type(self):
58 """
59 Data type of the Scalar object.
60 """
61 return pyarrow_wrap_data_type(self.wrapped.get().type)
62
63 @property
64 def is_valid(self):
65 """
66 Holds a valid (non-null) value.
67 """
68 return self.wrapped.get().is_valid
69
70 def cast(self, object target_type):
71 """
72 Attempt a safe cast to target data type.
73 """
74 cdef:
75 DataType type = ensure_type(target_type)
76 shared_ptr[CScalar] result
77
78 with nogil:
79 result = GetResultValue(self.wrapped.get().CastTo(type.sp_type))
80
81 return Scalar.wrap(result)
82
83 def __repr__(self):
84 return '<pyarrow.{}: {!r}>'.format(
85 self.__class__.__name__, self.as_py()
86 )
87
88 def __str__(self):
89 return str(self.as_py())
90
91 def equals(self, Scalar other not None):
92 return self.wrapped.get().Equals(other.unwrap().get()[0])
93
94 def __eq__(self, other):
95 try:
96 return self.equals(other)
97 except TypeError:
98 return NotImplemented
99
100 def __hash__(self):
101 cdef CScalarHash hasher
102 return hasher(self.wrapped)
103
104 def __reduce__(self):
105 return scalar, (self.as_py(), self.type)
106
107 def as_py(self):
108 raise NotImplementedError()
109
110
111 _NULL = NA = None
112
113
114 cdef class NullScalar(Scalar):
115 """
116 Concrete class for null scalars.
117 """
118
119 def __cinit__(self):
120 global NA
121 if NA is not None:
122 raise RuntimeError('Cannot create multiple NullScalar instances')
123 self.init(shared_ptr[CScalar](new CNullScalar()))
124
125 def __init__(self):
126 pass
127
128 def as_py(self):
129 """
130 Return this value as a Python None.
131 """
132 return None
133
134
135 _NULL = NA = NullScalar()
136
137
138 cdef class BooleanScalar(Scalar):
139 """
140 Concrete class for boolean scalars.
141 """
142
143 def as_py(self):
144 """
145 Return this value as a Python bool.
146 """
147 cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get()
148 return sp.value if sp.is_valid else None
149
150
151 cdef class UInt8Scalar(Scalar):
152 """
153 Concrete class for uint8 scalars.
154 """
155
156 def as_py(self):
157 """
158 Return this value as a Python int.
159 """
160 cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get()
161 return sp.value if sp.is_valid else None
162
163
164 cdef class Int8Scalar(Scalar):
165 """
166 Concrete class for int8 scalars.
167 """
168
169 def as_py(self):
170 """
171 Return this value as a Python int.
172 """
173 cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get()
174 return sp.value if sp.is_valid else None
175
176
177 cdef class UInt16Scalar(Scalar):
178 """
179 Concrete class for uint16 scalars.
180 """
181
182 def as_py(self):
183 """
184 Return this value as a Python int.
185 """
186 cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get()
187 return sp.value if sp.is_valid else None
188
189
190 cdef class Int16Scalar(Scalar):
191 """
192 Concrete class for int16 scalars.
193 """
194
195 def as_py(self):
196 """
197 Return this value as a Python int.
198 """
199 cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get()
200 return sp.value if sp.is_valid else None
201
202
203 cdef class UInt32Scalar(Scalar):
204 """
205 Concrete class for uint32 scalars.
206 """
207
208 def as_py(self):
209 """
210 Return this value as a Python int.
211 """
212 cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get()
213 return sp.value if sp.is_valid else None
214
215
216 cdef class Int32Scalar(Scalar):
217 """
218 Concrete class for int32 scalars.
219 """
220
221 def as_py(self):
222 """
223 Return this value as a Python int.
224 """
225 cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get()
226 return sp.value if sp.is_valid else None
227
228
229 cdef class UInt64Scalar(Scalar):
230 """
231 Concrete class for uint64 scalars.
232 """
233
234 def as_py(self):
235 """
236 Return this value as a Python int.
237 """
238 cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get()
239 return sp.value if sp.is_valid else None
240
241
242 cdef class Int64Scalar(Scalar):
243 """
244 Concrete class for int64 scalars.
245 """
246
247 def as_py(self):
248 """
249 Return this value as a Python int.
250 """
251 cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get()
252 return sp.value if sp.is_valid else None
253
254
255 cdef class HalfFloatScalar(Scalar):
256 """
257 Concrete class for float scalars.
258 """
259
260 def as_py(self):
261 """
262 Return this value as a Python float.
263 """
264 cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
265 return PyHalf_FromHalf(sp.value) if sp.is_valid else None
266
267
268 cdef class FloatScalar(Scalar):
269 """
270 Concrete class for float scalars.
271 """
272
273 def as_py(self):
274 """
275 Return this value as a Python float.
276 """
277 cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get()
278 return sp.value if sp.is_valid else None
279
280
281 cdef class DoubleScalar(Scalar):
282 """
283 Concrete class for double scalars.
284 """
285
286 def as_py(self):
287 """
288 Return this value as a Python float.
289 """
290 cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get()
291 return sp.value if sp.is_valid else None
292
293
294 cdef class Decimal128Scalar(Scalar):
295 """
296 Concrete class for decimal128 scalars.
297 """
298
299 def as_py(self):
300 """
301 Return this value as a Python Decimal.
302 """
303 cdef:
304 CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get()
305 CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get()
306 if sp.is_valid:
307 return _pydecimal.Decimal(
308 frombytes(sp.value.ToString(dtype.scale()))
309 )
310 else:
311 return None
312
313
314 cdef class Decimal256Scalar(Scalar):
315 """
316 Concrete class for decimal256 scalars.
317 """
318
319 def as_py(self):
320 """
321 Return this value as a Python Decimal.
322 """
323 cdef:
324 CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get()
325 CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get()
326 if sp.is_valid:
327 return _pydecimal.Decimal(
328 frombytes(sp.value.ToString(dtype.scale()))
329 )
330 else:
331 return None
332
333
334 cdef class Date32Scalar(Scalar):
335 """
336 Concrete class for date32 scalars.
337 """
338
339 def as_py(self):
340 """
341 Return this value as a Python datetime.datetime instance.
342 """
343 cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get()
344
345 if sp.is_valid:
346 # shift to seconds since epoch
347 return (
348 datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value)
349 )
350 else:
351 return None
352
353
354 cdef class Date64Scalar(Scalar):
355 """
356 Concrete class for date64 scalars.
357 """
358
359 def as_py(self):
360 """
361 Return this value as a Python datetime.datetime instance.
362 """
363 cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get()
364
365 if sp.is_valid:
366 return (
367 datetime.date(1970, 1, 1) +
368 datetime.timedelta(days=sp.value / 86400000)
369 )
370 else:
371 return None
372
373
374 def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None):
375 if unit == TimeUnit_SECOND:
376 delta = datetime.timedelta(seconds=value)
377 elif unit == TimeUnit_MILLI:
378 delta = datetime.timedelta(milliseconds=value)
379 elif unit == TimeUnit_MICRO:
380 delta = datetime.timedelta(microseconds=value)
381 else:
382 # TimeUnit_NANO: prefer pandas timestamps if available
383 if _pandas_api.have_pandas:
384 return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns')
385 # otherwise safely truncate to microsecond resolution datetime
386 if value % 1000 != 0:
387 raise ValueError(
388 "Nanosecond resolution temporal type {} is not safely "
389 "convertible to microseconds to convert to datetime.datetime. "
390 "Install pandas to return as Timestamp with nanosecond "
391 "support or access the .value attribute.".format(value)
392 )
393 delta = datetime.timedelta(microseconds=value // 1000)
394
395 dt = datetime.datetime(1970, 1, 1) + delta
396 # adjust timezone if set to the datatype
397 if tzinfo is not None:
398 dt = tzinfo.fromutc(dt)
399
400 return dt
401
402
403 cdef class Time32Scalar(Scalar):
404 """
405 Concrete class for time32 scalars.
406 """
407
408 def as_py(self):
409 """
410 Return this value as a Python datetime.timedelta instance.
411 """
412 cdef:
413 CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get()
414 CTime32Type* dtype = <CTime32Type*> sp.type.get()
415
416 if sp.is_valid:
417 return _datetime_from_int(sp.value, unit=dtype.unit()).time()
418 else:
419 return None
420
421
422 cdef class Time64Scalar(Scalar):
423 """
424 Concrete class for time64 scalars.
425 """
426
427 def as_py(self):
428 """
429 Return this value as a Python datetime.timedelta instance.
430 """
431 cdef:
432 CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get()
433 CTime64Type* dtype = <CTime64Type*> sp.type.get()
434
435 if sp.is_valid:
436 return _datetime_from_int(sp.value, unit=dtype.unit()).time()
437 else:
438 return None
439
440
441 cdef class TimestampScalar(Scalar):
442 """
443 Concrete class for timestamp scalars.
444 """
445
446 @property
447 def value(self):
448 cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
449 return sp.value if sp.is_valid else None
450
451 def as_py(self):
452 """
453 Return this value as a Pandas Timestamp instance (if units are
454 nanoseconds and pandas is available), otherwise as a Python
455 datetime.datetime instance.
456 """
457 cdef:
458 CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
459 CTimestampType* dtype = <CTimestampType*> sp.type.get()
460
461 if not sp.is_valid:
462 return None
463
464 if not dtype.timezone().empty():
465 tzinfo = string_to_tzinfo(frombytes(dtype.timezone()))
466 else:
467 tzinfo = None
468
469 return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo)
470
471
472 cdef class DurationScalar(Scalar):
473 """
474 Concrete class for duration scalars.
475 """
476
477 @property
478 def value(self):
479 cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
480 return sp.value if sp.is_valid else None
481
482 def as_py(self):
483 """
484 Return this value as a Pandas Timedelta instance (if units are
485 nanoseconds and pandas is available), otherwise as a Python
486 datetime.timedelta instance.
487 """
488 cdef:
489 CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
490 CDurationType* dtype = <CDurationType*> sp.type.get()
491 TimeUnit unit = dtype.unit()
492
493 if not sp.is_valid:
494 return None
495
496 if unit == TimeUnit_SECOND:
497 return datetime.timedelta(seconds=sp.value)
498 elif unit == TimeUnit_MILLI:
499 return datetime.timedelta(milliseconds=sp.value)
500 elif unit == TimeUnit_MICRO:
501 return datetime.timedelta(microseconds=sp.value)
502 else:
503 # TimeUnit_NANO: prefer pandas timestamps if available
504 if _pandas_api.have_pandas:
505 return _pandas_api.pd.Timedelta(sp.value, unit='ns')
506 # otherwise safely truncate to microsecond resolution timedelta
507 if sp.value % 1000 != 0:
508 raise ValueError(
509 "Nanosecond duration {} is not safely convertible to "
510 "microseconds to convert to datetime.timedelta. Install "
511 "pandas to return as Timedelta with nanosecond support or "
512 "access the .value attribute.".format(sp.value)
513 )
514 return datetime.timedelta(microseconds=sp.value // 1000)
515
516
517 cdef class MonthDayNanoIntervalScalar(Scalar):
518 """
519 Concrete class for month, day, nanosecond interval scalars.
520 """
521
522 @property
523 def value(self):
524 """
525 Same as self.as_py()
526 """
527 return self.as_py()
528
529 def as_py(self):
530 """
531 Return this value as a pyarrow.MonthDayNano.
532 """
533 cdef:
534 PyObject* val
535 CMonthDayNanoIntervalScalar* scalar
536 scalar = <CMonthDayNanoIntervalScalar*>self.wrapped.get()
537 val = GetResultValue(MonthDayNanoIntervalScalarToPyObject(
538 deref(scalar)))
539 return PyObject_to_object(val)
540
541
542 cdef class BinaryScalar(Scalar):
543 """
544 Concrete class for binary-like scalars.
545 """
546
547 def as_buffer(self):
548 """
549 Return a view over this value as a Buffer object.
550 """
551 cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get()
552 return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None
553
554 def as_py(self):
555 """
556 Return this value as a Python bytes.
557 """
558 buffer = self.as_buffer()
559 return None if buffer is None else buffer.to_pybytes()
560
561
562 cdef class LargeBinaryScalar(BinaryScalar):
563 pass
564
565
566 cdef class FixedSizeBinaryScalar(BinaryScalar):
567 pass
568
569
570 cdef class StringScalar(BinaryScalar):
571 """
572 Concrete class for string-like (utf8) scalars.
573 """
574
575 def as_py(self):
576 """
577 Return this value as a Python string.
578 """
579 buffer = self.as_buffer()
580 return None if buffer is None else str(buffer, 'utf8')
581
582
583 cdef class LargeStringScalar(StringScalar):
584 pass
585
586
587 cdef class ListScalar(Scalar):
588 """
589 Concrete class for list-like scalars.
590 """
591
592 @property
593 def values(self):
594 cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get()
595 if sp.is_valid:
596 return pyarrow_wrap_array(sp.value)
597 else:
598 return None
599
600 def __len__(self):
601 """
602 Return the number of values.
603 """
604 return len(self.values)
605
606 def __getitem__(self, i):
607 """
608 Return the value at the given index.
609 """
610 return self.values[_normalize_index(i, len(self))]
611
612 def __iter__(self):
613 """
614 Iterate over this element's values.
615 """
616 return iter(self.values)
617
618 def as_py(self):
619 """
620 Return this value as a Python list.
621 """
622 arr = self.values
623 return None if arr is None else arr.to_pylist()
624
625
626 cdef class FixedSizeListScalar(ListScalar):
627 pass
628
629
630 cdef class LargeListScalar(ListScalar):
631 pass
632
633
634 cdef class StructScalar(Scalar, collections.abc.Mapping):
635 """
636 Concrete class for struct scalars.
637 """
638
639 def __len__(self):
640 cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get()
641 return sp.value.size()
642
643 def __iter__(self):
644 cdef:
645 CStructScalar* sp = <CStructScalar*> self.wrapped.get()
646 CStructType* dtype = <CStructType*> sp.type.get()
647 vector[shared_ptr[CField]] fields = dtype.fields()
648
649 for i in range(dtype.num_fields()):
650 yield frombytes(fields[i].get().name())
651
652 def items(self):
653 return ((key, self[i]) for i, key in enumerate(self))
654
655 def __contains__(self, key):
656 return key in list(self)
657
658 def __getitem__(self, key):
659 """
660 Return the child value for the given field.
661
662 Parameters
663 ----------
664 index : Union[int, str]
665 Index / position or name of the field.
666
667 Returns
668 -------
669 result : Scalar
670 """
671 cdef:
672 CFieldRef ref
673 CStructScalar* sp = <CStructScalar*> self.wrapped.get()
674
675 if isinstance(key, (bytes, str)):
676 ref = CFieldRef(<c_string> tobytes(key))
677 elif isinstance(key, int):
678 ref = CFieldRef(<int> key)
679 else:
680 raise TypeError('Expected integer or string index')
681
682 try:
683 return Scalar.wrap(GetResultValue(sp.field(ref)))
684 except ArrowInvalid as exc:
685 if isinstance(key, int):
686 raise IndexError(key) from exc
687 else:
688 raise KeyError(key) from exc
689
690 def as_py(self):
691 """
692 Return this value as a Python dict.
693 """
694 if self.is_valid:
695 try:
696 return {k: self[k].as_py() for k in self.keys()}
697 except KeyError:
698 raise ValueError(
699 "Converting to Python dictionary is not supported when "
700 "duplicate field names are present")
701 else:
702 return None
703
704 def _as_py_tuple(self):
705 # a version that returns a tuple instead of dict to support repr/str
706 # with the presence of duplicate field names
707 if self.is_valid:
708 return [(key, self[i].as_py()) for i, key in enumerate(self)]
709 else:
710 return None
711
712 def __repr__(self):
713 return '<pyarrow.{}: {!r}>'.format(
714 self.__class__.__name__, self._as_py_tuple()
715 )
716
717 def __str__(self):
718 return str(self._as_py_tuple())
719
720
721 cdef class MapScalar(ListScalar):
722 """
723 Concrete class for map scalars.
724 """
725
726 def __getitem__(self, i):
727 """
728 Return the value at the given index.
729 """
730 arr = self.values
731 if arr is None:
732 raise IndexError(i)
733 dct = arr[_normalize_index(i, len(arr))]
734 return (dct['key'], dct['value'])
735
736 def __iter__(self):
737 """
738 Iterate over this element's values.
739 """
740 arr = self.values
741 if array is None:
742 raise StopIteration
743 for k, v in zip(arr.field('key'), arr.field('value')):
744 yield (k.as_py(), v.as_py())
745
746 def as_py(self):
747 """
748 Return this value as a Python list.
749 """
750 cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get()
751 return list(self) if sp.is_valid else None
752
753
754 cdef class DictionaryScalar(Scalar):
755 """
756 Concrete class for dictionary-encoded scalars.
757 """
758
759 @classmethod
760 def _reconstruct(cls, type, is_valid, index, dictionary):
761 cdef:
762 CDictionaryScalarIndexAndDictionary value
763 shared_ptr[CDictionaryScalar] wrapped
764 DataType type_
765 Scalar index_
766 Array dictionary_
767
768 type_ = ensure_type(type, allow_none=False)
769 if not isinstance(type_, DictionaryType):
770 raise TypeError('Must pass a DictionaryType instance')
771
772 if isinstance(index, Scalar):
773 if not index.type.equals(type.index_type):
774 raise TypeError("The Scalar value passed as index must have "
775 "identical type to the dictionary type's "
776 "index_type")
777 index_ = index
778 else:
779 index_ = scalar(index, type=type_.index_type)
780
781 if isinstance(dictionary, Array):
782 if not dictionary.type.equals(type.value_type):
783 raise TypeError("The Array passed as dictionary must have "
784 "identical type to the dictionary type's "
785 "value_type")
786 dictionary_ = dictionary
787 else:
788 dictionary_ = array(dictionary, type=type_.value_type)
789
790 value.index = pyarrow_unwrap_scalar(index_)
791 value.dictionary = pyarrow_unwrap_array(dictionary_)
792
793 wrapped = make_shared[CDictionaryScalar](
794 value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid)
795 )
796 return Scalar.wrap(<shared_ptr[CScalar]> wrapped)
797
798 def __reduce__(self):
799 return DictionaryScalar._reconstruct, (
800 self.type, self.is_valid, self.index, self.dictionary
801 )
802
803 @property
804 def index(self):
805 """
806 Return this value's underlying index as a scalar.
807 """
808 cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
809 return Scalar.wrap(sp.value.index)
810
811 @property
812 def value(self):
813 """
814 Return the encoded value as a scalar.
815 """
816 cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
817 return Scalar.wrap(GetResultValue(sp.GetEncodedValue()))
818
819 @property
820 def dictionary(self):
821 cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
822 return pyarrow_wrap_array(sp.value.dictionary)
823
824 def as_py(self):
825 """
826 Return this encoded value as a Python object.
827 """
828 return self.value.as_py() if self.is_valid else None
829
830 @property
831 def index_value(self):
832 warnings.warn("`index_value` property is deprecated as of 1.0.0"
833 "please use the `index` property instead",
834 FutureWarning)
835 return self.index
836
837 @property
838 def dictionary_value(self):
839 warnings.warn("`dictionary_value` property is deprecated as of 1.0.0, "
840 "please use the `value` property instead", FutureWarning)
841 return self.value
842
843
844 cdef class UnionScalar(Scalar):
845 """
846 Concrete class for Union scalars.
847 """
848
849 @property
850 def value(self):
851 """
852 Return underlying value as a scalar.
853 """
854 cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
855 return Scalar.wrap(sp.value) if sp.is_valid else None
856
857 def as_py(self):
858 """
859 Return underlying value as a Python object.
860 """
861 value = self.value
862 return None if value is None else value.as_py()
863
864 @property
865 def type_code(self):
866 """
867 Return the union type code for this scalar.
868 """
869 cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
870 return sp.type_code
871
872
873 cdef class ExtensionScalar(Scalar):
874 """
875 Concrete class for Extension scalars.
876 """
877
878 @property
879 def value(self):
880 """
881 Return storage value as a scalar.
882 """
883 cdef CExtensionScalar* sp = <CExtensionScalar*> self.wrapped.get()
884 return Scalar.wrap(sp.value) if sp.is_valid else None
885
886 def as_py(self):
887 """
888 Return this scalar as a Python object.
889 """
890 # XXX should there be a hook to wrap the result in a custom class?
891 value = self.value
892 return None if value is None else value.as_py()
893
894 @staticmethod
895 def from_storage(BaseExtensionType typ, value):
896 """
897 Construct ExtensionScalar from type and storage value.
898
899 Parameters
900 ----------
901 typ : DataType
902 The extension type for the result scalar.
903 value : object
904 The storage value for the result scalar.
905
906 Returns
907 -------
908 ext_scalar : ExtensionScalar
909 """
910 cdef:
911 shared_ptr[CExtensionScalar] sp_scalar
912 CExtensionScalar* ext_scalar
913
914 if value is None:
915 storage = None
916 elif isinstance(value, Scalar):
917 if value.type != typ.storage_type:
918 raise TypeError("Incompatible storage type {0} "
919 "for extension type {1}"
920 .format(value.type, typ))
921 storage = value
922 else:
923 storage = scalar(value, typ.storage_type)
924
925 sp_scalar = make_shared[CExtensionScalar](typ.sp_type)
926 ext_scalar = sp_scalar.get()
927 ext_scalar.is_valid = storage is not None and storage.is_valid
928 if ext_scalar.is_valid:
929 ext_scalar.value = pyarrow_unwrap_scalar(storage)
930 check_status(ext_scalar.Validate())
931 return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
932
933
934 cdef dict _scalar_classes = {
935 _Type_BOOL: BooleanScalar,
936 _Type_UINT8: UInt8Scalar,
937 _Type_UINT16: UInt16Scalar,
938 _Type_UINT32: UInt32Scalar,
939 _Type_UINT64: UInt64Scalar,
940 _Type_INT8: Int8Scalar,
941 _Type_INT16: Int16Scalar,
942 _Type_INT32: Int32Scalar,
943 _Type_INT64: Int64Scalar,
944 _Type_HALF_FLOAT: HalfFloatScalar,
945 _Type_FLOAT: FloatScalar,
946 _Type_DOUBLE: DoubleScalar,
947 _Type_DECIMAL128: Decimal128Scalar,
948 _Type_DECIMAL256: Decimal256Scalar,
949 _Type_DATE32: Date32Scalar,
950 _Type_DATE64: Date64Scalar,
951 _Type_TIME32: Time32Scalar,
952 _Type_TIME64: Time64Scalar,
953 _Type_TIMESTAMP: TimestampScalar,
954 _Type_DURATION: DurationScalar,
955 _Type_BINARY: BinaryScalar,
956 _Type_LARGE_BINARY: LargeBinaryScalar,
957 _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
958 _Type_STRING: StringScalar,
959 _Type_LARGE_STRING: LargeStringScalar,
960 _Type_LIST: ListScalar,
961 _Type_LARGE_LIST: LargeListScalar,
962 _Type_FIXED_SIZE_LIST: FixedSizeListScalar,
963 _Type_STRUCT: StructScalar,
964 _Type_MAP: MapScalar,
965 _Type_DICTIONARY: DictionaryScalar,
966 _Type_SPARSE_UNION: UnionScalar,
967 _Type_DENSE_UNION: UnionScalar,
968 _Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar,
969 _Type_EXTENSION: ExtensionScalar,
970 }
971
972
973 def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
974 """
975 Create a pyarrow.Scalar instance from a Python object.
976
977 Parameters
978 ----------
979 value : Any
980 Python object coercible to arrow's type system.
981 type : pyarrow.DataType
982 Explicit type to attempt to coerce to, otherwise will be inferred from
983 the value.
984 from_pandas : bool, default None
985 Use pandas's semantics for inferring nulls from values in
986 ndarray-like data. Defaults to False if not passed explicitly by user,
987 or True if a pandas object is passed in.
988 memory_pool : pyarrow.MemoryPool, optional
989 If not passed, will allocate memory from the currently-set default
990 memory pool.
991
992 Returns
993 -------
994 scalar : pyarrow.Scalar
995
996 Examples
997 --------
998 >>> import pyarrow as pa
999
1000 >>> pa.scalar(42)
1001 <pyarrow.Int64Scalar: 42>
1002
1003 >>> pa.scalar("string")
1004 <pyarrow.StringScalar: 'string'>
1005
1006 >>> pa.scalar([1, 2])
1007 <pyarrow.ListScalar: [1, 2]>
1008
1009 >>> pa.scalar([1, 2], type=pa.list_(pa.int16()))
1010 <pyarrow.ListScalar: [1, 2]>
1011 """
1012 cdef:
1013 DataType ty
1014 PyConversionOptions options
1015 shared_ptr[CScalar] scalar
1016 shared_ptr[CArray] array
1017 shared_ptr[CChunkedArray] chunked
1018 bint is_pandas_object = False
1019 CMemoryPool* pool
1020
1021 type = ensure_type(type, allow_none=True)
1022 pool = maybe_unbox_memory_pool(memory_pool)
1023
1024 if _is_array_like(value):
1025 value = get_values(value, &is_pandas_object)
1026
1027 options.size = 1
1028
1029 if type is not None:
1030 ty = ensure_type(type)
1031 options.type = ty.sp_type
1032
1033 if from_pandas is None:
1034 options.from_pandas = is_pandas_object
1035 else:
1036 options.from_pandas = from_pandas
1037
1038 value = [value]
1039 with nogil:
1040 chunked = GetResultValue(ConvertPySequence(value, None, options, pool))
1041
1042 # get the first chunk
1043 assert chunked.get().num_chunks() == 1
1044 array = chunked.get().chunk(0)
1045
1046 # retrieve the scalar from the first position
1047 scalar = GetResultValue(array.get().GetScalar(0))
1048 return Scalar.wrap(scalar)