]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, | |
12 | # software distributed under the License is distributed on an | |
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | # KIND, either express or implied. See the License for the | |
15 | # specific language governing permissions and limitations | |
16 | # under the License. | |
17 | ||
18 | import collections | |
19 | ||
20 | ||
21 | cdef class Scalar(_Weakrefable): | |
22 | """ | |
23 | The base class for scalars. | |
24 | """ | |
25 | ||
26 | def __init__(self): | |
27 | raise TypeError("Do not call {}'s constructor directly, use " | |
28 | "pa.scalar() instead.".format(self.__class__.__name__)) | |
29 | ||
30 | cdef void init(self, const shared_ptr[CScalar]& wrapped): | |
31 | self.wrapped = wrapped | |
32 | ||
33 | @staticmethod | |
34 | cdef wrap(const shared_ptr[CScalar]& wrapped): | |
35 | cdef: | |
36 | Scalar self | |
37 | Type type_id = wrapped.get().type.get().id() | |
38 | ||
39 | if type_id == _Type_NA: | |
40 | return _NULL | |
41 | ||
42 | try: | |
43 | typ = _scalar_classes[type_id] | |
44 | except KeyError: | |
45 | raise NotImplementedError( | |
46 | "Wrapping scalar of type " + | |
47 | frombytes(wrapped.get().type.get().ToString())) | |
48 | self = typ.__new__(typ) | |
49 | self.init(wrapped) | |
50 | ||
51 | return self | |
52 | ||
53 | cdef inline shared_ptr[CScalar] unwrap(self) nogil: | |
54 | return self.wrapped | |
55 | ||
56 | @property | |
57 | def type(self): | |
58 | """ | |
59 | Data type of the Scalar object. | |
60 | """ | |
61 | return pyarrow_wrap_data_type(self.wrapped.get().type) | |
62 | ||
63 | @property | |
64 | def is_valid(self): | |
65 | """ | |
66 | Holds a valid (non-null) value. | |
67 | """ | |
68 | return self.wrapped.get().is_valid | |
69 | ||
70 | def cast(self, object target_type): | |
71 | """ | |
72 | Attempt a safe cast to target data type. | |
73 | """ | |
74 | cdef: | |
75 | DataType type = ensure_type(target_type) | |
76 | shared_ptr[CScalar] result | |
77 | ||
78 | with nogil: | |
79 | result = GetResultValue(self.wrapped.get().CastTo(type.sp_type)) | |
80 | ||
81 | return Scalar.wrap(result) | |
82 | ||
83 | def __repr__(self): | |
84 | return '<pyarrow.{}: {!r}>'.format( | |
85 | self.__class__.__name__, self.as_py() | |
86 | ) | |
87 | ||
88 | def __str__(self): | |
89 | return str(self.as_py()) | |
90 | ||
91 | def equals(self, Scalar other not None): | |
92 | return self.wrapped.get().Equals(other.unwrap().get()[0]) | |
93 | ||
94 | def __eq__(self, other): | |
95 | try: | |
96 | return self.equals(other) | |
97 | except TypeError: | |
98 | return NotImplemented | |
99 | ||
100 | def __hash__(self): | |
101 | cdef CScalarHash hasher | |
102 | return hasher(self.wrapped) | |
103 | ||
104 | def __reduce__(self): | |
105 | return scalar, (self.as_py(), self.type) | |
106 | ||
107 | def as_py(self): | |
108 | raise NotImplementedError() | |
109 | ||
110 | ||
111 | _NULL = NA = None | |
112 | ||
113 | ||
114 | cdef class NullScalar(Scalar): | |
115 | """ | |
116 | Concrete class for null scalars. | |
117 | """ | |
118 | ||
119 | def __cinit__(self): | |
120 | global NA | |
121 | if NA is not None: | |
122 | raise RuntimeError('Cannot create multiple NullScalar instances') | |
123 | self.init(shared_ptr[CScalar](new CNullScalar())) | |
124 | ||
125 | def __init__(self): | |
126 | pass | |
127 | ||
128 | def as_py(self): | |
129 | """ | |
130 | Return this value as a Python None. | |
131 | """ | |
132 | return None | |
133 | ||
134 | ||
135 | _NULL = NA = NullScalar() | |
136 | ||
137 | ||
138 | cdef class BooleanScalar(Scalar): | |
139 | """ | |
140 | Concrete class for boolean scalars. | |
141 | """ | |
142 | ||
143 | def as_py(self): | |
144 | """ | |
145 | Return this value as a Python bool. | |
146 | """ | |
147 | cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get() | |
148 | return sp.value if sp.is_valid else None | |
149 | ||
150 | ||
151 | cdef class UInt8Scalar(Scalar): | |
152 | """ | |
153 | Concrete class for uint8 scalars. | |
154 | """ | |
155 | ||
156 | def as_py(self): | |
157 | """ | |
158 | Return this value as a Python int. | |
159 | """ | |
160 | cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get() | |
161 | return sp.value if sp.is_valid else None | |
162 | ||
163 | ||
164 | cdef class Int8Scalar(Scalar): | |
165 | """ | |
166 | Concrete class for int8 scalars. | |
167 | """ | |
168 | ||
169 | def as_py(self): | |
170 | """ | |
171 | Return this value as a Python int. | |
172 | """ | |
173 | cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get() | |
174 | return sp.value if sp.is_valid else None | |
175 | ||
176 | ||
177 | cdef class UInt16Scalar(Scalar): | |
178 | """ | |
179 | Concrete class for uint16 scalars. | |
180 | """ | |
181 | ||
182 | def as_py(self): | |
183 | """ | |
184 | Return this value as a Python int. | |
185 | """ | |
186 | cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get() | |
187 | return sp.value if sp.is_valid else None | |
188 | ||
189 | ||
190 | cdef class Int16Scalar(Scalar): | |
191 | """ | |
192 | Concrete class for int16 scalars. | |
193 | """ | |
194 | ||
195 | def as_py(self): | |
196 | """ | |
197 | Return this value as a Python int. | |
198 | """ | |
199 | cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get() | |
200 | return sp.value if sp.is_valid else None | |
201 | ||
202 | ||
203 | cdef class UInt32Scalar(Scalar): | |
204 | """ | |
205 | Concrete class for uint32 scalars. | |
206 | """ | |
207 | ||
208 | def as_py(self): | |
209 | """ | |
210 | Return this value as a Python int. | |
211 | """ | |
212 | cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get() | |
213 | return sp.value if sp.is_valid else None | |
214 | ||
215 | ||
216 | cdef class Int32Scalar(Scalar): | |
217 | """ | |
218 | Concrete class for int32 scalars. | |
219 | """ | |
220 | ||
221 | def as_py(self): | |
222 | """ | |
223 | Return this value as a Python int. | |
224 | """ | |
225 | cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get() | |
226 | return sp.value if sp.is_valid else None | |
227 | ||
228 | ||
229 | cdef class UInt64Scalar(Scalar): | |
230 | """ | |
231 | Concrete class for uint64 scalars. | |
232 | """ | |
233 | ||
234 | def as_py(self): | |
235 | """ | |
236 | Return this value as a Python int. | |
237 | """ | |
238 | cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get() | |
239 | return sp.value if sp.is_valid else None | |
240 | ||
241 | ||
242 | cdef class Int64Scalar(Scalar): | |
243 | """ | |
244 | Concrete class for int64 scalars. | |
245 | """ | |
246 | ||
247 | def as_py(self): | |
248 | """ | |
249 | Return this value as a Python int. | |
250 | """ | |
251 | cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get() | |
252 | return sp.value if sp.is_valid else None | |
253 | ||
254 | ||
255 | cdef class HalfFloatScalar(Scalar): | |
256 | """ | |
257 | Concrete class for float scalars. | |
258 | """ | |
259 | ||
260 | def as_py(self): | |
261 | """ | |
262 | Return this value as a Python float. | |
263 | """ | |
264 | cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get() | |
265 | return PyHalf_FromHalf(sp.value) if sp.is_valid else None | |
266 | ||
267 | ||
268 | cdef class FloatScalar(Scalar): | |
269 | """ | |
270 | Concrete class for float scalars. | |
271 | """ | |
272 | ||
273 | def as_py(self): | |
274 | """ | |
275 | Return this value as a Python float. | |
276 | """ | |
277 | cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get() | |
278 | return sp.value if sp.is_valid else None | |
279 | ||
280 | ||
281 | cdef class DoubleScalar(Scalar): | |
282 | """ | |
283 | Concrete class for double scalars. | |
284 | """ | |
285 | ||
286 | def as_py(self): | |
287 | """ | |
288 | Return this value as a Python float. | |
289 | """ | |
290 | cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get() | |
291 | return sp.value if sp.is_valid else None | |
292 | ||
293 | ||
294 | cdef class Decimal128Scalar(Scalar): | |
295 | """ | |
296 | Concrete class for decimal128 scalars. | |
297 | """ | |
298 | ||
299 | def as_py(self): | |
300 | """ | |
301 | Return this value as a Python Decimal. | |
302 | """ | |
303 | cdef: | |
304 | CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get() | |
305 | CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get() | |
306 | if sp.is_valid: | |
307 | return _pydecimal.Decimal( | |
308 | frombytes(sp.value.ToString(dtype.scale())) | |
309 | ) | |
310 | else: | |
311 | return None | |
312 | ||
313 | ||
314 | cdef class Decimal256Scalar(Scalar): | |
315 | """ | |
316 | Concrete class for decimal256 scalars. | |
317 | """ | |
318 | ||
319 | def as_py(self): | |
320 | """ | |
321 | Return this value as a Python Decimal. | |
322 | """ | |
323 | cdef: | |
324 | CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get() | |
325 | CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get() | |
326 | if sp.is_valid: | |
327 | return _pydecimal.Decimal( | |
328 | frombytes(sp.value.ToString(dtype.scale())) | |
329 | ) | |
330 | else: | |
331 | return None | |
332 | ||
333 | ||
334 | cdef class Date32Scalar(Scalar): | |
335 | """ | |
336 | Concrete class for date32 scalars. | |
337 | """ | |
338 | ||
339 | def as_py(self): | |
340 | """ | |
341 | Return this value as a Python datetime.datetime instance. | |
342 | """ | |
343 | cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get() | |
344 | ||
345 | if sp.is_valid: | |
346 | # shift to seconds since epoch | |
347 | return ( | |
348 | datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value) | |
349 | ) | |
350 | else: | |
351 | return None | |
352 | ||
353 | ||
354 | cdef class Date64Scalar(Scalar): | |
355 | """ | |
356 | Concrete class for date64 scalars. | |
357 | """ | |
358 | ||
359 | def as_py(self): | |
360 | """ | |
361 | Return this value as a Python datetime.datetime instance. | |
362 | """ | |
363 | cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get() | |
364 | ||
365 | if sp.is_valid: | |
366 | return ( | |
367 | datetime.date(1970, 1, 1) + | |
368 | datetime.timedelta(days=sp.value / 86400000) | |
369 | ) | |
370 | else: | |
371 | return None | |
372 | ||
373 | ||
374 | def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None): | |
375 | if unit == TimeUnit_SECOND: | |
376 | delta = datetime.timedelta(seconds=value) | |
377 | elif unit == TimeUnit_MILLI: | |
378 | delta = datetime.timedelta(milliseconds=value) | |
379 | elif unit == TimeUnit_MICRO: | |
380 | delta = datetime.timedelta(microseconds=value) | |
381 | else: | |
382 | # TimeUnit_NANO: prefer pandas timestamps if available | |
383 | if _pandas_api.have_pandas: | |
384 | return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns') | |
385 | # otherwise safely truncate to microsecond resolution datetime | |
386 | if value % 1000 != 0: | |
387 | raise ValueError( | |
388 | "Nanosecond resolution temporal type {} is not safely " | |
389 | "convertible to microseconds to convert to datetime.datetime. " | |
390 | "Install pandas to return as Timestamp with nanosecond " | |
391 | "support or access the .value attribute.".format(value) | |
392 | ) | |
393 | delta = datetime.timedelta(microseconds=value // 1000) | |
394 | ||
395 | dt = datetime.datetime(1970, 1, 1) + delta | |
396 | # adjust timezone if set to the datatype | |
397 | if tzinfo is not None: | |
398 | dt = tzinfo.fromutc(dt) | |
399 | ||
400 | return dt | |
401 | ||
402 | ||
403 | cdef class Time32Scalar(Scalar): | |
404 | """ | |
405 | Concrete class for time32 scalars. | |
406 | """ | |
407 | ||
408 | def as_py(self): | |
409 | """ | |
410 | Return this value as a Python datetime.timedelta instance. | |
411 | """ | |
412 | cdef: | |
413 | CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get() | |
414 | CTime32Type* dtype = <CTime32Type*> sp.type.get() | |
415 | ||
416 | if sp.is_valid: | |
417 | return _datetime_from_int(sp.value, unit=dtype.unit()).time() | |
418 | else: | |
419 | return None | |
420 | ||
421 | ||
422 | cdef class Time64Scalar(Scalar): | |
423 | """ | |
424 | Concrete class for time64 scalars. | |
425 | """ | |
426 | ||
427 | def as_py(self): | |
428 | """ | |
429 | Return this value as a Python datetime.timedelta instance. | |
430 | """ | |
431 | cdef: | |
432 | CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get() | |
433 | CTime64Type* dtype = <CTime64Type*> sp.type.get() | |
434 | ||
435 | if sp.is_valid: | |
436 | return _datetime_from_int(sp.value, unit=dtype.unit()).time() | |
437 | else: | |
438 | return None | |
439 | ||
440 | ||
441 | cdef class TimestampScalar(Scalar): | |
442 | """ | |
443 | Concrete class for timestamp scalars. | |
444 | """ | |
445 | ||
446 | @property | |
447 | def value(self): | |
448 | cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() | |
449 | return sp.value if sp.is_valid else None | |
450 | ||
451 | def as_py(self): | |
452 | """ | |
453 | Return this value as a Pandas Timestamp instance (if units are | |
454 | nanoseconds and pandas is available), otherwise as a Python | |
455 | datetime.datetime instance. | |
456 | """ | |
457 | cdef: | |
458 | CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() | |
459 | CTimestampType* dtype = <CTimestampType*> sp.type.get() | |
460 | ||
461 | if not sp.is_valid: | |
462 | return None | |
463 | ||
464 | if not dtype.timezone().empty(): | |
465 | tzinfo = string_to_tzinfo(frombytes(dtype.timezone())) | |
466 | else: | |
467 | tzinfo = None | |
468 | ||
469 | return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo) | |
470 | ||
471 | ||
472 | cdef class DurationScalar(Scalar): | |
473 | """ | |
474 | Concrete class for duration scalars. | |
475 | """ | |
476 | ||
477 | @property | |
478 | def value(self): | |
479 | cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() | |
480 | return sp.value if sp.is_valid else None | |
481 | ||
482 | def as_py(self): | |
483 | """ | |
484 | Return this value as a Pandas Timedelta instance (if units are | |
485 | nanoseconds and pandas is available), otherwise as a Python | |
486 | datetime.timedelta instance. | |
487 | """ | |
488 | cdef: | |
489 | CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() | |
490 | CDurationType* dtype = <CDurationType*> sp.type.get() | |
491 | TimeUnit unit = dtype.unit() | |
492 | ||
493 | if not sp.is_valid: | |
494 | return None | |
495 | ||
496 | if unit == TimeUnit_SECOND: | |
497 | return datetime.timedelta(seconds=sp.value) | |
498 | elif unit == TimeUnit_MILLI: | |
499 | return datetime.timedelta(milliseconds=sp.value) | |
500 | elif unit == TimeUnit_MICRO: | |
501 | return datetime.timedelta(microseconds=sp.value) | |
502 | else: | |
503 | # TimeUnit_NANO: prefer pandas timestamps if available | |
504 | if _pandas_api.have_pandas: | |
505 | return _pandas_api.pd.Timedelta(sp.value, unit='ns') | |
506 | # otherwise safely truncate to microsecond resolution timedelta | |
507 | if sp.value % 1000 != 0: | |
508 | raise ValueError( | |
509 | "Nanosecond duration {} is not safely convertible to " | |
510 | "microseconds to convert to datetime.timedelta. Install " | |
511 | "pandas to return as Timedelta with nanosecond support or " | |
512 | "access the .value attribute.".format(sp.value) | |
513 | ) | |
514 | return datetime.timedelta(microseconds=sp.value // 1000) | |
515 | ||
516 | ||
517 | cdef class MonthDayNanoIntervalScalar(Scalar): | |
518 | """ | |
519 | Concrete class for month, day, nanosecond interval scalars. | |
520 | """ | |
521 | ||
522 | @property | |
523 | def value(self): | |
524 | """ | |
525 | Same as self.as_py() | |
526 | """ | |
527 | return self.as_py() | |
528 | ||
529 | def as_py(self): | |
530 | """ | |
531 | Return this value as a pyarrow.MonthDayNano. | |
532 | """ | |
533 | cdef: | |
534 | PyObject* val | |
535 | CMonthDayNanoIntervalScalar* scalar | |
536 | scalar = <CMonthDayNanoIntervalScalar*>self.wrapped.get() | |
537 | val = GetResultValue(MonthDayNanoIntervalScalarToPyObject( | |
538 | deref(scalar))) | |
539 | return PyObject_to_object(val) | |
540 | ||
541 | ||
542 | cdef class BinaryScalar(Scalar): | |
543 | """ | |
544 | Concrete class for binary-like scalars. | |
545 | """ | |
546 | ||
547 | def as_buffer(self): | |
548 | """ | |
549 | Return a view over this value as a Buffer object. | |
550 | """ | |
551 | cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get() | |
552 | return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None | |
553 | ||
554 | def as_py(self): | |
555 | """ | |
556 | Return this value as a Python bytes. | |
557 | """ | |
558 | buffer = self.as_buffer() | |
559 | return None if buffer is None else buffer.to_pybytes() | |
560 | ||
561 | ||
562 | cdef class LargeBinaryScalar(BinaryScalar): | |
563 | pass | |
564 | ||
565 | ||
566 | cdef class FixedSizeBinaryScalar(BinaryScalar): | |
567 | pass | |
568 | ||
569 | ||
570 | cdef class StringScalar(BinaryScalar): | |
571 | """ | |
572 | Concrete class for string-like (utf8) scalars. | |
573 | """ | |
574 | ||
575 | def as_py(self): | |
576 | """ | |
577 | Return this value as a Python string. | |
578 | """ | |
579 | buffer = self.as_buffer() | |
580 | return None if buffer is None else str(buffer, 'utf8') | |
581 | ||
582 | ||
583 | cdef class LargeStringScalar(StringScalar): | |
584 | pass | |
585 | ||
586 | ||
587 | cdef class ListScalar(Scalar): | |
588 | """ | |
589 | Concrete class for list-like scalars. | |
590 | """ | |
591 | ||
592 | @property | |
593 | def values(self): | |
594 | cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get() | |
595 | if sp.is_valid: | |
596 | return pyarrow_wrap_array(sp.value) | |
597 | else: | |
598 | return None | |
599 | ||
600 | def __len__(self): | |
601 | """ | |
602 | Return the number of values. | |
603 | """ | |
604 | return len(self.values) | |
605 | ||
606 | def __getitem__(self, i): | |
607 | """ | |
608 | Return the value at the given index. | |
609 | """ | |
610 | return self.values[_normalize_index(i, len(self))] | |
611 | ||
612 | def __iter__(self): | |
613 | """ | |
614 | Iterate over this element's values. | |
615 | """ | |
616 | return iter(self.values) | |
617 | ||
618 | def as_py(self): | |
619 | """ | |
620 | Return this value as a Python list. | |
621 | """ | |
622 | arr = self.values | |
623 | return None if arr is None else arr.to_pylist() | |
624 | ||
625 | ||
626 | cdef class FixedSizeListScalar(ListScalar): | |
627 | pass | |
628 | ||
629 | ||
630 | cdef class LargeListScalar(ListScalar): | |
631 | pass | |
632 | ||
633 | ||
634 | cdef class StructScalar(Scalar, collections.abc.Mapping): | |
635 | """ | |
636 | Concrete class for struct scalars. | |
637 | """ | |
638 | ||
639 | def __len__(self): | |
640 | cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() | |
641 | return sp.value.size() | |
642 | ||
643 | def __iter__(self): | |
644 | cdef: | |
645 | CStructScalar* sp = <CStructScalar*> self.wrapped.get() | |
646 | CStructType* dtype = <CStructType*> sp.type.get() | |
647 | vector[shared_ptr[CField]] fields = dtype.fields() | |
648 | ||
649 | for i in range(dtype.num_fields()): | |
650 | yield frombytes(fields[i].get().name()) | |
651 | ||
652 | def items(self): | |
653 | return ((key, self[i]) for i, key in enumerate(self)) | |
654 | ||
655 | def __contains__(self, key): | |
656 | return key in list(self) | |
657 | ||
658 | def __getitem__(self, key): | |
659 | """ | |
660 | Return the child value for the given field. | |
661 | ||
662 | Parameters | |
663 | ---------- | |
664 | index : Union[int, str] | |
665 | Index / position or name of the field. | |
666 | ||
667 | Returns | |
668 | ------- | |
669 | result : Scalar | |
670 | """ | |
671 | cdef: | |
672 | CFieldRef ref | |
673 | CStructScalar* sp = <CStructScalar*> self.wrapped.get() | |
674 | ||
675 | if isinstance(key, (bytes, str)): | |
676 | ref = CFieldRef(<c_string> tobytes(key)) | |
677 | elif isinstance(key, int): | |
678 | ref = CFieldRef(<int> key) | |
679 | else: | |
680 | raise TypeError('Expected integer or string index') | |
681 | ||
682 | try: | |
683 | return Scalar.wrap(GetResultValue(sp.field(ref))) | |
684 | except ArrowInvalid as exc: | |
685 | if isinstance(key, int): | |
686 | raise IndexError(key) from exc | |
687 | else: | |
688 | raise KeyError(key) from exc | |
689 | ||
690 | def as_py(self): | |
691 | """ | |
692 | Return this value as a Python dict. | |
693 | """ | |
694 | if self.is_valid: | |
695 | try: | |
696 | return {k: self[k].as_py() for k in self.keys()} | |
697 | except KeyError: | |
698 | raise ValueError( | |
699 | "Converting to Python dictionary is not supported when " | |
700 | "duplicate field names are present") | |
701 | else: | |
702 | return None | |
703 | ||
704 | def _as_py_tuple(self): | |
705 | # a version that returns a tuple instead of dict to support repr/str | |
706 | # with the presence of duplicate field names | |
707 | if self.is_valid: | |
708 | return [(key, self[i].as_py()) for i, key in enumerate(self)] | |
709 | else: | |
710 | return None | |
711 | ||
712 | def __repr__(self): | |
713 | return '<pyarrow.{}: {!r}>'.format( | |
714 | self.__class__.__name__, self._as_py_tuple() | |
715 | ) | |
716 | ||
717 | def __str__(self): | |
718 | return str(self._as_py_tuple()) | |
719 | ||
720 | ||
721 | cdef class MapScalar(ListScalar): | |
722 | """ | |
723 | Concrete class for map scalars. | |
724 | """ | |
725 | ||
726 | def __getitem__(self, i): | |
727 | """ | |
728 | Return the value at the given index. | |
729 | """ | |
730 | arr = self.values | |
731 | if arr is None: | |
732 | raise IndexError(i) | |
733 | dct = arr[_normalize_index(i, len(arr))] | |
734 | return (dct['key'], dct['value']) | |
735 | ||
736 | def __iter__(self): | |
737 | """ | |
738 | Iterate over this element's values. | |
739 | """ | |
740 | arr = self.values | |
741 | if array is None: | |
742 | raise StopIteration | |
743 | for k, v in zip(arr.field('key'), arr.field('value')): | |
744 | yield (k.as_py(), v.as_py()) | |
745 | ||
746 | def as_py(self): | |
747 | """ | |
748 | Return this value as a Python list. | |
749 | """ | |
750 | cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() | |
751 | return list(self) if sp.is_valid else None | |
752 | ||
753 | ||
754 | cdef class DictionaryScalar(Scalar): | |
755 | """ | |
756 | Concrete class for dictionary-encoded scalars. | |
757 | """ | |
758 | ||
759 | @classmethod | |
760 | def _reconstruct(cls, type, is_valid, index, dictionary): | |
761 | cdef: | |
762 | CDictionaryScalarIndexAndDictionary value | |
763 | shared_ptr[CDictionaryScalar] wrapped | |
764 | DataType type_ | |
765 | Scalar index_ | |
766 | Array dictionary_ | |
767 | ||
768 | type_ = ensure_type(type, allow_none=False) | |
769 | if not isinstance(type_, DictionaryType): | |
770 | raise TypeError('Must pass a DictionaryType instance') | |
771 | ||
772 | if isinstance(index, Scalar): | |
773 | if not index.type.equals(type.index_type): | |
774 | raise TypeError("The Scalar value passed as index must have " | |
775 | "identical type to the dictionary type's " | |
776 | "index_type") | |
777 | index_ = index | |
778 | else: | |
779 | index_ = scalar(index, type=type_.index_type) | |
780 | ||
781 | if isinstance(dictionary, Array): | |
782 | if not dictionary.type.equals(type.value_type): | |
783 | raise TypeError("The Array passed as dictionary must have " | |
784 | "identical type to the dictionary type's " | |
785 | "value_type") | |
786 | dictionary_ = dictionary | |
787 | else: | |
788 | dictionary_ = array(dictionary, type=type_.value_type) | |
789 | ||
790 | value.index = pyarrow_unwrap_scalar(index_) | |
791 | value.dictionary = pyarrow_unwrap_array(dictionary_) | |
792 | ||
793 | wrapped = make_shared[CDictionaryScalar]( | |
794 | value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid) | |
795 | ) | |
796 | return Scalar.wrap(<shared_ptr[CScalar]> wrapped) | |
797 | ||
798 | def __reduce__(self): | |
799 | return DictionaryScalar._reconstruct, ( | |
800 | self.type, self.is_valid, self.index, self.dictionary | |
801 | ) | |
802 | ||
803 | @property | |
804 | def index(self): | |
805 | """ | |
806 | Return this value's underlying index as a scalar. | |
807 | """ | |
808 | cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() | |
809 | return Scalar.wrap(sp.value.index) | |
810 | ||
811 | @property | |
812 | def value(self): | |
813 | """ | |
814 | Return the encoded value as a scalar. | |
815 | """ | |
816 | cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() | |
817 | return Scalar.wrap(GetResultValue(sp.GetEncodedValue())) | |
818 | ||
819 | @property | |
820 | def dictionary(self): | |
821 | cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() | |
822 | return pyarrow_wrap_array(sp.value.dictionary) | |
823 | ||
824 | def as_py(self): | |
825 | """ | |
826 | Return this encoded value as a Python object. | |
827 | """ | |
828 | return self.value.as_py() if self.is_valid else None | |
829 | ||
830 | @property | |
831 | def index_value(self): | |
832 | warnings.warn("`index_value` property is deprecated as of 1.0.0" | |
833 | "please use the `index` property instead", | |
834 | FutureWarning) | |
835 | return self.index | |
836 | ||
837 | @property | |
838 | def dictionary_value(self): | |
839 | warnings.warn("`dictionary_value` property is deprecated as of 1.0.0, " | |
840 | "please use the `value` property instead", FutureWarning) | |
841 | return self.value | |
842 | ||
843 | ||
844 | cdef class UnionScalar(Scalar): | |
845 | """ | |
846 | Concrete class for Union scalars. | |
847 | """ | |
848 | ||
849 | @property | |
850 | def value(self): | |
851 | """ | |
852 | Return underlying value as a scalar. | |
853 | """ | |
854 | cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get() | |
855 | return Scalar.wrap(sp.value) if sp.is_valid else None | |
856 | ||
857 | def as_py(self): | |
858 | """ | |
859 | Return underlying value as a Python object. | |
860 | """ | |
861 | value = self.value | |
862 | return None if value is None else value.as_py() | |
863 | ||
864 | @property | |
865 | def type_code(self): | |
866 | """ | |
867 | Return the union type code for this scalar. | |
868 | """ | |
869 | cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get() | |
870 | return sp.type_code | |
871 | ||
872 | ||
873 | cdef class ExtensionScalar(Scalar): | |
874 | """ | |
875 | Concrete class for Extension scalars. | |
876 | """ | |
877 | ||
878 | @property | |
879 | def value(self): | |
880 | """ | |
881 | Return storage value as a scalar. | |
882 | """ | |
883 | cdef CExtensionScalar* sp = <CExtensionScalar*> self.wrapped.get() | |
884 | return Scalar.wrap(sp.value) if sp.is_valid else None | |
885 | ||
886 | def as_py(self): | |
887 | """ | |
888 | Return this scalar as a Python object. | |
889 | """ | |
890 | # XXX should there be a hook to wrap the result in a custom class? | |
891 | value = self.value | |
892 | return None if value is None else value.as_py() | |
893 | ||
894 | @staticmethod | |
895 | def from_storage(BaseExtensionType typ, value): | |
896 | """ | |
897 | Construct ExtensionScalar from type and storage value. | |
898 | ||
899 | Parameters | |
900 | ---------- | |
901 | typ : DataType | |
902 | The extension type for the result scalar. | |
903 | value : object | |
904 | The storage value for the result scalar. | |
905 | ||
906 | Returns | |
907 | ------- | |
908 | ext_scalar : ExtensionScalar | |
909 | """ | |
910 | cdef: | |
911 | shared_ptr[CExtensionScalar] sp_scalar | |
912 | CExtensionScalar* ext_scalar | |
913 | ||
914 | if value is None: | |
915 | storage = None | |
916 | elif isinstance(value, Scalar): | |
917 | if value.type != typ.storage_type: | |
918 | raise TypeError("Incompatible storage type {0} " | |
919 | "for extension type {1}" | |
920 | .format(value.type, typ)) | |
921 | storage = value | |
922 | else: | |
923 | storage = scalar(value, typ.storage_type) | |
924 | ||
925 | sp_scalar = make_shared[CExtensionScalar](typ.sp_type) | |
926 | ext_scalar = sp_scalar.get() | |
927 | ext_scalar.is_valid = storage is not None and storage.is_valid | |
928 | if ext_scalar.is_valid: | |
929 | ext_scalar.value = pyarrow_unwrap_scalar(storage) | |
930 | check_status(ext_scalar.Validate()) | |
931 | return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar) | |
932 | ||
933 | ||
934 | cdef dict _scalar_classes = { | |
935 | _Type_BOOL: BooleanScalar, | |
936 | _Type_UINT8: UInt8Scalar, | |
937 | _Type_UINT16: UInt16Scalar, | |
938 | _Type_UINT32: UInt32Scalar, | |
939 | _Type_UINT64: UInt64Scalar, | |
940 | _Type_INT8: Int8Scalar, | |
941 | _Type_INT16: Int16Scalar, | |
942 | _Type_INT32: Int32Scalar, | |
943 | _Type_INT64: Int64Scalar, | |
944 | _Type_HALF_FLOAT: HalfFloatScalar, | |
945 | _Type_FLOAT: FloatScalar, | |
946 | _Type_DOUBLE: DoubleScalar, | |
947 | _Type_DECIMAL128: Decimal128Scalar, | |
948 | _Type_DECIMAL256: Decimal256Scalar, | |
949 | _Type_DATE32: Date32Scalar, | |
950 | _Type_DATE64: Date64Scalar, | |
951 | _Type_TIME32: Time32Scalar, | |
952 | _Type_TIME64: Time64Scalar, | |
953 | _Type_TIMESTAMP: TimestampScalar, | |
954 | _Type_DURATION: DurationScalar, | |
955 | _Type_BINARY: BinaryScalar, | |
956 | _Type_LARGE_BINARY: LargeBinaryScalar, | |
957 | _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar, | |
958 | _Type_STRING: StringScalar, | |
959 | _Type_LARGE_STRING: LargeStringScalar, | |
960 | _Type_LIST: ListScalar, | |
961 | _Type_LARGE_LIST: LargeListScalar, | |
962 | _Type_FIXED_SIZE_LIST: FixedSizeListScalar, | |
963 | _Type_STRUCT: StructScalar, | |
964 | _Type_MAP: MapScalar, | |
965 | _Type_DICTIONARY: DictionaryScalar, | |
966 | _Type_SPARSE_UNION: UnionScalar, | |
967 | _Type_DENSE_UNION: UnionScalar, | |
968 | _Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar, | |
969 | _Type_EXTENSION: ExtensionScalar, | |
970 | } | |
971 | ||
972 | ||
973 | def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None): | |
974 | """ | |
975 | Create a pyarrow.Scalar instance from a Python object. | |
976 | ||
977 | Parameters | |
978 | ---------- | |
979 | value : Any | |
980 | Python object coercible to arrow's type system. | |
981 | type : pyarrow.DataType | |
982 | Explicit type to attempt to coerce to, otherwise will be inferred from | |
983 | the value. | |
984 | from_pandas : bool, default None | |
985 | Use pandas's semantics for inferring nulls from values in | |
986 | ndarray-like data. Defaults to False if not passed explicitly by user, | |
987 | or True if a pandas object is passed in. | |
988 | memory_pool : pyarrow.MemoryPool, optional | |
989 | If not passed, will allocate memory from the currently-set default | |
990 | memory pool. | |
991 | ||
992 | Returns | |
993 | ------- | |
994 | scalar : pyarrow.Scalar | |
995 | ||
996 | Examples | |
997 | -------- | |
998 | >>> import pyarrow as pa | |
999 | ||
1000 | >>> pa.scalar(42) | |
1001 | <pyarrow.Int64Scalar: 42> | |
1002 | ||
1003 | >>> pa.scalar("string") | |
1004 | <pyarrow.StringScalar: 'string'> | |
1005 | ||
1006 | >>> pa.scalar([1, 2]) | |
1007 | <pyarrow.ListScalar: [1, 2]> | |
1008 | ||
1009 | >>> pa.scalar([1, 2], type=pa.list_(pa.int16())) | |
1010 | <pyarrow.ListScalar: [1, 2]> | |
1011 | """ | |
1012 | cdef: | |
1013 | DataType ty | |
1014 | PyConversionOptions options | |
1015 | shared_ptr[CScalar] scalar | |
1016 | shared_ptr[CArray] array | |
1017 | shared_ptr[CChunkedArray] chunked | |
1018 | bint is_pandas_object = False | |
1019 | CMemoryPool* pool | |
1020 | ||
1021 | type = ensure_type(type, allow_none=True) | |
1022 | pool = maybe_unbox_memory_pool(memory_pool) | |
1023 | ||
1024 | if _is_array_like(value): | |
1025 | value = get_values(value, &is_pandas_object) | |
1026 | ||
1027 | options.size = 1 | |
1028 | ||
1029 | if type is not None: | |
1030 | ty = ensure_type(type) | |
1031 | options.type = ty.sp_type | |
1032 | ||
1033 | if from_pandas is None: | |
1034 | options.from_pandas = is_pandas_object | |
1035 | else: | |
1036 | options.from_pandas = from_pandas | |
1037 | ||
1038 | value = [value] | |
1039 | with nogil: | |
1040 | chunked = GetResultValue(ConvertPySequence(value, None, options, pool)) | |
1041 | ||
1042 | # get the first chunk | |
1043 | assert chunked.get().num_chunks() == 1 | |
1044 | array = chunked.get().chunk(0) | |
1045 | ||
1046 | # retrieve the scalar from the first position | |
1047 | scalar = GetResultValue(array.get().GetScalar(0)) | |
1048 | return Scalar.wrap(scalar) |