]>
git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/python/pyarrow/tests/test_types.py
1 # Licensed to the Apache Software Foundation (ASF) under one
2 # or more contributor license agreements. See the NOTICE file
3 # distributed with this work for additional information
4 # regarding copyright ownership. The ASF licenses this file
5 # to you under the Apache License, Version 2.0 (the
6 # "License"); you may not use this file except in compliance
7 # with the License. You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing,
12 # software distributed under the License is distributed on an
13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 # KIND, either express or implied. See the License for the
15 # specific language governing permissions and limitations
18 from collections
import OrderedDict
19 from collections
.abc
import Iterator
20 from functools
import partial
27 import hypothesis
as h
28 import hypothesis
.strategies
as st
29 import hypothesis
.extra
.pytz
as tzst
34 import pyarrow
.types
as types
35 import pyarrow
.tests
.strategies
as past
39 # returning them from a function is required because of pa.dictionary
40 # type holds a pyarrow array and test_array.py::test_toal_bytes_allocated
41 # checks that the default memory pool has zero allocated bytes
50 pa
.timestamp('us', tz
='UTC'),
51 pa
.timestamp('us', tz
='Europe/Paris'),
57 pa
.decimal256(76, 38),
64 pa
.list_(pa
.int32(), 2),
65 pa
.large_list(pa
.uint16()),
66 pa
.map_(pa
.string(), pa
.int32()),
67 pa
.map_(pa
.field('key', pa
.int32(), nullable
=False),
68 pa
.field('value', pa
.int32())),
69 pa
.struct([pa
.field('a', pa
.int32()),
70 pa
.field('b', pa
.int8()),
71 pa
.field('c', pa
.string())]),
72 pa
.struct([pa
.field('a', pa
.int32(), nullable
=False),
73 pa
.field('b', pa
.int8(), nullable
=False),
74 pa
.field('c', pa
.string())]),
75 pa
.union([pa
.field('a', pa
.binary(10)),
76 pa
.field('b', pa
.string())], mode
=pa
.lib
.UnionMode_DENSE
),
77 pa
.union([pa
.field('a', pa
.binary(10)),
78 pa
.field('b', pa
.string())], mode
=pa
.lib
.UnionMode_DENSE
,
80 pa
.union([pa
.field('a', pa
.binary(10)),
81 pa
.field('b', pa
.string())], mode
=pa
.lib
.UnionMode_SPARSE
),
82 pa
.union([pa
.field('a', pa
.binary(10), nullable
=False),
83 pa
.field('b', pa
.string())], mode
=pa
.lib
.UnionMode_SPARSE
),
84 pa
.dictionary(pa
.int32(), pa
.string())
88 def test_is_boolean():
89 assert types
.is_boolean(pa
.bool_())
90 assert not types
.is_boolean(pa
.int8())
93 def test_is_integer():
94 signed_ints
= [pa
.int8(), pa
.int16(), pa
.int32(), pa
.int64()]
95 unsigned_ints
= [pa
.uint8(), pa
.uint16(), pa
.uint32(), pa
.uint64()]
97 for t
in signed_ints
+ unsigned_ints
:
98 assert types
.is_integer(t
)
100 for t
in signed_ints
:
101 assert types
.is_signed_integer(t
)
102 assert not types
.is_unsigned_integer(t
)
104 for t
in unsigned_ints
:
105 assert types
.is_unsigned_integer(t
)
106 assert not types
.is_signed_integer(t
)
108 assert not types
.is_integer(pa
.float32())
109 assert not types
.is_signed_integer(pa
.float32())
112 def test_is_floating():
113 for t
in [pa
.float16(), pa
.float32(), pa
.float64()]:
114 assert types
.is_floating(t
)
116 assert not types
.is_floating(pa
.int32())
120 assert types
.is_null(pa
.null())
121 assert not types
.is_null(pa
.list_(pa
.int32()))
124 def test_null_field_may_not_be_non_nullable():
126 with pytest
.raises(ValueError):
127 pa
.field('f0', pa
.null(), nullable
=False)
130 def test_is_decimal():
131 decimal128
= pa
.decimal128(19, 4)
132 decimal256
= pa
.decimal256(76, 38)
135 assert types
.is_decimal(decimal128
)
136 assert types
.is_decimal(decimal256
)
137 assert not types
.is_decimal(int32
)
139 assert types
.is_decimal128(decimal128
)
140 assert not types
.is_decimal128(decimal256
)
141 assert not types
.is_decimal128(int32
)
143 assert not types
.is_decimal256(decimal128
)
144 assert types
.is_decimal256(decimal256
)
145 assert not types
.is_decimal256(int32
)
149 a
= pa
.list_(pa
.int32())
150 b
= pa
.large_list(pa
.int32())
151 c
= pa
.list_(pa
.int32(), 3)
153 assert types
.is_list(a
)
154 assert not types
.is_large_list(a
)
155 assert not types
.is_fixed_size_list(a
)
156 assert types
.is_large_list(b
)
157 assert not types
.is_list(b
)
158 assert not types
.is_fixed_size_list(b
)
159 assert types
.is_fixed_size_list(c
)
160 assert not types
.is_list(c
)
161 assert not types
.is_large_list(c
)
163 assert not types
.is_list(pa
.int32())
167 m
= pa
.map_(pa
.utf8(), pa
.int32())
169 assert types
.is_map(m
)
170 assert not types
.is_map(pa
.int32())
172 fields
= pa
.map_(pa
.field('key_name', pa
.utf8(), nullable
=False),
173 pa
.field('value_name', pa
.int32()))
174 assert types
.is_map(fields
)
176 entries_type
= pa
.struct([pa
.field('key', pa
.int8()),
177 pa
.field('value', pa
.int8())])
178 list_type
= pa
.list_(entries_type
)
179 assert not types
.is_map(list_type
)
182 def test_is_dictionary():
183 assert types
.is_dictionary(pa
.dictionary(pa
.int32(), pa
.string()))
184 assert not types
.is_dictionary(pa
.int32())
187 def test_is_nested_or_struct():
188 struct_ex
= pa
.struct([pa
.field('a', pa
.int32()),
189 pa
.field('b', pa
.int8()),
190 pa
.field('c', pa
.string())])
192 assert types
.is_struct(struct_ex
)
193 assert not types
.is_struct(pa
.list_(pa
.int32()))
195 assert types
.is_nested(struct_ex
)
196 assert types
.is_nested(pa
.list_(pa
.int32()))
197 assert types
.is_nested(pa
.large_list(pa
.int32()))
198 assert not types
.is_nested(pa
.int32())
202 for mode
in [pa
.lib
.UnionMode_SPARSE
, pa
.lib
.UnionMode_DENSE
]:
203 assert types
.is_union(pa
.union([pa
.field('a', pa
.int32()),
204 pa
.field('b', pa
.int8()),
205 pa
.field('c', pa
.string())],
207 assert not types
.is_union(pa
.list_(pa
.int32()))
210 # TODO(wesm): is_map, once implemented
213 def test_is_binary_string():
214 assert types
.is_binary(pa
.binary())
215 assert not types
.is_binary(pa
.string())
216 assert not types
.is_binary(pa
.large_binary())
217 assert not types
.is_binary(pa
.large_string())
219 assert types
.is_string(pa
.string())
220 assert types
.is_unicode(pa
.string())
221 assert not types
.is_string(pa
.binary())
222 assert not types
.is_string(pa
.large_string())
223 assert not types
.is_string(pa
.large_binary())
225 assert types
.is_large_binary(pa
.large_binary())
226 assert not types
.is_large_binary(pa
.large_string())
227 assert not types
.is_large_binary(pa
.binary())
228 assert not types
.is_large_binary(pa
.string())
230 assert types
.is_large_string(pa
.large_string())
231 assert not types
.is_large_string(pa
.large_binary())
232 assert not types
.is_large_string(pa
.string())
233 assert not types
.is_large_string(pa
.binary())
235 assert types
.is_fixed_size_binary(pa
.binary(5))
236 assert not types
.is_fixed_size_binary(pa
.binary())
239 def test_is_temporal_date_time_timestamp():
240 date_types
= [pa
.date32(), pa
.date64()]
241 time_types
= [pa
.time32('s'), pa
.time64('ns')]
242 timestamp_types
= [pa
.timestamp('ms')]
243 duration_types
= [pa
.duration('ms')]
244 interval_types
= [pa
.month_day_nano_interval()]
246 for case
in (date_types
+ time_types
+ timestamp_types
+ duration_types
+
248 assert types
.is_temporal(case
)
250 for case
in date_types
:
251 assert types
.is_date(case
)
252 assert not types
.is_time(case
)
253 assert not types
.is_timestamp(case
)
254 assert not types
.is_duration(case
)
255 assert not types
.is_interval(case
)
257 for case
in time_types
:
258 assert types
.is_time(case
)
259 assert not types
.is_date(case
)
260 assert not types
.is_timestamp(case
)
261 assert not types
.is_duration(case
)
262 assert not types
.is_interval(case
)
264 for case
in timestamp_types
:
265 assert types
.is_timestamp(case
)
266 assert not types
.is_date(case
)
267 assert not types
.is_time(case
)
268 assert not types
.is_duration(case
)
269 assert not types
.is_interval(case
)
271 for case
in duration_types
:
272 assert types
.is_duration(case
)
273 assert not types
.is_date(case
)
274 assert not types
.is_time(case
)
275 assert not types
.is_timestamp(case
)
276 assert not types
.is_interval(case
)
278 for case
in interval_types
:
279 assert types
.is_interval(case
)
280 assert not types
.is_date(case
)
281 assert not types
.is_time(case
)
282 assert not types
.is_timestamp(case
)
284 assert not types
.is_temporal(pa
.int32())
287 def test_is_primitive():
288 assert types
.is_primitive(pa
.int32())
289 assert not types
.is_primitive(pa
.list_(pa
.int32()))
292 @pytest.mark
.parametrize(('tz', 'expected'), [
294 (pytz
.timezone('Europe/Paris'), 'Europe/Paris'),
295 # StaticTzInfo.tzname returns with '-09' so we need to infer the timezone's
296 # name from the tzinfo.zone attribute
297 (pytz
.timezone('Etc/GMT-9'), 'Etc/GMT-9'),
298 (pytz
.FixedOffset(180), '+03:00'),
299 (datetime
.timezone
.utc
, 'UTC'),
300 (datetime
.timezone(datetime
.timedelta(hours
=1, minutes
=30)), '+01:30')
302 def test_tzinfo_to_string(tz
, expected
):
303 assert pa
.lib
.tzinfo_to_string(tz
) == expected
306 def test_tzinfo_to_string_errors():
307 msg
= "Not an instance of datetime.tzinfo"
308 with pytest
.raises(TypeError):
309 pa
.lib
.tzinfo_to_string("Europe/Budapest")
311 if sys
.version_info
>= (3, 8):
312 # before 3.8 it was only possible to create timezone objects with whole
314 tz
= datetime
.timezone(datetime
.timedelta(hours
=1, seconds
=30))
315 msg
= "Offset must represent whole number of minutes"
316 with pytest
.raises(ValueError, match
=msg
):
317 pa
.lib
.tzinfo_to_string(tz
)
320 @h.given(tzst
.timezones())
321 def test_pytz_timezone_roundtrip(tz
):
322 timezone_string
= pa
.lib
.tzinfo_to_string(tz
)
323 timezone_tzinfo
= pa
.lib
.string_to_tzinfo(timezone_string
)
324 assert timezone_tzinfo
== tz
327 def test_convert_custom_tzinfo_objects_to_string():
328 class CorrectTimezone1(datetime
.tzinfo
):
330 Conversion is using utcoffset()
333 def tzname(self
, dt
):
336 def utcoffset(self
, dt
):
337 return datetime
.timedelta(hours
=-3, minutes
=30)
339 class CorrectTimezone2(datetime
.tzinfo
):
341 Conversion is using tzname()
344 def tzname(self
, dt
):
347 def utcoffset(self
, dt
):
348 return datetime
.timedelta(hours
=3)
350 class BuggyTimezone1(datetime
.tzinfo
):
352 Unable to infer name or offset
355 def tzname(self
, dt
):
358 def utcoffset(self
, dt
):
361 class BuggyTimezone2(datetime
.tzinfo
):
366 def tzname(self
, dt
):
369 def utcoffset(self
, dt
):
372 class BuggyTimezone3(datetime
.tzinfo
):
374 Wrong timezone name type
377 def tzname(self
, dt
):
380 def utcoffset(self
, dt
):
383 assert pa
.lib
.tzinfo_to_string(CorrectTimezone1()) == "-02:30"
384 assert pa
.lib
.tzinfo_to_string(CorrectTimezone2()) == "+03:00"
386 msg
= (r
"Object returned by tzinfo.utcoffset\(None\) is not an instance "
387 r
"of datetime.timedelta")
388 for wrong
in [BuggyTimezone1(), BuggyTimezone2(), BuggyTimezone3()]:
389 with pytest
.raises(ValueError, match
=msg
):
390 pa
.lib
.tzinfo_to_string(wrong
)
393 @pytest.mark
.parametrize(('string', 'expected'), [
395 ('Europe/Paris', pytz
.timezone('Europe/Paris')),
396 ('+03:00', pytz
.FixedOffset(180)),
397 ('+01:30', pytz
.FixedOffset(90)),
398 ('-02:00', pytz
.FixedOffset(-120))
400 def test_string_to_tzinfo(string
, expected
):
401 result
= pa
.lib
.string_to_tzinfo(string
)
402 assert result
== expected
405 @pytest.mark
.parametrize('tz,name', [
406 (pytz
.FixedOffset(90), '+01:30'),
407 (pytz
.FixedOffset(-90), '-01:30'),
409 (pytz
.timezone('America/New_York'), 'America/New_York')
411 def test_timezone_string_roundtrip(tz
, name
):
412 assert pa
.lib
.tzinfo_to_string(tz
) == name
413 assert pa
.lib
.string_to_tzinfo(name
) == tz
416 def test_timestamp():
417 for unit
in ('s', 'ms', 'us', 'ns'):
418 for tz
in (None, 'UTC', 'Europe/Paris'):
419 ty
= pa
.timestamp(unit
, tz
=tz
)
420 assert ty
.unit
== unit
423 for invalid_unit
in ('m', 'arbit', 'rary'):
424 with pytest
.raises(ValueError, match
='Invalid time unit'):
425 pa
.timestamp(invalid_unit
)
428 def test_time32_units():
429 for valid_unit
in ('s', 'ms'):
430 ty
= pa
.time32(valid_unit
)
431 assert ty
.unit
== valid_unit
433 for invalid_unit
in ('m', 'us', 'ns'):
434 error_msg
= 'Invalid time unit for time32: {!r}'.format(invalid_unit
)
435 with pytest
.raises(ValueError, match
=error_msg
):
436 pa
.time32(invalid_unit
)
439 def test_time64_units():
440 for valid_unit
in ('us', 'ns'):
441 ty
= pa
.time64(valid_unit
)
442 assert ty
.unit
== valid_unit
444 for invalid_unit
in ('m', 's', 'ms'):
445 error_msg
= 'Invalid time unit for time64: {!r}'.format(invalid_unit
)
446 with pytest
.raises(ValueError, match
=error_msg
):
447 pa
.time64(invalid_unit
)
451 for unit
in ('s', 'ms', 'us', 'ns'):
452 ty
= pa
.duration(unit
)
453 assert ty
.unit
== unit
455 for invalid_unit
in ('m', 'arbit', 'rary'):
456 with pytest
.raises(ValueError, match
='Invalid time unit'):
457 pa
.duration(invalid_unit
)
460 def test_list_type():
461 ty
= pa
.list_(pa
.int64())
462 assert isinstance(ty
, pa
.ListType
)
463 assert ty
.value_type
== pa
.int64()
464 assert ty
.value_field
== pa
.field("item", pa
.int64(), nullable
=True)
466 with pytest
.raises(TypeError):
470 def test_large_list_type():
471 ty
= pa
.large_list(pa
.utf8())
472 assert isinstance(ty
, pa
.LargeListType
)
473 assert ty
.value_type
== pa
.utf8()
474 assert ty
.value_field
== pa
.field("item", pa
.utf8(), nullable
=True)
476 with pytest
.raises(TypeError):
481 ty
= pa
.map_(pa
.utf8(), pa
.int32())
482 assert isinstance(ty
, pa
.MapType
)
483 assert ty
.key_type
== pa
.utf8()
484 assert ty
.key_field
== pa
.field("key", pa
.utf8(), nullable
=False)
485 assert ty
.item_type
== pa
.int32()
486 assert ty
.item_field
== pa
.field("value", pa
.int32(), nullable
=True)
488 with pytest
.raises(TypeError):
490 with pytest
.raises(TypeError):
491 pa
.map_(pa
.int32(), None)
492 with pytest
.raises(TypeError):
493 pa
.map_(pa
.field("name", pa
.string(), nullable
=True), pa
.int64())
496 def test_fixed_size_list_type():
497 ty
= pa
.list_(pa
.float64(), 2)
498 assert isinstance(ty
, pa
.FixedSizeListType
)
499 assert ty
.value_type
== pa
.float64()
500 assert ty
.value_field
== pa
.field("item", pa
.float64(), nullable
=True)
501 assert ty
.list_size
== 2
503 with pytest
.raises(ValueError):
504 pa
.list_(pa
.float64(), -2)
507 def test_struct_type():
509 # Duplicate field name on purpose
510 pa
.field('a', pa
.int64()),
511 pa
.field('a', pa
.int32()),
512 pa
.field('b', pa
.int32())
514 ty
= pa
.struct(fields
)
516 assert len(ty
) == ty
.num_fields
== 3
517 assert list(ty
) == fields
518 assert ty
[0].name
== 'a'
519 assert ty
[2].type == pa
.int32()
520 with pytest
.raises(IndexError):
523 assert ty
['b'] == ty
[2]
526 with pytest
.raises(KeyError):
529 # Neither integer nor string
530 with pytest
.raises(TypeError):
533 for a
, b
in zip(ty
, fields
):
536 # Construct from list of tuples
537 ty
= pa
.struct([('a', pa
.int64()),
540 assert list(ty
) == fields
541 for a
, b
in zip(ty
, fields
):
544 # Construct from mapping
545 fields
= [pa
.field('a', pa
.int64()),
546 pa
.field('b', pa
.int32())]
547 ty
= pa
.struct(OrderedDict([('a', pa
.int64()),
549 assert list(ty
) == fields
550 for a
, b
in zip(ty
, fields
):
554 with pytest
.raises(TypeError):
555 pa
.struct([('a', None)])
558 def test_struct_duplicate_field_names():
560 pa
.field('a', pa
.int64()),
561 pa
.field('b', pa
.int32()),
562 pa
.field('a', pa
.int32())
564 ty
= pa
.struct(fields
)
567 with pytest
.warns(UserWarning):
568 with pytest
.raises(KeyError):
571 # StructType::GetFieldIndex
572 assert ty
.get_field_index('a') == -1
574 # StructType::GetAllFieldIndices
575 assert ty
.get_all_field_indices('a') == [0, 2]
578 def test_union_type():
579 def check_fields(ty
, fields
):
580 assert ty
.num_fields
== len(fields
)
581 assert [ty
[i
] for i
in range(ty
.num_fields
)] == fields
583 fields
= [pa
.field('x', pa
.list_(pa
.int32())),
584 pa
.field('y', pa
.binary())]
588 partial(pa
.union
, mode
='sparse'),
589 partial(pa
.union
, mode
=pa
.lib
.UnionMode_SPARSE
),
594 partial(pa
.union
, mode
='dense'),
595 partial(pa
.union
, mode
=pa
.lib
.UnionMode_DENSE
),
599 for factory
in sparse_factories
:
601 assert isinstance(ty
, pa
.SparseUnionType
)
602 assert ty
.mode
== 'sparse'
603 check_fields(ty
, fields
)
604 assert ty
.type_codes
== [0, 1]
605 ty
= factory(fields
, type_codes
=type_codes
)
606 assert ty
.mode
== 'sparse'
607 check_fields(ty
, fields
)
608 assert ty
.type_codes
== type_codes
609 # Invalid number of type codes
610 with pytest
.raises(ValueError):
611 factory(fields
, type_codes
=type_codes
[1:])
613 for factory
in dense_factories
:
615 assert isinstance(ty
, pa
.DenseUnionType
)
616 assert ty
.mode
== 'dense'
617 check_fields(ty
, fields
)
618 assert ty
.type_codes
== [0, 1]
619 ty
= factory(fields
, type_codes
=type_codes
)
620 assert ty
.mode
== 'dense'
621 check_fields(ty
, fields
)
622 assert ty
.type_codes
== type_codes
623 # Invalid number of type codes
624 with pytest
.raises(ValueError):
625 factory(fields
, type_codes
=type_codes
[1:])
627 for mode
in ('unknown', 2):
628 with pytest
.raises(ValueError, match
='Invalid union mode'):
629 pa
.union(fields
, mode
=mode
)
632 def test_dictionary_type():
633 ty0
= pa
.dictionary(pa
.int32(), pa
.string())
634 assert ty0
.index_type
== pa
.int32()
635 assert ty0
.value_type
== pa
.string()
636 assert ty0
.ordered
is False
638 ty1
= pa
.dictionary(pa
.int8(), pa
.float64(), ordered
=True)
639 assert ty1
.index_type
== pa
.int8()
640 assert ty1
.value_type
== pa
.float64()
641 assert ty1
.ordered
is True
643 # construct from non-arrow objects
644 ty2
= pa
.dictionary('int8', 'string')
645 assert ty2
.index_type
== pa
.int8()
646 assert ty2
.value_type
== pa
.string()
647 assert ty2
.ordered
is False
649 # allow unsigned integers for index type
650 ty3
= pa
.dictionary(pa
.uint32(), pa
.string())
651 assert ty3
.index_type
== pa
.uint32()
652 assert ty3
.value_type
== pa
.string()
653 assert ty3
.ordered
is False
655 # invalid index type raises
656 with pytest
.raises(TypeError):
657 pa
.dictionary(pa
.string(), pa
.int64())
660 def test_dictionary_ordered_equals():
661 # Python side checking of ARROW-6345
662 d1
= pa
.dictionary('int32', 'binary', ordered
=True)
663 d2
= pa
.dictionary('int32', 'binary', ordered
=False)
664 d3
= pa
.dictionary('int8', 'binary', ordered
=True)
665 d4
= pa
.dictionary('int32', 'binary', ordered
=True)
667 assert not d1
.equals(d2
)
668 assert not d1
.equals(d3
)
672 def test_types_hashable():
673 many_types
= get_many_types()
675 for i
, type_
in enumerate(many_types
):
676 assert hash(type_
) == hash(type_
)
678 assert len(in_dict
) == len(many_types
)
679 for i
, type_
in enumerate(many_types
):
680 assert in_dict
[type_
] == i
683 def test_types_picklable():
684 for ty
in get_many_types():
685 data
= pickle
.dumps(ty
)
686 assert pickle
.loads(data
) == ty
689 def test_types_weakref():
690 for ty
in get_many_types():
692 assert wr() is not None
693 # Note that ty may be a singleton and therefore outlive this loop
695 wr
= weakref
.ref(pa
.int32())
696 assert wr() is not None # singleton
697 wr
= weakref
.ref(pa
.list_(pa
.int32()))
698 assert wr() is None # not a singleton
701 def test_fields_hashable():
703 fields
= [pa
.field('a', pa
.int32()),
704 pa
.field('a', pa
.int64()),
705 pa
.field('a', pa
.int64(), nullable
=False),
706 pa
.field('b', pa
.int32()),
707 pa
.field('b', pa
.int32(), nullable
=False)]
708 for i
, field
in enumerate(fields
):
710 assert len(in_dict
) == len(fields
)
711 for i
, field
in enumerate(fields
):
712 assert in_dict
[field
] == i
715 def test_fields_weakrefable():
716 field
= pa
.field('a', pa
.int32())
717 wr
= weakref
.ref(field
)
718 assert wr() is not None
723 @pytest.mark
.parametrize('t,check_func', [
724 (pa
.date32(), types
.is_date32
),
725 (pa
.date64(), types
.is_date64
),
726 (pa
.time32('s'), types
.is_time32
),
727 (pa
.time64('ns'), types
.is_time64
),
728 (pa
.int8(), types
.is_int8
),
729 (pa
.int16(), types
.is_int16
),
730 (pa
.int32(), types
.is_int32
),
731 (pa
.int64(), types
.is_int64
),
732 (pa
.uint8(), types
.is_uint8
),
733 (pa
.uint16(), types
.is_uint16
),
734 (pa
.uint32(), types
.is_uint32
),
735 (pa
.uint64(), types
.is_uint64
),
736 (pa
.float16(), types
.is_float16
),
737 (pa
.float32(), types
.is_float32
),
738 (pa
.float64(), types
.is_float64
)
740 def test_exact_primitive_types(t
, check_func
):
745 # enum values are not exposed publicly
746 for ty
in get_many_types():
747 assert isinstance(ty
.id, int)
750 def test_bit_width():
751 for ty
, expected
in [(pa
.bool_(), 1),
755 (pa
.decimal128(19, 4), 128),
756 (pa
.decimal256(76, 38), 256),
757 (pa
.binary(42), 42 * 8)]:
758 assert ty
.bit_width
== expected
759 for ty
in [pa
.binary(), pa
.string(), pa
.list_(pa
.int16())]:
760 with pytest
.raises(ValueError, match
="fixed width"):
764 def test_fixed_size_binary_byte_width():
766 assert ty
.byte_width
== 5
769 def test_decimal_properties():
770 ty
= pa
.decimal128(19, 4)
771 assert ty
.byte_width
== 16
772 assert ty
.precision
== 19
774 ty
= pa
.decimal256(76, 38)
775 assert ty
.byte_width
== 32
776 assert ty
.precision
== 76
777 assert ty
.scale
== 38
780 def test_decimal_overflow():
783 for i
in (0, -1, 39):
784 with pytest
.raises(ValueError):
789 for i
in (0, -1, 77):
790 with pytest
.raises(ValueError):
794 def test_type_equality_operators():
795 many_types
= get_many_types()
796 non_pyarrow
= ('foo', 16, {'s', 'e', 't'})
798 for index
, ty
in enumerate(many_types
):
799 # could use two parametrization levels,
800 # but that'd bloat pytest's output
801 for i
, other
in enumerate(many_types
+ non_pyarrow
):
808 def test_key_value_metadata():
809 m
= pa
.KeyValueMetadata({'a': 'A', 'b': 'B'})
811 assert m
['a'] == b
'A'
812 assert m
[b
'a'] == b
'A'
813 assert m
['b'] == b
'B'
818 m1
= pa
.KeyValueMetadata({'a': 'A', 'b': 'B'})
819 m2
= pa
.KeyValueMetadata(a
='A', b
='B')
820 m3
= pa
.KeyValueMetadata([('a', 'A'), ('b', 'B')])
825 assert m1
== {'a': 'A', 'b': 'B'}
826 assert m1
!= {'a': 'A', 'b': 'C'}
828 with pytest
.raises(TypeError):
829 pa
.KeyValueMetadata({'a': 1})
830 with pytest
.raises(TypeError):
831 pa
.KeyValueMetadata({1: 'a'})
832 with pytest
.raises(TypeError):
833 pa
.KeyValueMetadata(a
=1)
835 expected
= [(b
'a', b
'A'), (b
'b', b
'B')]
836 result
= [(k
, v
) for k
, v
in m3
.items()]
837 assert result
== expected
838 assert list(m3
.items()) == expected
839 assert list(m3
.keys()) == [b
'a', b
'b']
840 assert list(m3
.values()) == [b
'A', b
'B']
843 # test duplicate key support
844 md
= pa
.KeyValueMetadata([
858 assert isinstance(md
.keys(), Iterator
)
859 assert isinstance(md
.values(), Iterator
)
860 assert isinstance(md
.items(), Iterator
)
861 assert list(md
.items()) == expected
862 assert list(md
.keys()) == [k
for k
, _
in expected
]
863 assert list(md
.values()) == [v
for _
, v
in expected
]
866 assert md
['a'] == b
'alpha'
867 assert md
['b'] == b
'beta'
868 assert md
.get_all('a') == [b
'alpha', b
'Alpha', b
'ALPHA']
869 assert md
.get_all('b') == [b
'beta']
870 assert md
.get_all('unkown') == []
872 with pytest
.raises(KeyError):
873 md
= pa
.KeyValueMetadata([
881 def test_key_value_metadata_duplicates():
882 meta
= pa
.KeyValueMetadata({'a': '1', 'b': '2'})
884 with pytest
.raises(KeyError):
885 pa
.KeyValueMetadata(meta
, a
='3')
888 def test_field_basic():
890 f
= pa
.field('foo', t
)
892 assert f
.name
== 'foo'
895 assert repr(f
) == "pyarrow.Field<foo: string>"
897 f
= pa
.field('foo', t
, False)
898 assert not f
.nullable
900 with pytest
.raises(TypeError):
901 pa
.field('foo', None)
904 def test_field_equals():
905 meta1
= {b
'foo': b
'bar'}
906 meta2
= {b
'bizz': b
'bazz'}
908 f1
= pa
.field('a', pa
.int8(), nullable
=True)
909 f2
= pa
.field('a', pa
.int8(), nullable
=True)
910 f3
= pa
.field('a', pa
.int8(), nullable
=False)
911 f4
= pa
.field('a', pa
.int16(), nullable
=False)
912 f5
= pa
.field('b', pa
.int16(), nullable
=False)
913 f6
= pa
.field('a', pa
.int8(), nullable
=True, metadata
=meta1
)
914 f7
= pa
.field('a', pa
.int8(), nullable
=True, metadata
=meta1
)
915 f8
= pa
.field('a', pa
.int8(), nullable
=True, metadata
=meta2
)
919 assert not f1
.equals(f3
)
920 assert not f1
.equals(f4
)
921 assert not f3
.equals(f4
)
922 assert not f4
.equals(f5
)
924 # No metadata in f1, but metadata in f6
926 assert not f1
.equals(f6
, check_metadata
=True)
931 assert not f7
.equals(f8
, check_metadata
=True)
934 def test_field_equality_operators():
935 f1
= pa
.field('a', pa
.int8(), nullable
=True)
936 f2
= pa
.field('a', pa
.int8(), nullable
=True)
937 f3
= pa
.field('b', pa
.int8(), nullable
=True)
938 f4
= pa
.field('b', pa
.int8(), nullable
=False)
946 def test_field_metadata():
947 f1
= pa
.field('a', pa
.int8())
948 f2
= pa
.field('a', pa
.int8(), metadata
={})
949 f3
= pa
.field('a', pa
.int8(), metadata
={b
'bizz': b
'bazz'})
951 assert f1
.metadata
is None
952 assert f2
.metadata
== {}
953 assert f3
.metadata
[b
'bizz'] == b
'bazz'
956 def test_field_add_remove_metadata():
959 f0
= pa
.field('foo', pa
.int32())
961 assert f0
.metadata
is None
963 metadata
= {b
'foo': b
'bar', b
'pandas': b
'badger'}
964 metadata2
= collections
.OrderedDict([
969 f1
= f0
.with_metadata(metadata
)
970 assert f1
.metadata
== metadata
972 f2
= f0
.with_metadata(metadata2
)
973 assert f2
.metadata
== metadata2
975 with pytest
.raises(TypeError):
976 f0
.with_metadata([1, 2, 3])
978 f3
= f1
.remove_metadata()
979 assert f3
.metadata
is None
982 f4
= f3
.remove_metadata()
983 assert f4
.metadata
is None
985 f5
= pa
.field('foo', pa
.int32(), True, metadata
)
986 f6
= f0
.with_metadata(metadata
)
990 def test_field_modified_copies():
991 f0
= pa
.field('foo', pa
.int32(), True)
992 f0_
= pa
.field('foo', pa
.int32(), True)
993 assert f0
.equals(f0_
)
995 f1
= pa
.field('foo', pa
.int64(), True)
996 f1_
= f0
.with_type(pa
.int64())
997 assert f1
.equals(f1_
)
998 # Original instance is unmodified
999 assert f0
.equals(f0_
)
1001 f2
= pa
.field('foo', pa
.int32(), False)
1002 f2_
= f0
.with_nullable(False)
1003 assert f2
.equals(f2_
)
1004 # Original instance is unmodified
1005 assert f0
.equals(f0_
)
1007 f3
= pa
.field('bar', pa
.int32(), True)
1008 f3_
= f0
.with_name('bar')
1009 assert f3
.equals(f3_
)
1010 # Original instance is unmodified
1011 assert f0
.equals(f0_
)
1014 def test_is_integer_value():
1015 assert pa
.types
.is_integer_value(1)
1016 assert pa
.types
.is_integer_value(np
.int64(1))
1017 assert not pa
.types
.is_integer_value('1')
1020 def test_is_float_value():
1021 assert not pa
.types
.is_float_value(1)
1022 assert pa
.types
.is_float_value(1.)
1023 assert pa
.types
.is_float_value(np
.float64(1))
1024 assert not pa
.types
.is_float_value('1.0')
1027 def test_is_boolean_value():
1028 assert not pa
.types
.is_boolean_value(1)
1029 assert pa
.types
.is_boolean_value(True)
1030 assert pa
.types
.is_boolean_value(False)
1031 assert pa
.types
.is_boolean_value(np
.bool_(True))
1032 assert pa
.types
.is_boolean_value(np
.bool_(False))
1041 pa
.field(name
='', type=pa
.null(), metadata
={'0': '', '': ''})
1043 def test_pickling(field
):
1044 data
= pickle
.dumps(field
)
1045 assert pickle
.loads(data
) == field
1049 st
.lists(past
.all_types
) |
1050 st
.lists(past
.all_fields
) |
1051 st
.lists(past
.all_schemas
)
1053 def test_hashing(items
):
1055 # well, this is still O(n^2), but makes the input unique
1056 all(not a
.equals(b
) for i
, a
in enumerate(items
) for b
in items
[:i
])
1060 for i
, item
in enumerate(items
):
1061 assert hash(item
) == hash(item
)
1064 assert len(container
) == len(items
)
1066 for i
, item
in enumerate(items
):
1067 assert container
[item
] == i