ceph/src/arrow/python/pyarrow/tests/test_types.py

   1 # Licensed to the Apache Software Foundation (ASF) under one
   2 # or more contributor license agreements.  See the NOTICE file
   3 # distributed with this work for additional information
   4 # regarding copyright ownership.  The ASF licenses this file
   5 # to you under the Apache License, Version 2.0 (the
   6 # "License"); you may not use this file except in compliance
   7 # with the License.  You may obtain a copy of the License at
   8 #
   9 #   http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing,
  12 # software distributed under the License is distributed on an
  13 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14 # KIND, either express or implied.  See the License for the
  15 # specific language governing permissions and limitations
  16 # under the License.
  17
  18 from collections import OrderedDict
  19 from collections.abc import Iterator
  20 from functools import partial
  21 import datetime
  22 import sys
  23
  24 import pickle
  25 import pytest
  26 import pytz
  27 import hypothesis as h
  28 import hypothesis.strategies as st
  29 import hypothesis.extra.pytz as tzst
  30 import weakref
  31
  32 import numpy as np
  33 import pyarrow as pa
  34 import pyarrow.types as types
  35 import pyarrow.tests.strategies as past
  36
  37
  38 def get_many_types():
  39     # returning them from a function is required because of pa.dictionary
  40     # type holds a pyarrow array and test_array.py::test_toal_bytes_allocated
  41     # checks that the default memory pool has zero allocated bytes
  42     return (
  43         pa.null(),
  44         pa.bool_(),
  45         pa.int32(),
  46         pa.time32('s'),
  47         pa.time64('us'),
  48         pa.date32(),
  49         pa.timestamp('us'),
  50         pa.timestamp('us', tz='UTC'),
  51         pa.timestamp('us', tz='Europe/Paris'),
  52         pa.duration('s'),
  53         pa.float16(),
  54         pa.float32(),
  55         pa.float64(),
  56         pa.decimal128(19, 4),
  57         pa.decimal256(76, 38),
  58         pa.string(),
  59         pa.binary(),
  60         pa.binary(10),
  61         pa.large_string(),
  62         pa.large_binary(),
  63         pa.list_(pa.int32()),
  64         pa.list_(pa.int32(), 2),
  65         pa.large_list(pa.uint16()),
  66         pa.map_(pa.string(), pa.int32()),
  67         pa.map_(pa.field('key', pa.int32(), nullable=False),
  68                 pa.field('value', pa.int32())),
  69         pa.struct([pa.field('a', pa.int32()),
  70                    pa.field('b', pa.int8()),
  71                    pa.field('c', pa.string())]),
  72         pa.struct([pa.field('a', pa.int32(), nullable=False),
  73                    pa.field('b', pa.int8(), nullable=False),
  74                    pa.field('c', pa.string())]),
  75         pa.union([pa.field('a', pa.binary(10)),
  76                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
  77         pa.union([pa.field('a', pa.binary(10)),
  78                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE,
  79                  type_codes=[4, 8]),
  80         pa.union([pa.field('a', pa.binary(10)),
  81                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
  82         pa.union([pa.field('a', pa.binary(10), nullable=False),
  83                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
  84         pa.dictionary(pa.int32(), pa.string())
  85     )
  86
  87
  88 def test_is_boolean():
  89     assert types.is_boolean(pa.bool_())
  90     assert not types.is_boolean(pa.int8())
  91
  92
  93 def test_is_integer():
  94     signed_ints = [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
  95     unsigned_ints = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
  96
  97     for t in signed_ints + unsigned_ints:
  98         assert types.is_integer(t)
  99
 100     for t in signed_ints:
 101         assert types.is_signed_integer(t)
 102         assert not types.is_unsigned_integer(t)
 103
 104     for t in unsigned_ints:
 105         assert types.is_unsigned_integer(t)
 106         assert not types.is_signed_integer(t)
 107
 108     assert not types.is_integer(pa.float32())
 109     assert not types.is_signed_integer(pa.float32())
 110
 111
 112 def test_is_floating():
 113     for t in [pa.float16(), pa.float32(), pa.float64()]:
 114         assert types.is_floating(t)
 115
 116     assert not types.is_floating(pa.int32())
 117
 118
 119 def test_is_null():
 120     assert types.is_null(pa.null())
 121     assert not types.is_null(pa.list_(pa.int32()))
 122
 123
 124 def test_null_field_may_not_be_non_nullable():
 125     # ARROW-7273
 126     with pytest.raises(ValueError):
 127         pa.field('f0', pa.null(), nullable=False)
 128
 129
 130 def test_is_decimal():
 131     decimal128 = pa.decimal128(19, 4)
 132     decimal256 = pa.decimal256(76, 38)
 133     int32 = pa.int32()
 134
 135     assert types.is_decimal(decimal128)
 136     assert types.is_decimal(decimal256)
 137     assert not types.is_decimal(int32)
 138
 139     assert types.is_decimal128(decimal128)
 140     assert not types.is_decimal128(decimal256)
 141     assert not types.is_decimal128(int32)
 142
 143     assert not types.is_decimal256(decimal128)
 144     assert types.is_decimal256(decimal256)
 145     assert not types.is_decimal256(int32)
 146
 147
 148 def test_is_list():
 149     a = pa.list_(pa.int32())
 150     b = pa.large_list(pa.int32())
 151     c = pa.list_(pa.int32(), 3)
 152
 153     assert types.is_list(a)
 154     assert not types.is_large_list(a)
 155     assert not types.is_fixed_size_list(a)
 156     assert types.is_large_list(b)
 157     assert not types.is_list(b)
 158     assert not types.is_fixed_size_list(b)
 159     assert types.is_fixed_size_list(c)
 160     assert not types.is_list(c)
 161     assert not types.is_large_list(c)
 162
 163     assert not types.is_list(pa.int32())
 164
 165
 166 def test_is_map():
 167     m = pa.map_(pa.utf8(), pa.int32())
 168
 169     assert types.is_map(m)
 170     assert not types.is_map(pa.int32())
 171
 172     fields = pa.map_(pa.field('key_name', pa.utf8(), nullable=False),
 173                      pa.field('value_name', pa.int32()))
 174     assert types.is_map(fields)
 175
 176     entries_type = pa.struct([pa.field('key', pa.int8()),
 177                               pa.field('value', pa.int8())])
 178     list_type = pa.list_(entries_type)
 179     assert not types.is_map(list_type)
 180
 181
 182 def test_is_dictionary():
 183     assert types.is_dictionary(pa.dictionary(pa.int32(), pa.string()))
 184     assert not types.is_dictionary(pa.int32())
 185
 186
 187 def test_is_nested_or_struct():
 188     struct_ex = pa.struct([pa.field('a', pa.int32()),
 189                            pa.field('b', pa.int8()),
 190                            pa.field('c', pa.string())])
 191
 192     assert types.is_struct(struct_ex)
 193     assert not types.is_struct(pa.list_(pa.int32()))
 194
 195     assert types.is_nested(struct_ex)
 196     assert types.is_nested(pa.list_(pa.int32()))
 197     assert types.is_nested(pa.large_list(pa.int32()))
 198     assert not types.is_nested(pa.int32())
 199
 200
 201 def test_is_union():
 202     for mode in [pa.lib.UnionMode_SPARSE, pa.lib.UnionMode_DENSE]:
 203         assert types.is_union(pa.union([pa.field('a', pa.int32()),
 204                                         pa.field('b', pa.int8()),
 205                                         pa.field('c', pa.string())],
 206                                        mode=mode))
 207     assert not types.is_union(pa.list_(pa.int32()))
 208
 209
 210 # TODO(wesm): is_map, once implemented
 211
 212
 213 def test_is_binary_string():
 214     assert types.is_binary(pa.binary())
 215     assert not types.is_binary(pa.string())
 216     assert not types.is_binary(pa.large_binary())
 217     assert not types.is_binary(pa.large_string())
 218
 219     assert types.is_string(pa.string())
 220     assert types.is_unicode(pa.string())
 221     assert not types.is_string(pa.binary())
 222     assert not types.is_string(pa.large_string())
 223     assert not types.is_string(pa.large_binary())
 224
 225     assert types.is_large_binary(pa.large_binary())
 226     assert not types.is_large_binary(pa.large_string())
 227     assert not types.is_large_binary(pa.binary())
 228     assert not types.is_large_binary(pa.string())
 229
 230     assert types.is_large_string(pa.large_string())
 231     assert not types.is_large_string(pa.large_binary())
 232     assert not types.is_large_string(pa.string())
 233     assert not types.is_large_string(pa.binary())
 234
 235     assert types.is_fixed_size_binary(pa.binary(5))
 236     assert not types.is_fixed_size_binary(pa.binary())
 237
 238
 239 def test_is_temporal_date_time_timestamp():
 240     date_types = [pa.date32(), pa.date64()]
 241     time_types = [pa.time32('s'), pa.time64('ns')]
 242     timestamp_types = [pa.timestamp('ms')]
 243     duration_types = [pa.duration('ms')]
 244     interval_types = [pa.month_day_nano_interval()]
 245
 246     for case in (date_types + time_types + timestamp_types + duration_types +
 247                  interval_types):
 248         assert types.is_temporal(case)
 249
 250     for case in date_types:
 251         assert types.is_date(case)
 252         assert not types.is_time(case)
 253         assert not types.is_timestamp(case)
 254         assert not types.is_duration(case)
 255         assert not types.is_interval(case)
 256
 257     for case in time_types:
 258         assert types.is_time(case)
 259         assert not types.is_date(case)
 260         assert not types.is_timestamp(case)
 261         assert not types.is_duration(case)
 262         assert not types.is_interval(case)
 263
 264     for case in timestamp_types:
 265         assert types.is_timestamp(case)
 266         assert not types.is_date(case)
 267         assert not types.is_time(case)
 268         assert not types.is_duration(case)
 269         assert not types.is_interval(case)
 270
 271     for case in duration_types:
 272         assert types.is_duration(case)
 273         assert not types.is_date(case)
 274         assert not types.is_time(case)
 275         assert not types.is_timestamp(case)
 276         assert not types.is_interval(case)
 277
 278     for case in interval_types:
 279         assert types.is_interval(case)
 280         assert not types.is_date(case)
 281         assert not types.is_time(case)
 282         assert not types.is_timestamp(case)
 283
 284     assert not types.is_temporal(pa.int32())
 285
 286
 287 def test_is_primitive():
 288     assert types.is_primitive(pa.int32())
 289     assert not types.is_primitive(pa.list_(pa.int32()))
 290
 291
 292 @pytest.mark.parametrize(('tz', 'expected'), [
 293     (pytz.utc, 'UTC'),
 294     (pytz.timezone('Europe/Paris'), 'Europe/Paris'),
 295     # StaticTzInfo.tzname returns with '-09' so we need to infer the timezone's
 296     # name from the tzinfo.zone attribute
 297     (pytz.timezone('Etc/GMT-9'), 'Etc/GMT-9'),
 298     (pytz.FixedOffset(180), '+03:00'),
 299     (datetime.timezone.utc, 'UTC'),
 300     (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30')
 301 ])
 302 def test_tzinfo_to_string(tz, expected):
 303     assert pa.lib.tzinfo_to_string(tz) == expected
 304
 305
 306 def test_tzinfo_to_string_errors():
 307     msg = "Not an instance of datetime.tzinfo"
 308     with pytest.raises(TypeError):
 309         pa.lib.tzinfo_to_string("Europe/Budapest")
 310
 311     if sys.version_info >= (3, 8):
 312         # before 3.8 it was only possible to create timezone objects with whole
 313         # number of minutes
 314         tz = datetime.timezone(datetime.timedelta(hours=1, seconds=30))
 315         msg = "Offset must represent whole number of minutes"
 316         with pytest.raises(ValueError, match=msg):
 317             pa.lib.tzinfo_to_string(tz)
 318
 319
 320 @h.given(tzst.timezones())
 321 def test_pytz_timezone_roundtrip(tz):
 322     timezone_string = pa.lib.tzinfo_to_string(tz)
 323     timezone_tzinfo = pa.lib.string_to_tzinfo(timezone_string)
 324     assert timezone_tzinfo == tz
 325
 326
 327 def test_convert_custom_tzinfo_objects_to_string():
 328     class CorrectTimezone1(datetime.tzinfo):
 329         """
 330         Conversion is using utcoffset()
 331         """
 332
 333         def tzname(self, dt):
 334             return None
 335
 336         def utcoffset(self, dt):
 337             return datetime.timedelta(hours=-3, minutes=30)
 338
 339     class CorrectTimezone2(datetime.tzinfo):
 340         """
 341         Conversion is using tzname()
 342         """
 343
 344         def tzname(self, dt):
 345             return "+03:00"
 346
 347         def utcoffset(self, dt):
 348             return datetime.timedelta(hours=3)
 349
 350     class BuggyTimezone1(datetime.tzinfo):
 351         """
 352         Unable to infer name or offset
 353         """
 354
 355         def tzname(self, dt):
 356             return None
 357
 358         def utcoffset(self, dt):
 359             return None
 360
 361     class BuggyTimezone2(datetime.tzinfo):
 362         """
 363         Wrong offset type
 364         """
 365
 366         def tzname(self, dt):
 367             return None
 368
 369         def utcoffset(self, dt):
 370             return "one hour"
 371
 372     class BuggyTimezone3(datetime.tzinfo):
 373         """
 374         Wrong timezone name type
 375         """
 376
 377         def tzname(self, dt):
 378             return 240
 379
 380         def utcoffset(self, dt):
 381             return None
 382
 383     assert pa.lib.tzinfo_to_string(CorrectTimezone1()) == "-02:30"
 384     assert pa.lib.tzinfo_to_string(CorrectTimezone2()) == "+03:00"
 385
 386     msg = (r"Object returned by tzinfo.utcoffset\(None\) is not an instance "
 387            r"of datetime.timedelta")
 388     for wrong in [BuggyTimezone1(), BuggyTimezone2(), BuggyTimezone3()]:
 389         with pytest.raises(ValueError, match=msg):
 390             pa.lib.tzinfo_to_string(wrong)
 391
 392
 393 @pytest.mark.parametrize(('string', 'expected'), [
 394     ('UTC', pytz.utc),
 395     ('Europe/Paris', pytz.timezone('Europe/Paris')),
 396     ('+03:00', pytz.FixedOffset(180)),
 397     ('+01:30', pytz.FixedOffset(90)),
 398     ('-02:00', pytz.FixedOffset(-120))
 399 ])
 400 def test_string_to_tzinfo(string, expected):
 401     result = pa.lib.string_to_tzinfo(string)
 402     assert result == expected
 403
 404
 405 @pytest.mark.parametrize('tz,name', [
 406     (pytz.FixedOffset(90), '+01:30'),
 407     (pytz.FixedOffset(-90), '-01:30'),
 408     (pytz.utc, 'UTC'),
 409     (pytz.timezone('America/New_York'), 'America/New_York')
 410 ])
 411 def test_timezone_string_roundtrip(tz, name):
 412     assert pa.lib.tzinfo_to_string(tz) == name
 413     assert pa.lib.string_to_tzinfo(name) == tz
 414
 415
 416 def test_timestamp():
 417     for unit in ('s', 'ms', 'us', 'ns'):
 418         for tz in (None, 'UTC', 'Europe/Paris'):
 419             ty = pa.timestamp(unit, tz=tz)
 420             assert ty.unit == unit
 421             assert ty.tz == tz
 422
 423     for invalid_unit in ('m', 'arbit', 'rary'):
 424         with pytest.raises(ValueError, match='Invalid time unit'):
 425             pa.timestamp(invalid_unit)
 426
 427
 428 def test_time32_units():
 429     for valid_unit in ('s', 'ms'):
 430         ty = pa.time32(valid_unit)
 431         assert ty.unit == valid_unit
 432
 433     for invalid_unit in ('m', 'us', 'ns'):
 434         error_msg = 'Invalid time unit for time32: {!r}'.format(invalid_unit)
 435         with pytest.raises(ValueError, match=error_msg):
 436             pa.time32(invalid_unit)
 437
 438
 439 def test_time64_units():
 440     for valid_unit in ('us', 'ns'):
 441         ty = pa.time64(valid_unit)
 442         assert ty.unit == valid_unit
 443
 444     for invalid_unit in ('m', 's', 'ms'):
 445         error_msg = 'Invalid time unit for time64: {!r}'.format(invalid_unit)
 446         with pytest.raises(ValueError, match=error_msg):
 447             pa.time64(invalid_unit)
 448
 449
 450 def test_duration():
 451     for unit in ('s', 'ms', 'us', 'ns'):
 452         ty = pa.duration(unit)
 453         assert ty.unit == unit
 454
 455     for invalid_unit in ('m', 'arbit', 'rary'):
 456         with pytest.raises(ValueError, match='Invalid time unit'):
 457             pa.duration(invalid_unit)
 458
 459
 460 def test_list_type():
 461     ty = pa.list_(pa.int64())
 462     assert isinstance(ty, pa.ListType)
 463     assert ty.value_type == pa.int64()
 464     assert ty.value_field == pa.field("item", pa.int64(), nullable=True)
 465
 466     with pytest.raises(TypeError):
 467         pa.list_(None)
 468
 469
 470 def test_large_list_type():
 471     ty = pa.large_list(pa.utf8())
 472     assert isinstance(ty, pa.LargeListType)
 473     assert ty.value_type == pa.utf8()
 474     assert ty.value_field == pa.field("item", pa.utf8(), nullable=True)
 475
 476     with pytest.raises(TypeError):
 477         pa.large_list(None)
 478
 479
 480 def test_map_type():
 481     ty = pa.map_(pa.utf8(), pa.int32())
 482     assert isinstance(ty, pa.MapType)
 483     assert ty.key_type == pa.utf8()
 484     assert ty.key_field == pa.field("key", pa.utf8(), nullable=False)
 485     assert ty.item_type == pa.int32()
 486     assert ty.item_field == pa.field("value", pa.int32(), nullable=True)
 487
 488     with pytest.raises(TypeError):
 489         pa.map_(None)
 490     with pytest.raises(TypeError):
 491         pa.map_(pa.int32(), None)
 492     with pytest.raises(TypeError):
 493         pa.map_(pa.field("name", pa.string(), nullable=True), pa.int64())
 494
 495
 496 def test_fixed_size_list_type():
 497     ty = pa.list_(pa.float64(), 2)
 498     assert isinstance(ty, pa.FixedSizeListType)
 499     assert ty.value_type == pa.float64()
 500     assert ty.value_field == pa.field("item", pa.float64(), nullable=True)
 501     assert ty.list_size == 2
 502
 503     with pytest.raises(ValueError):
 504         pa.list_(pa.float64(), -2)
 505
 506
 507 def test_struct_type():
 508     fields = [
 509         # Duplicate field name on purpose
 510         pa.field('a', pa.int64()),
 511         pa.field('a', pa.int32()),
 512         pa.field('b', pa.int32())
 513     ]
 514     ty = pa.struct(fields)
 515
 516     assert len(ty) == ty.num_fields == 3
 517     assert list(ty) == fields
 518     assert ty[0].name == 'a'
 519     assert ty[2].type == pa.int32()
 520     with pytest.raises(IndexError):
 521         assert ty[3]
 522
 523     assert ty['b'] == ty[2]
 524
 525     # Not found
 526     with pytest.raises(KeyError):
 527         ty['c']
 528
 529     # Neither integer nor string
 530     with pytest.raises(TypeError):
 531         ty[None]
 532
 533     for a, b in zip(ty, fields):
 534         a == b
 535
 536     # Construct from list of tuples
 537     ty = pa.struct([('a', pa.int64()),
 538                     ('a', pa.int32()),
 539                     ('b', pa.int32())])
 540     assert list(ty) == fields
 541     for a, b in zip(ty, fields):
 542         a == b
 543
 544     # Construct from mapping
 545     fields = [pa.field('a', pa.int64()),
 546               pa.field('b', pa.int32())]
 547     ty = pa.struct(OrderedDict([('a', pa.int64()),
 548                                 ('b', pa.int32())]))
 549     assert list(ty) == fields
 550     for a, b in zip(ty, fields):
 551         a == b
 552
 553     # Invalid args
 554     with pytest.raises(TypeError):
 555         pa.struct([('a', None)])
 556
 557
 558 def test_struct_duplicate_field_names():
 559     fields = [
 560         pa.field('a', pa.int64()),
 561         pa.field('b', pa.int32()),
 562         pa.field('a', pa.int32())
 563     ]
 564     ty = pa.struct(fields)
 565
 566     # Duplicate
 567     with pytest.warns(UserWarning):
 568         with pytest.raises(KeyError):
 569             ty['a']
 570
 571     # StructType::GetFieldIndex
 572     assert ty.get_field_index('a') == -1
 573
 574     # StructType::GetAllFieldIndices
 575     assert ty.get_all_field_indices('a') == [0, 2]
 576
 577
 578 def test_union_type():
 579     def check_fields(ty, fields):
 580         assert ty.num_fields == len(fields)
 581         assert [ty[i] for i in range(ty.num_fields)] == fields
 582
 583     fields = [pa.field('x', pa.list_(pa.int32())),
 584               pa.field('y', pa.binary())]
 585     type_codes = [5, 9]
 586
 587     sparse_factories = [
 588         partial(pa.union, mode='sparse'),
 589         partial(pa.union, mode=pa.lib.UnionMode_SPARSE),
 590         pa.sparse_union,
 591     ]
 592
 593     dense_factories = [
 594         partial(pa.union, mode='dense'),
 595         partial(pa.union, mode=pa.lib.UnionMode_DENSE),
 596         pa.dense_union,
 597     ]
 598
 599     for factory in sparse_factories:
 600         ty = factory(fields)
 601         assert isinstance(ty, pa.SparseUnionType)
 602         assert ty.mode == 'sparse'
 603         check_fields(ty, fields)
 604         assert ty.type_codes == [0, 1]
 605         ty = factory(fields, type_codes=type_codes)
 606         assert ty.mode == 'sparse'
 607         check_fields(ty, fields)
 608         assert ty.type_codes == type_codes
 609         # Invalid number of type codes
 610         with pytest.raises(ValueError):
 611             factory(fields, type_codes=type_codes[1:])
 612
 613     for factory in dense_factories:
 614         ty = factory(fields)
 615         assert isinstance(ty, pa.DenseUnionType)
 616         assert ty.mode == 'dense'
 617         check_fields(ty, fields)
 618         assert ty.type_codes == [0, 1]
 619         ty = factory(fields, type_codes=type_codes)
 620         assert ty.mode == 'dense'
 621         check_fields(ty, fields)
 622         assert ty.type_codes == type_codes
 623         # Invalid number of type codes
 624         with pytest.raises(ValueError):
 625             factory(fields, type_codes=type_codes[1:])
 626
 627     for mode in ('unknown', 2):
 628         with pytest.raises(ValueError, match='Invalid union mode'):
 629             pa.union(fields, mode=mode)
 630
 631
 632 def test_dictionary_type():
 633     ty0 = pa.dictionary(pa.int32(), pa.string())
 634     assert ty0.index_type == pa.int32()
 635     assert ty0.value_type == pa.string()
 636     assert ty0.ordered is False
 637
 638     ty1 = pa.dictionary(pa.int8(), pa.float64(), ordered=True)
 639     assert ty1.index_type == pa.int8()
 640     assert ty1.value_type == pa.float64()
 641     assert ty1.ordered is True
 642
 643     # construct from non-arrow objects
 644     ty2 = pa.dictionary('int8', 'string')
 645     assert ty2.index_type == pa.int8()
 646     assert ty2.value_type == pa.string()
 647     assert ty2.ordered is False
 648
 649     # allow unsigned integers for index type
 650     ty3 = pa.dictionary(pa.uint32(), pa.string())
 651     assert ty3.index_type == pa.uint32()
 652     assert ty3.value_type == pa.string()
 653     assert ty3.ordered is False
 654
 655     # invalid index type raises
 656     with pytest.raises(TypeError):
 657         pa.dictionary(pa.string(), pa.int64())
 658
 659
 660 def test_dictionary_ordered_equals():
 661     # Python side checking of ARROW-6345
 662     d1 = pa.dictionary('int32', 'binary', ordered=True)
 663     d2 = pa.dictionary('int32', 'binary', ordered=False)
 664     d3 = pa.dictionary('int8', 'binary', ordered=True)
 665     d4 = pa.dictionary('int32', 'binary', ordered=True)
 666
 667     assert not d1.equals(d2)
 668     assert not d1.equals(d3)
 669     assert d1.equals(d4)
 670
 671
 672 def test_types_hashable():
 673     many_types = get_many_types()
 674     in_dict = {}
 675     for i, type_ in enumerate(many_types):
 676         assert hash(type_) == hash(type_)
 677         in_dict[type_] = i
 678     assert len(in_dict) == len(many_types)
 679     for i, type_ in enumerate(many_types):
 680         assert in_dict[type_] == i
 681
 682
 683 def test_types_picklable():
 684     for ty in get_many_types():
 685         data = pickle.dumps(ty)
 686         assert pickle.loads(data) == ty
 687
 688
 689 def test_types_weakref():
 690     for ty in get_many_types():
 691         wr = weakref.ref(ty)
 692         assert wr() is not None
 693         # Note that ty may be a singleton and therefore outlive this loop
 694
 695     wr = weakref.ref(pa.int32())
 696     assert wr() is not None  # singleton
 697     wr = weakref.ref(pa.list_(pa.int32()))
 698     assert wr() is None  # not a singleton
 699
 700
 701 def test_fields_hashable():
 702     in_dict = {}
 703     fields = [pa.field('a', pa.int32()),
 704               pa.field('a', pa.int64()),
 705               pa.field('a', pa.int64(), nullable=False),
 706               pa.field('b', pa.int32()),
 707               pa.field('b', pa.int32(), nullable=False)]
 708     for i, field in enumerate(fields):
 709         in_dict[field] = i
 710     assert len(in_dict) == len(fields)
 711     for i, field in enumerate(fields):
 712         assert in_dict[field] == i
 713
 714
 715 def test_fields_weakrefable():
 716     field = pa.field('a', pa.int32())
 717     wr = weakref.ref(field)
 718     assert wr() is not None
 719     del field
 720     assert wr() is None
 721
 722
 723 @pytest.mark.parametrize('t,check_func', [
 724     (pa.date32(), types.is_date32),
 725     (pa.date64(), types.is_date64),
 726     (pa.time32('s'), types.is_time32),
 727     (pa.time64('ns'), types.is_time64),
 728     (pa.int8(), types.is_int8),
 729     (pa.int16(), types.is_int16),
 730     (pa.int32(), types.is_int32),
 731     (pa.int64(), types.is_int64),
 732     (pa.uint8(), types.is_uint8),
 733     (pa.uint16(), types.is_uint16),
 734     (pa.uint32(), types.is_uint32),
 735     (pa.uint64(), types.is_uint64),
 736     (pa.float16(), types.is_float16),
 737     (pa.float32(), types.is_float32),
 738     (pa.float64(), types.is_float64)
 739 ])
 740 def test_exact_primitive_types(t, check_func):
 741     assert check_func(t)
 742
 743
 744 def test_type_id():
 745     # enum values are not exposed publicly
 746     for ty in get_many_types():
 747         assert isinstance(ty.id, int)
 748
 749
 750 def test_bit_width():
 751     for ty, expected in [(pa.bool_(), 1),
 752                          (pa.int8(), 8),
 753                          (pa.uint32(), 32),
 754                          (pa.float16(), 16),
 755                          (pa.decimal128(19, 4), 128),
 756                          (pa.decimal256(76, 38), 256),
 757                          (pa.binary(42), 42 * 8)]:
 758         assert ty.bit_width == expected
 759     for ty in [pa.binary(), pa.string(), pa.list_(pa.int16())]:
 760         with pytest.raises(ValueError, match="fixed width"):
 761             ty.bit_width
 762
 763
 764 def test_fixed_size_binary_byte_width():
 765     ty = pa.binary(5)
 766     assert ty.byte_width == 5
 767
 768
 769 def test_decimal_properties():
 770     ty = pa.decimal128(19, 4)
 771     assert ty.byte_width == 16
 772     assert ty.precision == 19
 773     assert ty.scale == 4
 774     ty = pa.decimal256(76, 38)
 775     assert ty.byte_width == 32
 776     assert ty.precision == 76
 777     assert ty.scale == 38
 778
 779
 780 def test_decimal_overflow():
 781     pa.decimal128(1, 0)
 782     pa.decimal128(38, 0)
 783     for i in (0, -1, 39):
 784         with pytest.raises(ValueError):
 785             pa.decimal128(i, 0)
 786
 787     pa.decimal256(1, 0)
 788     pa.decimal256(76, 0)
 789     for i in (0, -1, 77):
 790         with pytest.raises(ValueError):
 791             pa.decimal256(i, 0)
 792
 793
 794 def test_type_equality_operators():
 795     many_types = get_many_types()
 796     non_pyarrow = ('foo', 16, {'s', 'e', 't'})
 797
 798     for index, ty in enumerate(many_types):
 799         # could use two parametrization levels,
 800         # but that'd bloat pytest's output
 801         for i, other in enumerate(many_types + non_pyarrow):
 802             if i == index:
 803                 assert ty == other
 804             else:
 805                 assert ty != other
 806
 807
 808 def test_key_value_metadata():
 809     m = pa.KeyValueMetadata({'a': 'A', 'b': 'B'})
 810     assert len(m) == 2
 811     assert m['a'] == b'A'
 812     assert m[b'a'] == b'A'
 813     assert m['b'] == b'B'
 814     assert 'a' in m
 815     assert b'a' in m
 816     assert 'c' not in m
 817
 818     m1 = pa.KeyValueMetadata({'a': 'A', 'b': 'B'})
 819     m2 = pa.KeyValueMetadata(a='A', b='B')
 820     m3 = pa.KeyValueMetadata([('a', 'A'), ('b', 'B')])
 821
 822     assert m1 != 2
 823     assert m1 == m2
 824     assert m2 == m3
 825     assert m1 == {'a': 'A', 'b': 'B'}
 826     assert m1 != {'a': 'A', 'b': 'C'}
 827
 828     with pytest.raises(TypeError):
 829         pa.KeyValueMetadata({'a': 1})
 830     with pytest.raises(TypeError):
 831         pa.KeyValueMetadata({1: 'a'})
 832     with pytest.raises(TypeError):
 833         pa.KeyValueMetadata(a=1)
 834
 835     expected = [(b'a', b'A'), (b'b', b'B')]
 836     result = [(k, v) for k, v in m3.items()]
 837     assert result == expected
 838     assert list(m3.items()) == expected
 839     assert list(m3.keys()) == [b'a', b'b']
 840     assert list(m3.values()) == [b'A', b'B']
 841     assert len(m3) == 2
 842
 843     # test duplicate key support
 844     md = pa.KeyValueMetadata([
 845         ('a', 'alpha'),
 846         ('b', 'beta'),
 847         ('a', 'Alpha'),
 848         ('a', 'ALPHA'),
 849     ])
 850
 851     expected = [
 852         (b'a', b'alpha'),
 853         (b'b', b'beta'),
 854         (b'a', b'Alpha'),
 855         (b'a', b'ALPHA')
 856     ]
 857     assert len(md) == 4
 858     assert isinstance(md.keys(), Iterator)
 859     assert isinstance(md.values(), Iterator)
 860     assert isinstance(md.items(), Iterator)
 861     assert list(md.items()) == expected
 862     assert list(md.keys()) == [k for k, _ in expected]
 863     assert list(md.values()) == [v for _, v in expected]
 864
 865     # first occurrence
 866     assert md['a'] == b'alpha'
 867     assert md['b'] == b'beta'
 868     assert md.get_all('a') == [b'alpha', b'Alpha', b'ALPHA']
 869     assert md.get_all('b') == [b'beta']
 870     assert md.get_all('unkown') == []
 871
 872     with pytest.raises(KeyError):
 873         md = pa.KeyValueMetadata([
 874             ('a', 'alpha'),
 875             ('b', 'beta'),
 876             ('a', 'Alpha'),
 877             ('a', 'ALPHA'),
 878         ], b='BETA')
 879
 880
 881 def test_key_value_metadata_duplicates():
 882     meta = pa.KeyValueMetadata({'a': '1', 'b': '2'})
 883
 884     with pytest.raises(KeyError):
 885         pa.KeyValueMetadata(meta, a='3')
 886
 887
 888 def test_field_basic():
 889     t = pa.string()
 890     f = pa.field('foo', t)
 891
 892     assert f.name == 'foo'
 893     assert f.nullable
 894     assert f.type is t
 895     assert repr(f) == "pyarrow.Field<foo: string>"
 896
 897     f = pa.field('foo', t, False)
 898     assert not f.nullable
 899
 900     with pytest.raises(TypeError):
 901         pa.field('foo', None)
 902
 903
 904 def test_field_equals():
 905     meta1 = {b'foo': b'bar'}
 906     meta2 = {b'bizz': b'bazz'}
 907
 908     f1 = pa.field('a', pa.int8(), nullable=True)
 909     f2 = pa.field('a', pa.int8(), nullable=True)
 910     f3 = pa.field('a', pa.int8(), nullable=False)
 911     f4 = pa.field('a', pa.int16(), nullable=False)
 912     f5 = pa.field('b', pa.int16(), nullable=False)
 913     f6 = pa.field('a', pa.int8(), nullable=True, metadata=meta1)
 914     f7 = pa.field('a', pa.int8(), nullable=True, metadata=meta1)
 915     f8 = pa.field('a', pa.int8(), nullable=True, metadata=meta2)
 916
 917     assert f1.equals(f2)
 918     assert f6.equals(f7)
 919     assert not f1.equals(f3)
 920     assert not f1.equals(f4)
 921     assert not f3.equals(f4)
 922     assert not f4.equals(f5)
 923
 924     # No metadata in f1, but metadata in f6
 925     assert f1.equals(f6)
 926     assert not f1.equals(f6, check_metadata=True)
 927
 928     # Different metadata
 929     assert f6.equals(f7)
 930     assert f7.equals(f8)
 931     assert not f7.equals(f8, check_metadata=True)
 932
 933
 934 def test_field_equality_operators():
 935     f1 = pa.field('a', pa.int8(), nullable=True)
 936     f2 = pa.field('a', pa.int8(), nullable=True)
 937     f3 = pa.field('b', pa.int8(), nullable=True)
 938     f4 = pa.field('b', pa.int8(), nullable=False)
 939
 940     assert f1 == f2
 941     assert f1 != f3
 942     assert f3 != f4
 943     assert f1 != 'foo'
 944
 945
 946 def test_field_metadata():
 947     f1 = pa.field('a', pa.int8())
 948     f2 = pa.field('a', pa.int8(), metadata={})
 949     f3 = pa.field('a', pa.int8(), metadata={b'bizz': b'bazz'})
 950
 951     assert f1.metadata is None
 952     assert f2.metadata == {}
 953     assert f3.metadata[b'bizz'] == b'bazz'
 954
 955
 956 def test_field_add_remove_metadata():
 957     import collections
 958
 959     f0 = pa.field('foo', pa.int32())
 960
 961     assert f0.metadata is None
 962
 963     metadata = {b'foo': b'bar', b'pandas': b'badger'}
 964     metadata2 = collections.OrderedDict([
 965         (b'a', b'alpha'),
 966         (b'b', b'beta')
 967     ])
 968
 969     f1 = f0.with_metadata(metadata)
 970     assert f1.metadata == metadata
 971
 972     f2 = f0.with_metadata(metadata2)
 973     assert f2.metadata == metadata2
 974
 975     with pytest.raises(TypeError):
 976         f0.with_metadata([1, 2, 3])
 977
 978     f3 = f1.remove_metadata()
 979     assert f3.metadata is None
 980
 981     # idempotent
 982     f4 = f3.remove_metadata()
 983     assert f4.metadata is None
 984
 985     f5 = pa.field('foo', pa.int32(), True, metadata)
 986     f6 = f0.with_metadata(metadata)
 987     assert f5.equals(f6)
 988
 989
 990 def test_field_modified_copies():
 991     f0 = pa.field('foo', pa.int32(), True)
 992     f0_ = pa.field('foo', pa.int32(), True)
 993     assert f0.equals(f0_)
 994
 995     f1 = pa.field('foo', pa.int64(), True)
 996     f1_ = f0.with_type(pa.int64())
 997     assert f1.equals(f1_)
 998     # Original instance is unmodified
 999     assert f0.equals(f0_)
1000
1001     f2 = pa.field('foo', pa.int32(), False)
1002     f2_ = f0.with_nullable(False)
1003     assert f2.equals(f2_)
1004     # Original instance is unmodified
1005     assert f0.equals(f0_)
1006
1007     f3 = pa.field('bar', pa.int32(), True)
1008     f3_ = f0.with_name('bar')
1009     assert f3.equals(f3_)
1010     # Original instance is unmodified
1011     assert f0.equals(f0_)
1012
1013
1014 def test_is_integer_value():
1015     assert pa.types.is_integer_value(1)
1016     assert pa.types.is_integer_value(np.int64(1))
1017     assert not pa.types.is_integer_value('1')
1018
1019
1020 def test_is_float_value():
1021     assert not pa.types.is_float_value(1)
1022     assert pa.types.is_float_value(1.)
1023     assert pa.types.is_float_value(np.float64(1))
1024     assert not pa.types.is_float_value('1.0')
1025
1026
1027 def test_is_boolean_value():
1028     assert not pa.types.is_boolean_value(1)
1029     assert pa.types.is_boolean_value(True)
1030     assert pa.types.is_boolean_value(False)
1031     assert pa.types.is_boolean_value(np.bool_(True))
1032     assert pa.types.is_boolean_value(np.bool_(False))
1033
1034
1035 @h.given(
1036     past.all_types |
1037     past.all_fields |
1038     past.all_schemas
1039 )
1040 @h.example(
1041     pa.field(name='', type=pa.null(), metadata={'0': '', '': ''})
1042 )
1043 def test_pickling(field):
1044     data = pickle.dumps(field)
1045     assert pickle.loads(data) == field
1046
1047
1048 @h.given(
1049     st.lists(past.all_types) |
1050     st.lists(past.all_fields) |
1051     st.lists(past.all_schemas)
1052 )
1053 def test_hashing(items):
1054     h.assume(
1055         # well, this is still O(n^2), but makes the input unique
1056         all(not a.equals(b) for i, a in enumerate(items) for b in items[:i])
1057     )
1058
1059     container = {}
1060     for i, item in enumerate(items):
1061         assert hash(item) == hash(item)
1062         container[item] = i
1063
1064     assert len(container) == len(items)
1065
1066     for i, item in enumerate(items):
1067         assert container[item] == i