]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/python/pyarrow/tests/test_schema.py
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / python / pyarrow / tests / test_schema.py
CommitLineData
1d09f67e
TL
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18from collections import OrderedDict
19import pickle
20import sys
21import weakref
22
23import pytest
24import numpy as np
25import pyarrow as pa
26
27import pyarrow.tests.util as test_util
28from pyarrow.vendored.version import Version
29
30
31def test_schema_constructor_errors():
32 msg = ("Do not call Schema's constructor directly, use `pyarrow.schema` "
33 "instead")
34 with pytest.raises(TypeError, match=msg):
35 pa.Schema()
36
37
38def test_type_integers():
39 dtypes = ['int8', 'int16', 'int32', 'int64',
40 'uint8', 'uint16', 'uint32', 'uint64']
41
42 for name in dtypes:
43 factory = getattr(pa, name)
44 t = factory()
45 assert str(t) == name
46
47
48def test_type_to_pandas_dtype():
49 M8_ns = np.dtype('datetime64[ns]')
50 cases = [
51 (pa.null(), np.object_),
52 (pa.bool_(), np.bool_),
53 (pa.int8(), np.int8),
54 (pa.int16(), np.int16),
55 (pa.int32(), np.int32),
56 (pa.int64(), np.int64),
57 (pa.uint8(), np.uint8),
58 (pa.uint16(), np.uint16),
59 (pa.uint32(), np.uint32),
60 (pa.uint64(), np.uint64),
61 (pa.float16(), np.float16),
62 (pa.float32(), np.float32),
63 (pa.float64(), np.float64),
64 (pa.date32(), M8_ns),
65 (pa.date64(), M8_ns),
66 (pa.timestamp('ms'), M8_ns),
67 (pa.binary(), np.object_),
68 (pa.binary(12), np.object_),
69 (pa.string(), np.object_),
70 (pa.list_(pa.int8()), np.object_),
71 # (pa.list_(pa.int8(), 2), np.object_), # TODO needs pandas conversion
72 (pa.map_(pa.int64(), pa.float64()), np.object_),
73 ]
74 for arrow_type, numpy_type in cases:
75 assert arrow_type.to_pandas_dtype() == numpy_type
76
77
78@pytest.mark.pandas
79def test_type_to_pandas_dtype_check_import():
80 # ARROW-7980
81 test_util.invoke_script('arrow_7980.py')
82
83
84def test_type_list():
85 value_type = pa.int32()
86 list_type = pa.list_(value_type)
87 assert str(list_type) == 'list<item: int32>'
88
89 field = pa.field('my_item', pa.string())
90 l2 = pa.list_(field)
91 assert str(l2) == 'list<my_item: string>'
92
93
94def test_type_comparisons():
95 val = pa.int32()
96 assert val == pa.int32()
97 assert val == 'int32'
98 assert val != 5
99
100
101def test_type_for_alias():
102 cases = [
103 ('i1', pa.int8()),
104 ('int8', pa.int8()),
105 ('i2', pa.int16()),
106 ('int16', pa.int16()),
107 ('i4', pa.int32()),
108 ('int32', pa.int32()),
109 ('i8', pa.int64()),
110 ('int64', pa.int64()),
111 ('u1', pa.uint8()),
112 ('uint8', pa.uint8()),
113 ('u2', pa.uint16()),
114 ('uint16', pa.uint16()),
115 ('u4', pa.uint32()),
116 ('uint32', pa.uint32()),
117 ('u8', pa.uint64()),
118 ('uint64', pa.uint64()),
119 ('f4', pa.float32()),
120 ('float32', pa.float32()),
121 ('f8', pa.float64()),
122 ('float64', pa.float64()),
123 ('date32', pa.date32()),
124 ('date64', pa.date64()),
125 ('string', pa.string()),
126 ('str', pa.string()),
127 ('binary', pa.binary()),
128 ('time32[s]', pa.time32('s')),
129 ('time32[ms]', pa.time32('ms')),
130 ('time64[us]', pa.time64('us')),
131 ('time64[ns]', pa.time64('ns')),
132 ('timestamp[s]', pa.timestamp('s')),
133 ('timestamp[ms]', pa.timestamp('ms')),
134 ('timestamp[us]', pa.timestamp('us')),
135 ('timestamp[ns]', pa.timestamp('ns')),
136 ('duration[s]', pa.duration('s')),
137 ('duration[ms]', pa.duration('ms')),
138 ('duration[us]', pa.duration('us')),
139 ('duration[ns]', pa.duration('ns')),
140 ('month_day_nano_interval', pa.month_day_nano_interval()),
141 ]
142
143 for val, expected in cases:
144 assert pa.type_for_alias(val) == expected
145
146
147def test_type_string():
148 t = pa.string()
149 assert str(t) == 'string'
150
151
152def test_type_timestamp_with_tz():
153 tz = 'America/Los_Angeles'
154 t = pa.timestamp('ns', tz=tz)
155 assert t.unit == 'ns'
156 assert t.tz == tz
157
158
159def test_time_types():
160 t1 = pa.time32('s')
161 t2 = pa.time32('ms')
162 t3 = pa.time64('us')
163 t4 = pa.time64('ns')
164
165 assert t1.unit == 's'
166 assert t2.unit == 'ms'
167 assert t3.unit == 'us'
168 assert t4.unit == 'ns'
169
170 assert str(t1) == 'time32[s]'
171 assert str(t4) == 'time64[ns]'
172
173 with pytest.raises(ValueError):
174 pa.time32('us')
175
176 with pytest.raises(ValueError):
177 pa.time64('s')
178
179
180def test_from_numpy_dtype():
181 cases = [
182 (np.dtype('bool'), pa.bool_()),
183 (np.dtype('int8'), pa.int8()),
184 (np.dtype('int16'), pa.int16()),
185 (np.dtype('int32'), pa.int32()),
186 (np.dtype('int64'), pa.int64()),
187 (np.dtype('uint8'), pa.uint8()),
188 (np.dtype('uint16'), pa.uint16()),
189 (np.dtype('uint32'), pa.uint32()),
190 (np.dtype('float16'), pa.float16()),
191 (np.dtype('float32'), pa.float32()),
192 (np.dtype('float64'), pa.float64()),
193 (np.dtype('U'), pa.string()),
194 (np.dtype('S'), pa.binary()),
195 (np.dtype('datetime64[s]'), pa.timestamp('s')),
196 (np.dtype('datetime64[ms]'), pa.timestamp('ms')),
197 (np.dtype('datetime64[us]'), pa.timestamp('us')),
198 (np.dtype('datetime64[ns]'), pa.timestamp('ns')),
199 (np.dtype('timedelta64[s]'), pa.duration('s')),
200 (np.dtype('timedelta64[ms]'), pa.duration('ms')),
201 (np.dtype('timedelta64[us]'), pa.duration('us')),
202 (np.dtype('timedelta64[ns]'), pa.duration('ns')),
203 ]
204
205 for dt, pt in cases:
206 result = pa.from_numpy_dtype(dt)
207 assert result == pt
208
209 # Things convertible to numpy dtypes work
210 assert pa.from_numpy_dtype('U') == pa.string()
211 assert pa.from_numpy_dtype(np.str_) == pa.string()
212 assert pa.from_numpy_dtype('int32') == pa.int32()
213 assert pa.from_numpy_dtype(bool) == pa.bool_()
214
215 with pytest.raises(NotImplementedError):
216 pa.from_numpy_dtype(np.dtype('O'))
217
218 with pytest.raises(TypeError):
219 pa.from_numpy_dtype('not_convertible_to_dtype')
220
221
222def test_schema():
223 fields = [
224 pa.field('foo', pa.int32()),
225 pa.field('bar', pa.string()),
226 pa.field('baz', pa.list_(pa.int8()))
227 ]
228 sch = pa.schema(fields)
229
230 assert sch.names == ['foo', 'bar', 'baz']
231 assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
232
233 assert len(sch) == 3
234 assert sch[0].name == 'foo'
235 assert sch[0].type == fields[0].type
236 assert sch.field('foo').name == 'foo'
237 assert sch.field('foo').type == fields[0].type
238
239 assert repr(sch) == """\
240foo: int32
241bar: string
242baz: list<item: int8>
243 child 0, item: int8"""
244
245 with pytest.raises(TypeError):
246 pa.schema([None])
247
248
249def test_schema_weakref():
250 fields = [
251 pa.field('foo', pa.int32()),
252 pa.field('bar', pa.string()),
253 pa.field('baz', pa.list_(pa.int8()))
254 ]
255 schema = pa.schema(fields)
256 wr = weakref.ref(schema)
257 assert wr() is not None
258 del schema
259 assert wr() is None
260
261
262def test_schema_to_string_with_metadata():
263 lorem = """\
264Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla accumsan vel
265turpis et mollis. Aliquam tincidunt arcu id tortor blandit blandit. Donec
266eget leo quis lectus scelerisque varius. Class aptent taciti sociosqu ad
267litora torquent per conubia nostra, per inceptos himenaeos. Praesent
268faucibus, diam eu volutpat iaculis, tellus est porta ligula, a efficitur
269turpis nulla facilisis quam. Aliquam vitae lorem erat. Proin a dolor ac libero
270dignissim mollis vitae eu mauris. Quisque posuere tellus vitae massa
271pellentesque sagittis. Aenean feugiat, diam ac dignissim fermentum, lorem
272sapien commodo massa, vel volutpat orci nisi eu justo. Nulla non blandit
273sapien. Quisque pretium vestibulum urna eu vehicula."""
274 # ARROW-7063
275 my_schema = pa.schema([pa.field("foo", "int32", False,
276 metadata={"key1": "value1"}),
277 pa.field("bar", "string", True,
278 metadata={"key3": "value3"})],
279 metadata={"lorem": lorem})
280
281 assert my_schema.to_string() == """\
282foo: int32 not null
283 -- field metadata --
284 key1: 'value1'
285bar: string
286 -- field metadata --
287 key3: 'value3'
288-- schema metadata --
289lorem: '""" + lorem[:65] + "' + " + str(len(lorem) - 65)
290
291 # Metadata that exactly fits
292 result = pa.schema([('f0', 'int32')],
293 metadata={'key': 'value' + 'x' * 62}).to_string()
294 assert result == """\
295f0: int32
296-- schema metadata --
297key: 'valuexxxxxxxxxxxxxxxxxxxxxxxxxxxxx\
298xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'"""
299
300 assert my_schema.to_string(truncate_metadata=False) == """\
301foo: int32 not null
302 -- field metadata --
303 key1: 'value1'
304bar: string
305 -- field metadata --
306 key3: 'value3'
307-- schema metadata --
308lorem: '{}'""".format(lorem)
309
310 assert my_schema.to_string(truncate_metadata=False,
311 show_field_metadata=False) == """\
312foo: int32 not null
313bar: string
314-- schema metadata --
315lorem: '{}'""".format(lorem)
316
317 assert my_schema.to_string(truncate_metadata=False,
318 show_schema_metadata=False) == """\
319foo: int32 not null
320 -- field metadata --
321 key1: 'value1'
322bar: string
323 -- field metadata --
324 key3: 'value3'"""
325
326 assert my_schema.to_string(truncate_metadata=False,
327 show_field_metadata=False,
328 show_schema_metadata=False) == """\
329foo: int32 not null
330bar: string"""
331
332
333def test_schema_from_tuples():
334 fields = [
335 ('foo', pa.int32()),
336 ('bar', pa.string()),
337 ('baz', pa.list_(pa.int8())),
338 ]
339 sch = pa.schema(fields)
340 assert sch.names == ['foo', 'bar', 'baz']
341 assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
342 assert len(sch) == 3
343 assert repr(sch) == """\
344foo: int32
345bar: string
346baz: list<item: int8>
347 child 0, item: int8"""
348
349 with pytest.raises(TypeError):
350 pa.schema([('foo', None)])
351
352
353def test_schema_from_mapping():
354 fields = OrderedDict([
355 ('foo', pa.int32()),
356 ('bar', pa.string()),
357 ('baz', pa.list_(pa.int8())),
358 ])
359 sch = pa.schema(fields)
360 assert sch.names == ['foo', 'bar', 'baz']
361 assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
362 assert len(sch) == 3
363 assert repr(sch) == """\
364foo: int32
365bar: string
366baz: list<item: int8>
367 child 0, item: int8"""
368
369 fields = OrderedDict([('foo', None)])
370 with pytest.raises(TypeError):
371 pa.schema(fields)
372
373
374def test_schema_duplicate_fields():
375 fields = [
376 pa.field('foo', pa.int32()),
377 pa.field('bar', pa.string()),
378 pa.field('foo', pa.list_(pa.int8())),
379 ]
380 sch = pa.schema(fields)
381 assert sch.names == ['foo', 'bar', 'foo']
382 assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
383 assert len(sch) == 3
384 assert repr(sch) == """\
385foo: int32
386bar: string
387foo: list<item: int8>
388 child 0, item: int8"""
389
390 assert sch[0].name == 'foo'
391 assert sch[0].type == fields[0].type
392 with pytest.warns(FutureWarning):
393 assert sch.field_by_name('bar') == fields[1]
394 with pytest.warns(FutureWarning):
395 assert sch.field_by_name('xxx') is None
396 with pytest.warns((UserWarning, FutureWarning)):
397 assert sch.field_by_name('foo') is None
398
399 # Schema::GetFieldIndex
400 assert sch.get_field_index('foo') == -1
401
402 # Schema::GetAllFieldIndices
403 assert sch.get_all_field_indices('foo') == [0, 2]
404
405
406def test_field_flatten():
407 f0 = pa.field('foo', pa.int32()).with_metadata({b'foo': b'bar'})
408 assert f0.flatten() == [f0]
409
410 f1 = pa.field('bar', pa.float64(), nullable=False)
411 ff = pa.field('ff', pa.struct([f0, f1]), nullable=False)
412 assert ff.flatten() == [
413 pa.field('ff.foo', pa.int32()).with_metadata({b'foo': b'bar'}),
414 pa.field('ff.bar', pa.float64(), nullable=False)] # XXX
415
416 # Nullable parent makes flattened child nullable
417 ff = pa.field('ff', pa.struct([f0, f1]))
418 assert ff.flatten() == [
419 pa.field('ff.foo', pa.int32()).with_metadata({b'foo': b'bar'}),
420 pa.field('ff.bar', pa.float64())]
421
422 fff = pa.field('fff', pa.struct([ff]))
423 assert fff.flatten() == [pa.field('fff.ff', pa.struct([f0, f1]))]
424
425
426def test_schema_add_remove_metadata():
427 fields = [
428 pa.field('foo', pa.int32()),
429 pa.field('bar', pa.string()),
430 pa.field('baz', pa.list_(pa.int8()))
431 ]
432
433 s1 = pa.schema(fields)
434
435 assert s1.metadata is None
436
437 metadata = {b'foo': b'bar', b'pandas': b'badger'}
438
439 s2 = s1.with_metadata(metadata)
440 assert s2.metadata == metadata
441
442 s3 = s2.remove_metadata()
443 assert s3.metadata is None
444
445 # idempotent
446 s4 = s3.remove_metadata()
447 assert s4.metadata is None
448
449
450def test_schema_equals():
451 fields = [
452 pa.field('foo', pa.int32()),
453 pa.field('bar', pa.string()),
454 pa.field('baz', pa.list_(pa.int8()))
455 ]
456 metadata = {b'foo': b'bar', b'pandas': b'badger'}
457
458 sch1 = pa.schema(fields)
459 sch2 = pa.schema(fields)
460 sch3 = pa.schema(fields, metadata=metadata)
461 sch4 = pa.schema(fields, metadata=metadata)
462
463 assert sch1.equals(sch2, check_metadata=True)
464 assert sch3.equals(sch4, check_metadata=True)
465 assert sch1.equals(sch3)
466 assert not sch1.equals(sch3, check_metadata=True)
467 assert not sch1.equals(sch3, check_metadata=True)
468
469 del fields[-1]
470 sch3 = pa.schema(fields)
471 assert not sch1.equals(sch3)
472
473
474def test_schema_equals_propagates_check_metadata():
475 # ARROW-4088
476 schema1 = pa.schema([
477 pa.field('foo', pa.int32()),
478 pa.field('bar', pa.string())
479 ])
480 schema2 = pa.schema([
481 pa.field('foo', pa.int32()),
482 pa.field('bar', pa.string(), metadata={'a': 'alpha'}),
483 ])
484 assert not schema1.equals(schema2, check_metadata=True)
485 assert schema1.equals(schema2)
486
487
488def test_schema_equals_invalid_type():
489 # ARROW-5873
490 schema = pa.schema([pa.field("a", pa.int64())])
491
492 for val in [None, 'string', pa.array([1, 2])]:
493 with pytest.raises(TypeError):
494 schema.equals(val)
495
496
497def test_schema_equality_operators():
498 fields = [
499 pa.field('foo', pa.int32()),
500 pa.field('bar', pa.string()),
501 pa.field('baz', pa.list_(pa.int8()))
502 ]
503 metadata = {b'foo': b'bar', b'pandas': b'badger'}
504
505 sch1 = pa.schema(fields)
506 sch2 = pa.schema(fields)
507 sch3 = pa.schema(fields, metadata=metadata)
508 sch4 = pa.schema(fields, metadata=metadata)
509
510 assert sch1 == sch2
511 assert sch3 == sch4
512
513 # __eq__ and __ne__ do not check metadata
514 assert sch1 == sch3
515 assert not sch1 != sch3
516
517 assert sch2 == sch4
518
519 # comparison with other types doesn't raise
520 assert sch1 != []
521 assert sch3 != 'foo'
522
523
524def test_schema_get_fields():
525 fields = [
526 pa.field('foo', pa.int32()),
527 pa.field('bar', pa.string()),
528 pa.field('baz', pa.list_(pa.int8()))
529 ]
530
531 schema = pa.schema(fields)
532
533 assert schema.field('foo').name == 'foo'
534 assert schema.field(0).name == 'foo'
535 assert schema.field(-1).name == 'baz'
536
537 with pytest.raises(KeyError):
538 schema.field('other')
539 with pytest.raises(TypeError):
540 schema.field(0.0)
541 with pytest.raises(IndexError):
542 schema.field(4)
543
544
545def test_schema_negative_indexing():
546 fields = [
547 pa.field('foo', pa.int32()),
548 pa.field('bar', pa.string()),
549 pa.field('baz', pa.list_(pa.int8()))
550 ]
551
552 schema = pa.schema(fields)
553
554 assert schema[-1].equals(schema[2])
555 assert schema[-2].equals(schema[1])
556 assert schema[-3].equals(schema[0])
557
558 with pytest.raises(IndexError):
559 schema[-4]
560
561 with pytest.raises(IndexError):
562 schema[3]
563
564
565def test_schema_repr_with_dictionaries():
566 fields = [
567 pa.field('one', pa.dictionary(pa.int16(), pa.string())),
568 pa.field('two', pa.int32())
569 ]
570 sch = pa.schema(fields)
571
572 expected = (
573 """\
574one: dictionary<values=string, indices=int16, ordered=0>
575two: int32""")
576
577 assert repr(sch) == expected
578
579
580def test_type_schema_pickling():
581 cases = [
582 pa.int8(),
583 pa.string(),
584 pa.binary(),
585 pa.binary(10),
586 pa.list_(pa.string()),
587 pa.map_(pa.string(), pa.int8()),
588 pa.struct([
589 pa.field('a', 'int8'),
590 pa.field('b', 'string')
591 ]),
592 pa.union([
593 pa.field('a', pa.int8()),
594 pa.field('b', pa.int16())
595 ], pa.lib.UnionMode_SPARSE),
596 pa.union([
597 pa.field('a', pa.int8()),
598 pa.field('b', pa.int16())
599 ], pa.lib.UnionMode_DENSE),
600 pa.time32('s'),
601 pa.time64('us'),
602 pa.date32(),
603 pa.date64(),
604 pa.timestamp('ms'),
605 pa.timestamp('ns'),
606 pa.decimal128(12, 2),
607 pa.decimal256(76, 38),
608 pa.field('a', 'string', metadata={b'foo': b'bar'}),
609 pa.list_(pa.field("element", pa.int64())),
610 pa.large_list(pa.field("element", pa.int64())),
611 pa.map_(pa.field("key", pa.string(), nullable=False),
612 pa.field("value", pa.int8()))
613 ]
614
615 for val in cases:
616 roundtripped = pickle.loads(pickle.dumps(val))
617 assert val == roundtripped
618
619 fields = []
620 for i, f in enumerate(cases):
621 if isinstance(f, pa.Field):
622 fields.append(f)
623 else:
624 fields.append(pa.field('_f{}'.format(i), f))
625
626 schema = pa.schema(fields, metadata={b'foo': b'bar'})
627 roundtripped = pickle.loads(pickle.dumps(schema))
628 assert schema == roundtripped
629
630
631def test_empty_table():
632 schema1 = pa.schema([
633 pa.field('f0', pa.int64()),
634 pa.field('f1', pa.dictionary(pa.int32(), pa.string())),
635 pa.field('f2', pa.list_(pa.list_(pa.int64()))),
636 ])
637 # test it preserves field nullability
638 schema2 = pa.schema([
639 pa.field('a', pa.int64(), nullable=False),
640 pa.field('b', pa.int64())
641 ])
642
643 for schema in [schema1, schema2]:
644 table = schema.empty_table()
645 assert isinstance(table, pa.Table)
646 assert table.num_rows == 0
647 assert table.schema == schema
648
649
650@pytest.mark.pandas
651def test_schema_from_pandas():
652 import pandas as pd
653 inputs = [
654 list(range(10)),
655 pd.Categorical(list(range(10))),
656 ['foo', 'bar', None, 'baz', 'qux'],
657 np.array([
658 '2007-07-13T01:23:34.123456789',
659 '2006-01-13T12:34:56.432539784',
660 '2010-08-13T05:46:57.437699912'
661 ], dtype='datetime64[ns]'),
662 ]
663 if Version(pd.__version__) >= Version('1.0.0'):
664 inputs.append(pd.array([1, 2, None], dtype=pd.Int32Dtype()))
665 for data in inputs:
666 df = pd.DataFrame({'a': data})
667 schema = pa.Schema.from_pandas(df)
668 expected = pa.Table.from_pandas(df).schema
669 assert schema == expected
670
671
672def test_schema_sizeof():
673 schema = pa.schema([
674 pa.field('foo', pa.int32()),
675 pa.field('bar', pa.string()),
676 ])
677
678 assert sys.getsizeof(schema) > 30
679
680 schema2 = schema.with_metadata({"key": "some metadata"})
681 assert sys.getsizeof(schema2) > sys.getsizeof(schema)
682 schema3 = schema.with_metadata({"key": "some more metadata"})
683 assert sys.getsizeof(schema3) > sys.getsizeof(schema2)
684
685
686def test_schema_merge():
687 a = pa.schema([
688 pa.field('foo', pa.int32()),
689 pa.field('bar', pa.string()),
690 pa.field('baz', pa.list_(pa.int8()))
691 ])
692 b = pa.schema([
693 pa.field('foo', pa.int32()),
694 pa.field('qux', pa.bool_())
695 ])
696 c = pa.schema([
697 pa.field('quux', pa.dictionary(pa.int32(), pa.string()))
698 ])
699 d = pa.schema([
700 pa.field('foo', pa.int64()),
701 pa.field('qux', pa.bool_())
702 ])
703
704 result = pa.unify_schemas([a, b, c])
705 expected = pa.schema([
706 pa.field('foo', pa.int32()),
707 pa.field('bar', pa.string()),
708 pa.field('baz', pa.list_(pa.int8())),
709 pa.field('qux', pa.bool_()),
710 pa.field('quux', pa.dictionary(pa.int32(), pa.string()))
711 ])
712 assert result.equals(expected)
713
714 with pytest.raises(pa.ArrowInvalid):
715 pa.unify_schemas([b, d])
716
717 # ARROW-14002: Try with tuple instead of list
718 result = pa.unify_schemas((a, b, c))
719 assert result.equals(expected)
720
721
722def test_undecodable_metadata():
723 # ARROW-10214: undecodable metadata shouldn't fail repr()
724 data1 = b'abcdef\xff\x00'
725 data2 = b'ghijkl\xff\x00'
726 schema = pa.schema(
727 [pa.field('ints', pa.int16(), metadata={'key': data1})],
728 metadata={'key': data2})
729 assert 'abcdef' in str(schema)
730 assert 'ghijkl' in str(schema)