]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/python/pyarrow/tests/test_feather.py
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / python / pyarrow / tests / test_feather.py
CommitLineData
1d09f67e
TL
1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements. See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership. The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License. You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied. See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18import io
19import os
20import sys
21import tempfile
22import pytest
23import hypothesis as h
24import hypothesis.strategies as st
25
26import numpy as np
27
28import pyarrow as pa
29import pyarrow.tests.strategies as past
30from pyarrow.feather import (read_feather, write_feather, read_table,
31 FeatherDataset)
32
33
34try:
35 from pandas.testing import assert_frame_equal
36 import pandas as pd
37 import pyarrow.pandas_compat
38except ImportError:
39 pass
40
41
42@pytest.fixture(scope='module')
43def datadir(base_datadir):
44 return base_datadir / 'feather'
45
46
47def random_path(prefix='feather_'):
48 return tempfile.mktemp(prefix=prefix)
49
50
51@pytest.fixture(scope="module", params=[1, 2])
52def version(request):
53 yield request.param
54
55
56@pytest.fixture(scope="module", params=[None, "uncompressed", "lz4", "zstd"])
57def compression(request):
58 if request.param in ['lz4', 'zstd'] and not pa.Codec.is_available(
59 request.param):
60 pytest.skip(f'{request.param} is not available')
61 yield request.param
62
63
64TEST_FILES = None
65
66
67def setup_module(module):
68 global TEST_FILES
69 TEST_FILES = []
70
71
72def teardown_module(module):
73 for path in TEST_FILES:
74 try:
75 os.remove(path)
76 except os.error:
77 pass
78
79
80@pytest.mark.pandas
81def test_file_not_exist():
82 with pytest.raises(pa.ArrowIOError):
83 read_feather('test_invalid_file')
84
85
86def _check_pandas_roundtrip(df, expected=None, path=None,
87 columns=None, use_threads=False,
88 version=None, compression=None,
89 compression_level=None):
90 if path is None:
91 path = random_path()
92
93 TEST_FILES.append(path)
94 write_feather(df, path, compression=compression,
95 compression_level=compression_level, version=version)
96 if not os.path.exists(path):
97 raise Exception('file not written')
98
99 result = read_feather(path, columns, use_threads=use_threads)
100 if expected is None:
101 expected = df
102
103 assert_frame_equal(result, expected)
104
105
106def _check_arrow_roundtrip(table, path=None, compression=None):
107 if path is None:
108 path = random_path()
109
110 TEST_FILES.append(path)
111 write_feather(table, path, compression=compression)
112 if not os.path.exists(path):
113 raise Exception('file not written')
114
115 result = read_table(path)
116 assert result.equals(table)
117
118
119def _assert_error_on_write(df, exc, path=None, version=2):
120 # check that we are raising the exception
121 # on writing
122
123 if path is None:
124 path = random_path()
125
126 TEST_FILES.append(path)
127
128 def f():
129 write_feather(df, path, version=version)
130
131 pytest.raises(exc, f)
132
133
134def test_dataset(version):
135 num_values = (100, 100)
136 num_files = 5
137 paths = [random_path() for i in range(num_files)]
138 data = {
139 "col_" + str(i): np.random.randn(num_values[0])
140 for i in range(num_values[1])
141 }
142 table = pa.table(data)
143
144 TEST_FILES.extend(paths)
145 for index, path in enumerate(paths):
146 rows = (
147 index * (num_values[0] // num_files),
148 (index + 1) * (num_values[0] // num_files),
149 )
150
151 write_feather(table[rows[0]: rows[1]], path, version=version)
152
153 data = FeatherDataset(paths).read_table()
154 assert data.equals(table)
155
156
157@pytest.mark.pandas
158def test_float_no_nulls(version):
159 data = {}
160 numpy_dtypes = ['f4', 'f8']
161 num_values = 100
162
163 for dtype in numpy_dtypes:
164 values = np.random.randn(num_values)
165 data[dtype] = values.astype(dtype)
166
167 df = pd.DataFrame(data)
168 _check_pandas_roundtrip(df, version=version)
169
170
171@pytest.mark.pandas
172def test_read_table(version):
173 num_values = (100, 100)
174 path = random_path()
175
176 TEST_FILES.append(path)
177
178 values = np.random.randint(0, 100, size=num_values)
179 columns = ['col_' + str(i) for i in range(100)]
180 table = pa.Table.from_arrays(values, columns)
181
182 write_feather(table, path, version=version)
183
184 result = read_table(path)
185 assert result.equals(table)
186
187 # Test without memory mapping
188 result = read_table(path, memory_map=False)
189 assert result.equals(table)
190
191 result = read_feather(path, memory_map=False)
192 assert_frame_equal(table.to_pandas(), result)
193
194
195@pytest.mark.pandas
196def test_float_nulls(version):
197 num_values = 100
198
199 path = random_path()
200 TEST_FILES.append(path)
201
202 null_mask = np.random.randint(0, 10, size=num_values) < 3
203 dtypes = ['f4', 'f8']
204 expected_cols = []
205
206 arrays = []
207 for name in dtypes:
208 values = np.random.randn(num_values).astype(name)
209 arrays.append(pa.array(values, mask=null_mask))
210
211 values[null_mask] = np.nan
212
213 expected_cols.append(values)
214
215 table = pa.table(arrays, names=dtypes)
216 _check_arrow_roundtrip(table)
217
218 df = table.to_pandas()
219 _check_pandas_roundtrip(df, version=version)
220
221
222@pytest.mark.pandas
223def test_integer_no_nulls(version):
224 data, arr = {}, []
225
226 numpy_dtypes = ['i1', 'i2', 'i4', 'i8',
227 'u1', 'u2', 'u4', 'u8']
228 num_values = 100
229
230 for dtype in numpy_dtypes:
231 values = np.random.randint(0, 100, size=num_values)
232 data[dtype] = values.astype(dtype)
233 arr.append(values.astype(dtype))
234
235 df = pd.DataFrame(data)
236 _check_pandas_roundtrip(df, version=version)
237
238 table = pa.table(arr, names=numpy_dtypes)
239 _check_arrow_roundtrip(table)
240
241
242@pytest.mark.pandas
243def test_platform_numpy_integers(version):
244 data = {}
245
246 numpy_dtypes = ['longlong']
247 num_values = 100
248
249 for dtype in numpy_dtypes:
250 values = np.random.randint(0, 100, size=num_values)
251 data[dtype] = values.astype(dtype)
252
253 df = pd.DataFrame(data)
254 _check_pandas_roundtrip(df, version=version)
255
256
257@pytest.mark.pandas
258def test_integer_with_nulls(version):
259 # pandas requires upcast to float dtype
260 path = random_path()
261 TEST_FILES.append(path)
262
263 int_dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8']
264 num_values = 100
265
266 arrays = []
267 null_mask = np.random.randint(0, 10, size=num_values) < 3
268 expected_cols = []
269 for name in int_dtypes:
270 values = np.random.randint(0, 100, size=num_values)
271 arrays.append(pa.array(values, mask=null_mask))
272
273 expected = values.astype('f8')
274 expected[null_mask] = np.nan
275
276 expected_cols.append(expected)
277
278 table = pa.table(arrays, names=int_dtypes)
279 _check_arrow_roundtrip(table)
280
281 df = table.to_pandas()
282 _check_pandas_roundtrip(df, version=version)
283
284
285@pytest.mark.pandas
286def test_boolean_no_nulls(version):
287 num_values = 100
288
289 np.random.seed(0)
290
291 df = pd.DataFrame({'bools': np.random.randn(num_values) > 0})
292 _check_pandas_roundtrip(df, version=version)
293
294
295@pytest.mark.pandas
296def test_boolean_nulls(version):
297 # pandas requires upcast to object dtype
298 path = random_path()
299 TEST_FILES.append(path)
300
301 num_values = 100
302 np.random.seed(0)
303
304 mask = np.random.randint(0, 10, size=num_values) < 3
305 values = np.random.randint(0, 10, size=num_values) < 5
306
307 table = pa.table([pa.array(values, mask=mask)], names=['bools'])
308 _check_arrow_roundtrip(table)
309
310 df = table.to_pandas()
311 _check_pandas_roundtrip(df, version=version)
312
313
314def test_buffer_bounds_error(version):
315 # ARROW-1676
316 path = random_path()
317 TEST_FILES.append(path)
318
319 for i in range(16, 256):
320 table = pa.Table.from_arrays(
321 [pa.array([None] + list(range(i)), type=pa.float64())],
322 names=["arr"]
323 )
324 _check_arrow_roundtrip(table)
325
326
327def test_boolean_object_nulls(version):
328 repeats = 100
329 table = pa.Table.from_arrays(
330 [np.array([False, None, True] * repeats, dtype=object)],
331 names=["arr"]
332 )
333 _check_arrow_roundtrip(table)
334
335
336@pytest.mark.pandas
337def test_delete_partial_file_on_error(version):
338 if sys.platform == 'win32':
339 pytest.skip('Windows hangs on to file handle for some reason')
340
341 class CustomClass:
342 pass
343
344 # strings will fail
345 df = pd.DataFrame(
346 {
347 'numbers': range(5),
348 'strings': [b'foo', None, 'bar', CustomClass(), np.nan]},
349 columns=['numbers', 'strings'])
350
351 path = random_path()
352 try:
353 write_feather(df, path, version=version)
354 except Exception:
355 pass
356
357 assert not os.path.exists(path)
358
359
360@pytest.mark.pandas
361def test_strings(version):
362 repeats = 1000
363
364 # Mixed bytes, unicode, strings coerced to binary
365 values = [b'foo', None, 'bar', 'qux', np.nan]
366 df = pd.DataFrame({'strings': values * repeats})
367
368 ex_values = [b'foo', None, b'bar', b'qux', np.nan]
369 expected = pd.DataFrame({'strings': ex_values * repeats})
370 _check_pandas_roundtrip(df, expected, version=version)
371
372 # embedded nulls are ok
373 values = ['foo', None, 'bar', 'qux', None]
374 df = pd.DataFrame({'strings': values * repeats})
375 expected = pd.DataFrame({'strings': values * repeats})
376 _check_pandas_roundtrip(df, expected, version=version)
377
378 values = ['foo', None, 'bar', 'qux', np.nan]
379 df = pd.DataFrame({'strings': values * repeats})
380 expected = pd.DataFrame({'strings': values * repeats})
381 _check_pandas_roundtrip(df, expected, version=version)
382
383
384@pytest.mark.pandas
385def test_empty_strings(version):
386 df = pd.DataFrame({'strings': [''] * 10})
387 _check_pandas_roundtrip(df, version=version)
388
389
390@pytest.mark.pandas
391def test_all_none(version):
392 df = pd.DataFrame({'all_none': [None] * 10})
393 _check_pandas_roundtrip(df, version=version)
394
395
396@pytest.mark.pandas
397def test_all_null_category(version):
398 # ARROW-1188
399 df = pd.DataFrame({"A": (1, 2, 3), "B": (None, None, None)})
400 df = df.assign(B=df.B.astype("category"))
401 _check_pandas_roundtrip(df, version=version)
402
403
404@pytest.mark.pandas
405def test_multithreaded_read(version):
406 data = {'c{}'.format(i): [''] * 10
407 for i in range(100)}
408 df = pd.DataFrame(data)
409 _check_pandas_roundtrip(df, use_threads=True, version=version)
410
411
412@pytest.mark.pandas
413def test_nan_as_null(version):
414 # Create a nan that is not numpy.nan
415 values = np.array(['foo', np.nan, np.nan * 2, 'bar'] * 10)
416 df = pd.DataFrame({'strings': values})
417 _check_pandas_roundtrip(df, version=version)
418
419
420@pytest.mark.pandas
421def test_category(version):
422 repeats = 1000
423 values = ['foo', None, 'bar', 'qux', np.nan]
424 df = pd.DataFrame({'strings': values * repeats})
425 df['strings'] = df['strings'].astype('category')
426
427 values = ['foo', None, 'bar', 'qux', None]
428 expected = pd.DataFrame({'strings': pd.Categorical(values * repeats)})
429 _check_pandas_roundtrip(df, expected, version=version)
430
431
432@pytest.mark.pandas
433def test_timestamp(version):
434 df = pd.DataFrame({'naive': pd.date_range('2016-03-28', periods=10)})
435 df['with_tz'] = (df.naive.dt.tz_localize('utc')
436 .dt.tz_convert('America/Los_Angeles'))
437
438 _check_pandas_roundtrip(df, version=version)
439
440
441@pytest.mark.pandas
442def test_timestamp_with_nulls(version):
443 df = pd.DataFrame({'test': [pd.Timestamp(2016, 1, 1),
444 None,
445 pd.Timestamp(2016, 1, 3)]})
446 df['with_tz'] = df.test.dt.tz_localize('utc')
447
448 _check_pandas_roundtrip(df, version=version)
449
450
451@pytest.mark.pandas
452@pytest.mark.xfail(reason="not supported", raises=TypeError)
453def test_timedelta_with_nulls_v1():
454 df = pd.DataFrame({'test': [pd.Timedelta('1 day'),
455 None,
456 pd.Timedelta('3 day')]})
457 _check_pandas_roundtrip(df, version=1)
458
459
460@pytest.mark.pandas
461def test_timedelta_with_nulls():
462 df = pd.DataFrame({'test': [pd.Timedelta('1 day'),
463 None,
464 pd.Timedelta('3 day')]})
465 _check_pandas_roundtrip(df, version=2)
466
467
468@pytest.mark.pandas
469def test_out_of_float64_timestamp_with_nulls(version):
470 df = pd.DataFrame(
471 {'test': pd.DatetimeIndex([1451606400000000001,
472 None, 14516064000030405])})
473 df['with_tz'] = df.test.dt.tz_localize('utc')
474 _check_pandas_roundtrip(df, version=version)
475
476
477@pytest.mark.pandas
478def test_non_string_columns(version):
479 df = pd.DataFrame({0: [1, 2, 3, 4],
480 1: [True, False, True, False]})
481
482 expected = df.rename(columns=str)
483 _check_pandas_roundtrip(df, expected, version=version)
484
485
486@pytest.mark.pandas
487@pytest.mark.skipif(not os.path.supports_unicode_filenames,
488 reason='unicode filenames not supported')
489def test_unicode_filename(version):
490 # GH #209
491 name = (b'Besa_Kavaj\xc3\xab.feather').decode('utf-8')
492 df = pd.DataFrame({'foo': [1, 2, 3, 4]})
493 _check_pandas_roundtrip(df, path=random_path(prefix=name),
494 version=version)
495
496
497@pytest.mark.pandas
498def test_read_columns(version):
499 df = pd.DataFrame({
500 'foo': [1, 2, 3, 4],
501 'boo': [5, 6, 7, 8],
502 'woo': [1, 3, 5, 7]
503 })
504 expected = df[['boo', 'woo']]
505
506 _check_pandas_roundtrip(df, expected, version=version,
507 columns=['boo', 'woo'])
508
509
510def test_overwritten_file(version):
511 path = random_path()
512 TEST_FILES.append(path)
513
514 num_values = 100
515 np.random.seed(0)
516
517 values = np.random.randint(0, 10, size=num_values)
518
519 table = pa.table({'ints': values})
520 write_feather(table, path)
521
522 table = pa.table({'more_ints': values[0:num_values//2]})
523 _check_arrow_roundtrip(table, path=path)
524
525
526@pytest.mark.pandas
527def test_filelike_objects(version):
528 buf = io.BytesIO()
529
530 # the copy makes it non-strided
531 df = pd.DataFrame(np.arange(12).reshape(4, 3),
532 columns=['a', 'b', 'c']).copy()
533 write_feather(df, buf, version=version)
534
535 buf.seek(0)
536
537 result = read_feather(buf)
538 assert_frame_equal(result, df)
539
540
541@pytest.mark.pandas
542@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
543@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
544def test_sparse_dataframe(version):
545 if not pa.pandas_compat._pandas_api.has_sparse:
546 pytest.skip("version of pandas does not support SparseDataFrame")
547 # GH #221
548 data = {'A': [0, 1, 2],
549 'B': [1, 0, 1]}
550 df = pd.DataFrame(data).to_sparse(fill_value=1)
551 expected = df.to_dense()
552 _check_pandas_roundtrip(df, expected, version=version)
553
554
555@pytest.mark.pandas
556def test_duplicate_columns_pandas():
557
558 # https://github.com/wesm/feather/issues/53
559 # not currently able to handle duplicate columns
560 df = pd.DataFrame(np.arange(12).reshape(4, 3),
561 columns=list('aaa')).copy()
562 _assert_error_on_write(df, ValueError)
563
564
565def test_duplicate_columns():
566 # only works for version 2
567 table = pa.table([[1, 2, 3], [4, 5, 6], [7, 8, 9]], names=['a', 'a', 'b'])
568 _check_arrow_roundtrip(table)
569 _assert_error_on_write(table, ValueError, version=1)
570
571
572@pytest.mark.pandas
573def test_unsupported():
574 # https://github.com/wesm/feather/issues/240
575 # serializing actual python objects
576
577 # custom python objects
578 class A:
579 pass
580
581 df = pd.DataFrame({'a': [A(), A()]})
582 _assert_error_on_write(df, ValueError)
583
584 # non-strings
585 df = pd.DataFrame({'a': ['a', 1, 2.0]})
586 _assert_error_on_write(df, TypeError)
587
588
589@pytest.mark.pandas
590def test_v2_set_chunksize():
591 df = pd.DataFrame({'A': np.arange(1000)})
592 table = pa.table(df)
593
594 buf = io.BytesIO()
595 write_feather(table, buf, chunksize=250, version=2)
596
597 result = buf.getvalue()
598
599 ipc_file = pa.ipc.open_file(pa.BufferReader(result))
600 assert ipc_file.num_record_batches == 4
601 assert len(ipc_file.get_batch(0)) == 250
602
603
604@pytest.mark.pandas
605@pytest.mark.lz4
606@pytest.mark.snappy
607@pytest.mark.zstd
608def test_v2_compression_options():
609 df = pd.DataFrame({'A': np.arange(1000)})
610
611 cases = [
612 # compression, compression_level
613 ('uncompressed', None),
614 ('lz4', None),
615 ('zstd', 1),
616 ('zstd', 10)
617 ]
618
619 for compression, compression_level in cases:
620 _check_pandas_roundtrip(df, compression=compression,
621 compression_level=compression_level)
622
623 buf = io.BytesIO()
624
625 # LZ4 doesn't support compression_level
626 with pytest.raises(pa.ArrowInvalid,
627 match="doesn't support setting a compression level"):
628 write_feather(df, buf, compression='lz4', compression_level=10)
629
630 # Trying to compress with V1
631 with pytest.raises(
632 ValueError,
633 match="Feather V1 files do not support compression option"):
634 write_feather(df, buf, compression='lz4', version=1)
635
636 # Trying to set chunksize with V1
637 with pytest.raises(
638 ValueError,
639 match="Feather V1 files do not support chunksize option"):
640 write_feather(df, buf, chunksize=4096, version=1)
641
642 # Unsupported compressor
643 with pytest.raises(ValueError,
644 match='compression="snappy" not supported'):
645 write_feather(df, buf, compression='snappy')
646
647
648def test_v2_lz4_default_compression():
649 # ARROW-8750: Make sure that the compression=None option selects lz4 if
650 # it's available
651 if not pa.Codec.is_available('lz4_frame'):
652 pytest.skip("LZ4 compression support is not built in C++")
653
654 # some highly compressible data
655 t = pa.table([np.repeat(0, 100000)], names=['f0'])
656
657 buf = io.BytesIO()
658 write_feather(t, buf)
659 default_result = buf.getvalue()
660
661 buf = io.BytesIO()
662 write_feather(t, buf, compression='uncompressed')
663 uncompressed_result = buf.getvalue()
664
665 assert len(default_result) < len(uncompressed_result)
666
667
668def test_v1_unsupported_types():
669 table = pa.table([pa.array([[1, 2, 3], [], None])], names=['f0'])
670
671 buf = io.BytesIO()
672 with pytest.raises(TypeError,
673 match=("Unsupported Feather V1 type: "
674 "list<item: int64>. "
675 "Use V2 format to serialize all Arrow types.")):
676 write_feather(table, buf, version=1)
677
678
679@pytest.mark.slow
680@pytest.mark.pandas
681def test_large_dataframe(version):
682 df = pd.DataFrame({'A': np.arange(400000000)})
683 _check_pandas_roundtrip(df, version=version)
684
685
686@pytest.mark.large_memory
687@pytest.mark.pandas
688def test_chunked_binary_error_message():
689 # ARROW-3058: As Feather does not yet support chunked columns, we at least
690 # make sure it's clear to the user what is going on
691
692 # 2^31 + 1 bytes
693 values = [b'x'] + [
694 b'x' * (1 << 20)
695 ] * 2 * (1 << 10)
696 df = pd.DataFrame({'byte_col': values})
697
698 # Works fine with version 2
699 buf = io.BytesIO()
700 write_feather(df, buf, version=2)
701 result = read_feather(pa.BufferReader(buf.getvalue()))
702 assert_frame_equal(result, df)
703
704 with pytest.raises(ValueError, match="'byte_col' exceeds 2GB maximum "
705 "capacity of a Feather binary column. This restriction "
706 "may be lifted in the future"):
707 write_feather(df, io.BytesIO(), version=1)
708
709
710def test_feather_without_pandas(tempdir, version):
711 # ARROW-8345
712 table = pa.table([pa.array([1, 2, 3])], names=['f0'])
713 path = str(tempdir / "data.feather")
714 _check_arrow_roundtrip(table, path)
715
716
717@pytest.mark.pandas
718def test_read_column_selection(version):
719 # ARROW-8641
720 df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=['a', 'b', 'c'])
721
722 # select columns as string names or integer indices
723 _check_pandas_roundtrip(
724 df, columns=['a', 'c'], expected=df[['a', 'c']], version=version)
725 _check_pandas_roundtrip(
726 df, columns=[0, 2], expected=df[['a', 'c']], version=version)
727
728 # different order is followed
729 _check_pandas_roundtrip(
730 df, columns=['b', 'a'], expected=df[['b', 'a']], version=version)
731 _check_pandas_roundtrip(
732 df, columns=[1, 0], expected=df[['b', 'a']], version=version)
733
734
735def test_read_column_duplicated_selection(tempdir, version):
736 # duplicated columns in the column selection
737 table = pa.table([[1, 2, 3], [4, 5, 6], [7, 8, 9]], names=['a', 'b', 'c'])
738 path = str(tempdir / "data.feather")
739 write_feather(table, path, version=version)
740
741 expected = pa.table([[1, 2, 3], [4, 5, 6], [1, 2, 3]],
742 names=['a', 'b', 'a'])
743 for col_selection in [['a', 'b', 'a'], [0, 1, 0]]:
744 result = read_table(path, columns=col_selection)
745 assert result.equals(expected)
746
747
748def test_read_column_duplicated_in_file(tempdir):
749 # duplicated columns in feather file (only works for feather v2)
750 table = pa.table([[1, 2, 3], [4, 5, 6], [7, 8, 9]], names=['a', 'b', 'a'])
751 path = str(tempdir / "data.feather")
752 write_feather(table, path, version=2)
753
754 # no selection works fine
755 result = read_table(path)
756 assert result.equals(table)
757
758 # selection with indices works
759 result = read_table(path, columns=[0, 2])
760 assert result.column_names == ['a', 'a']
761
762 # selection with column names errors
763 with pytest.raises(ValueError):
764 read_table(path, columns=['a', 'b'])
765
766
767def test_nested_types(compression):
768 # https://issues.apache.org/jira/browse/ARROW-8860
769 table = pa.table({'col': pa.StructArray.from_arrays(
770 [[0, 1, 2], [1, 2, 3]], names=["f1", "f2"])})
771 _check_arrow_roundtrip(table, compression=compression)
772
773 table = pa.table({'col': pa.array([[1, 2], [3, 4]])})
774 _check_arrow_roundtrip(table, compression=compression)
775
776 table = pa.table({'col': pa.array([[[1, 2], [3, 4]], [[5, 6], None]])})
777 _check_arrow_roundtrip(table, compression=compression)
778
779
780@h.given(past.all_tables, st.sampled_from(["uncompressed", "lz4", "zstd"]))
781def test_roundtrip(table, compression):
782 _check_arrow_roundtrip(table, compression=compression)
783
784
785@pytest.mark.lz4
786def test_feather_v017_experimental_compression_backward_compatibility(datadir):
787 # ARROW-11163 - ensure newer pyarrow versions can read the old feather
788 # files from version 0.17.0 with experimental compression support (before
789 # it was officially added to IPC format in 1.0.0)
790
791 # file generated with:
792 # table = pa.table({'a': range(5)})
793 # from pyarrow import feather
794 # feather.write_feather(
795 # table, "v0.17.0.version=2-compression=lz4.feather",
796 # compression="lz4", version=2)
797 expected = pa.table({'a': range(5)})
798 result = read_table(datadir / "v0.17.0.version=2-compression=lz4.feather")
799 assert result.equals(expected)