]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/go/parquet/schema/reflection_test.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / parquet / schema / reflection_test.go
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package schema_test
18
19import (
20 "log"
21 "os"
22 "reflect"
23 "testing"
24
25 "github.com/apache/arrow/go/v6/parquet"
26 "github.com/apache/arrow/go/v6/parquet/schema"
27 "github.com/stretchr/testify/assert"
28)
29
30func ExampleNewSchemaFromStruct_primitives() {
31 type Schema struct {
32 Bool bool
33 Int8 int8
34 Uint16 uint16
35 Int32 int32
36 Int64 int64
37 Int96 parquet.Int96
38 Float float32
39 Double float64
40 ByteArray string
41 FixedLenByteArray [10]byte
42 }
43
44 sc, err := schema.NewSchemaFromStruct(Schema{})
45 if err != nil {
46 log.Fatal(err)
47 }
48
49 schema.PrintSchema(sc.Root(), os.Stdout, 2)
50
51 // Output:
52 // repeated group field_id=-1 Schema {
53 // required boolean field_id=-1 Bool;
54 // required int32 field_id=-1 Int8 (Int(bitWidth=8, isSigned=true));
55 // required int32 field_id=-1 Uint16 (Int(bitWidth=16, isSigned=false));
56 // required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true));
57 // required int64 field_id=-1 Int64 (Int(bitWidth=64, isSigned=true));
58 // required int96 field_id=-1 Int96;
59 // required float field_id=-1 Float;
60 // required double field_id=-1 Double;
61 // required byte_array field_id=-1 ByteArray;
62 // required fixed_len_byte_array field_id=-1 FixedLenByteArray;
63 // }
64}
65
66func ExampleNewSchemaFromStruct_convertedtypes() {
67 type ConvertedSchema struct {
68 Utf8 string `parquet:"name=utf8, converted=UTF8"`
69 Uint32 uint32 `parquet:"converted=INT_32"`
70 Date int32 `parquet:"name=date, converted=date"`
71 TimeMilli int32 `parquet:"name=timemilli, converted=TIME_MILLIS"`
72 TimeMicro int64 `parquet:"name=timemicro, converted=time_micros"`
73 TimeStampMilli int64 `parquet:"converted=timestamp_millis"`
74 TimeStampMicro int64 `parquet:"converted=timestamp_micros"`
75 Interval parquet.Int96 `parquet:"converted=INTERVAL"`
76 Decimal1 int32 `parquet:"converted=decimal, scale=2, precision=9"`
77 Decimal2 int64 `parquet:"converted=decimal, scale=2, precision=18"`
78 Decimal3 [12]byte `parquet:"converted=decimal, scale=2, precision=10"`
79 Decimal4 string `parquet:"converted=decimal, scale=2, precision=20"`
80 }
81
82 sc, err := schema.NewSchemaFromStruct(&ConvertedSchema{})
83 if err != nil {
84 log.Fatal(err)
85 }
86
87 schema.PrintSchema(sc.Root(), os.Stdout, 2)
88
89 // Output:
90 // repeated group field_id=-1 ConvertedSchema {
91 // required byte_array field_id=-1 utf8 (String);
92 // required int32 field_id=-1 Uint32 (Int(bitWidth=32, isSigned=true));
93 // required int32 field_id=-1 date (Date);
94 // required int32 field_id=-1 timemilli (Time(isAdjustedToUTC=true, timeUnit=milliseconds));
95 // required int64 field_id=-1 timemicro (Time(isAdjustedToUTC=true, timeUnit=microseconds));
96 // required int64 field_id=-1 TimeStampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=true, force_set_converted_type=false));
97 // required int64 field_id=-1 TimeStampMicro (Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, is_from_converted_type=true, force_set_converted_type=false));
98 // required int96 field_id=-1 Interval;
99 // required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2));
100 // required int64 field_id=-1 Decimal2 (Decimal(precision=18, scale=2));
101 // required fixed_len_byte_array field_id=-1 Decimal3 (Decimal(precision=10, scale=2));
102 // required byte_array field_id=-1 Decimal4 (Decimal(precision=20, scale=2));
103 // }
104}
105
106func ExampleNewSchemaFromStruct_repetition() {
107 type RepetitionSchema struct {
108 List []int64 `parquet:"fieldid=1"`
109 Repeated []int64 `parquet:"repetition=repeated, fieldid=2"`
110 Optional *int64 `parquet:"fieldid=3"`
111 Required *int64 `parquet:"repetition=REQUIRED, fieldid=4"`
112 Opt int64 `parquet:"repetition=OPTIONAL, fieldid=5"`
113 }
114
115 sc, err := schema.NewSchemaFromStruct(RepetitionSchema{})
116 if err != nil {
117 log.Fatal(err)
118 }
119
120 schema.PrintSchema(sc.Root(), os.Stdout, 2)
121
122 // Output:
123 // repeated group field_id=-1 RepetitionSchema {
124 // required group field_id=1 List (List) {
125 // repeated group field_id=-1 list {
126 // required int64 field_id=-1 element (Int(bitWidth=64, isSigned=true));
127 // }
128 // }
129 // repeated int64 field_id=2 Repeated (Int(bitWidth=64, isSigned=true));
130 // optional int64 field_id=3 Optional (Int(bitWidth=64, isSigned=true));
131 // required int64 field_id=4 Required (Int(bitWidth=64, isSigned=true));
132 // optional int64 field_id=5 Opt (Int(bitWidth=64, isSigned=true));
133 // }
134}
135
136func ExampleNewSchemaFromStruct_logicaltypes() {
137 type LogicalTypes struct {
138 String []byte `parquet:"logical=String"`
139 Enum string `parquet:"logical=enum"`
140 Date int32 `parquet:"logical=date"`
141 Decimal1 int32 `parquet:"logical=decimal, precision=9, scale=2"`
142 Decimal2 int32 `parquet:"logical=decimal, logical.precision=9, scale=2"`
143 Decimal3 int32 `parquet:"logical=decimal, precision=5, logical.precision=9, scale=1, logical.scale=3"`
144 TimeMilliUTC int32 `parquet:"logical=TIME, logical.unit=millis"`
145 TimeMilli int32 `parquet:"logical=Time, logical.unit=millis, logical.isadjustedutc=false"`
146 TimeMicros int64 `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=false"`
147 TimeMicrosUTC int64 `parquet:"logical=time, logical.unit=micros, logical.isadjustedutc=true"`
148 TimeNanos int64 `parquet:"logical=time, logical.unit=nanos"`
149 TimestampMilli int64 `parquet:"logical=timestamp, logical.unit=millis"`
150 TimestampMicrosNotUTC int64 `parquet:"logical=timestamp, logical.unit=micros, logical.isadjustedutc=false"`
151 TimestampNanos int64 `parquet:"logical=timestamp, logical.unit=nanos"`
152 JSON string `parquet:"logical=json"`
153 BSON []byte `parquet:"logical=BSON"`
154 UUID [16]byte `parquet:"logical=uuid"`
155 }
156
157 sc, err := schema.NewSchemaFromStruct(LogicalTypes{})
158 if err != nil {
159 log.Fatal(err)
160 }
161
162 schema.PrintSchema(sc.Root(), os.Stdout, 2)
163
164 // Output:
165 // repeated group field_id=-1 LogicalTypes {
166 // required byte_array field_id=-1 String (String);
167 // required byte_array field_id=-1 Enum (Enum);
168 // required int32 field_id=-1 Date (Date);
169 // required int32 field_id=-1 Decimal1 (Decimal(precision=9, scale=2));
170 // required int32 field_id=-1 Decimal2 (Decimal(precision=9, scale=2));
171 // required int32 field_id=-1 Decimal3 (Decimal(precision=9, scale=3));
172 // required int32 field_id=-1 TimeMilliUTC (Time(isAdjustedToUTC=true, timeUnit=milliseconds));
173 // required int32 field_id=-1 TimeMilli (Time(isAdjustedToUTC=false, timeUnit=milliseconds));
174 // required int64 field_id=-1 TimeMicros (Time(isAdjustedToUTC=false, timeUnit=microseconds));
175 // required int64 field_id=-1 TimeMicrosUTC (Time(isAdjustedToUTC=true, timeUnit=microseconds));
176 // required int64 field_id=-1 TimeNanos (Time(isAdjustedToUTC=true, timeUnit=nanoseconds));
177 // required int64 field_id=-1 TimestampMilli (Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, is_from_converted_type=false, force_set_converted_type=false));
178 // required int64 field_id=-1 TimestampMicrosNotUTC (Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, is_from_converted_type=false, force_set_converted_type=false));
179 // required int64 field_id=-1 TimestampNanos (Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, is_from_converted_type=false, force_set_converted_type=false));
180 // required byte_array field_id=-1 JSON (JSON);
181 // required byte_array field_id=-1 BSON (BSON);
182 // required fixed_len_byte_array field_id=-1 UUID (UUID);
183 // }
184}
185
186func ExampleNewSchemaFromStruct_physicaltype() {
187 type ChangeTypes struct {
188 Int32 int64 `parquet:"type=int32"`
189 FixedLen string `parquet:"type=fixed_len_byte_array, length=10"`
190 SliceAsFixed []byte `parquet:"type=fixed_len_byte_array, length=12"`
191 Int int `parquet:"type=int32"`
192 }
193
194 sc, err := schema.NewSchemaFromStruct(ChangeTypes{})
195 if err != nil {
196 log.Fatal(err)
197 }
198
199 schema.PrintSchema(sc.Root(), os.Stdout, 2)
200
201 // Output:
202 // repeated group field_id=-1 ChangeTypes {
203 // required int32 field_id=-1 Int32 (Int(bitWidth=32, isSigned=true));
204 // required fixed_len_byte_array field_id=-1 FixedLen;
205 // required fixed_len_byte_array field_id=-1 SliceAsFixed;
206 // required int32 field_id=-1 Int (Int(bitWidth=32, isSigned=true));
207 // }
208}
209
210func ExampleNewSchemaFromStruct_nestedtypes() {
211 type Other struct {
212 OptionalMap *map[string]*string `parquet:"valuerepetition=required, keylogical=String, valueconverted=BSON"`
213 }
214
215 type MyMap map[int32]string
216
217 type Nested struct {
218 SimpleMap map[int32]string
219 FixedLenMap map[string][]byte `parquet:"keytype=fixed_len_byte_array, keyfieldid=10, valuefieldid=11, keylength=10"`
220 DecimalMap map[int32]string `parquet:"logical=map, keyconverted=DECIMAL, keyscale=3, keyprecision=7, valuetype=fixed_len_byte_array, valuelength=4, valuelogical=decimal, valuelogical.precision=9, valuescale=2"`
221 OtherList []*Other
222 OtherRepeated []Other `parquet:"repetition=repeated"`
223 DateArray [5]int32 `parquet:"valuelogical=date, logical=list"`
224 DateMap MyMap `parquet:"keylogical=TIME, keylogical.unit=MILLIS, keylogical.isadjustedutc=false, valuelogical=enum"`
225 }
226
227 sc, err := schema.NewSchemaFromStruct(Nested{})
228 if err != nil {
229 log.Fatal(err)
230 }
231
232 schema.PrintSchema(sc.Root(), os.Stdout, 2)
233
234 // Output:
235 // repeated group field_id=-1 Nested {
236 // required group field_id=-1 SimpleMap (Map) {
237 // repeated group field_id=-1 key_value {
238 // required int32 field_id=-1 key (Int(bitWidth=32, isSigned=true));
239 // required byte_array field_id=-1 value;
240 // }
241 // }
242 // required group field_id=-1 FixedLenMap (Map) {
243 // repeated group field_id=-1 key_value {
244 // required fixed_len_byte_array field_id=10 key;
245 // required byte_array field_id=11 value;
246 // }
247 // }
248 // required group field_id=-1 DecimalMap (Map) {
249 // repeated group field_id=-1 key_value {
250 // required int32 field_id=-1 key (Decimal(precision=7, scale=3));
251 // required fixed_len_byte_array field_id=-1 value (Decimal(precision=9, scale=2));
252 // }
253 // }
254 // required group field_id=-1 OtherList (List) {
255 // repeated group field_id=-1 list {
256 // optional group field_id=-1 element {
257 // optional group field_id=-1 OptionalMap (Map) {
258 // repeated group field_id=-1 key_value {
259 // required byte_array field_id=-1 key (String);
260 // required byte_array field_id=-1 value (BSON);
261 // }
262 // }
263 // }
264 // }
265 // }
266 // repeated group field_id=-1 OtherRepeated {
267 // optional group field_id=-1 OptionalMap (Map) {
268 // repeated group field_id=-1 key_value {
269 // required byte_array field_id=-1 key (String);
270 // required byte_array field_id=-1 value (BSON);
271 // }
272 // }
273 // }
274 // required group field_id=-1 DateArray (List) {
275 // repeated group field_id=-1 list {
276 // required int32 field_id=-1 element (Date);
277 // }
278 // }
279 // required group field_id=-1 DateMap (Map) {
280 // repeated group field_id=-1 key_value {
281 // required int32 field_id=-1 key (Time(isAdjustedToUTC=false, timeUnit=milliseconds));
282 // required byte_array field_id=-1 value (Enum);
283 // }
284 // }
285 // }
286}
287
288func TestStructFromSchema(t *testing.T) {
289 root, err := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{
290 schema.NewBooleanNode("bool", parquet.Repetitions.Required, -1),
291 schema.NewInt32Node("int32", parquet.Repetitions.Optional, -1),
292 schema.NewInt64Node("int64", parquet.Repetitions.Repeated, -1),
293 schema.NewInt96Node("int96", parquet.Repetitions.Required, -1),
294 schema.NewFloat32Node("float", parquet.Repetitions.Required, -1),
295 schema.NewByteArrayNode("bytearray", parquet.Repetitions.Required, -1),
296 schema.NewFixedLenByteArrayNode("fixedLen", parquet.Repetitions.Required, 10, -1),
297 }, -1)
298 assert.NoError(t, err)
299
300 sc := schema.NewSchema(root)
301
302 typ, err := schema.NewStructFromSchema(sc)
303 assert.NoError(t, err)
304
305 assert.Equal(t, reflect.Struct, typ.Kind())
306 assert.Equal(t, "struct { bool bool; int32 *int32; int64 []int64; int96 parquet.Int96; float float32; bytearray parquet.ByteArray; fixedLen parquet.FixedLenByteArray }",
307 typ.String())
308}
309
310func TestStructFromSchemaWithNesting(t *testing.T) {
311 type Other struct {
312 List *[]*float32
313 }
314
315 type Nested struct {
316 Nest []int32
317 OptionalNest []*int64
318 Mapped map[string]float32
319 Other []Other
320 Other2 Other
321 }
322
323 sc, err := schema.NewSchemaFromStruct(Nested{})
324 assert.NoError(t, err)
325
326 typ, err := schema.NewStructFromSchema(sc)
327 assert.NoError(t, err)
328 assert.Equal(t, "struct { Nest []int32; OptionalNest []*int64; Mapped map[string]float32; Other []struct { List *[]*float32 }; Other2 struct { List *[]*float32 } }",
329 typ.String())
330}
331
332func TestStructFromSchemaBackwardsCompatList(t *testing.T) {
333 tests := []struct {
334 name string
335 n schema.Node
336 expected string
337 }{
338 {"proper list", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required,
339 schema.FieldList{
340 schema.MustGroup(schema.NewGroupNode("list", parquet.Repetitions.Repeated, schema.FieldList{schema.NewBooleanNode("element", parquet.Repetitions.Optional, -1)}, -1)),
341 }, schema.NewListLogicalType(), -1)), "struct { my_list []*bool }"},
342 {"backward nullable list nonnull ints", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
343 schema.NewInt32Node("element", parquet.Repetitions.Repeated, -1),
344 }, schema.NewListLogicalType(), -1)), "struct { my_list *[]int32 }"},
345 {"backward nullable list tuple string int", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
346 schema.MustGroup(schema.NewGroupNode("element", parquet.Repetitions.Repeated, schema.FieldList{
347 schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
348 schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
349 }, -1)),
350 }, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string; num int32 } }"},
351 {"list tuple string", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Required, schema.FieldList{
352 schema.MustGroup(schema.NewGroupNode("array", parquet.Repetitions.Repeated, schema.FieldList{
353 schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
354 }, -1)),
355 }, schema.NewListLogicalType(), -1)), "struct { my_list []struct { str parquet.ByteArray } }"},
356 {"list tuple string my_list_tuple", schema.MustGroup(schema.NewGroupNodeLogical("my_list", parquet.Repetitions.Optional, schema.FieldList{
357 schema.MustGroup(schema.NewGroupNode("my_list_tuple", parquet.Repetitions.Repeated, schema.FieldList{
358 schema.MustPrimitive(schema.NewPrimitiveNodeLogical("str", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
359 }, -1)),
360 }, schema.NewListLogicalType(), -1)), "struct { my_list *[]struct { str string } }"},
361 }
362
363 for _, tt := range tests {
364 t.Run(tt.name, func(t *testing.T) {
365 typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
366 assert.NoError(t, err)
367 assert.Equal(t, tt.expected, typ.String())
368 })
369 }
370}
371
372func TestStructFromSchemaMaps(t *testing.T) {
373 tests := []struct {
374 name string
375 n schema.Node
376 expected string
377 }{
378 {"map string int", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Required, schema.FieldList{
379 schema.MustGroup(schema.NewGroupNode("key_value", parquet.Repetitions.Repeated, schema.FieldList{
380 schema.MustPrimitive(schema.NewPrimitiveNodeLogical("key", parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)),
381 schema.NewInt32Node("value", parquet.Repetitions.Optional, -1),
382 }, -1)),
383 }, schema.MapLogicalType{}, -1)), "struct { my_map map[string]*int32 }"},
384 {"nullable map string, int, required values", schema.MustGroup(schema.NewGroupNodeLogical("my_map", parquet.Repetitions.Optional, schema.FieldList{
385 schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
386 schema.NewByteArrayNode("str", parquet.Repetitions.Required, -1),
387 schema.NewInt32Node("num", parquet.Repetitions.Required, -1),
388 }, -1)),
389 }, schema.MapLogicalType{}, -1)), "struct { my_map *map[string]int32 }"},
390 {"map_key_value with missing value", schema.MustGroup(schema.NewGroupNodeConverted("my_map", parquet.Repetitions.Optional, schema.FieldList{
391 schema.MustGroup(schema.NewGroupNode("map", parquet.Repetitions.Repeated, schema.FieldList{
392 schema.NewByteArrayNode("key", parquet.Repetitions.Required, -1),
393 }, -1)),
394 }, schema.ConvertedTypes.MapKeyValue, -1)), "struct { my_map *map[string]bool }"},
395 }
396 for _, tt := range tests {
397 t.Run(tt.name, func(t *testing.T) {
398 typ, err := schema.NewStructFromSchema(schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{tt.n}, -1))))
399 assert.NoError(t, err)
400 assert.Equal(t, tt.expected, typ.String())
401 })
402 }
403}