1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
24 "github.com/apache/arrow/go/v6/parquet"
25 format "github.com/apache/arrow/go/v6/parquet/internal/gen-go/parquet"
26 "golang.org/x/xerrors"
29 type taggedInfo struct {
34 ValueType parquet.Type
52 RepetitionType parquet.Repetition
53 ValueRepetition parquet.Repetition
55 Converted ConvertedType
56 KeyConverted ConvertedType
57 ValueConverted ConvertedType
59 LogicalFields map[string]string
60 KeyLogicalFields map[string]string
61 ValueLogicalFields map[string]string
63 LogicalType LogicalType
64 KeyLogicalType LogicalType
65 ValueLogicalType LogicalType
68 func (t *taggedInfo) CopyForKey() (ret taggedInfo) {
71 ret.Length = t.KeyLength
72 ret.Scale = t.KeyScale
73 ret.Precision = t.KeyPrecision
74 ret.FieldID = t.KeyFieldID
75 ret.RepetitionType = parquet.Repetitions.Required
76 ret.Converted = t.KeyConverted
77 ret.LogicalType = t.KeyLogicalType
81 func (t *taggedInfo) CopyForValue() (ret taggedInfo) {
83 ret.Type = t.ValueType
84 ret.Length = t.ValueLength
85 ret.Scale = t.ValueScale
86 ret.Precision = t.ValuePrecision
87 ret.FieldID = t.ValueFieldID
88 ret.RepetitionType = t.ValueRepetition
89 ret.Converted = t.ValueConverted
90 ret.LogicalType = t.ValueLogicalType
94 func (t *taggedInfo) UpdateLogicalTypes() {
95 processLogicalType := func(fields map[string]string, precision, scale int32) LogicalType {
96 t, ok := fields["type"]
98 return NoLogicalType{}
101 switch strings.ToLower(t) {
103 return StringLogicalType{}
105 return MapLogicalType{}
107 return ListLogicalType{}
109 return EnumLogicalType{}
111 if v, ok := fields["precision"]; ok {
112 precision = int32FromType(v)
114 if v, ok := fields["scale"]; ok {
115 scale = int32FromType(v)
117 return NewDecimalLogicalType(precision, scale)
119 return DateLogicalType{}
121 unit, ok := fields["unit"]
123 panic("must specify unit for time logical type")
125 adjustedToUtc, ok := fields["isadjustedutc"]
127 adjustedToUtc = "true"
129 return NewTimeLogicalType(boolFromStr(adjustedToUtc), timeUnitFromString(strings.ToLower(unit)))
131 unit, ok := fields["unit"]
133 panic("must specify unit for time logical type")
135 adjustedToUtc, ok := fields["isadjustedutc"]
137 adjustedToUtc = "true"
139 return NewTimestampLogicalType(boolFromStr(adjustedToUtc), timeUnitFromString(unit))
141 width, ok := fields["bitwidth"]
143 panic("must specify bitwidth if explicitly setting integer logical type")
145 signed, ok := fields["signed"]
150 return NewIntLogicalType(int8(int32FromType(width)), boolFromStr(signed))
152 return NullLogicalType{}
154 return JSONLogicalType{}
156 return BSONLogicalType{}
158 return UUIDLogicalType{}
160 panic(xerrors.Errorf("invalid logical type specified: %s", t))
164 t.LogicalType = processLogicalType(t.LogicalFields, t.Precision, t.Scale)
165 t.KeyLogicalType = processLogicalType(t.KeyLogicalFields, t.KeyPrecision, t.KeyScale)
166 t.ValueLogicalType = processLogicalType(t.ValueLogicalFields, t.ValuePrecision, t.ValueScale)
169 func newTaggedInfo() taggedInfo {
171 Type: parquet.Types.Undefined,
172 KeyType: parquet.Types.Undefined,
173 ValueType: parquet.Types.Undefined,
174 RepetitionType: parquet.Repetitions.Undefined,
175 ValueRepetition: parquet.Repetitions.Undefined,
176 Converted: ConvertedTypes.NA,
177 KeyConverted: ConvertedTypes.NA,
178 ValueConverted: ConvertedTypes.NA,
182 LogicalFields: make(map[string]string),
183 KeyLogicalFields: make(map[string]string),
184 ValueLogicalFields: make(map[string]string),
185 LogicalType: NoLogicalType{},
186 KeyLogicalType: NoLogicalType{},
187 ValueLogicalType: NoLogicalType{},
191 var int32FromType = func(v string) int32 {
192 val, err := strconv.Atoi(v)
199 var boolFromStr = func(v string) bool {
200 val, err := strconv.ParseBool(v)
207 func infoFromTags(f reflect.StructTag) *taggedInfo {
208 typeFromStr := func(v string) parquet.Type {
209 t, err := format.TypeFromString(strings.ToUpper(v))
211 panic(xerrors.Errorf("invalid type specified: %s", v))
213 return parquet.Type(t)
216 repFromStr := func(v string) parquet.Repetition {
217 r, err := format.FieldRepetitionTypeFromString(strings.ToUpper(v))
221 return parquet.Repetition(r)
224 convertedFromStr := func(v string) ConvertedType {
225 c, err := format.ConvertedTypeFromString(strings.ToUpper(v))
229 return ConvertedType(c)
232 if ptags, ok := f.Lookup("parquet"); ok {
233 info := newTaggedInfo()
234 for _, tag := range strings.Split(strings.Replace(ptags, "\t", "", -1), ",") {
235 tag = strings.TrimSpace(tag)
236 kv := strings.SplitN(tag, "=", 2)
237 key := strings.TrimSpace(strings.ToLower(kv[0]))
238 value := strings.TrimSpace(kv[1])
244 info.Type = typeFromStr(value)
246 info.KeyType = typeFromStr(value)
248 info.ValueType = typeFromStr(value)
250 info.Length = int32FromType(value)
252 info.KeyLength = int32FromType(value)
254 info.ValueLength = int32FromType(value)
256 info.Scale = int32FromType(value)
258 info.KeyScale = int32FromType(value)
260 info.ValueScale = int32FromType(value)
262 info.Precision = int32FromType(value)
264 info.KeyPrecision = int32FromType(value)
265 case "valueprecision":
266 info.ValuePrecision = int32FromType(value)
268 info.FieldID = int32FromType(value)
270 info.KeyFieldID = int32FromType(value)
272 info.ValueFieldID = int32FromType(value)
274 info.RepetitionType = repFromStr(value)
275 case "valuerepetition":
276 info.ValueRepetition = repFromStr(value)
278 info.Converted = convertedFromStr(value)
280 info.KeyConverted = convertedFromStr(value)
281 case "valueconverted":
282 info.ValueConverted = convertedFromStr(value)
284 info.LogicalFields["type"] = value
286 info.KeyLogicalFields["type"] = value
288 info.ValueLogicalFields["type"] = value
291 case strings.HasPrefix(key, "logical."):
292 info.LogicalFields[strings.TrimPrefix(key, "logical.")] = value
293 case strings.HasPrefix(key, "keylogical."):
294 info.KeyLogicalFields[strings.TrimPrefix(key, "keylogical.")] = value
295 case strings.HasPrefix(key, "valuelogical."):
296 info.ValueLogicalFields[strings.TrimPrefix(key, "valuelogical.")] = value
300 info.UpdateLogicalTypes()
306 // typeToNode recurseively converts a physical type and the tag info into parquet Nodes
308 // to avoid having to propagate errors up potentially high numbers of recursive calls
309 // we use panics and then recover in the public function NewSchemaFromStruct so that a
310 // failure very far down the stack quickly unwinds.
311 func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info *taggedInfo) Node {
312 // set up our default values for everything
314 converted = ConvertedTypes.None
315 logical LogicalType = NoLogicalType{}
317 physical = parquet.Types.Undefined
322 if info != nil { // we have struct tag info to process
323 fieldID = info.FieldID
324 if info.Converted != ConvertedTypes.NA {
325 converted = info.Converted
327 logical = info.LogicalType
329 typeLen = int(info.Length)
330 precision = int(info.Precision)
331 scale = int(info.Scale)
336 if info.RepetitionType != parquet.Repetitions.Undefined {
337 repType = info.RepetitionType
341 // simplify the logic by switching based on the reflection Kind
344 // a map must have a logical type of MAP or have no tag for logical type in which case
345 // we assume MAP logical type.
346 if !logical.IsNone() && !logical.Equals(MapLogicalType{}) {
347 panic("cannot set logical type to something other than map for a map")
350 infoCopy := newTaggedInfo()
351 if info != nil { // populate any value specific tags to propagate for the value type
352 infoCopy = info.CopyForValue()
355 // create the node for the value type of the map
356 value := typeToNode("value", typ.Elem(), parquet.Repetitions.Required, &infoCopy)
357 if info != nil { // change our copy to now use the key specific tags if they exist
358 infoCopy = info.CopyForKey()
361 // create the node for the key type of the map
362 key := typeToNode("key", typ.Key(), parquet.Repetitions.Required, &infoCopy)
363 if key.RepetitionType() != parquet.Repetitions.Required { // key cannot be optional
364 panic("key type of map must be Required")
366 return Must(MapOf(name, key, value, repType, fieldID))
368 // structs are Group nodes
369 fields := make(FieldList, 0)
370 for i := 0; i < typ.NumField(); i++ {
373 fields = append(fields, typeToNode(f.Name, f.Type, parquet.Repetitions.Required, infoFromTags(f.Tag)))
375 // group nodes don't have a physical type
376 if physical != parquet.Types.Undefined {
377 panic("cannot specify custom type on struct")
379 // group nodes don't have converted or logical types
380 if converted != ConvertedTypes.None {
381 panic("cannot specify converted types for a struct")
383 if !logical.IsNone() {
384 panic("cannot specify logicaltype for a struct")
386 return Must(NewGroupNode(name, repType, fields, fieldID))
387 case reflect.Ptr: // if we encounter a pointer create a node for the type it points to, but mark it as optional
388 return typeToNode(name, typ.Elem(), parquet.Repetitions.Optional, info)
390 // arrays are repeated or fixed size
391 if typ == reflect.TypeOf(parquet.Int96{}) {
392 return NewInt96Node(name, repType, fieldID)
395 if typ.Elem() == reflect.TypeOf(byte(0)) { // something like [12]byte translates to FixedLenByteArray with length 12
396 if physical == parquet.Types.Undefined {
397 physical = parquet.Types.FixedLenByteArray
399 if typeLen == 0 { // if there was no type length specified in the tag, use the length of the type.
402 if !logical.IsNone() {
403 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
405 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
407 fallthrough // if it's not a fixed len byte array type, then just treat it like a slice
409 // for slices, we default to treating them as lists unless the repetition type is set to REPEATED or they are
410 // a bytearray/fixedlenbytearray
412 case repType == parquet.Repetitions.Repeated:
413 return typeToNode(name, typ.Elem(), parquet.Repetitions.Repeated, info)
414 case physical == parquet.Types.FixedLenByteArray || physical == parquet.Types.ByteArray:
415 if typ.Elem() != reflect.TypeOf(byte(0)) {
416 panic("slice with physical type ByteArray or FixedLenByteArray must be []byte")
419 case typ.Elem() == reflect.TypeOf(byte(0)):
420 if physical == parquet.Types.Undefined {
421 physical = parquet.Types.ByteArray
423 if !logical.IsNone() {
424 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, physical, typeLen, fieldID))
426 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, physical, converted, typeLen, precision, scale, fieldID))
428 var elemInfo *taggedInfo
430 elemInfo = &taggedInfo{}
431 *elemInfo = info.CopyForValue()
434 if !logical.IsNone() && !logical.Equals(ListLogicalType{}) {
435 panic("slice must either be repeated or a List type")
437 if converted != ConvertedTypes.None && converted != ConvertedTypes.List {
438 panic("slice must either be repeated or a List type")
440 return Must(ListOf(typeToNode(name, typ.Elem(), parquet.Repetitions.Required, elemInfo), repType, fieldID))
443 // strings are byte arrays or fixedlen byte array
444 t := parquet.Types.ByteArray
446 case parquet.Types.Undefined, parquet.Types.ByteArray:
447 case parquet.Types.FixedLenByteArray:
448 t = parquet.Types.FixedLenByteArray
450 panic("string fields should be of type bytearray or fixedlenbytearray only")
453 if !logical.IsNone() {
454 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, t, typeLen, fieldID))
457 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, t, converted, typeLen, precision, scale, fieldID))
458 case reflect.Int, reflect.Int32, reflect.Int8, reflect.Int16, reflect.Int64:
459 // handle integer types, default to setting the corresponding logical type
460 ptyp := parquet.Types.Int32
461 if typ.Bits() == 64 {
462 ptyp = parquet.Types.Int64
465 if physical != parquet.Types.Undefined {
469 if !logical.IsNone() {
470 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
473 bitwidth := int8(typ.Bits())
474 if physical != parquet.Types.Undefined {
475 if ptyp == parquet.Types.Int32 {
477 } else if ptyp == parquet.Types.Int64 {
482 if converted != ConvertedTypes.None {
483 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
486 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, true), ptyp, 0, fieldID))
487 case reflect.Uint, reflect.Uint32, reflect.Uint8, reflect.Uint16, reflect.Uint64:
488 // handle unsigned integer types and default to the corresponding logical type for it.
489 ptyp := parquet.Types.Int32
490 if typ.Bits() == 64 {
491 ptyp = parquet.Types.Int64
494 if physical != parquet.Types.Undefined {
498 if !logical.IsNone() {
499 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, ptyp, typeLen, fieldID))
502 bitwidth := int8(typ.Bits())
503 if physical != parquet.Types.Undefined {
504 if ptyp == parquet.Types.Int32 {
506 } else if ptyp == parquet.Types.Int64 {
511 if converted != ConvertedTypes.None {
512 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, ptyp, converted, 0, precision, scale, fieldID))
515 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, NewIntLogicalType(bitwidth, false), ptyp, 0, fieldID))
517 if !logical.IsNone() {
518 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Boolean, typeLen, fieldID))
520 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Boolean, converted, typeLen, precision, scale, fieldID))
521 case reflect.Float32:
522 if !logical.IsNone() {
523 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Float, typeLen, fieldID))
525 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Float, converted, typeLen, precision, scale, fieldID))
526 case reflect.Float64:
527 if !logical.IsNone() {
528 return MustPrimitive(NewPrimitiveNodeLogical(name, repType, logical, parquet.Types.Double, typeLen, fieldID))
530 return MustPrimitive(NewPrimitiveNodeConverted(name, repType, parquet.Types.Double, converted, typeLen, precision, scale, fieldID))
535 // NewSchemaFromStruct generates a schema from an object type via reflection of
536 // the type and reading struct tags for "parquet".
540 // Everything defaults to Required repetition, unless otherwise specified.
541 // Pointer types become Optional repetition.
542 // Arrays and Slices become logical List types unless using the tag `repetition=repeated`.
544 // A length specified byte field (like [5]byte) becomes a fixed_len_byte_array of that length
545 // unless otherwise specified by tags.
547 // string and []byte both become ByteArray unless otherwise specified.
549 // Integer types will default to having a logical type of the appropriate bit width
550 // and signedness rather than having no logical type, ie: an int8 will become an int32
551 // node with logical type Int(bitWidth=8, signed=true).
553 // Structs will become group nodes with the fields of the struct as the fields of the group,
554 // recursively creating the nodes.
556 // maps will become appropriate Map structures in the schema of the defined key and values.
560 // name: by default the node will have the same name as the field, this tag let's you specify a name
562 // type: Specify the physical type instead of using the field type
564 // length: specify the type length of the node, only relevant for fixed_len_byte_array
566 // scale: specify the scale for a decimal field
568 // precision: specify the precision for a decimal field
570 // fieldid: specify the field ID for that node, defaults to -1 which means it is not set in the parquet file.
572 // repetition: specify the repetition as something other than what is determined by the type
574 // converted: specify the Converted Type of the field
576 // logical: specify the logical type of the field, if using decimal then the scale and precision
577 // will be determined by the precision and scale fields, or by the logical.precision / logical.scale fields
578 // with the logical. prefixed versions taking precedence. For Time or Timestamp logical types,
579 // use logical.unit=<millis|micros|nanos> and logical.isadjustedutc=<true|false> to set those. Unit is required
580 // isadjustedutc defaults to true. For Integer logical type, use logical.bitwidth and logical.signed to specify
581 // those values, with bitwidth being required, and signed defaulting to true.
583 // All tags other than name can use a prefix of "key<tagname>=<value>" to refer to the type of the key for a map
584 // and "value<tagname>=<value>" to refer to the value type of a map or the element of a list (such as the type of a slice)
585 func NewSchemaFromStruct(obj interface{}) (sc *Schema, err error) {
586 ot := reflect.TypeOf(obj)
587 if ot.Kind() == reflect.Ptr {
591 // typeToNode uses panics to fail fast / fail early instead of propagating
592 // errors up recursive stacks. so we recover here and return it as an error
594 if r := recover(); r != nil {
596 switch x := r.(type) {
602 err = xerrors.New("unknown panic")
607 root := typeToNode(ot.Name(), ot, parquet.Repetitions.Repeated, nil)
608 return NewSchema(root.(*GroupNode)), nil
611 var parquetTypeToReflect = map[parquet.Type]reflect.Type{
612 parquet.Types.Boolean: reflect.TypeOf(true),
613 parquet.Types.Int32: reflect.TypeOf(int32(0)),
614 parquet.Types.Int64: reflect.TypeOf(int64(0)),
615 parquet.Types.Float: reflect.TypeOf(float32(0)),
616 parquet.Types.Double: reflect.TypeOf(float64(0)),
617 parquet.Types.Int96: reflect.TypeOf(parquet.Int96{}),
618 parquet.Types.ByteArray: reflect.TypeOf(parquet.ByteArray{}),
619 parquet.Types.FixedLenByteArray: reflect.TypeOf(parquet.FixedLenByteArray{}),
622 func typeFromNode(n Node) reflect.Type {
625 typ := parquetTypeToReflect[n.(*PrimitiveNode).PhysicalType()]
626 // if a bytearray field is annoted as a String logical type or a UTF8 converted type
627 // then use a string instead of parquet.ByteArray / parquet.FixedLenByteArray which are []byte
628 if n.LogicalType().Equals(StringLogicalType{}) || n.ConvertedType() == ConvertedTypes.UTF8 {
629 typ = reflect.TypeOf(string(""))
632 if n.RepetitionType() == parquet.Repetitions.Optional {
633 typ = reflect.PtrTo(typ)
634 } else if n.RepetitionType() == parquet.Repetitions.Repeated {
635 typ = reflect.SliceOf(typ)
640 gnode := n.(*GroupNode)
641 switch gnode.ConvertedType() {
642 case ConvertedTypes.List:
643 // According to the Parquet Spec, a list should always be a 3-level structure
645 // <list-repetition> group <name> (LIST) {
646 // repeated group list {
647 // <element-repetition> <element-type> element;
651 // Outer-most level must be a group annotated with LIST containing a single field named "list".
652 // this level must be only optional (if the list is nullable) or required
653 // Middle level, named list, must be repeated group with a single field named "element"
654 // "element" field is the lists element type and repetition, which should be only required or optional
656 if gnode.fields.Len() != 1 {
657 panic("invalid list node, should have exactly 1 child.")
660 if gnode.fields[0].RepetitionType() != parquet.Repetitions.Repeated {
661 panic("invalid list node, child should be repeated")
664 // it is required that the repeated group of elements is named "list" and it's element
665 // field is named "element", however existing data may not use this so readers shouldn't
666 // enforce them as errors
668 // Rules for backward compatibility from the parquet spec:
670 // 1) if the repeated field is not a group, then it's type is the element type and elements
672 // 2) if the repeated field is a group with multiple fields, then its type is the element type
673 // and elements must be required.
674 // 3) if the repeated field is a group with one field AND is named either "array" or uses the
675 // LIST-annotated group's name with "_tuple" suffix, then the repeated type is the element
676 // type and the elements must be required.
677 // 4) otherwise, the repeated field's type is the element type with the repeated field's repetition
679 elemMustBeRequired := false
681 var elemType reflect.Type
682 elemNode := gnode.fields[0]
684 case elemNode.Type() == Primitive,
685 elemNode.(*GroupNode).fields.Len() > 1,
686 elemNode.(*GroupNode).fields.Len() == 1 && (elemNode.Name() == "array" || elemNode.Name() == gnode.Name()+"_tuple"):
687 elemMustBeRequired = true
688 elemType = typeFromNode(elemNode)
691 elemType = typeFromNode(elemNode.(*GroupNode).fields[0])
694 if elemMustBeRequired && elemType.Kind() == reflect.Ptr {
695 elemType = elemType.Elem()
698 elemType = reflect.SliceOf(elemType)
700 if gnode.RepetitionType() == parquet.Repetitions.Optional {
701 elemType = reflect.PtrTo(elemType)
704 case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
705 // According to the Parquet Spec, the outer-most level should be
706 // a group containing a single field named "key_value" with repetition
707 // either optional or required for whether or not the map is nullable.
709 // The key_value middle level *must* be a repeated group with a "key" field
710 // and *optionally* a "value" field
712 // the "key" field *must* be required and must always exist
714 // the "value" field can be required or optional or omitted.
716 // <map-repetition> group <name> (MAP) {
717 // repeated group key_value {
718 // required <key-type> key;
719 // <value-repetition> <value-type> value;
723 if gnode.fields.Len() != 1 {
724 panic("invalid map node, should have exactly 1 child")
727 if gnode.fields[0].Type() != Group {
728 panic("invalid map node, child should be a group node")
731 // that said, this may not be used in existing data and should not be
732 // enforced as errors when reading.
734 // some data may also incorrectly use MAP_KEY_VALUE instead of MAP
736 // so any group with MAP_KEY_VALUE that is not contained inside of a "MAP"
737 // group, should be considered equivalent to being a MAP group itself.
739 // in addition, the fields may not be called "key" and "value" in existing
740 // data, and as such should not be enforced as errors when reading.
742 keyval := gnode.fields[0].(*GroupNode)
744 keyIndex := keyval.FieldIndexByName("key")
746 keyIndex = 0 // use first child if there is no child named "key"
749 keyType := typeFromNode(keyval.fields[keyIndex])
750 if keyType.Kind() == reflect.Ptr {
751 keyType = keyType.Elem()
753 // can't use a []byte as a key for a map, so use string
754 if keyType == reflect.TypeOf(parquet.ByteArray{}) || keyType == reflect.TypeOf(parquet.FixedLenByteArray{}) {
755 keyType = reflect.TypeOf(string(""))
758 // if the value node is omitted, then consider this a "set" and make it a
760 valType := reflect.TypeOf(true)
761 if keyval.fields.Len() > 1 {
762 valIndex := keyval.FieldIndexByName("value")
764 valIndex = 1 // use second child if there is no child named "value"
767 valType = typeFromNode(keyval.fields[valIndex])
770 mapType := reflect.MapOf(keyType, valType)
771 if gnode.RepetitionType() == parquet.Repetitions.Optional {
772 mapType = reflect.PtrTo(mapType)
776 fields := []reflect.StructField{}
777 for _, f := range gnode.fields {
778 fields = append(fields, reflect.StructField{
780 Type: typeFromNode(f),
785 structType := reflect.StructOf(fields)
786 if gnode.RepetitionType() == parquet.Repetitions.Repeated {
787 return reflect.SliceOf(structType)
789 if gnode.RepetitionType() == parquet.Repetitions.Optional {
790 return reflect.PtrTo(structType)
795 panic("what happened?")
798 // NewStructFromSchema generates a struct type as a reflect.Type from the schema
799 // by using the appropriate physical types and making things either pointers or slices
800 // based on whether they are repeated/optional/required. It does not use the logical
801 // or converted types to change the physical storage so that it is more efficient to use
802 // the resulting type for reading without having to do conversions.
804 // It will use maps for map types and slices for list types, but otherwise ignores the
805 // converted and logical types of the nodes. Group nodes that are not List or Map will
806 // be nested structs.
807 func NewStructFromSchema(sc *Schema) (t reflect.Type, err error) {
809 if r := recover(); r != nil {
811 switch x := r.(type) {
817 err = xerrors.New("unknown panic")
822 t = typeFromNode(sc.root)
823 if t.Kind() == reflect.Slice || t.Kind() == reflect.Ptr {