]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.c; | |
19 | ||
20 | import static org.apache.arrow.c.NativeUtil.NULL; | |
21 | import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; | |
22 | import static org.apache.arrow.util.Preconditions.checkNotNull; | |
23 | import static org.apache.arrow.util.Preconditions.checkState; | |
24 | ||
25 | import java.util.ArrayList; | |
26 | import java.util.List; | |
27 | import java.util.Map; | |
28 | ||
29 | import org.apache.arrow.memory.BufferAllocator; | |
30 | import org.apache.arrow.vector.dictionary.Dictionary; | |
31 | import org.apache.arrow.vector.types.pojo.ArrowType; | |
32 | import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; | |
33 | import org.apache.arrow.vector.types.pojo.DictionaryEncoding; | |
34 | import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; | |
35 | import org.apache.arrow.vector.types.pojo.Field; | |
36 | import org.apache.arrow.vector.types.pojo.FieldType; | |
37 | import org.slf4j.Logger; | |
38 | import org.slf4j.LoggerFactory; | |
39 | ||
40 | /** | |
41 | * Importer for {@link ArrowSchema}. | |
42 | */ | |
43 | final class SchemaImporter { | |
44 | private static final Logger logger = LoggerFactory.getLogger(SchemaImporter.class); | |
45 | ||
46 | private static final int MAX_IMPORT_RECURSION_LEVEL = 64; | |
47 | private long nextDictionaryID = 1L; | |
48 | ||
49 | private final BufferAllocator allocator; | |
50 | ||
51 | public SchemaImporter(BufferAllocator allocator) { | |
52 | this.allocator = allocator; | |
53 | } | |
54 | ||
55 | Field importField(ArrowSchema schema, CDataDictionaryProvider provider) { | |
56 | return importField(schema, provider, 0); | |
57 | } | |
58 | ||
59 | private Field importField(ArrowSchema schema, CDataDictionaryProvider provider, int recursionLevel) { | |
60 | checkState(recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, "Recursion level in ArrowSchema struct exceeded"); | |
61 | ||
62 | ArrowSchema.Snapshot snapshot = schema.snapshot(); | |
63 | checkState(snapshot.release != NULL, "Cannot import released ArrowSchema"); | |
64 | ||
65 | String name = NativeUtil.toJavaString(snapshot.name); | |
66 | String format = NativeUtil.toJavaString(snapshot.format); | |
67 | checkNotNull(format, "format field must not be null"); | |
68 | ArrowType arrowType = Format.asType(format, snapshot.flags); | |
69 | boolean nullable = (snapshot.flags & Flags.ARROW_FLAG_NULLABLE) != 0; | |
70 | Map<String, String> metadata = Metadata.decode(snapshot.metadata); | |
71 | ||
72 | if (metadata != null && metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) { | |
73 | final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME); | |
74 | final String extensionMetadata = metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ""); | |
75 | ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName); | |
76 | if (extensionType != null) { | |
77 | arrowType = extensionType.deserialize(arrowType, extensionMetadata); | |
78 | } else { | |
79 | // Otherwise, we haven't registered the type | |
80 | logger.info("Unrecognized extension type: {}", extensionName); | |
81 | } | |
82 | } | |
83 | ||
84 | // Handle dictionary encoded vectors | |
85 | DictionaryEncoding dictionaryEncoding = null; | |
86 | if (snapshot.dictionary != NULL && provider != null) { | |
87 | boolean ordered = (snapshot.flags & Flags.ARROW_FLAG_DICTIONARY_ORDERED) != 0; | |
88 | ArrowType.Int indexType = (ArrowType.Int) arrowType; | |
89 | dictionaryEncoding = new DictionaryEncoding(nextDictionaryID++, ordered, indexType); | |
90 | ||
91 | ArrowSchema dictionarySchema = ArrowSchema.wrap(snapshot.dictionary); | |
92 | Field dictionaryField = importField(dictionarySchema, provider, recursionLevel + 1); | |
93 | provider.put(new Dictionary(dictionaryField.createVector(allocator), dictionaryEncoding)); | |
94 | } | |
95 | ||
96 | FieldType fieldType = new FieldType(nullable, arrowType, dictionaryEncoding, metadata); | |
97 | ||
98 | List<Field> children = null; | |
99 | long[] childrenIds = NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); | |
100 | if (childrenIds != null && childrenIds.length > 0) { | |
101 | children = new ArrayList<>(childrenIds.length); | |
102 | for (long childAddress : childrenIds) { | |
103 | ArrowSchema childSchema = ArrowSchema.wrap(childAddress); | |
104 | Field field = importField(childSchema, provider, recursionLevel + 1); | |
105 | children.add(field); | |
106 | } | |
107 | } | |
108 | return new Field(name, fieldType, children); | |
109 | } | |
110 | } |