]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow; | |
19 | ||
20 | import java.util.Set; | |
21 | ||
22 | import org.apache.arrow.memory.BufferAllocator; | |
23 | import org.apache.arrow.util.Preconditions; | |
24 | import org.apache.arrow.vector.dictionary.DictionaryProvider; | |
25 | ||
26 | /** | |
27 | * This class configures the Avro-to-Arrow conversion process. | |
28 | */ | |
29 | public class AvroToArrowConfig { | |
30 | ||
31 | private final BufferAllocator allocator; | |
32 | /** | |
33 | * The maximum rowCount to read each time when partially convert data. | |
34 | * Default value is 1024 and -1 means read all data into one vector. | |
35 | */ | |
36 | private final int targetBatchSize; | |
37 | ||
38 | /** | |
39 | * The dictionary provider used for enum type. | |
40 | * If avro schema has enum type, will create dictionary and update this provider. | |
41 | */ | |
42 | private final DictionaryProvider.MapDictionaryProvider provider; | |
43 | ||
44 | /** | |
45 | * The field names which to skip when reading decoder values. | |
46 | */ | |
47 | private final Set<String> skipFieldNames; | |
48 | ||
49 | /** | |
50 | * Instantiate an instance. | |
51 | * @param allocator The memory allocator to construct the Arrow vectors with. | |
52 | * @param targetBatchSize The maximum rowCount to read each time when partially convert data. | |
53 | * @param provider The dictionary provider used for enum type, adapter will update this provider. | |
54 | * @param skipFieldNames Field names which to skip. | |
55 | */ | |
56 | AvroToArrowConfig( | |
57 | BufferAllocator allocator, | |
58 | int targetBatchSize, | |
59 | DictionaryProvider.MapDictionaryProvider provider, | |
60 | Set<String> skipFieldNames) { | |
61 | ||
62 | Preconditions.checkArgument(targetBatchSize == AvroToArrowVectorIterator.NO_LIMIT_BATCH_SIZE || | |
63 | targetBatchSize > 0, "invalid targetBatchSize: %s", targetBatchSize); | |
64 | ||
65 | this.allocator = allocator; | |
66 | this.targetBatchSize = targetBatchSize; | |
67 | this.provider = provider; | |
68 | this.skipFieldNames = skipFieldNames; | |
69 | } | |
70 | ||
71 | public BufferAllocator getAllocator() { | |
72 | return allocator; | |
73 | } | |
74 | ||
75 | public int getTargetBatchSize() { | |
76 | return targetBatchSize; | |
77 | } | |
78 | ||
79 | public DictionaryProvider.MapDictionaryProvider getProvider() { | |
80 | return provider; | |
81 | } | |
82 | ||
83 | public Set<String> getSkipFieldNames() { | |
84 | return skipFieldNames; | |
85 | } | |
86 | } |