// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package csv reads CSV files and presents the extracted data as records, also // writes data as record into CSV files package csv import ( "errors" "fmt" "github.com/apache/arrow/go/v6/arrow" "github.com/apache/arrow/go/v6/arrow/memory" ) var ( ErrMismatchFields = errors.New("arrow/csv: number of records mismatch") ) // Option configures a CSV reader/writer. type Option func(config) type config interface{} // WithComma specifies the fields separation character used while parsing CSV files. func WithComma(c rune) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Reader: cfg.r.Comma = c case *Writer: cfg.w.Comma = c default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // WithComment specifies the comment character used while parsing CSV files. func WithComment(c rune) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Reader: cfg.r.Comment = c default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // WithAllocator specifies the Arrow memory allocator used while building records. func WithAllocator(mem memory.Allocator) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Reader: cfg.mem = mem default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // WithChunk specifies the chunk size used while parsing CSV files. // // If n is zero or 1, no chunking will take place and the reader will create // one record per row. // If n is greater than 1, chunks of n rows will be read. // If n is negative, the reader will load the whole CSV file into memory and // create one big record with all the rows. func WithChunk(n int) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Reader: cfg.chunk = n default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // WithCRLF specifies the line terminator used while writing CSV files. // If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used. // The default value is false. func WithCRLF(useCRLF bool) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Writer: cfg.w.UseCRLF = useCRLF default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // WithHeader enables or disables CSV-header handling. func WithHeader(useHeader bool) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Reader: cfg.header = useHeader case *Writer: cfg.header = useHeader default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // DefaultNullValues is the set of values considered as NULL values by default // when Reader is configured to handle NULL values. var DefaultNullValues = []string{"", "NULL", "null"} // WithNullReader sets options for a CSV Reader pertaining to NULL value // handling. If stringsCanBeNull is true, then a string that matches one of the // nullValues set will be interpreted as NULL. Numeric columns will be checked // for nulls in all cases. If no nullValues arguments are passed in, the // defaults set in NewReader() will be kept. // // When no NULL values is given, the default set is taken from DefaultNullValues. func WithNullReader(stringsCanBeNull bool, nullValues ...string) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Reader: cfg.stringsCanBeNull = stringsCanBeNull if len(nullValues) == 0 { nullValues = DefaultNullValues } cfg.nulls = make([]string, len(nullValues)) copy(cfg.nulls, nullValues) default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } // WithNullWriter sets the null string written for NULL values. The default is // set in NewWriter(). func WithNullWriter(null string) Option { return func(cfg config) { switch cfg := cfg.(type) { case *Writer: cfg.nullValue = null default: panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) } } } func validate(schema *arrow.Schema) { for i, f := range schema.Fields() { switch ft := f.Type.(type) { case *arrow.BooleanType: case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: case *arrow.Float32Type, *arrow.Float64Type: case *arrow.StringType: default: panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) } } }