[ceph.git] / ceph / src / arrow / format / Schema.fbs

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

/// Logical types, vector layouts, and schemas

/// Format Version History.
/// Version 1.0 - Forward and backwards compatibility guaranteed.
/// Version 1.1 - Add Decimal256 (No format release).
/// Version 1.2 (Pending)- Add Interval MONTH_DAY_NANO

namespace org.apache.arrow.flatbuf;

enum MetadataVersion:short {
  /// 0.1.0 (October 2016).
  V1,

  /// 0.2.0 (February 2017). Non-backwards compatible with V1.
  V2,

  /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
  V3,

  /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
  V4,

  /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
  /// metadata and IPC messages). Implementations are recommended to provide a
  /// V4 compatibility mode with V5 format changes disabled.
  ///
  /// Incompatible changes between V4 and V5:
  /// - Union buffer layout has changed. In V5, Unions don't have a validity
  ///   bitmap buffer.
  V5,
}

/// Represents Arrow Features that might not have full support
/// within implementations. This is intended to be used in
/// two scenarios:
///  1.  A mechanism for readers of Arrow Streams
///      and files to understand that the stream or file makes
///      use of a feature that isn't supported or unknown to
///      the implementation (and therefore can meet the Arrow
///      forward compatibility guarantees).
///  2.  A means of negotiating between a client and server
///      what features a stream is allowed to use. The enums
///      values here are intented to represent higher level
///      features, additional details maybe negotiated
///      with key-value pairs specific to the protocol.
///
/// Enums added to this list should be assigned power-of-two values
/// to facilitate exchanging and comparing bitmaps for supported
/// features.
enum Feature : long {
  /// Needed to make flatbuffers happy.
  UNUSED = 0,
  /// The stream makes use of multiple full dictionaries with the
  /// same ID and assumes clients implement dictionary replacement
  /// correctly.
  DICTIONARY_REPLACEMENT = 1,
  /// The stream makes use of compressed bodies as described
  /// in Message.fbs.
  COMPRESSED_BODY = 2
}

/// These are stored in the flatbuffer in the Type union below

table Null {
}

/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
/// (according to the physical memory layout). We used Struct_ here as
/// Struct is a reserved word in Flatbuffers
table Struct_ {
}

table List {
}

/// Same as List, but with 64-bit offsets, allowing to represent
/// extremely large data values.
table LargeList {
}

table FixedSizeList {
  /// Number of list items per value
  listSize: int;
}

/// A Map is a logical nested type that is represented as
///
/// List<entries: Struct<key: K, value: V>>
///
/// In this layout, the keys and values are each respectively contiguous. We do
/// not constrain the key and value types, so the application is responsible
/// for ensuring that the keys are hashable and unique. Whether the keys are sorted
/// may be set in the metadata for this field.
///
/// In a field with Map type, the field has a child Struct field, which then
/// has two children: key type and the second the value type. The names of the
/// child fields may be respectively "entries", "key", and "value", but this is
/// not enforced.
///
/// Map
/// ```text
///   - child[0] entries: Struct
///     - child[0] key: K
///     - child[1] value: V
/// ```
/// Neither the "entries" field nor the "key" field may be nullable.
///
/// The metadata is structured so that Arrow systems without special handling
/// for Map can make Map an alias for List. The "layout" attribute for the Map
/// field must have the same contents as a List.
table Map {
  /// Set to true if the keys within each value are sorted
  keysSorted: bool;
}

enum UnionMode:short { Sparse, Dense }

/// A union is a complex type with children in Field
/// By default ids in the type vector refer to the offsets in the children
/// optionally typeIds provides an indirection between the child offset and the type id
/// for each child `typeIds[offset]` is the id used in the type vector
table Union {
  mode: UnionMode;
  typeIds: [ int ]; // optional, describes typeid of each child.
}

table Int {
  bitWidth: int; // restricted to 8, 16, 32, and 64 in v1
  is_signed: bool;
}

enum Precision:short {HALF, SINGLE, DOUBLE}

table FloatingPoint {
  precision: Precision;
}

/// Unicode with UTF-8 encoding
table Utf8 {
}

/// Opaque binary data
table Binary {
}

/// Same as Utf8, but with 64-bit offsets, allowing to represent
/// extremely large data values.
table LargeUtf8 {
}

/// Same as Binary, but with 64-bit offsets, allowing to represent
/// extremely large data values.
table LargeBinary {
}

table FixedSizeBinary {
  /// Number of bytes per value
  byteWidth: int;
}

table Bool {
}

/// Exact decimal value represented as an integer value in two's
/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
/// are used. The representation uses the endianness indicated
/// in the Schema.
table Decimal {
  /// Total number of decimal digits
  precision: int;

  /// Number of digits after the decimal point "."
  scale: int;

  /// Number of bits per value. The only accepted widths are 128 and 256.
  /// We use bitWidth for consistency with Int::bitWidth.
  bitWidth: int = 128;
}

enum DateUnit: short {
  DAY,
  MILLISECOND
}

/// Date is either a 32-bit or 64-bit signed integer type representing an
/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
///
/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
///   leap seconds), where the values are evenly divisible by 86400000
/// * Days (32 bits) since the UNIX epoch
table Date {
  unit: DateUnit = MILLISECOND;
}

enum TimeUnit: short { SECOND, MILLISECOND, MICROSECOND, NANOSECOND }

/// Time is either a 32-bit or 64-bit signed integer type representing an
/// elapsed time since midnight, stored in either of four units: seconds,
/// milliseconds, microseconds or nanoseconds.
///
/// The integer `bitWidth` depends on the `unit` and must be one of the following:
/// * SECOND and MILLISECOND: 32 bits
/// * MICROSECOND and NANOSECOND: 64 bits
///
/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
/// (exclusive), adjusted for the time unit (for example, up to 86400000
/// exclusive for the MILLISECOND unit).
/// This definition doesn't allow for leap seconds. Time values from
/// measurements with leap seconds will need to be corrected when ingesting
/// into Arrow (for example by replacing the value 86400 with 86399).
table Time {
  unit: TimeUnit = MILLISECOND;
  bitWidth: int = 32;
}

/// Timestamp is a 64-bit signed integer representing an elapsed time since a
/// fixed epoch, stored in either of four units: seconds, milliseconds,
/// microseconds or nanoseconds, and is optionally annotated with a timezone.
///
/// Timestamp values do not include any leap seconds (in other words, all
/// days are considered 86400 seconds long).
///
/// Timestamps with a non-empty timezone
/// ------------------------------------
///
/// If a Timestamp column has a non-empty timezone value, its epoch is
/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
/// (the Unix epoch), regardless of the Timestamp's own timezone.
///
/// Therefore, timestamp values with a non-empty timezone correspond to
/// physical points in time together with some additional information about
/// how the data was obtained and/or how to display it (the timezone).
///
///   For example, the timestamp value 0 with the timezone string "Europe/Paris"
///   corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
///   application may prefer to display it as "January 1st 1970, 01h00" in
///   the Europe/Paris timezone (which is the same physical point in time).
///
/// One consequence is that timestamp values with a non-empty timezone
/// can be compared and ordered directly, since they all share the same
/// well-known point of reference (the Unix epoch).
///
/// Timestamps with an unset / empty timezone
/// -----------------------------------------
///
/// If a Timestamp column has no timezone value, its epoch is
/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
///
/// Therefore, timestamp values without a timezone cannot be meaningfully
/// interpreted as physical points in time, but only as calendar / clock
/// indications ("wall clock time") in an unspecified timezone.
///
///   For example, the timestamp value 0 with an empty timezone string
///   corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
///   is not enough information to interpret it as a well-defined physical
///   point in time.
///
/// One consequence is that timestamp values without a timezone cannot
/// be reliably compared or ordered, since they may have different points of
/// reference.  In particular, it is *not* possible to interpret an unset
/// or empty timezone as the same as "UTC".
///
/// Conversion between timezones
/// ----------------------------
///
/// If a Timestamp column has a non-empty timezone, changing the timezone
/// to a different non-empty value is a metadata-only operation:
/// the timestamp values need not change as their point of reference remains
/// the same (the Unix epoch).
///
/// However, if a Timestamp column has no timezone value, changing it to a
/// non-empty value requires to think about the desired semantics.
/// One possibility is to assume that the original timestamp values are
/// relative to the epoch of the timezone being set; timestamp values should
/// then adjusted to the Unix epoch (for example, changing the timezone from
/// empty to "Europe/Paris" would require converting the timestamp values
/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
/// nevertheless correct).
///
/// Guidelines for encoding data from external libraries
/// ----------------------------------------------------
///
/// Date & time libraries often have multiple different data types for temporal
/// data. In order to ease interoperability between different implementations the
/// Arrow project has some recommendations for encoding these types into a Timestamp
/// column.
///
/// An "instant" represents a physical point in time that has no relevant timezone
/// (for example, astronomical data). To encode an instant, use a Timestamp with
/// the timezone string set to "UTC", and make sure the Timestamp values
/// are relative to the UTC epoch (January 1st 1970, midnight).
///
/// A "zoned date-time" represents a physical point in time annotated with an
/// informative timezone (for example, the timezone in which the data was
/// recorded).  To encode a zoned date-time, use a Timestamp with the timezone
/// string set to the name of the timezone, and make sure the Timestamp values
/// are relative to the UTC epoch (January 1st 1970, midnight).
///
///  (There is some ambiguity between an instant and a zoned date-time with the
///   UTC timezone.  Both of these are stored the same in Arrow.  Typically,
///   this distinction does not matter.  If it does, then an application should
///   use custom metadata or an extension type to distinguish between the two cases.)
///
/// An "offset date-time" represents a physical point in time combined with an
/// explicit offset from UTC.  To encode an offset date-time, use a Timestamp
/// with the timezone string set to the numeric timezone offset string
/// (e.g. "+03:00"), and make sure the Timestamp values are relative to
/// the UTC epoch (January 1st 1970, midnight).
///
/// A "naive date-time" (also called "local date-time" in some libraries)
/// represents a wall clock time combined with a calendar date, but with
/// no indication of how to map this information to a physical point in time.
/// Naive date-times must be handled with care because of this missing
/// information, and also because daylight saving time (DST) may make
/// some values ambiguous or non-existent. A naive date-time may be
/// stored as a struct with Date and Time fields. However, it may also be
/// encoded into a Timestamp column with an empty timezone. The timestamp
/// values should be computed "as if" the timezone of the date-time values
/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
/// be encoded as timestamp value 0.
table Timestamp {
  unit: TimeUnit;

  /// The timezone is an optional string indicating the name of a timezone,
  /// one of:
  ///
  /// * As used in the Olson timezone database (the "tz database" or
  ///   "tzdata"), such as "America/New_York".
  /// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
  ///   such as "+07:30".
  ///
  /// Whether a timezone string is present indicates different semantics about
  /// the data (see above).
  timezone: string;
}

enum IntervalUnit: short { YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO}
// A "calendar" interval which models types that don't necessarily
// have a precise duration without the context of a base timestamp (e.g.
// days can differ in length during day light savings time transitions).
// All integers in the types below are stored in the endianness indicated
// by the schema.
//
// YEAR_MONTH - Indicates the number of elapsed whole months, stored as
//   4-byte signed integers.
// DAY_TIME - Indicates the number of elapsed days and milliseconds (no leap seconds),
//   stored as 2 contiguous 32-bit signed integers (8-bytes in total). Support
//   of this IntervalUnit is not required for full arrow compatibility.
// MONTH_DAY_NANO - A triple of the number of elapsed months, days, and nanoseconds.
//  The values are stored contiguously in 16-byte blocks. Months and days are
//  encoded as 32-bit signed integers and nanoseconds is encoded as a 64-bit
//  signed integer. Nanoseconds does not allow for leap seconds. Each field is
//  independent (e.g. there is no constraint that nanoseconds have the same
//  sign as days or that the quantity of nanoseconds represents less than a
//  day's worth of time).
table Interval {
  unit: IntervalUnit;
}

// An absolute length of time unrelated to any calendar artifacts.
//
// For the purposes of Arrow Implementations, adding this value to a Timestamp
// ("t1") naively (i.e. simply summing the two number) is acceptable even
// though in some cases the resulting Timestamp (t2) would not account for
// leap-seconds during the elapsed time between "t1" and "t2".  Similarly,
// representing the difference between two Unix timestamp is acceptable, but
// would yield a value that is possibly a few seconds off from the true elapsed
// time.
//
//  The resolution defaults to millisecond, but can be any of the other
//  supported TimeUnit values as with Timestamp and Time types.  This type is
//  always represented as an 8-byte integer.
table Duration {
  unit: TimeUnit = MILLISECOND;
}

/// ----------------------------------------------------------------------
/// Top-level Type value, enabling extensible type-specific metadata. We can
/// add new logical types to Type without breaking backwards compatibility

union Type {
  Null,
  Int,
  FloatingPoint,
  Binary,
  Utf8,
  Bool,
  Decimal,
  Date,
  Time,
  Timestamp,
  Interval,
  List,
  Struct_,
  Union,
  FixedSizeBinary,
  FixedSizeList,
  Map,
  Duration,
  LargeBinary,
  LargeUtf8,
  LargeList,
}

/// ----------------------------------------------------------------------
/// user defined key value pairs to add custom metadata to arrow
/// key namespacing is the responsibility of the user

table KeyValue {
  key: string;
  value: string;
}

/// ----------------------------------------------------------------------
/// Dictionary encoding metadata
/// Maintained for forwards compatibility, in the future
/// Dictionaries might be explicit maps between integers and values
/// allowing for non-contiguous index values
enum DictionaryKind : short { DenseArray }
table DictionaryEncoding {
  /// The known dictionary id in the application where this data is used. In
  /// the file or streaming formats, the dictionary ids are found in the
  /// DictionaryBatch messages
  id: long;

  /// The dictionary indices are constrained to be non-negative integers. If
  /// this field is null, the indices must be signed int32. To maximize
  /// cross-language compatibility and performance, implementations are
  /// recommended to prefer signed integer types over unsigned integer types
  /// and to avoid uint64 indices unless they are required by an application.
  indexType: Int;

  /// By default, dictionaries are not ordered, or the order does not have
  /// semantic meaning. In some statistical, applications, dictionary-encoding
  /// is used to represent ordered categorical data, and we provide a way to
  /// preserve that metadata here
  isOrdered: bool;

  dictionaryKind: DictionaryKind;
}

/// ----------------------------------------------------------------------
/// A field represents a named column in a record / row batch or child of a
/// nested type.

table Field {
  /// Name is not required, in i.e. a List
  name: string;

  /// Whether or not this field can contain nulls. Should be true in general.
  nullable: bool;

  /// This is the type of the decoded value if the field is dictionary encoded.
  type: Type;

  /// Present only if the field is dictionary encoded.
  dictionary: DictionaryEncoding;

  /// children apply only to nested data types like Struct, List and Union. For
  /// primitive types children will have length 0.
  children: [ Field ];

  /// User-defined metadata
  custom_metadata: [ KeyValue ];
}

/// ----------------------------------------------------------------------
/// Endianness of the platform producing the data

enum Endianness:short { Little, Big }

/// ----------------------------------------------------------------------
/// A Buffer represents a single contiguous memory segment
struct Buffer {
  /// The relative offset into the shared memory page where the bytes for this
  /// buffer starts
  offset: long;

  /// The absolute length (in bytes) of the memory buffer. The memory is found
  /// from offset (inclusive) to offset + length (non-inclusive). When building
  /// messages using the encapsulated IPC message, padding bytes may be written
  /// after a buffer, but such padding bytes do not need to be accounted for in
  /// the size here.
  length: long;
}

/// ----------------------------------------------------------------------
/// A Schema describes the columns in a row batch

table Schema {

  /// endianness of the buffer
  /// it is Little Endian by default
  /// if endianness doesn't match the underlying system then the vectors need to be converted
  endianness: Endianness=Little;

  fields: [Field];
  // User-defined metadata
  custom_metadata: [ KeyValue ];

  /// Features used in the stream/file.
  features : [ Feature ];
}

root_type Schema;