[ceph.git] / ceph / src / arrow / go / parquet / internal / utils / typed_rle_dict.gen.go.tmpl

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
  "github.com/apache/arrow/go/v6/parquet"
)

{{range .In}}
{{if ne .Name "Boolean"}}
func (r *RleDecoder) GetBatchWithDictSpaced{{.Name}}(dc DictionaryConverter, vals []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (totalProcessed int, err error) {
  if nullCount == 0 {
    return r.GetBatchWithDict{{.Name}}(dc, vals)
  }

  var (
    blockCounter = NewBitBlockCounter(validBits, validBitsOffset, int64(len(vals)))
    processed = 0
    block BitBlockCount
  )

  for {
    block = blockCounter.NextFourWords()
    if block.Len == 0 {
      break
    }

    switch {
    case block.AllSet():
      processed, err = r.GetBatchWithDict{{.Name}}(dc, vals[:block.Len])
    case block.NoneSet():
      dc.FillZero(vals[:block.Len])
      processed = int(block.Len)
    default:
      processed, err = r.getspaced{{.Name}}(dc, vals, int(block.Len), int(block.Len)-int(block.Popcnt), validBits, validBitsOffset)
    }

    if err != nil {
      break
    }

    totalProcessed += processed
    vals = vals[int(block.Len):]
    validBitsOffset += int64(block.Len)
    if processed != int(block.Len) {
      break
    }
  }
  return
}

func (r *RleDecoder) getspaced{{.Name}}(dc DictionaryConverter, vals []{{.name}}, batchSize, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
  if nullCount == batchSize {
    dc.FillZero(vals[:batchSize])
    return batchSize, nil
  }

  read := 0
  remain := batchSize - nullCount

  const bufferSize = 1024
  var indexbuffer [bufferSize]IndexType

  // assume no bits to start
  bitReader := NewBitRunReader(validBits, validBitsOffset, int64(batchSize))
  validRun := bitReader.NextRun()
  for read < batchSize {
    if validRun.Len == 0 {
      validRun = bitReader.NextRun()
    }

    if !validRun.Set {
      dc.FillZero(vals[:int(validRun.Len)])
      vals = vals[int(validRun.Len):]
      read += int(validRun.Len)
      validRun.Len = 0
      continue
    }

    if r.repCount == 0 && r.litCount == 0 {
      if !r.Next() {
        return read, nil
      }
    }

    var batch int
    switch {
    case r.repCount > 0:
      batch, remain, validRun = r.consumeRepeatCounts(read, batchSize, remain, validRun, bitReader)
      current := IndexType(r.curVal)
      if !dc.IsValid(current) {
        return read, nil
      }
      dc.Fill(vals[:batch], current)
    case r.litCount > 0:
      var (
        litread int
        skipped int
        err error
      )
      litread, skipped, validRun, err = r.consumeLiterals{{.Name}}(dc, vals, remain, indexbuffer[:], validRun, bitReader)
    	if err != nil {
        return read, err
      }
      batch = litread + skipped
      remain -= litread
    }

    vals = vals[batch:]
    read += batch
  }
  return read, nil
}

func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{.name}}, remain int, buf []IndexType, run BitRun, bitRdr BitRunReader) (int, int, BitRun, error) {
  batch := MinInt(MinInt(remain, int(r.litCount)), len(buf))
	buf = buf[:batch]

	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
	if n != batch {
		return 0, 0, run, xerrors.New("was not able to retrieve correct number of indexes")
	}

	if !dc.IsValid(buf...) {
		return 0, 0, run, xerrors.New("invalid index values found for dictionary converter")
	}

	var (
		read    int
		skipped int
	)
	for read < batch {
		if run.Set {
			updateSize := MinInt(batch-read, int(run.Len))
			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
				return 0, 0, run, err
			}
			read += updateSize
			vals = vals[updateSize:]
			run.Len -= int64(updateSize)
		} else {
			dc.FillZero(vals[:int(run.Len)])
			vals = vals[int(run.Len):]
			skipped += int(run.Len)
			run.Len = 0
		}
		if run.Len == 0 {
			run = bitRdr.NextRun()
		}
	}
	r.litCount -= int32(batch)
	return read, skipped, run, nil
}

func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{.name}}) (int, error) {
  var (
    read = 0
    size = len(vals)
    indexbuffer [1024]IndexType
  )

  for read < size {
    remain := size - read

    switch {
    case r.repCount > 0:
      idx := IndexType(r.curVal)
      if !dc.IsValid(idx) {
        return read, nil
      }
      batch := MinInt(remain, int(r.repCount))
      if err := dc.Fill(vals[:batch], idx); err != nil {
        return read, err
      }
      r.repCount -= int32(batch)
      read += batch
      vals = vals[batch:]
    case r.litCount > 0:
      litbatch := MinInt(MinInt(remain, int(r.litCount)), 1024)
      buf := indexbuffer[:litbatch]
      n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
      if n != litbatch {
        return read, nil
      }
      if !dc.IsValid(buf...) {
        return read, nil
      }
      if err := dc.Copy(vals, buf); err != nil {
        return read, nil
      }
      r.litCount -= int32(litbatch)
      read += litbatch
      vals = vals[litbatch:]
    default:
      if !r.Next() {
        return read, nil
      }
    }
  }

  return read, nil
}
{{end}}
{{end}}
Commit	Line	Data
1d09f67e TL	1	// Licensed to the Apache Software Foundation (ASF) under one
	2	// or more contributor license agreements. See the NOTICE file
	3	// distributed with this work for additional information
	4	// regarding copyright ownership. The ASF licenses this file
	5	// to you under the Apache License, Version 2.0 (the
	6	// "License"); you may not use this file except in compliance
	7	// with the License. You may obtain a copy of the License at
	8	//
	9	// http://www.apache.org/licenses/LICENSE-2.0
	10	//
	11	// Unless required by applicable law or agreed to in writing, software
	12	// distributed under the License is distributed on an "AS IS" BASIS,
	13	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	14	// See the License for the specific language governing permissions and
	15	// limitations under the License.
	16
	17	package utils
	18
	19	import (
	20	"github.com/apache/arrow/go/v6/parquet"
	21	)
	22
	23	{{range .In}}
	24	{{if ne .Name "Boolean"}}
	25	func (r *RleDecoder) GetBatchWithDictSpaced{{.Name}}(dc DictionaryConverter, vals []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (totalProcessed int, err error) {
	26	if nullCount == 0 {
	27	return r.GetBatchWithDict{{.Name}}(dc, vals)
	28	}
	29
	30	var (
	31	blockCounter = NewBitBlockCounter(validBits, validBitsOffset, int64(len(vals)))
	32	processed = 0
	33	block BitBlockCount
	34	)
	35
	36	for {
	37	block = blockCounter.NextFourWords()
	38	if block.Len == 0 {
	39	break
	40	}
	41
	42	switch {
	43	case block.AllSet():
	44	processed, err = r.GetBatchWithDict{{.Name}}(dc, vals[:block.Len])
	45	case block.NoneSet():
	46	dc.FillZero(vals[:block.Len])
	47	processed = int(block.Len)
	48	default:
	49	processed, err = r.getspaced{{.Name}}(dc, vals, int(block.Len), int(block.Len)-int(block.Popcnt), validBits, validBitsOffset)
	50	}
	51
	52	if err != nil {
	53	break
	54	}
	55
	56	totalProcessed += processed
	57	vals = vals[int(block.Len):]
	58	validBitsOffset += int64(block.Len)
	59	if processed != int(block.Len) {
	60	break
	61	}
	62	}
	63	return
	64	}
65
66	func (r *RleDecoder) getspaced{{.Name}}(dc DictionaryConverter, vals []{{.name}}, batchSize, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
67	if nullCount == batchSize {
68	dc.FillZero(vals[:batchSize])
69	return batchSize, nil
70	}
71
72	read := 0
73	remain := batchSize - nullCount
74
75	const bufferSize = 1024
76	var indexbuffer [bufferSize]IndexType
77
78	// assume no bits to start
79	bitReader := NewBitRunReader(validBits, validBitsOffset, int64(batchSize))
80	validRun := bitReader.NextRun()
81	for read < batchSize {
82	if validRun.Len == 0 {
83	validRun = bitReader.NextRun()
84	}
85
86	if !validRun.Set {
87	dc.FillZero(vals[:int(validRun.Len)])
88	vals = vals[int(validRun.Len):]
89	read += int(validRun.Len)
90	validRun.Len = 0
91	continue
92	}
93
94	if r.repCount == 0 && r.litCount == 0 {
95	if !r.Next() {
96	return read, nil
97	}
98	}
99
100	var batch int
101	switch {
102	case r.repCount > 0:
103	batch, remain, validRun = r.consumeRepeatCounts(read, batchSize, remain, validRun, bitReader)
104	current := IndexType(r.curVal)
105	if !dc.IsValid(current) {
106	return read, nil
107	}
108	dc.Fill(vals[:batch], current)
109	case r.litCount > 0:
110	var (
111	litread int
112	skipped int
113	err error
114	)
115	litread, skipped, validRun, err = r.consumeLiterals{{.Name}}(dc, vals, remain, indexbuffer[:], validRun, bitReader)
116	if err != nil {
117	return read, err
118	}
119	batch = litread + skipped
120	remain -= litread
121	}
122
123	vals = vals[batch:]
124	read += batch
125	}
126	return read, nil
127	}
128
129	func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{.name}}, remain int, buf []IndexType, run BitRun, bitRdr BitRunReader) (int, int, BitRun, error) {
130	batch := MinInt(MinInt(remain, int(r.litCount)), len(buf))
131	buf = buf[:batch]
132
133	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
134	if n != batch {
135	return 0, 0, run, xerrors.New("was not able to retrieve correct number of indexes")
136	}
137
138	if !dc.IsValid(buf...) {
139	return 0, 0, run, xerrors.New("invalid index values found for dictionary converter")
140	}
141
142	var (
143	read int
144	skipped int
145	)
146	for read < batch {
147	if run.Set {
148	updateSize := MinInt(batch-read, int(run.Len))
149	if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
150	return 0, 0, run, err
151	}
152	read += updateSize
153	vals = vals[updateSize:]
154	run.Len -= int64(updateSize)
155	} else {
156	dc.FillZero(vals[:int(run.Len)])
157	vals = vals[int(run.Len):]
158	skipped += int(run.Len)
159	run.Len = 0
160	}
161	if run.Len == 0 {
162	run = bitRdr.NextRun()
163	}
164	}
165	r.litCount -= int32(batch)
166	return read, skipped, run, nil
167	}
168
169	func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{.name}}) (int, error) {
170	var (
171	read = 0
172	size = len(vals)
173	indexbuffer [1024]IndexType
174	)
175
176	for read < size {
177	remain := size - read
178
179	switch {
180	case r.repCount > 0:
181	idx := IndexType(r.curVal)
182	if !dc.IsValid(idx) {
183	return read, nil
184	}
185	batch := MinInt(remain, int(r.repCount))
186	if err := dc.Fill(vals[:batch], idx); err != nil {
187	return read, err
188	}
189	r.repCount -= int32(batch)
190	read += batch
191	vals = vals[batch:]
192	case r.litCount > 0:
193	litbatch := MinInt(MinInt(remain, int(r.litCount)), 1024)
194	buf := indexbuffer[:litbatch]
195	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
196	if n != litbatch {
197	return read, nil
198	}
199	if !dc.IsValid(buf...) {
200	return read, nil
201	}
202	if err := dc.Copy(vals, buf); err != nil {
203	return read, nil
204	}
205	r.litCount -= int32(litbatch)
206	read += litbatch
207	vals = vals[litbatch:]
208	default:
209	if !r.Next() {
210	return read, nil
211	}
212	}
213	}
214
215	return read, nil
216	}
217	{{end}}
218	{{end}}