1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
19 #include "arrow/util/value_parsing.h"
32 gdv_int32
octet_length_utf8(const gdv_utf8 input
, gdv_int32 length
) { return length
; }
35 gdv_int32
bit_length_utf8(const gdv_utf8 input
, gdv_int32 length
) { return length
* 8; }
38 gdv_int32
octet_length_binary(const gdv_binary input
, gdv_int32 length
) { return length
; }
41 gdv_int32
bit_length_binary(const gdv_binary input
, gdv_int32 length
) {
46 int match_string(const char* input
, gdv_int32 input_len
, gdv_int32 start_pos
,
47 const char* delim
, gdv_int32 delim_len
) {
48 for (int i
= start_pos
; i
< input_len
; i
++) {
49 int left_chars
= input_len
- i
;
50 if ((left_chars
>= delim_len
) && memcmp(input
+ i
, delim
, delim_len
) == 0) {
59 gdv_int32
mem_compare(const char* left
, gdv_int32 left_len
, const char* right
,
60 gdv_int32 right_len
) {
62 if (right_len
< min
) {
66 int cmp_ret
= memcmp(left
, right
, min
);
70 return left_len
- right_len
;
74 // Expand inner macro for all varlen types.
75 #define VAR_LEN_OP_TYPES(INNER, NAME, OP) \
76 INNER(NAME, utf8, OP) \
77 INNER(NAME, binary, OP)
79 // Relational binary fns : left, right params are same, return is bool.
80 #define BINARY_RELATIONAL(NAME, TYPE, OP) \
82 bool NAME##_##TYPE##_##TYPE(const gdv_##TYPE left, gdv_int32 left_len, \
83 const gdv_##TYPE right, gdv_int32 right_len) { \
84 return mem_compare(left, left_len, right, right_len) OP 0; \
87 VAR_LEN_OP_TYPES(BINARY_RELATIONAL
, equal
, ==)
88 VAR_LEN_OP_TYPES(BINARY_RELATIONAL
, not_equal
, !=)
89 VAR_LEN_OP_TYPES(BINARY_RELATIONAL
, less_than
, <)
90 VAR_LEN_OP_TYPES(BINARY_RELATIONAL
, less_than_or_equal_to
, <=)
91 VAR_LEN_OP_TYPES(BINARY_RELATIONAL
, greater_than
, >)
92 VAR_LEN_OP_TYPES(BINARY_RELATIONAL
, greater_than_or_equal_to
, >=)
94 #undef BINARY_RELATIONAL
95 #undef VAR_LEN_OP_TYPES
97 // Expand inner macro for all varlen types.
98 #define VAR_LEN_TYPES(INNER, NAME) \
103 int to_binary_from_hex(char ch
) {
104 if (ch
>= 'A' && ch
<= 'F') {
105 return 10 + (ch
- 'A');
106 } else if (ch
>= 'a' && ch
<= 'f') {
107 return 10 + (ch
- 'a');
113 bool starts_with_utf8_utf8(const char* data
, gdv_int32 data_len
, const char* prefix
,
114 gdv_int32 prefix_len
) {
115 return ((data_len
>= prefix_len
) && (memcmp(data
, prefix
, prefix_len
) == 0));
119 bool ends_with_utf8_utf8(const char* data
, gdv_int32 data_len
, const char* suffix
,
120 gdv_int32 suffix_len
) {
121 return ((data_len
>= suffix_len
) &&
122 (memcmp(data
+ data_len
- suffix_len
, suffix
, suffix_len
) == 0));
126 bool is_substr_utf8_utf8(const char* data
, int32_t data_len
, const char* substr
,
127 int32_t substr_len
) {
128 for (int32_t i
= 0; i
<= data_len
- substr_len
; ++i
) {
129 if (memcmp(data
+ i
, substr
, substr_len
) == 0) {
137 gdv_int32
utf8_char_length(char c
) {
138 if ((signed char)c
>= 0) { // 1-byte char (0x00 ~ 0x7F)
140 } else if ((c
& 0xE0) == 0xC0) { // 2-byte char
142 } else if ((c
& 0xF0) == 0xE0) { // 3-byte char
144 } else if ((c
& 0xF8) == 0xF0) { // 4-byte char
152 void set_error_for_invalid_utf(int64_t execution_context
, char val
) {
153 char const* fmt
= "unexpected byte \\%02hhx encountered while decoding utf8 string";
154 int size
= static_cast<int>(strlen(fmt
)) + 64;
155 char* error
= reinterpret_cast<char*>(malloc(size
));
156 snprintf(error
, size
, fmt
, (unsigned char)val
);
157 gdv_fn_context_set_error_msg(execution_context
, error
);
162 bool validate_utf8_following_bytes(const char* data
, int32_t data_len
,
163 int32_t char_index
) {
164 for (int j
= 1; j
< data_len
; ++j
) {
165 if ((data
[char_index
+ j
] & 0xC0) != 0x80) { // bytes following head-byte of glyph
172 // Count the number of utf8 characters
173 // return 0 for invalid/incomplete input byte sequences
175 gdv_int32
utf8_length(gdv_int64 context
, const char* data
, gdv_int32 data_len
) {
178 for (int i
= 0; i
< data_len
; i
+= char_len
) {
179 char_len
= utf8_char_length(data
[i
]);
180 if (char_len
== 0 || i
+ char_len
> data_len
) { // invalid byte or incomplete glyph
181 set_error_for_invalid_utf(context
, data
[i
]);
184 for (int j
= 1; j
< char_len
; ++j
) {
185 if ((data
[i
+ j
] & 0xC0) != 0x80) { // bytes following head-byte of glyph
186 set_error_for_invalid_utf(context
, data
[i
+ j
]);
195 // Count the number of utf8 characters, ignoring invalid char, considering size 1
197 gdv_int32
utf8_length_ignore_invalid(const char* data
, gdv_int32 data_len
) {
200 for (int i
= 0; i
< data_len
; i
+= char_len
) {
201 char_len
= utf8_char_length(data
[i
]);
202 if (char_len
== 0 || i
+ char_len
> data_len
) { // invalid byte or incomplete glyph
203 // if invalid byte or incomplete glyph, ignore it
206 for (int j
= 1; j
< char_len
; ++j
) {
207 if ((data
[i
+ j
] & 0xC0) != 0x80) { // bytes following head-byte of glyph
216 // Get the byte position corresponding to a character position for a non-empty utf8
219 gdv_int32
utf8_byte_pos(gdv_int64 context
, const char* str
, gdv_int32 str_len
,
220 gdv_int32 char_pos
) {
223 for (gdv_int32 char_index
= 0; char_index
< char_pos
&& byte_index
< str_len
;
225 char_len
= utf8_char_length(str
[byte_index
]);
227 byte_index
+ char_len
> str_len
) { // invalid byte or incomplete glyph
228 set_error_for_invalid_utf(context
, str
[byte_index
]);
231 byte_index
+= char_len
;
236 #define UTF8_LENGTH(NAME, TYPE) \
238 gdv_int32 NAME##_##TYPE(gdv_int64 context, gdv_##TYPE in, gdv_int32 in_len) { \
239 return utf8_length(context, in, in_len); \
242 UTF8_LENGTH(char_length
, utf8
)
243 UTF8_LENGTH(length
, utf8
)
244 UTF8_LENGTH(lengthUtf8
, binary
)
246 // Returns a string of 'n' spaces.
247 #define SPACE_STR(IN_TYPE) \
249 const char* space_##IN_TYPE(gdv_int64 ctx, gdv_##IN_TYPE n, int32_t* out_len) { \
250 gdv_int32 n_times = static_cast<gdv_int32>(n); \
251 if (n_times <= 0) { \
255 char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx, n_times)); \
256 if (ret == nullptr) { \
257 gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); \
261 for (int i = 0; i < n_times; i++) { \
264 *out_len = n_times; \
271 // Reverse a utf8 sequence
273 const char* reverse_utf8(gdv_int64 context
, const char* data
, gdv_int32 data_len
,
280 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, data_len
));
281 if (ret
== nullptr) {
282 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
288 for (gdv_int32 i
= 0; i
< data_len
; i
+= char_len
) {
289 char_len
= utf8_char_length(data
[i
]);
291 if (char_len
== 0 || i
+ char_len
> data_len
) { // invalid byte or incomplete glyph
292 set_error_for_invalid_utf(context
, data
[i
]);
297 for (gdv_int32 j
= 0; j
< char_len
; ++j
) {
298 if (j
> 0 && (data
[i
+ j
] & 0xC0) != 0x80) { // bytes following head-byte of glyph
299 set_error_for_invalid_utf(context
, data
[i
+ j
]);
303 ret
[data_len
- i
- char_len
+ j
] = data
[i
+ j
];
310 // Trims whitespaces from the left end of the input utf8 sequence
312 const char* ltrim_utf8(gdv_int64 context
, const char* data
, gdv_int32 data_len
,
320 // start denotes the first position of non-space characters in the input string
321 while (start
< data_len
&& data
[start
] == ' ') {
325 *out_len
= data_len
- start
;
329 // Trims whitespaces from the right end of the input utf8 sequence
331 const char* rtrim_utf8(gdv_int64 context
, const char* data
, gdv_int32 data_len
,
338 gdv_int32 end
= data_len
- 1;
339 // end denotes the last position of non-space characters in the input string
340 while (end
>= 0 && data
[end
] == ' ') {
348 // Trims whitespaces from both the ends of the input utf8 sequence
350 const char* btrim_utf8(gdv_int64 context
, const char* data
, gdv_int32 data_len
,
357 gdv_int32 start
= 0, end
= data_len
- 1;
358 // start and end denote the first and last positions of non-space
359 // characters in the input string respectively
360 while (start
<= end
&& data
[start
] == ' ') {
363 while (end
>= start
&& data
[end
] == ' ') {
367 // string has some leading/trailing spaces and some non-space characters
368 *out_len
= end
- start
+ 1;
372 // Trims characters present in the trim text from the left end of the base text
374 const char* ltrim_utf8_utf8(gdv_int64 context
, const char* basetext
,
375 gdv_int32 basetext_len
, const char* trimtext
,
376 gdv_int32 trimtext_len
, int32_t* out_len
) {
377 if (basetext_len
== 0) {
380 } else if (trimtext_len
== 0) {
381 *out_len
= basetext_len
;
385 gdv_int32 start_ptr
, char_len
;
386 // scan the base text from left to right and increment the start pointer till
387 // there is a character which is not present in the trim text
388 for (start_ptr
= 0; start_ptr
< basetext_len
; start_ptr
+= char_len
) {
389 char_len
= utf8_char_length(basetext
[start_ptr
]);
390 if (char_len
== 0 || start_ptr
+ char_len
> basetext_len
) {
391 // invalid byte or incomplete glyph
392 set_error_for_invalid_utf(context
, basetext
[start_ptr
]);
396 if (!is_substr_utf8_utf8(trimtext
, trimtext_len
, basetext
+ start_ptr
, char_len
)) {
401 *out_len
= basetext_len
- start_ptr
;
402 return basetext
+ start_ptr
;
405 // Trims characters present in the trim text from the right end of the base text
407 const char* rtrim_utf8_utf8(gdv_int64 context
, const char* basetext
,
408 gdv_int32 basetext_len
, const char* trimtext
,
409 gdv_int32 trimtext_len
, int32_t* out_len
) {
410 if (basetext_len
== 0) {
413 } else if (trimtext_len
== 0) {
414 *out_len
= basetext_len
;
418 gdv_int32 char_len
, end_ptr
, byte_cnt
= 1;
419 // scan the base text from right to left and decrement the end pointer till
420 // there is a character which is not present in the trim text
421 for (end_ptr
= basetext_len
- 1; end_ptr
>= 0; --end_ptr
) {
422 char_len
= utf8_char_length(basetext
[end_ptr
]);
423 if (char_len
== 0) { // trailing bytes of multibyte character
427 // this is the first byte of a character, hence check if char_len = char_cnt
428 if (byte_cnt
!= char_len
) { // invalid byte or incomplete glyph
429 set_error_for_invalid_utf(context
, basetext
[end_ptr
]);
433 byte_cnt
= 1; // reset the counter*/
434 if (!is_substr_utf8_utf8(trimtext
, trimtext_len
, basetext
+ end_ptr
, char_len
)) {
439 // when all characters in the basetext are part of the trimtext
445 end_ptr
+= utf8_char_length(basetext
[end_ptr
]); // point to the next character
450 // Trims characters present in the trim text from both ends of the base text
452 const char* btrim_utf8_utf8(gdv_int64 context
, const char* basetext
,
453 gdv_int32 basetext_len
, const char* trimtext
,
454 gdv_int32 trimtext_len
, int32_t* out_len
) {
455 if (basetext_len
== 0) {
458 } else if (trimtext_len
== 0) {
459 *out_len
= basetext_len
;
463 gdv_int32 start_ptr
, end_ptr
, char_len
, byte_cnt
= 1;
464 // scan the base text from left to right and increment the start and decrement the
465 // end pointers till there are characters which are not present in the trim text
466 for (start_ptr
= 0; start_ptr
< basetext_len
; start_ptr
+= char_len
) {
467 char_len
= utf8_char_length(basetext
[start_ptr
]);
468 if (char_len
== 0 || start_ptr
+ char_len
> basetext_len
) {
469 // invalid byte or incomplete glyph
470 set_error_for_invalid_utf(context
, basetext
[start_ptr
]);
474 if (!is_substr_utf8_utf8(trimtext
, trimtext_len
, basetext
+ start_ptr
, char_len
)) {
478 for (end_ptr
= basetext_len
- 1; end_ptr
>= start_ptr
; --end_ptr
) {
479 char_len
= utf8_char_length(basetext
[end_ptr
]);
480 if (char_len
== 0) { // trailing byte in multibyte character
484 // this is the first byte of a character, hence check if char_len = char_cnt
485 if (byte_cnt
!= char_len
) { // invalid byte or incomplete glyph
486 set_error_for_invalid_utf(context
, basetext
[end_ptr
]);
490 byte_cnt
= 1; // reset the counter*/
491 if (!is_substr_utf8_utf8(trimtext
, trimtext_len
, basetext
+ end_ptr
, char_len
)) {
496 // when all characters are trimmed, start_ptr has been incremented to basetext_len and
497 // end_ptr still points to basetext_len - 1, hence we need to handle this case
498 if (start_ptr
> end_ptr
) {
503 end_ptr
+= utf8_char_length(basetext
[end_ptr
]); // point to the next character
504 *out_len
= end_ptr
- start_ptr
;
505 return basetext
+ start_ptr
;
509 gdv_boolean
compare_lower_strings(const char* base_str
, gdv_int32 base_str_len
,
510 const char* str
, gdv_int32 str_len
) {
511 if (base_str_len
!= str_len
) {
514 for (int i
= 0; i
< str_len
; i
++) {
515 // convert char to lower
517 // 'A' - 'Z' : 0x41 - 0x5a
518 // 'a' - 'z' : 0x61 - 0x7a
519 if (cur
>= 0x41 && cur
<= 0x5a) {
520 cur
= static_cast<char>(cur
+ 0x20);
522 // if the character does not match, break the flow
523 if (cur
!= base_str
[i
]) break;
524 // if the character matches and it is the last iteration, return true
525 if (i
== str_len
- 1) return true;
530 // Try to cast the received string ('0', '1', 'true', 'false'), ignoring leading
531 // and trailing spaces, also ignoring lower and upper case.
533 gdv_boolean
castBIT_utf8(gdv_int64 context
, const char* data
, gdv_int32 data_len
) {
535 gdv_fn_context_set_error_msg(context
, "Invalid value for boolean.");
539 // trim leading and trailing spaces
541 int32_t start
= 0, end
= data_len
- 1;
542 while (start
<= end
&& data
[start
] == ' ') {
545 while (end
>= start
&& data
[end
] == ' ') {
548 trimmed_len
= end
- start
+ 1;
549 const char* trimmed_data
= data
+ start
;
551 // compare received string with the valid bool string values '1', '0', 'true', 'false'
552 if (trimmed_len
== 1) {
553 // case for '0' and '1' value
554 if (trimmed_data
[0] == '1') return true;
555 if (trimmed_data
[0] == '0') return false;
556 } else if (trimmed_len
== 4) {
557 // case for matching 'true'
558 if (compare_lower_strings("true", 4, trimmed_data
, trimmed_len
)) return true;
559 } else if (trimmed_len
== 5) {
560 // case for matching 'false'
561 if (compare_lower_strings("false", 5, trimmed_data
, trimmed_len
)) return false;
563 // if no 'true', 'false', '0' or '1' value is found, set an error
564 gdv_fn_context_set_error_msg(context
, "Invalid value for boolean.");
569 const char* castVARCHAR_bool_int64(gdv_int64 context
, gdv_boolean value
,
570 gdv_int64 out_len
, gdv_int32
* out_length
) {
571 gdv_int32 len
= static_cast<gdv_int32
>(out_len
);
573 gdv_fn_context_set_error_msg(context
, "Output buffer length can't be negative");
578 reinterpret_cast<const char*>(gdv_fn_context_arena_malloc(context
, 5));
579 out
= value
? "true" : "false";
580 *out_length
= value
? ((len
> 4) ? 4 : len
) : ((len
> 5) ? 5 : len
);
584 // Truncates the string to given length
585 #define CAST_VARCHAR_FROM_VARLEN_TYPE(TYPE) \
587 const char* castVARCHAR_##TYPE##_int64(gdv_int64 context, const char* data, \
588 gdv_int32 data_len, int64_t out_len, \
589 int32_t* out_length) { \
590 int32_t len = static_cast<int32_t>(out_len); \
593 gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
598 if (len >= data_len || len == 0) { \
599 *out_length = data_len; \
603 int32_t remaining = len; \
605 bool is_multibyte = false; \
607 /* In utf8, MSB of a single byte unicode char is always 0, \
608 * whereas for a multibyte character the MSB of each byte is 1. \
609 * So for a single byte char, a bitwise-and with x80 (10000000) will be 0 \
610 * and it won't be 0 for bytes of a multibyte char. \
612 char* data_ptr = const_cast<char*>(data); \
614 /* advance byte by byte till the 8-byte boundary then advance 8 bytes */ \
615 auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07; \
616 num_bytes = (8 - num_bytes) & 0x07; \
617 while (num_bytes > 0) { \
618 uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index); \
619 if ((*ptr & 0x80) != 0) { \
620 is_multibyte = true; \
627 if (is_multibyte) break; \
628 while (remaining >= 8) { \
629 uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index); \
630 if ((*ptr & 0x8080808080808080) != 0) { \
631 is_multibyte = true; \
637 if (is_multibyte) break; \
638 if (remaining >= 4) { \
639 uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index); \
640 if ((*ptr & 0x80808080) != 0) break; \
644 while (remaining > 0) { \
645 uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index); \
646 if ((*ptr & 0x80) != 0) { \
647 is_multibyte = true; \
653 if (is_multibyte) break; \
654 /* reached here; all are single byte characters */ \
659 /* detected multibyte utf8 characters; slow path */ \
661 utf8_byte_pos(context, data + index, data_len - index, len - index); \
662 if (byte_pos < 0) { \
667 *out_length = index + byte_pos; \
671 CAST_VARCHAR_FROM_VARLEN_TYPE(utf8
)
672 CAST_VARCHAR_FROM_VARLEN_TYPE(binary
)
674 #undef CAST_VARCHAR_FROM_VARLEN_TYPE
676 // Add functions for castVARBINARY
677 #define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE) \
679 const char* castVARBINARY_##TYPE##_int64(gdv_int64 context, const char* data, \
680 gdv_int32 data_len, int64_t out_len, \
681 int32_t* out_length) { \
682 int32_t len = static_cast<int32_t>(out_len); \
684 gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
689 if (len >= data_len || len == 0) { \
690 *out_length = data_len; \
697 CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8
)
698 CAST_VARBINARY_FROM_STRING_AND_BINARY(binary
)
700 #undef CAST_VARBINARY_FROM_STRING_AND_BINARY
702 #define IS_NULL(NAME, TYPE) \
704 bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
708 VAR_LEN_TYPES(IS_NULL
, isnull
)
712 #define IS_NOT_NULL(NAME, TYPE) \
714 bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
718 VAR_LEN_TYPES(IS_NOT_NULL
, isnotnull
)
724 We follow Oracle semantics for offset:
725 - If position is positive, then the first glyph in the substring is determined by
726 counting that many glyphs forward from the beginning of the input. (i.e., for position ==
727 1 the first glyph in the substring will be identical to the first glyph in the input)
729 - If position is negative, then the first glyph in the substring is determined by
730 counting that many glyphs backward from the end of the input. (i.e., for position == -1
731 the first glyph in the substring will be identical to the last glyph in the input)
733 - If position is 0 then it is treated as 1.
736 const char* substr_utf8_int64_int64(gdv_int64 context
, const char* input
,
737 gdv_int32 in_data_len
, gdv_int64 position
,
738 gdv_int64 substring_length
, gdv_int32
* out_data_len
) {
739 if (substring_length
<= 0 || input
== nullptr || in_data_len
<= 0) {
744 gdv_int64 in_glyphs_count
=
745 static_cast<gdv_int64
>(utf8_length(context
, input
, in_data_len
));
747 // in_glyphs_count is zero if input has invalid glyphs
748 if (in_glyphs_count
== 0) {
753 gdv_int64 from_glyph
; // from_glyph==0 indicates the first glyph of the input
755 from_glyph
= position
- 1;
756 } else if (position
< 0) {
757 from_glyph
= in_glyphs_count
+ position
;
762 if (from_glyph
< 0 || from_glyph
>= in_glyphs_count
) {
767 gdv_int64 out_glyphs_count
= substring_length
;
768 if (substring_length
> in_glyphs_count
- from_glyph
) {
769 out_glyphs_count
= in_glyphs_count
- from_glyph
;
772 gdv_int64 in_data_len64
= static_cast<gdv_int64
>(in_data_len
);
773 gdv_int64 start_pos
= 0;
774 gdv_int64 end_pos
= in_data_len64
;
776 gdv_int64 current_glyph
= 0;
778 while (pos
< in_data_len64
) {
779 if (current_glyph
== from_glyph
) {
782 pos
+= static_cast<gdv_int64
>(utf8_char_length(input
[pos
]));
783 if (current_glyph
- from_glyph
+ 1 == out_glyphs_count
) {
789 if (end_pos
> in_data_len64
|| end_pos
> INT_MAX
) {
790 end_pos
= in_data_len64
;
793 *out_data_len
= static_cast<gdv_int32
>(end_pos
- start_pos
);
795 reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_data_len
));
796 if (ret
== nullptr) {
797 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
801 memcpy(ret
, input
+ start_pos
, *out_data_len
);
806 const char* substr_utf8_int64(gdv_int64 context
, const char* input
, gdv_int32 in_len
,
807 gdv_int64 offset64
, gdv_int32
* out_len
) {
808 return substr_utf8_int64_int64(context
, input
, in_len
, offset64
, in_len
, out_len
);
812 const char* repeat_utf8_int32(gdv_int64 context
, const char* in
, gdv_int32 in_len
,
813 gdv_int32 repeat_number
, gdv_int32
* out_len
) {
814 // if the repeat number is zero, then return empty string
815 if (repeat_number
== 0 || in_len
<= 0) {
819 // if the repeat number is a negative number, an error is set on context
820 if (repeat_number
< 0) {
821 gdv_fn_context_set_error_msg(context
, "Repeat number can't be negative");
825 *out_len
= repeat_number
* in_len
;
826 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
827 if (ret
== nullptr) {
828 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
832 for (int i
= 0; i
< repeat_number
; ++i
) {
833 memcpy(ret
+ (i
* in_len
), in
, in_len
);
839 const char* concat_utf8_utf8(gdv_int64 context
, const char* left
, gdv_int32 left_len
,
840 bool left_validity
, const char* right
, gdv_int32 right_len
,
841 bool right_validity
, gdv_int32
* out_len
) {
842 if (!left_validity
) {
845 if (!right_validity
) {
848 return concatOperator_utf8_utf8(context
, left
, left_len
, right
, right_len
, out_len
);
852 const char* concatOperator_utf8_utf8(gdv_int64 context
, const char* left
,
853 gdv_int32 left_len
, const char* right
,
854 gdv_int32 right_len
, gdv_int32
* out_len
) {
855 *out_len
= left_len
+ right_len
;
860 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
861 if (ret
== nullptr) {
862 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
866 memcpy(ret
, left
, left_len
);
867 memcpy(ret
+ left_len
, right
, right_len
);
872 const char* concat_utf8_utf8_utf8(gdv_int64 context
, const char* in1
, gdv_int32 in1_len
,
873 bool in1_validity
, const char* in2
, gdv_int32 in2_len
,
874 bool in2_validity
, const char* in3
, gdv_int32 in3_len
,
875 bool in3_validity
, gdv_int32
* out_len
) {
885 return concatOperator_utf8_utf8_utf8(context
, in1
, in1_len
, in2
, in2_len
, in3
, in3_len
,
890 const char* concatOperator_utf8_utf8_utf8(gdv_int64 context
, const char* in1
,
891 gdv_int32 in1_len
, const char* in2
,
892 gdv_int32 in2_len
, const char* in3
,
893 gdv_int32 in3_len
, gdv_int32
* out_len
) {
894 *out_len
= in1_len
+ in2_len
+ in3_len
;
899 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
900 if (ret
== nullptr) {
901 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
905 memcpy(ret
, in1
, in1_len
);
906 memcpy(ret
+ in1_len
, in2
, in2_len
);
907 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
912 const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context
, const char* in1
,
913 gdv_int32 in1_len
, bool in1_validity
,
914 const char* in2
, gdv_int32 in2_len
,
915 bool in2_validity
, const char* in3
,
916 gdv_int32 in3_len
, bool in3_validity
,
917 const char* in4
, gdv_int32 in4_len
,
918 bool in4_validity
, gdv_int32
* out_len
) {
931 return concatOperator_utf8_utf8_utf8_utf8(context
, in1
, in1_len
, in2
, in2_len
, in3
,
932 in3_len
, in4
, in4_len
, out_len
);
936 const char* concatOperator_utf8_utf8_utf8_utf8(gdv_int64 context
, const char* in1
,
937 gdv_int32 in1_len
, const char* in2
,
938 gdv_int32 in2_len
, const char* in3
,
939 gdv_int32 in3_len
, const char* in4
,
940 gdv_int32 in4_len
, gdv_int32
* out_len
) {
941 *out_len
= in1_len
+ in2_len
+ in3_len
+ in4_len
;
946 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
947 if (ret
== nullptr) {
948 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
952 memcpy(ret
, in1
, in1_len
);
953 memcpy(ret
+ in1_len
, in2
, in2_len
);
954 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
955 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
960 const char* concat_utf8_utf8_utf8_utf8_utf8(
961 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, bool in1_validity
,
962 const char* in2
, gdv_int32 in2_len
, bool in2_validity
, const char* in3
,
963 gdv_int32 in3_len
, bool in3_validity
, const char* in4
, gdv_int32 in4_len
,
964 bool in4_validity
, const char* in5
, gdv_int32 in5_len
, bool in5_validity
,
965 gdv_int32
* out_len
) {
981 return concatOperator_utf8_utf8_utf8_utf8_utf8(context
, in1
, in1_len
, in2
, in2_len
, in3
,
982 in3_len
, in4
, in4_len
, in5
, in5_len
,
987 const char* concatOperator_utf8_utf8_utf8_utf8_utf8(
988 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, const char* in2
,
989 gdv_int32 in2_len
, const char* in3
, gdv_int32 in3_len
, const char* in4
,
990 gdv_int32 in4_len
, const char* in5
, gdv_int32 in5_len
, gdv_int32
* out_len
) {
991 *out_len
= in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
;
996 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
997 if (ret
== nullptr) {
998 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1002 memcpy(ret
, in1
, in1_len
);
1003 memcpy(ret
+ in1_len
, in2
, in2_len
);
1004 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
1005 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
1006 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
, in5
, in5_len
);
1011 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8(
1012 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, bool in1_validity
,
1013 const char* in2
, gdv_int32 in2_len
, bool in2_validity
, const char* in3
,
1014 gdv_int32 in3_len
, bool in3_validity
, const char* in4
, gdv_int32 in4_len
,
1015 bool in4_validity
, const char* in5
, gdv_int32 in5_len
, bool in5_validity
,
1016 const char* in6
, gdv_int32 in6_len
, bool in6_validity
, gdv_int32
* out_len
) {
1017 if (!in1_validity
) {
1020 if (!in2_validity
) {
1023 if (!in3_validity
) {
1026 if (!in4_validity
) {
1029 if (!in5_validity
) {
1032 if (!in6_validity
) {
1035 return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(context
, in1
, in1_len
, in2
, in2_len
,
1036 in3
, in3_len
, in4
, in4_len
, in5
,
1037 in5_len
, in6
, in6_len
, out_len
);
1041 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(
1042 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, const char* in2
,
1043 gdv_int32 in2_len
, const char* in3
, gdv_int32 in3_len
, const char* in4
,
1044 gdv_int32 in4_len
, const char* in5
, gdv_int32 in5_len
, const char* in6
,
1045 gdv_int32 in6_len
, gdv_int32
* out_len
) {
1046 *out_len
= in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
;
1047 if (*out_len
<= 0) {
1051 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1052 if (ret
== nullptr) {
1053 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1057 memcpy(ret
, in1
, in1_len
);
1058 memcpy(ret
+ in1_len
, in2
, in2_len
);
1059 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
1060 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
1061 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
, in5
, in5_len
);
1062 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
, in6
, in6_len
);
1067 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1068 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, bool in1_validity
,
1069 const char* in2
, gdv_int32 in2_len
, bool in2_validity
, const char* in3
,
1070 gdv_int32 in3_len
, bool in3_validity
, const char* in4
, gdv_int32 in4_len
,
1071 bool in4_validity
, const char* in5
, gdv_int32 in5_len
, bool in5_validity
,
1072 const char* in6
, gdv_int32 in6_len
, bool in6_validity
, const char* in7
,
1073 gdv_int32 in7_len
, bool in7_validity
, gdv_int32
* out_len
) {
1074 if (!in1_validity
) {
1077 if (!in2_validity
) {
1080 if (!in3_validity
) {
1083 if (!in4_validity
) {
1086 if (!in5_validity
) {
1089 if (!in6_validity
) {
1092 if (!in7_validity
) {
1095 return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1096 context
, in1
, in1_len
, in2
, in2_len
, in3
, in3_len
, in4
, in4_len
, in5
, in5_len
, in6
,
1097 in6_len
, in7
, in7_len
, out_len
);
1101 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1102 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, const char* in2
,
1103 gdv_int32 in2_len
, const char* in3
, gdv_int32 in3_len
, const char* in4
,
1104 gdv_int32 in4_len
, const char* in5
, gdv_int32 in5_len
, const char* in6
,
1105 gdv_int32 in6_len
, const char* in7
, gdv_int32 in7_len
, gdv_int32
* out_len
) {
1106 *out_len
= in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
;
1107 if (*out_len
<= 0) {
1111 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1112 if (ret
== nullptr) {
1113 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1117 memcpy(ret
, in1
, in1_len
);
1118 memcpy(ret
+ in1_len
, in2
, in2_len
);
1119 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
1120 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
1121 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
, in5
, in5_len
);
1122 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
, in6
, in6_len
);
1123 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
, in7
, in7_len
);
1128 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1129 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, bool in1_validity
,
1130 const char* in2
, gdv_int32 in2_len
, bool in2_validity
, const char* in3
,
1131 gdv_int32 in3_len
, bool in3_validity
, const char* in4
, gdv_int32 in4_len
,
1132 bool in4_validity
, const char* in5
, gdv_int32 in5_len
, bool in5_validity
,
1133 const char* in6
, gdv_int32 in6_len
, bool in6_validity
, const char* in7
,
1134 gdv_int32 in7_len
, bool in7_validity
, const char* in8
, gdv_int32 in8_len
,
1135 bool in8_validity
, gdv_int32
* out_len
) {
1136 if (!in1_validity
) {
1139 if (!in2_validity
) {
1142 if (!in3_validity
) {
1145 if (!in4_validity
) {
1148 if (!in5_validity
) {
1151 if (!in6_validity
) {
1154 if (!in7_validity
) {
1157 if (!in8_validity
) {
1160 return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1161 context
, in1
, in1_len
, in2
, in2_len
, in3
, in3_len
, in4
, in4_len
, in5
, in5_len
, in6
,
1162 in6_len
, in7
, in7_len
, in8
, in8_len
, out_len
);
1166 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1167 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, const char* in2
,
1168 gdv_int32 in2_len
, const char* in3
, gdv_int32 in3_len
, const char* in4
,
1169 gdv_int32 in4_len
, const char* in5
, gdv_int32 in5_len
, const char* in6
,
1170 gdv_int32 in6_len
, const char* in7
, gdv_int32 in7_len
, const char* in8
,
1171 gdv_int32 in8_len
, gdv_int32
* out_len
) {
1173 in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
+ in8_len
;
1174 if (*out_len
<= 0) {
1178 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1179 if (ret
== nullptr) {
1180 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1184 memcpy(ret
, in1
, in1_len
);
1185 memcpy(ret
+ in1_len
, in2
, in2_len
);
1186 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
1187 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
1188 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
, in5
, in5_len
);
1189 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
, in6
, in6_len
);
1190 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
, in7
, in7_len
);
1191 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
, in8
,
1197 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1198 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, bool in1_validity
,
1199 const char* in2
, gdv_int32 in2_len
, bool in2_validity
, const char* in3
,
1200 gdv_int32 in3_len
, bool in3_validity
, const char* in4
, gdv_int32 in4_len
,
1201 bool in4_validity
, const char* in5
, gdv_int32 in5_len
, bool in5_validity
,
1202 const char* in6
, gdv_int32 in6_len
, bool in6_validity
, const char* in7
,
1203 gdv_int32 in7_len
, bool in7_validity
, const char* in8
, gdv_int32 in8_len
,
1204 bool in8_validity
, const char* in9
, gdv_int32 in9_len
, bool in9_validity
,
1205 gdv_int32
* out_len
) {
1206 if (!in1_validity
) {
1209 if (!in2_validity
) {
1212 if (!in3_validity
) {
1215 if (!in4_validity
) {
1218 if (!in5_validity
) {
1221 if (!in6_validity
) {
1224 if (!in7_validity
) {
1227 if (!in8_validity
) {
1230 if (!in9_validity
) {
1233 return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1234 context
, in1
, in1_len
, in2
, in2_len
, in3
, in3_len
, in4
, in4_len
, in5
, in5_len
, in6
,
1235 in6_len
, in7
, in7_len
, in8
, in8_len
, in9
, in9_len
, out_len
);
1239 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1240 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, const char* in2
,
1241 gdv_int32 in2_len
, const char* in3
, gdv_int32 in3_len
, const char* in4
,
1242 gdv_int32 in4_len
, const char* in5
, gdv_int32 in5_len
, const char* in6
,
1243 gdv_int32 in6_len
, const char* in7
, gdv_int32 in7_len
, const char* in8
,
1244 gdv_int32 in8_len
, const char* in9
, gdv_int32 in9_len
, gdv_int32
* out_len
) {
1245 *out_len
= in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
+
1247 if (*out_len
<= 0) {
1251 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1252 if (ret
== nullptr) {
1253 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1257 memcpy(ret
, in1
, in1_len
);
1258 memcpy(ret
+ in1_len
, in2
, in2_len
);
1259 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
1260 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
1261 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
, in5
, in5_len
);
1262 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
, in6
, in6_len
);
1263 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
, in7
, in7_len
);
1264 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
, in8
,
1267 ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
+ in8_len
,
1273 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1274 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, bool in1_validity
,
1275 const char* in2
, gdv_int32 in2_len
, bool in2_validity
, const char* in3
,
1276 gdv_int32 in3_len
, bool in3_validity
, const char* in4
, gdv_int32 in4_len
,
1277 bool in4_validity
, const char* in5
, gdv_int32 in5_len
, bool in5_validity
,
1278 const char* in6
, gdv_int32 in6_len
, bool in6_validity
, const char* in7
,
1279 gdv_int32 in7_len
, bool in7_validity
, const char* in8
, gdv_int32 in8_len
,
1280 bool in8_validity
, const char* in9
, gdv_int32 in9_len
, bool in9_validity
,
1281 const char* in10
, gdv_int32 in10_len
, bool in10_validity
, gdv_int32
* out_len
) {
1282 if (!in1_validity
) {
1285 if (!in2_validity
) {
1288 if (!in3_validity
) {
1291 if (!in4_validity
) {
1294 if (!in5_validity
) {
1297 if (!in6_validity
) {
1300 if (!in7_validity
) {
1303 if (!in8_validity
) {
1306 if (!in9_validity
) {
1309 if (!in10_validity
) {
1312 return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1313 context
, in1
, in1_len
, in2
, in2_len
, in3
, in3_len
, in4
, in4_len
, in5
, in5_len
, in6
,
1314 in6_len
, in7
, in7_len
, in8
, in8_len
, in9
, in9_len
, in10
, in10_len
, out_len
);
1318 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
1319 gdv_int64 context
, const char* in1
, gdv_int32 in1_len
, const char* in2
,
1320 gdv_int32 in2_len
, const char* in3
, gdv_int32 in3_len
, const char* in4
,
1321 gdv_int32 in4_len
, const char* in5
, gdv_int32 in5_len
, const char* in6
,
1322 gdv_int32 in6_len
, const char* in7
, gdv_int32 in7_len
, const char* in8
,
1323 gdv_int32 in8_len
, const char* in9
, gdv_int32 in9_len
, const char* in10
,
1324 gdv_int32 in10_len
, gdv_int32
* out_len
) {
1325 *out_len
= in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
+
1326 in8_len
+ in9_len
+ in10_len
;
1327 if (*out_len
<= 0) {
1331 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1332 if (ret
== nullptr) {
1333 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1337 memcpy(ret
, in1
, in1_len
);
1338 memcpy(ret
+ in1_len
, in2
, in2_len
);
1339 memcpy(ret
+ in1_len
+ in2_len
, in3
, in3_len
);
1340 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
, in4
, in4_len
);
1341 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
, in5
, in5_len
);
1342 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
, in6
, in6_len
);
1343 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
, in7
, in7_len
);
1344 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
, in8
,
1347 ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
+ in8_len
,
1349 memcpy(ret
+ in1_len
+ in2_len
+ in3_len
+ in4_len
+ in5_len
+ in6_len
+ in7_len
+
1355 // Returns the numeric value of the first character of str.
1357 gdv_int32
ascii_utf8(const char* data
, gdv_int32 data_len
) {
1358 if (data_len
== 0) {
1361 return static_cast<gdv_int32
>(data
[0]);
1365 const char* convert_fromUTF8_binary(gdv_int64 context
, const char* bin_in
, gdv_int32 len
,
1366 gdv_int32
* out_len
) {
1368 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1369 if (ret
== nullptr) {
1370 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1374 memcpy(ret
, bin_in
, *out_len
);
1379 const char* convert_replace_invalid_fromUTF8_binary(int64_t context
, const char* text_in
,
1381 const char* char_to_replace
,
1382 int32_t char_to_replace_len
,
1384 if (char_to_replace_len
> 1) {
1385 gdv_fn_context_set_error_msg(context
, "Replacement of multiple bytes not supported");
1389 // actually the convert_replace function replaces invalid chars with an ASCII
1390 // character so the output length will be the same as the input length
1391 *out_len
= text_len
;
1392 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1393 if (ret
== nullptr) {
1394 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
1398 int32_t valid_bytes_to_cpy
= 0;
1399 int32_t out_byte_counter
= 0;
1400 int32_t in_byte_counter
= 0;
1402 // scan the base text from left to right and increment the start pointer till
1403 // looking for invalid chars to substitute
1404 for (int text_index
= 0; text_index
< text_len
; text_index
+= char_len
) {
1405 char_len
= utf8_char_length(text_in
[text_index
]);
1406 // only memory copy the bytes when detect invalid char
1407 if (char_len
== 0 || text_index
+ char_len
> text_len
||
1408 !validate_utf8_following_bytes(text_in
, char_len
, text_index
)) {
1409 // define char_len = 1 to increase text_index by 1 (as ASCII char fits in 1 byte)
1411 // first copy the valid bytes until now and then replace the invalid character
1412 memcpy(ret
+ out_byte_counter
, text_in
+ in_byte_counter
, valid_bytes_to_cpy
);
1413 // if the replacement char is empty, the invalid char should be ignored
1414 if (char_to_replace_len
== 0) {
1415 out_byte_counter
+= valid_bytes_to_cpy
;
1417 ret
[out_byte_counter
+ valid_bytes_to_cpy
] = char_to_replace
[0];
1418 out_byte_counter
+= valid_bytes_to_cpy
+ char_len
;
1420 in_byte_counter
+= valid_bytes_to_cpy
+ char_len
;
1421 valid_bytes_to_cpy
= 0;
1424 valid_bytes_to_cpy
+= char_len
;
1426 // if invalid chars were not found, return the original string
1427 if (out_byte_counter
== 0 && in_byte_counter
== 0) return text_in
;
1428 // if there are still valid bytes to copy, do it
1429 if (valid_bytes_to_cpy
!= 0) {
1430 memcpy(ret
+ out_byte_counter
, text_in
+ in_byte_counter
, valid_bytes_to_cpy
);
1432 // the out length will be the out bytes copied + the missing end bytes copied
1433 *out_len
= valid_bytes_to_cpy
+ out_byte_counter
;
1437 // The function reverse a char array in-place
1438 static inline void reverse_char_buf(char* buf
, int32_t len
) {
1441 for (int32_t i
= 0; i
< len
/ 2; i
++) {
1442 int32_t pos_swp
= len
- (1 + i
);
1443 temp
= buf
[pos_swp
];
1444 buf
[pos_swp
] = buf
[i
];
1449 // Converts a double variable to binary
1451 const char* convert_toDOUBLE(int64_t context
, double value
, int32_t* out_len
) {
1452 *out_len
= sizeof(value
);
1453 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1455 if (ret
== nullptr) {
1456 gdv_fn_context_set_error_msg(context
,
1457 "Could not allocate memory for the output string");
1463 memcpy(ret
, &value
, *out_len
);
1469 const char* convert_toDOUBLE_be(int64_t context
, double value
, int32_t* out_len
) {
1470 // The function behaves like convert_toDOUBLE, but always return the result
1471 // in big endian format
1472 char* ret
= const_cast<char*>(convert_toDOUBLE(context
, value
, out_len
));
1474 #if ARROW_LITTLE_ENDIAN
1475 reverse_char_buf(ret
, *out_len
);
1481 // Converts a float variable to binary
1483 const char* convert_toFLOAT(int64_t context
, float value
, int32_t* out_len
) {
1484 *out_len
= sizeof(value
);
1485 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1487 if (ret
== nullptr) {
1488 gdv_fn_context_set_error_msg(context
,
1489 "Could not allocate memory for the output string");
1495 memcpy(ret
, &value
, *out_len
);
1501 const char* convert_toFLOAT_be(int64_t context
, float value
, int32_t* out_len
) {
1502 // The function behaves like convert_toFLOAT, but always return the result
1503 // in big endian format
1504 char* ret
= const_cast<char*>(convert_toFLOAT(context
, value
, out_len
));
1506 #if ARROW_LITTLE_ENDIAN
1507 reverse_char_buf(ret
, *out_len
);
1513 // Converts a bigint(int with 64 bits) variable to binary
1515 const char* convert_toBIGINT(int64_t context
, int64_t value
, int32_t* out_len
) {
1516 *out_len
= sizeof(value
);
1517 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1519 if (ret
== nullptr) {
1520 gdv_fn_context_set_error_msg(context
,
1521 "Could not allocate memory for the output string");
1527 memcpy(ret
, &value
, *out_len
);
1533 const char* convert_toBIGINT_be(int64_t context
, int64_t value
, int32_t* out_len
) {
1534 // The function behaves like convert_toBIGINT, but always return the result
1535 // in big endian format
1536 char* ret
= const_cast<char*>(convert_toBIGINT(context
, value
, out_len
));
1538 #if ARROW_LITTLE_ENDIAN
1539 reverse_char_buf(ret
, *out_len
);
1545 // Converts an integer(with 32 bits) variable to binary
1547 const char* convert_toINT(int64_t context
, int32_t value
, int32_t* out_len
) {
1548 *out_len
= sizeof(value
);
1549 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1551 if (ret
== nullptr) {
1552 gdv_fn_context_set_error_msg(context
,
1553 "Could not allocate memory for the output string");
1559 memcpy(ret
, &value
, *out_len
);
1565 const char* convert_toINT_be(int64_t context
, int32_t value
, int32_t* out_len
) {
1566 // The function behaves like convert_toINT, but always return the result
1567 // in big endian format
1568 char* ret
= const_cast<char*>(convert_toINT(context
, value
, out_len
));
1570 #if ARROW_LITTLE_ENDIAN
1571 reverse_char_buf(ret
, *out_len
);
1577 // Converts a boolean variable to binary
1579 const char* convert_toBOOLEAN(int64_t context
, bool value
, int32_t* out_len
) {
1580 *out_len
= sizeof(value
);
1581 char* ret
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1583 if (ret
== nullptr) {
1584 gdv_fn_context_set_error_msg(context
,
1585 "Could not allocate memory for the output string");
1591 memcpy(ret
, &value
, *out_len
);
1596 // Converts a time variable to binary
1598 const char* convert_toTIME_EPOCH(int64_t context
, int32_t value
, int32_t* out_len
) {
1599 return convert_toINT(context
, value
, out_len
);
1603 const char* convert_toTIME_EPOCH_be(int64_t context
, int32_t value
, int32_t* out_len
) {
1604 // The function behaves as convert_toTIME_EPOCH, but
1605 // returns the bytes in big endian format
1606 return convert_toINT_be(context
, value
, out_len
);
1609 // Converts a timestamp variable to binary
1611 const char* convert_toTIMESTAMP_EPOCH(int64_t context
, int64_t timestamp
,
1613 return convert_toBIGINT(context
, timestamp
, out_len
);
1617 const char* convert_toTIMESTAMP_EPOCH_be(int64_t context
, int64_t timestamp
,
1619 // The function behaves as convert_toTIMESTAMP_EPOCH, but
1620 // returns the bytes in big endian format
1621 return convert_toBIGINT_be(context
, timestamp
, out_len
);
1624 // Converts a date variable to binary
1626 const char* convert_toDATE_EPOCH(int64_t context
, int64_t date
, int32_t* out_len
) {
1627 return convert_toBIGINT(context
, date
, out_len
);
1631 const char* convert_toDATE_EPOCH_be(int64_t context
, int64_t date
, int32_t* out_len
) {
1632 // The function behaves as convert_toDATE_EPOCH, but
1633 // returns the bytes in big endian format
1634 return convert_toBIGINT_be(context
, date
, out_len
);
1637 // Converts a string variable to binary
1639 const char* convert_toUTF8(int64_t context
, const char* value
, int32_t value_len
,
1641 *out_len
= value_len
;
1645 // Search for a string within another string
1646 // Same as "locate(substr, str)", except for the reverse order of the arguments.
1648 gdv_int32
strpos_utf8_utf8(gdv_int64 context
, const char* str
, gdv_int32 str_len
,
1649 const char* sub_str
, gdv_int32 sub_str_len
) {
1650 return locate_utf8_utf8_int32(context
, sub_str
, sub_str_len
, str
, str_len
, 1);
1653 // Search for a string within another string
1655 gdv_int32
locate_utf8_utf8(gdv_int64 context
, const char* sub_str
, gdv_int32 sub_str_len
,
1656 const char* str
, gdv_int32 str_len
) {
1657 return locate_utf8_utf8_int32(context
, sub_str
, sub_str_len
, str
, str_len
, 1);
1660 // Search for a string within another string starting at position start-pos (1-indexed)
1662 gdv_int32
locate_utf8_utf8_int32(gdv_int64 context
, const char* sub_str
,
1663 gdv_int32 sub_str_len
, const char* str
,
1664 gdv_int32 str_len
, gdv_int32 start_pos
) {
1665 if (start_pos
< 1) {
1666 gdv_fn_context_set_error_msg(context
, "Start position must be greater than 0");
1670 if (str_len
== 0 || sub_str_len
== 0) {
1674 gdv_int32 byte_pos
= utf8_byte_pos(context
, str
, str_len
, start_pos
- 1);
1675 if (byte_pos
< 0 || byte_pos
>= str_len
) {
1678 for (gdv_int32 i
= byte_pos
; i
<= str_len
- sub_str_len
; ++i
) {
1679 if (memcmp(str
+ i
, sub_str
, sub_str_len
) == 0) {
1680 return utf8_length(context
, str
, i
) + 1;
1687 const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context
, const char* text
,
1688 gdv_int32 text_len
, const char* from_str
,
1689 gdv_int32 from_str_len
,
1690 const char* to_str
, gdv_int32 to_str_len
,
1691 gdv_int32 max_length
,
1692 gdv_int32
* out_len
) {
1693 // if from_str is empty or its length exceeds that of original string,
1694 // return the original string
1695 if (from_str_len
<= 0 || from_str_len
> text_len
) {
1696 *out_len
= text_len
;
1701 gdv_int32 text_index
= 0;
1703 gdv_int32 out_index
= 0;
1704 gdv_int32 last_match_index
=
1705 0; // defer copying string from last_match_index till next match is found
1707 for (; text_index
<= text_len
- from_str_len
;) {
1708 if (memcmp(text
+ text_index
, from_str
, from_str_len
) == 0) {
1709 if (out_index
+ text_index
- last_match_index
+ to_str_len
> max_length
) {
1710 gdv_fn_context_set_error_msg(context
, "Buffer overflow for output string");
1715 // found match for first time
1716 out
= reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, max_length
));
1717 if (out
== nullptr) {
1718 gdv_fn_context_set_error_msg(context
,
1719 "Could not allocate memory for output string");
1725 // first copy the part deferred till now
1726 memcpy(out
+ out_index
, text
+ last_match_index
, (text_index
- last_match_index
));
1727 out_index
+= text_index
- last_match_index
;
1728 // then copy the target string
1729 memcpy(out
+ out_index
, to_str
, to_str_len
);
1730 out_index
+= to_str_len
;
1732 text_index
+= from_str_len
;
1733 last_match_index
= text_index
;
1740 *out_len
= text_len
;
1744 if (out_index
+ text_len
- last_match_index
> max_length
) {
1745 gdv_fn_context_set_error_msg(context
, "Buffer overflow for output string");
1749 memcpy(out
+ out_index
, text
+ last_match_index
, text_len
- last_match_index
);
1750 out_index
+= text_len
- last_match_index
;
1751 *out_len
= out_index
;
1756 const char* replace_utf8_utf8_utf8(gdv_int64 context
, const char* text
,
1757 gdv_int32 text_len
, const char* from_str
,
1758 gdv_int32 from_str_len
, const char* to_str
,
1759 gdv_int32 to_str_len
, gdv_int32
* out_len
) {
1760 return replace_with_max_len_utf8_utf8_utf8(context
, text
, text_len
, from_str
,
1761 from_str_len
, to_str
, to_str_len
, 65535,
1766 const char* lpad_utf8_int32_utf8(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
1767 gdv_int32 return_length
, const char* fill_text
,
1768 gdv_int32 fill_text_len
, gdv_int32
* out_len
) {
1769 // if the text length or the defined return length (number of characters to return)
1770 // is <=0, then return an empty string.
1771 if (text_len
== 0 || return_length
<= 0) {
1776 // count the number of utf8 characters on text, ignoring invalid bytes
1777 int text_char_count
= utf8_length_ignore_invalid(text
, text_len
);
1779 if (return_length
== text_char_count
||
1780 (return_length
> text_char_count
&& fill_text_len
== 0)) {
1781 // case where the return length is same as the text's length, or if it need to
1782 // fill into text but "fill_text" is empty, then return text directly.
1783 *out_len
= text_len
;
1785 } else if (return_length
< text_char_count
) {
1786 // case where it truncates the result on return length.
1787 *out_len
= utf8_byte_pos(context
, text
, text_len
, return_length
);
1790 // case (return_length > text_char_count)
1791 // case where it needs to copy "fill_text" on the string left. The total number
1792 // of chars to copy is given by (return_length - text_char_count)
1794 reinterpret_cast<gdv_binary
>(gdv_fn_context_arena_malloc(context
, return_length
));
1795 if (ret
== nullptr) {
1796 gdv_fn_context_set_error_msg(context
,
1797 "Could not allocate memory for output string");
1801 // try to fulfill the return string with the "fill_text" continuously
1802 int32_t copied_chars_count
= 0;
1803 int32_t copied_chars_position
= 0;
1804 while (copied_chars_count
< return_length
- text_char_count
) {
1807 // for each char, evaluate its length to consider it when mem copying
1808 for (fill_index
= 0; fill_index
< fill_text_len
; fill_index
+= char_len
) {
1809 if (copied_chars_count
>= return_length
- text_char_count
) {
1812 char_len
= utf8_char_length(fill_text
[fill_index
]);
1813 // ignore invalid char on the fill text, considering it as size 1
1814 if (char_len
== 0) char_len
+= 1;
1815 copied_chars_count
++;
1817 memcpy(ret
+ copied_chars_position
, fill_text
, fill_index
);
1818 copied_chars_position
+= fill_index
;
1820 // after fulfilling the text, copy the main string
1821 memcpy(ret
+ copied_chars_position
, text
, text_len
);
1822 *out_len
= copied_chars_position
+ text_len
;
1828 const char* rpad_utf8_int32_utf8(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
1829 gdv_int32 return_length
, const char* fill_text
,
1830 gdv_int32 fill_text_len
, gdv_int32
* out_len
) {
1831 // if the text length or the defined return length (number of characters to return)
1832 // is <=0, then return an empty string.
1833 if (text_len
== 0 || return_length
<= 0) {
1838 // count the number of utf8 characters on text, ignoring invalid bytes
1839 int text_char_count
= utf8_length_ignore_invalid(text
, text_len
);
1841 if (return_length
== text_char_count
||
1842 (return_length
> text_char_count
&& fill_text_len
== 0)) {
1843 // case where the return length is same as the text's length, or if it need to
1844 // fill into text but "fill_text" is empty, then return text directly.
1845 *out_len
= text_len
;
1847 } else if (return_length
< text_char_count
) {
1848 // case where it truncates the result on return length.
1849 *out_len
= utf8_byte_pos(context
, text
, text_len
, return_length
);
1852 // case (return_length > text_char_count)
1853 // case where it needs to copy "fill_text" on the string right
1855 reinterpret_cast<gdv_binary
>(gdv_fn_context_arena_malloc(context
, return_length
));
1856 if (ret
== nullptr) {
1857 gdv_fn_context_set_error_msg(context
,
1858 "Could not allocate memory for output string");
1862 // fulfill the initial text copying the main input string
1863 memcpy(ret
, text
, text_len
);
1864 // try to fulfill the return string with the "fill_text" continuously
1865 int32_t copied_chars_count
= 0;
1866 int32_t copied_chars_position
= 0;
1867 while (text_char_count
+ copied_chars_count
< return_length
) {
1869 int32_t fill_length
;
1870 // for each char, evaluate its length to consider it when mem copying
1871 for (fill_length
= 0; fill_length
< fill_text_len
; fill_length
+= char_len
) {
1872 if (text_char_count
+ copied_chars_count
>= return_length
) {
1875 char_len
= utf8_char_length(fill_text
[fill_length
]);
1876 // ignore invalid char on the fill text, considering it as size 1
1877 if (char_len
== 0) char_len
+= 1;
1878 copied_chars_count
++;
1880 memcpy(ret
+ text_len
+ copied_chars_position
, fill_text
, fill_length
);
1881 copied_chars_position
+= fill_length
;
1883 *out_len
= copied_chars_position
+ text_len
;
1889 const char* lpad_utf8_int32(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
1890 gdv_int32 return_length
, gdv_int32
* out_len
) {
1891 return lpad_utf8_int32_utf8(context
, text
, text_len
, return_length
, " ", 1, out_len
);
1895 const char* rpad_utf8_int32(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
1896 gdv_int32 return_length
, gdv_int32
* out_len
) {
1897 return rpad_utf8_int32_utf8(context
, text
, text_len
, return_length
, " ", 1, out_len
);
1901 const char* split_part(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
1902 const char* delimiter
, gdv_int32 delim_len
, gdv_int32 index
,
1903 gdv_int32
* out_len
) {
1906 char error_message
[100];
1907 snprintf(error_message
, sizeof(error_message
),
1908 "Index in split_part must be positive, value provided was %d", index
);
1909 gdv_fn_context_set_error_msg(context
, error_message
);
1913 if (delim_len
== 0 || text_len
== 0) {
1914 // output will just be text if no delimiter is provided
1915 *out_len
= text_len
;
1919 int i
= 0, match_no
= 1;
1921 while (i
< text_len
) {
1922 // find the position where delimiter matched for the first time
1923 int match_pos
= match_string(text
, text_len
, i
, delimiter
, delim_len
);
1924 if (match_pos
== -1 && match_no
!= index
) {
1925 // reached the end without finding a match.
1928 // Found a match. If the match number is index then return this match
1929 if (match_no
== index
) {
1930 int end_pos
= match_pos
- delim_len
;
1932 if (match_pos
== -1) {
1933 // end position should be last position of the string as we have the last
1938 *out_len
= end_pos
- i
;
1940 reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context
, *out_len
));
1941 if (out_str
== nullptr) {
1942 gdv_fn_context_set_error_msg(context
,
1943 "Could not allocate memory for output string");
1947 memcpy(out_str
, text
+ i
, *out_len
);
1959 // Returns the x leftmost characters of a given string. Cases:
1960 // LEFT("TestString", 10) => "TestString"
1961 // LEFT("TestString", 3) => "Tes"
1962 // LEFT("TestString", -3) => "TestStr"
1964 const char* left_utf8_int32(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
1965 gdv_int32 number
, gdv_int32
* out_len
) {
1966 // returns the 'number' left most characters of a given text
1967 if (text_len
== 0 || number
== 0) {
1972 // iterate over the utf8 string validating each character
1976 for (int i
= 0; i
< text_len
; i
+= char_len
) {
1977 char_len
= utf8_char_length(text
[i
]);
1978 if (char_len
== 0 || i
+ char_len
> text_len
) { // invalid byte or incomplete glyph
1979 set_error_for_invalid_utf(context
, text
[i
]);
1983 for (int j
= 1; j
< char_len
; ++j
) {
1984 if ((text
[i
+ j
] & 0xC0) != 0x80) { // bytes following head-byte of glyph
1985 set_error_for_invalid_utf(context
, text
[i
+ j
]);
1990 byte_index
+= char_len
;
1992 // Define the rules to stop the iteration over the string
1993 // case where left('abc', 5) -> 'abc'
1994 if (number
> 0 && char_count
== number
) break;
1995 // case where left('abc', -5) ==> ''
1996 if (number
< 0 && char_count
== number
+ text_len
) break;
1999 *out_len
= byte_index
;
2003 // Returns the x rightmost characters of a given string. Cases:
2004 // RIGHT("TestString", 10) => "TestString"
2005 // RIGHT("TestString", 3) => "ing"
2006 // RIGHT("TestString", -3) => "tString"
2008 const char* right_utf8_int32(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
2009 gdv_int32 number
, gdv_int32
* out_len
) {
2010 // returns the 'number' left most characters of a given text
2011 if (text_len
== 0 || number
== 0) {
2016 // initially counts the number of utf8 characters in the defined text
2017 int32_t char_count
= utf8_length(context
, text
, text_len
);
2018 // char_count is zero if input has invalid utf8 char
2019 if (char_count
== 0) {
2024 int32_t start_char_pos
; // the char result start position (inclusive)
2025 int32_t end_char_len
; // the char result end position (inclusive)
2027 // case where right('abc', 5) ==> 'abc' start_char_pos=1.
2028 start_char_pos
= (char_count
> number
) ? char_count
- number
: 0;
2029 end_char_len
= char_count
- start_char_pos
;
2031 start_char_pos
= number
* -1;
2032 end_char_len
= char_count
- start_char_pos
;
2035 // calculate the start byte position and the output length
2036 int32_t start_byte_pos
= utf8_byte_pos(context
, text
, text_len
, start_char_pos
);
2037 *out_len
= utf8_byte_pos(context
, text
, text_len
, end_char_len
);
2039 // try to allocate memory for the response
2041 reinterpret_cast<gdv_binary
>(gdv_fn_context_arena_malloc(context
, *out_len
));
2042 if (ret
== nullptr) {
2043 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
2047 memcpy(ret
, text
+ start_byte_pos
, *out_len
);
2052 const char* binary_string(gdv_int64 context
, const char* text
, gdv_int32 text_len
,
2053 gdv_int32
* out_len
) {
2055 reinterpret_cast<gdv_binary
>(gdv_fn_context_arena_malloc(context
, text_len
));
2057 if (ret
== nullptr) {
2058 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
2063 if (text_len
== 0) {
2068 // converting hex encoded string to normal string
2070 for (int i
= 0; i
< text_len
; i
++, j
++) {
2071 if (text
[i
] == '\\' && i
+ 3 < text_len
&&
2072 (text
[i
+ 1] == 'x' || text
[i
+ 1] == 'X')) {
2073 char hd1
= text
[i
+ 2];
2074 char hd2
= text
[i
+ 3];
2075 if (isxdigit(hd1
) && isxdigit(hd2
)) {
2077 ret
[j
] = to_binary_from_hex(hd1
) * 16 + to_binary_from_hex(hd2
);
2090 #define CAST_INT_BIGINT_VARBINARY(OUT_TYPE, TYPE_NAME) \
2093 cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in, int32_t in_len) { \
2094 if (in_len == 0) { \
2095 gdv_fn_context_set_error_msg(context, "Can't cast an empty string."); \
2098 char sign = in[0]; \
2100 bool negative = false; \
2101 if (sign == '-') { \
2103 /* Ignores the sign char in the hexadecimal string */ \
2108 if (negative && in_len == 0) { \
2109 gdv_fn_context_set_error_msg(context, \
2110 "Can't cast hexadecimal with only a minus sign."); \
2114 OUT_TYPE result = 0; \
2117 int read_index = 0; \
2118 while (read_index < in_len) { \
2119 char c1 = in[read_index]; \
2120 if (isxdigit(c1)) { \
2121 digit = to_binary_from_hex(c1); \
2123 OUT_TYPE next = result * 16 - digit; \
2125 if (next > result) { \
2126 gdv_fn_context_set_error_msg(context, "Integer overflow."); \
2132 gdv_fn_context_set_error_msg(context, \
2133 "The hexadecimal given has invalid characters."); \
2141 gdv_fn_context_set_error_msg(context, "Integer overflow."); \
2148 CAST_INT_BIGINT_VARBINARY(int32_t, INT
)
2149 CAST_INT_BIGINT_VARBINARY(int64_t, BIGINT
)
2151 #undef CAST_INT_BIGINT_VARBINARY
2153 // Produces the binary representation of a string y characters long derived by starting
2154 // at offset 'x' and considering the defined length 'y'. Notice that the offset index
2155 // may be a negative number (starting from the end of the string), or a positive number
2156 // starting on index 1. Cases:
2157 // BYTE_SUBSTR("TestString", 1, 10) => "TestString"
2158 // BYTE_SUBSTR("TestString", 5, 10) => "String"
2159 // BYTE_SUBSTR("TestString", -6, 10) => "String"
2160 // BYTE_SUBSTR("TestString", -600, 10) => "TestString"
2162 const char* byte_substr_binary_int32_int32(gdv_int64 context
, const char* text
,
2163 gdv_int32 text_len
, gdv_int32 offset
,
2164 gdv_int32 length
, gdv_int32
* out_len
) {
2165 // the first offset position for a string is 1, so not consider offset == 0
2166 // also, the length should be always a positive number
2167 if (text_len
== 0 || offset
== 0 || length
<= 0) {
2173 reinterpret_cast<gdv_binary
>(gdv_fn_context_arena_malloc(context
, text_len
));
2175 if (ret
== nullptr) {
2176 gdv_fn_context_set_error_msg(context
, "Could not allocate memory for output string");
2181 int32_t startPos
= 0;
2183 startPos
= offset
- 1;
2184 } else if (text_len
+ offset
>= 0) {
2185 startPos
= text_len
+ offset
;
2188 // calculate end position from length and truncate to upper value bounds
2189 if (startPos
+ length
> text_len
) {
2190 *out_len
= text_len
- startPos
;
2195 memcpy(ret
, text
+ startPos
, *out_len
);