]>
git.proxmox.com Git - libgit2.git/blob - src/utf8.c
2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
13 * git_utf8_iterate is taken from the utf8proc project,
14 * http://www.public-software-group.org/utf8proc
16 * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
18 * Permission is hereby granted, free of charge, to any person obtaining a
19 * copy of this software and associated documentation files (the ""Software""),
20 * to deal in the Software without restriction, including without limitation
21 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
22 * and/or sell copies of the Software, and to permit persons to whom the
23 * Software is furnished to do so, subject to the following conditions:
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34 * DEALINGS IN THE SOFTWARE.
37 static const uint8_t utf8proc_utf8class
[256] = {
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
53 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
56 static int utf8_charlen(const uint8_t *str
, size_t str_len
)
61 length
= utf8proc_utf8class
[str
[0]];
65 if (str_len
> 0 && length
> str_len
)
68 for (i
= 1; i
< length
; i
++) {
69 if ((str
[i
] & 0xC0) != 0x80)
76 int git_utf8_iterate(uint32_t *out
, const char *_str
, size_t str_len
)
78 const uint8_t *str
= (const uint8_t *)_str
;
84 if ((length
= utf8_charlen(str
, str_len
)) < 0)
92 uc
= ((str
[0] & 0x1F) << 6) + (str
[1] & 0x3F);
93 if (uc
< 0x80) uc
= -1;
96 uc
= ((str
[0] & 0x0F) << 12) + ((str
[1] & 0x3F) << 6)
98 if (uc
< 0x800 || (uc
>= 0xD800 && uc
< 0xE000) ||
99 (uc
>= 0xFDD0 && uc
< 0xFDF0)) uc
= -1;
102 uc
= ((str
[0] & 0x07) << 18) + ((str
[1] & 0x3F) << 12)
103 + ((str
[2] & 0x3F) << 6) + (str
[3] & 0x3F);
104 if (uc
< 0x10000 || uc
>= 0x110000) uc
= -1;
110 if ((uc
& 0xFFFF) >= 0xFFFE)
117 size_t git_utf8_char_length(const char *_str
, size_t str_len
)
119 const uint8_t *str
= (const uint8_t *)_str
;
120 size_t offset
= 0, count
= 0;
122 while (offset
< str_len
) {
123 int length
= utf8_charlen(str
+ offset
, str_len
- offset
);
135 size_t git_utf8_valid_buf_length(const char *_str
, size_t str_len
)
137 const uint8_t *str
= (const uint8_t *)_str
;
140 while (offset
< str_len
) {
141 int length
= utf8_charlen(str
+ offset
, str_len
- offset
);