]> git.proxmox.com Git - libgit2.git/blob - src/utf8.h
Refresh patches
[libgit2.git] / src / utf8.h
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7 #ifndef INCLUDE_utf8_h__
8 #define INCLUDE_utf8_h__
9
10 #include "common.h"
11
12 /*
13 * Iterate through an UTF-8 string, yielding one codepoint at a time.
14 *
15 * @param out pointer where to store the current codepoint
16 * @param str current position in the string
17 * @param str_len size left in the string
18 * @return length in bytes of the read codepoint; -1 if the codepoint was invalid
19 */
20 extern int git_utf8_iterate(uint32_t *out, const char *str, size_t str_len);
21
22 /**
23 * Returns the number of characters in the given string.
24 *
25 * This function will count invalid codepoints; if any given byte is
26 * not part of a valid UTF-8 codepoint, then it will be counted toward
27 * the length in characters.
28 *
29 * In other words:
30 * 0x24 (U+0024 "$") has length 1
31 * 0xc2 0xa2 (U+00A2 "¢") has length 1
32 * 0x24 0xc2 0xa2 (U+0024 U+00A2 "$¢") has length 2
33 * 0xf0 0x90 0x8d 0x88 (U+10348 "𐍈") has length 1
34 * 0x24 0xc0 0xc1 0x34 (U+0024 <invalid> <invalid> "4) has length 4
35 *
36 * @param str string to scan
37 * @param str_len size of the string
38 * @return length in characters of the string
39 */
40 extern size_t git_utf8_char_length(const char *str, size_t str_len);
41
42 /**
43 * Iterate through an UTF-8 string and stops after finding any invalid UTF-8
44 * codepoints.
45 *
46 * @param str string to scan
47 * @param str_len size of the string
48 * @return length in bytes of the string that contains valid data
49 */
50 extern size_t git_utf8_valid_buf_length(const char *str, size_t str_len);
51
52 #endif