]>
git.proxmox.com Git - libgit2.git/blob - src/utf8.h
2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
7 #ifndef INCLUDE_utf8_h__
8 #define INCLUDE_utf8_h__
13 * Iterate through an UTF-8 string, yielding one codepoint at a time.
15 * @param out pointer where to store the current codepoint
16 * @param str current position in the string
17 * @param str_len size left in the string
18 * @return length in bytes of the read codepoint; -1 if the codepoint was invalid
20 extern int git_utf8_iterate(uint32_t *out
, const char *str
, size_t str_len
);
23 * Returns the number of characters in the given string.
25 * This function will count invalid codepoints; if any given byte is
26 * not part of a valid UTF-8 codepoint, then it will be counted toward
27 * the length in characters.
30 * 0x24 (U+0024 "$") has length 1
31 * 0xc2 0xa2 (U+00A2 "¢") has length 1
32 * 0x24 0xc2 0xa2 (U+0024 U+00A2 "$¢") has length 2
33 * 0xf0 0x90 0x8d 0x88 (U+10348 "𐍈") has length 1
34 * 0x24 0xc0 0xc1 0x34 (U+0024 <invalid> <invalid> "4) has length 4
36 * @param str string to scan
37 * @param str_len size of the string
38 * @return length in characters of the string
40 extern size_t git_utf8_char_length(const char *str
, size_t str_len
);
43 * Iterate through an UTF-8 string and stops after finding any invalid UTF-8
46 * @param str string to scan
47 * @param str_len size of the string
48 * @return length in bytes of the string that contains valid data
50 extern size_t git_utf8_valid_buf_length(const char *str
, size_t str_len
);