]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/utf8.h
import ceph quincy 17.2.4
[ceph.git] / ceph / src / common / utf8.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_COMMON_UTF8_H
16 #define CEPH_COMMON_UTF8_H
17
18 #define MAX_UTF8_SZ 6
19 #define INVALID_UTF8_CHAR 0xfffffffful
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 /* Checks if a buffer is valid UTF-8.
26 * Returns 0 if it is, and one plus the offset of the first invalid byte
27 * if it is not.
28 */
29 int check_utf8(const char *buf, int len);
30
31 /* Checks if a null-terminated string is valid UTF-8.
32 * Returns 0 if it is, and one plus the offset of the first invalid byte
33 * if it is not.
34 */
35 int check_utf8_cstr(const char *buf);
36
37 /* Returns true if 'ch' is a control character.
38 * We do count newline as a control character, but not NULL.
39 */
40 int is_control_character(int ch);
41
42 /* Checks if a buffer contains control characters.
43 */
44 int check_for_control_characters(const char *buf, int len);
45
46 /* Checks if a null-terminated string contains control characters.
47 */
48 int check_for_control_characters_cstr(const char *buf);
49
50 /* Encode a 31-bit UTF8 code point to 'buf'.
51 * Assumes buf is of size MAX_UTF8_SZ
52 * Returns -1 on failure; number of bytes in the encoded value otherwise.
53 */
54 int encode_utf8(unsigned long u, unsigned char *buf);
55
56 /*
57 * Decode a UTF8 character from an array of bytes. Return character code.
58 * Upon error, return INVALID_UTF8_CHAR.
59 */
60 unsigned long decode_utf8(unsigned char *buf, int nbytes);
61
62 #ifdef __cplusplus
63 }
64 #endif
65
66 #endif