]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - ubuntu/vbox/vboxsf/include/iprt/uni.h
2 * IPRT - Unicode Code Points.
6 * Copyright (C) 2006-2017 Oracle Corporation
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
29 /** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
34 /** @def RTUNI_USE_WCTYPE
35 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
36 * data which the C runtime library provides. */
37 #ifdef DOXYGEN_RUNNING
38 # define RTUNI_USE_WCTYPE
41 #include <iprt/types.h>
42 #ifdef RTUNI_USE_WCTYPE
49 #ifndef RTUNI_USE_WCTYPE
52 * A unicode flags range.
55 typedef struct RTUNIFLAGSRANGE
57 /** The first code point of the range. */
59 /** The last + 1 code point of the range. */
61 /** Pointer to the array of case folded code points. */
62 const uint8_t *pafFlags
;
64 /** Pointer to a flags range.
66 typedef RTUNIFLAGSRANGE
*PRTUNIFLAGSRANGE
;
67 /** Pointer to a const flags range.
69 typedef const RTUNIFLAGSRANGE
*PCRTUNIFLAGSRANGE
;
72 * A unicode case folded range.
75 typedef struct RTUNICASERANGE
77 /** The first code point of the range. */
79 /** The last + 1 code point of the range. */
81 /** Pointer to the array of case folded code points. */
82 PCRTUNICP paFoldedCPs
;
84 /** Pointer to a case folded range.
86 typedef RTUNICASERANGE
*PRTUNICASERANGE
;
87 /** Pointer to a const case folded range.
89 typedef const RTUNICASERANGE
*PCRTUNICASERANGE
;
91 /** @name Unicode Code Point Flags.
94 #define RTUNI_UPPER RT_BIT(0)
95 #define RTUNI_LOWER RT_BIT(1)
96 #define RTUNI_ALPHA RT_BIT(2)
97 #define RTUNI_XDIGIT RT_BIT(3)
98 #define RTUNI_DDIGIT RT_BIT(4)
99 #define RTUNI_WSPACE RT_BIT(5)
100 /*#define RTUNI_BSPACE RT_BIT(6) - later */
101 /** When set, the codepoint requires further checking wrt NFC and NFD
102 * normalization. I.e. set when either of QC_NFD and QC_NFC are not Y. */
103 #define RTUNI_QC_NFX RT_BIT(7)
108 * Array of flags ranges.
111 extern RTDATADECL(const RTUNIFLAGSRANGE
) g_aRTUniFlagsRanges
[];
114 * Gets the flags for a unicode code point.
116 * @returns The flag mask. (RTUNI_*)
117 * @param CodePoint The unicode code point.
120 DECLINLINE(RTUNICP
) rtUniCpFlags(RTUNICP CodePoint
)
122 PCRTUNIFLAGSRANGE pCur
= &g_aRTUniFlagsRanges
[0];
125 if (pCur
->EndCP
> CodePoint
)
127 if (pCur
->BeginCP
<= CodePoint
)
128 return pCur
->pafFlags
[CodePoint
- pCur
->BeginCP
];
132 } while (pCur
->EndCP
!= RTUNICP_MAX
);
138 * Checks if a unicode code point is upper case.
140 * @returns true if it is.
141 * @returns false if it isn't.
142 * @param CodePoint The code point.
144 DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint
)
146 return (rtUniCpFlags(CodePoint
) & RTUNI_UPPER
) != 0;
151 * Checks if a unicode code point is lower case.
153 * @returns true if it is.
154 * @returns false if it isn't.
155 * @param CodePoint The code point.
157 DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint
)
159 return (rtUniCpFlags(CodePoint
) & RTUNI_LOWER
) != 0;
164 * Checks if a unicode code point is case foldable.
166 * @returns true if it is.
167 * @returns false if it isn't.
168 * @param CodePoint The code point.
170 DECLINLINE(bool) RTUniCpIsFoldable(RTUNICP CodePoint
)
173 return (rtUniCpFlags(CodePoint
) & (RTUNI_LOWER
| RTUNI_UPPER
)) != 0;
178 * Checks if a unicode code point is alphabetic.
180 * @returns true if it is.
181 * @returns false if it isn't.
182 * @param CodePoint The code point.
184 DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint
)
186 return (rtUniCpFlags(CodePoint
) & RTUNI_ALPHA
) != 0;
191 * Checks if a unicode code point is a decimal digit.
193 * @returns true if it is.
194 * @returns false if it isn't.
195 * @param CodePoint The code point.
197 DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint
)
199 return (rtUniCpFlags(CodePoint
) & RTUNI_DDIGIT
) != 0;
204 * Checks if a unicode code point is a hexadecimal digit.
206 * @returns true if it is.
207 * @returns false if it isn't.
208 * @param CodePoint The code point.
210 DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint
)
212 return (rtUniCpFlags(CodePoint
) & RTUNI_XDIGIT
) != 0;
217 * Checks if a unicode code point is white space.
219 * @returns true if it is.
220 * @returns false if it isn't.
221 * @param CodePoint The code point.
223 DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint
)
225 return (rtUniCpFlags(CodePoint
) & RTUNI_WSPACE
) != 0;
231 * Array of uppercase ranges.
234 extern RTDATADECL(const RTUNICASERANGE
) g_aRTUniUpperRanges
[];
237 * Array of lowercase ranges.
240 extern RTDATADECL(const RTUNICASERANGE
) g_aRTUniLowerRanges
[];
244 * Folds a unicode code point using the specified range array.
246 * @returns FOlded code point.
247 * @param CodePoint The unicode code point to fold.
248 * @param pCur The case folding range to use.
250 DECLINLINE(RTUNICP
) rtUniCpFold(RTUNICP CodePoint
, PCRTUNICASERANGE pCur
)
254 if (pCur
->EndCP
> CodePoint
)
256 if (pCur
->BeginCP
<= CodePoint
)
257 CodePoint
= pCur
->paFoldedCPs
[CodePoint
- pCur
->BeginCP
];
261 } while (pCur
->EndCP
!= RTUNICP_MAX
);
267 * Folds a unicode code point to upper case.
269 * @returns Folded code point.
270 * @param CodePoint The unicode code point to fold.
272 DECLINLINE(RTUNICP
) RTUniCpToUpper(RTUNICP CodePoint
)
274 return rtUniCpFold(CodePoint
, &g_aRTUniUpperRanges
[0]);
279 * Folds a unicode code point to lower case.
281 * @returns Folded code point.
282 * @param CodePoint The unicode code point to fold.
284 DECLINLINE(RTUNICP
) RTUniCpToLower(RTUNICP CodePoint
)
286 return rtUniCpFold(CodePoint
, &g_aRTUniLowerRanges
[0]);
290 #else /* RTUNI_USE_WCTYPE */
294 * Checks if a unicode code point is upper case.
296 * @returns true if it is.
297 * @returns false if it isn't.
298 * @param CodePoint The code point.
300 DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint
)
302 return !!iswupper(CodePoint
);
307 * Checks if a unicode code point is lower case.
309 * @returns true if it is.
310 * @returns false if it isn't.
311 * @param CodePoint The code point.
313 DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint
)
315 return !!iswlower(CodePoint
);
320 * Checks if a unicode code point is case foldable.
322 * @returns true if it is.
323 * @returns false if it isn't.
324 * @param CodePoint The code point.
326 DECLINLINE(bool) RTUniCpIsFoldable(RTUNICP CodePoint
)
329 return iswupper(CodePoint
) || iswlower(CodePoint
);
334 * Checks if a unicode code point is alphabetic.
336 * @returns true if it is.
337 * @returns false if it isn't.
338 * @param CodePoint The code point.
340 DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint
)
342 return !!iswalpha(CodePoint
);
347 * Checks if a unicode code point is a decimal digit.
349 * @returns true if it is.
350 * @returns false if it isn't.
351 * @param CodePoint The code point.
353 DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint
)
355 return !!iswdigit(CodePoint
);
360 * Checks if a unicode code point is a hexadecimal digit.
362 * @returns true if it is.
363 * @returns false if it isn't.
364 * @param CodePoint The code point.
366 DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint
)
368 return !!iswxdigit(CodePoint
);
373 * Checks if a unicode code point is white space.
375 * @returns true if it is.
376 * @returns false if it isn't.
377 * @param CodePoint The code point.
379 DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint
)
381 return !!iswspace(CodePoint
);
386 * Folds a unicode code point to upper case.
388 * @returns Folded code point.
389 * @param CodePoint The unicode code point to fold.
391 DECLINLINE(RTUNICP
) RTUniCpToUpper(RTUNICP CodePoint
)
393 return towupper(CodePoint
);
398 * Folds a unicode code point to lower case.
400 * @returns Folded code point.
401 * @param CodePoint The unicode code point to fold.
403 DECLINLINE(RTUNICP
) RTUniCpToLower(RTUNICP CodePoint
)
405 return towlower(CodePoint
);
409 #endif /* RTUNI_USE_WCTYPE */
413 * Frees a unicode string.
415 * @param pusz The string to free.
417 RTDECL(void) RTUniFree(PRTUNICP pusz
);
421 * Checks if a code point valid.
423 * Any code point (defined or not) within the 17 unicode planes (0 thru 16),
424 * except surrogates will be considered valid code points by this function.
426 * @returns true if in range, false if not.
427 * @param CodePoint The unicode code point to validate.
429 DECLINLINE(bool) RTUniCpIsValid(RTUNICP CodePoint
)
431 return CodePoint
<= 0x00d7ff
432 || ( CodePoint
<= 0x10ffff
433 && CodePoint
>= 0x00e000);
438 * Checks if the given code point is in the BMP range.
440 * Surrogates are not considered in the BMP range by this function.
442 * @returns true if in BMP, false if not.
443 * @param CodePoint The unicode code point to consider.
445 DECLINLINE(bool) RTUniCpIsBMP(RTUNICP CodePoint
)
447 return CodePoint
<= 0xd7ff
448 || ( CodePoint
<= 0xffff
449 && CodePoint
>= 0xe000);
454 * Folds a unicode code point to lower case.
456 * @returns Folded code point.
457 * @param CodePoint The unicode code point to fold.
459 DECLINLINE(size_t) RTUniCpCalcUtf8Len(RTUNICP CodePoint
)
461 if (CodePoint
< 0x80)
464 + (CodePoint
>= 0x00000800)
465 + (CodePoint
>= 0x00010000)
466 + (CodePoint
>= 0x00200000)
467 + (CodePoint
>= 0x04000000)
468 + (CodePoint
>= 0x80000000) /* illegal */;