]> git.proxmox.com Git - mirror_qemu.git/blobdiff - util/unicode.c
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
[mirror_qemu.git] / util / unicode.c
index a812a351710b51960bbda234c22d25abf967f8d8..8580bc598b3391f80854b0b0971470d62638b26e 100644 (file)
 #include "qemu/osdep.h"
 #include "qemu/unicode.h"
 
+static bool is_valid_codepoint(int codepoint)
+{
+    if (codepoint > 0x10FFFFu) {
+        return false;            /* beyond Unicode range */
+    }
+    if ((codepoint >= 0xFDD0 && codepoint <= 0xFDEF)
+        || (codepoint & 0xFFFE) == 0xFFFE) {
+        return false;            /* noncharacter */
+    }
+    if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
+        return false;            /* surrogate code point */
+    }
+    return true;
+}
+
 /**
  * mod_utf8_codepoint:
  * @s: string encoded in modified UTF-8
@@ -83,13 +98,8 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end)
             cp <<= 6;
             cp |= byte & 0x3F;
         }
-        if (cp > 0x10FFFF) {
-            cp = -1;            /* beyond Unicode range */
-        } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
-                   || (cp & 0xFFFE) == 0xFFFE) {
-            cp = -1;            /* noncharacter */
-        } else if (cp >= 0xD800 && cp <= 0xDFFF) {
-            cp = -1;            /* surrogate code point */
+        if (!is_valid_codepoint(cp)) {
+            cp = -1;
         } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
             cp = -1;            /* overlong, not \xC0\x80 */
         }
@@ -99,3 +109,48 @@ out:
     *end = (char *)p;
     return cp;
 }
+
+/**
+ * mod_utf8_encode:
+ * @buf: Destination buffer
+ * @bufsz: size of @buf, at least 5.
+ * @codepoint: Unicode codepoint to encode
+ *
+ * Convert Unicode codepoint @codepoint to modified UTF-8.
+ *
+ * Returns: the length of the UTF-8 sequence on success, -1 when
+ * @codepoint is invalid.
+ */
+ssize_t mod_utf8_encode(char buf[], size_t bufsz, int codepoint)
+{
+    assert(bufsz >= 5);
+
+    if (!is_valid_codepoint(codepoint)) {
+        return -1;
+    }
+
+    if (codepoint > 0 && codepoint <= 0x7F) {
+        buf[0] = codepoint & 0x7F;
+        buf[1] = 0;
+        return 1;
+    }
+    if (codepoint <= 0x7FF) {
+        buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+        buf[1] = 0x80 | (codepoint & 0x3F);
+        buf[2] = 0;
+        return 2;
+    }
+    if (codepoint <= 0xFFFF) {
+        buf[0] = 0xE0 | ((codepoint >> 12) & 0x0F);
+        buf[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+        buf[2] = 0x80 | (codepoint & 0x3F);
+        buf[3] = 0;
+        return 3;
+    }
+    buf[0] = 0xF0 | ((codepoint >> 18) & 0x07);
+    buf[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+    buf[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+    buf[3] = 0x80 | (codepoint & 0x3F);
+    buf[4] = 0;
+    return 4;
+}