]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/Console/TerminalDxe/Vtutf8.c
MdeModulePkg: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / MdeModulePkg / Universal / Console / TerminalDxe / Vtutf8.c
1 /** @file
2 Implementation of translation upon VT-UTF8.
3
4 Copyright (c) 2006 - 2018, Intel Corporation. All rights reserved.<BR>
5 SPDX-License-Identifier: BSD-2-Clause-Patent
6
7 **/
8
9 #include "Terminal.h"
10
11 /**
12 Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
13 and insert them into Unicode FIFO.
14
15 @param TerminalDevice The terminal device.
16
17 **/
18 VOID
19 VTUTF8RawDataToUnicode (
20 IN TERMINAL_DEV *TerminalDevice
21 )
22 {
23 UTF8_CHAR Utf8Char;
24 UINT8 ValidBytes;
25 UINT16 UnicodeChar;
26
27 ValidBytes = 0;
28 //
29 // pop the raw data out from the raw fifo,
30 // and translate it into unicode, then push
31 // the unicode into unicode fifo, until the raw fifo is empty.
32 //
33 while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) {
34
35 GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
36
37 if (ValidBytes < 1 || ValidBytes > 3) {
38 continue;
39 }
40
41 Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
42
43 UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
44 }
45 }
46
47 /**
48 Get one valid VT-UTF8 characters set from Raw Data FIFO.
49
50 @param Utf8Device The terminal device.
51 @param Utf8Char Returned valid VT-UTF8 characters set.
52 @param ValidBytes The count of returned VT-VTF8 characters.
53 If ValidBytes is zero, no valid VT-UTF8 returned.
54
55 **/
56 VOID
57 GetOneValidUtf8Char (
58 IN TERMINAL_DEV *Utf8Device,
59 OUT UTF8_CHAR *Utf8Char,
60 OUT UINT8 *ValidBytes
61 )
62 {
63 UINT8 Temp;
64 UINT8 Index;
65 BOOLEAN FetchFlag;
66
67 Temp = 0;
68 Index = 0;
69 FetchFlag = TRUE;
70
71 //
72 // if no valid Utf8 char is found in the RawFiFo,
73 // then *ValidBytes will be zero.
74 //
75 *ValidBytes = 0;
76
77 while (!IsRawFiFoEmpty (Utf8Device)) {
78
79 RawFiFoRemoveOneKey (Utf8Device, &Temp);
80
81 switch (*ValidBytes) {
82
83 case 0:
84 if ((Temp & 0x80) == 0) {
85 //
86 // one-byte utf8 char
87 //
88 *ValidBytes = 1;
89
90 Utf8Char->Utf8_1 = Temp;
91
92 FetchFlag = FALSE;
93
94 } else if ((Temp & 0xe0) == 0xc0) {
95 //
96 // two-byte utf8 char
97 //
98 *ValidBytes = 2;
99
100 Utf8Char->Utf8_2[1] = Temp;
101
102 } else if ((Temp & 0xf0) == 0xe0) {
103 //
104 // three-byte utf8 char
105 //
106 *ValidBytes = 3;
107
108 Utf8Char->Utf8_3[2] = Temp;
109
110 Index++;
111
112 } else {
113 //
114 // reset *ValidBytes to zero, let valid utf8 char search restart
115 //
116 *ValidBytes = 0;
117 }
118
119 break;
120
121 case 2:
122 //
123 // two-byte utf8 char go on
124 //
125 if ((Temp & 0xc0) == 0x80) {
126
127 Utf8Char->Utf8_2[0] = Temp;
128
129 FetchFlag = FALSE;
130
131 } else {
132
133 *ValidBytes = 0;
134 }
135 break;
136
137 case 3:
138 //
139 // three-byte utf8 char go on
140 //
141 if ((Temp & 0xc0) == 0x80) {
142 if (Index == 1) {
143 Utf8Char->Utf8_3[1] = Temp;
144 Index++;
145 } else {
146 Utf8Char->Utf8_3[0] = Temp;
147 FetchFlag = FALSE;
148 }
149 } else {
150 //
151 // reset *ValidBytes and Index to zero, let valid utf8 char search restart
152 //
153 *ValidBytes = 0;
154 Index = 0;
155 }
156 break;
157
158 default:
159 break;
160 }
161
162 if (!FetchFlag) {
163 break;
164 }
165 }
166
167 return ;
168 }
169
170 /**
171 Translate VT-UTF8 characters into one Unicode character.
172
173 UTF8 Encoding Table
174 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
175 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
176 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
177 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
178
179
180 @param Utf8Char VT-UTF8 character set needs translating.
181 @param ValidBytes The count of valid VT-UTF8 characters.
182 @param UnicodeChar Returned unicode character.
183
184 **/
185 VOID
186 Utf8ToUnicode (
187 IN UTF8_CHAR Utf8Char,
188 IN UINT8 ValidBytes,
189 OUT CHAR16 *UnicodeChar
190 )
191 {
192 UINT8 UnicodeByte0;
193 UINT8 UnicodeByte1;
194 UINT8 Byte0;
195 UINT8 Byte1;
196 UINT8 Byte2;
197
198 *UnicodeChar = 0;
199
200 //
201 // translate utf8 code to unicode, in terminal standard,
202 // up to 3 bytes utf8 code is supported.
203 //
204 switch (ValidBytes) {
205 case 1:
206 //
207 // one-byte utf8 code
208 //
209 *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
210 break;
211
212 case 2:
213 //
214 // two-byte utf8 code
215 //
216 Byte0 = Utf8Char.Utf8_2[0];
217 Byte1 = Utf8Char.Utf8_2[1];
218
219 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
220 UnicodeByte1 = (UINT8) ((Byte1 >> 2) & 0x07);
221 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
222 break;
223
224 case 3:
225 //
226 // three-byte utf8 code
227 //
228 Byte0 = Utf8Char.Utf8_3[0];
229 Byte1 = Utf8Char.Utf8_3[1];
230 Byte2 = Utf8Char.Utf8_3[2];
231
232 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
233 UnicodeByte1 = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
234 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
235
236 default:
237 break;
238 }
239
240 return ;
241 }
242
243 /**
244 Translate one Unicode character into VT-UTF8 characters.
245
246 UTF8 Encoding Table
247 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
248 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
249 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
250 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
251
252
253 @param Unicode Unicode character need translating.
254 @param Utf8Char Return VT-UTF8 character set.
255 @param ValidBytes The count of valid VT-UTF8 characters. If
256 ValidBytes is zero, no valid VT-UTF8 returned.
257
258 **/
259 VOID
260 UnicodeToUtf8 (
261 IN CHAR16 Unicode,
262 OUT UTF8_CHAR *Utf8Char,
263 OUT UINT8 *ValidBytes
264 )
265 {
266 UINT8 UnicodeByte0;
267 UINT8 UnicodeByte1;
268 //
269 // translate unicode to utf8 code
270 //
271 UnicodeByte0 = (UINT8) Unicode;
272 UnicodeByte1 = (UINT8) (Unicode >> 8);
273
274 if (Unicode < 0x0080) {
275
276 Utf8Char->Utf8_1 = (UINT8) (UnicodeByte0 & 0x7f);
277 *ValidBytes = 1;
278
279 } else if (Unicode < 0x0800) {
280 //
281 // byte sequence: high -> low
282 // Utf8_2[0], Utf8_2[1]
283 //
284 Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
285 Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
286
287 *ValidBytes = 2;
288
289 } else {
290 //
291 // byte sequence: high -> low
292 // Utf8_3[0], Utf8_3[1], Utf8_3[2]
293 //
294 Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
295 Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
296 Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
297
298 *ValidBytes = 3;
299 }
300 }
301
302
303 /**
304 Check if input string is valid VT-UTF8 string.
305
306 @param TerminalDevice The terminal device.
307 @param WString The input string.
308
309 @retval EFI_SUCCESS If all input characters are valid.
310
311 **/
312 EFI_STATUS
313 VTUTF8TestString (
314 IN TERMINAL_DEV *TerminalDevice,
315 IN CHAR16 *WString
316 )
317 {
318 //
319 // to utf8, all kind of characters are supported.
320 //
321 return EFI_SUCCESS;
322 }