]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/Console/TerminalDxe/Vtutf8.c
UefiCpuPkg: Move AsmRelocateApLoopStart from Mpfuncs.nasm to AmdSev.nasm
[mirror_edk2.git] / MdeModulePkg / Universal / Console / TerminalDxe / Vtutf8.c
1 /** @file
2 Implementation of translation upon VT-UTF8.
3
4 Copyright (c) 2006 - 2018, Intel Corporation. All rights reserved.<BR>
5 SPDX-License-Identifier: BSD-2-Clause-Patent
6
7 **/
8
9 #include "Terminal.h"
10
11 /**
12 Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
13 and insert them into Unicode FIFO.
14
15 @param TerminalDevice The terminal device.
16
17 **/
18 VOID
19 VTUTF8RawDataToUnicode (
20 IN TERMINAL_DEV *TerminalDevice
21 )
22 {
23 UTF8_CHAR Utf8Char;
24 UINT8 ValidBytes;
25 UINT16 UnicodeChar;
26
27 ValidBytes = 0;
28 //
29 // pop the raw data out from the raw fifo,
30 // and translate it into unicode, then push
31 // the unicode into unicode fifo, until the raw fifo is empty.
32 //
33 while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) {
34 GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
35
36 if ((ValidBytes < 1) || (ValidBytes > 3)) {
37 continue;
38 }
39
40 Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *)&UnicodeChar);
41
42 UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
43 }
44 }
45
46 /**
47 Get one valid VT-UTF8 characters set from Raw Data FIFO.
48
49 @param Utf8Device The terminal device.
50 @param Utf8Char Returned valid VT-UTF8 characters set.
51 @param ValidBytes The count of returned VT-VTF8 characters.
52 If ValidBytes is zero, no valid VT-UTF8 returned.
53
54 **/
55 VOID
56 GetOneValidUtf8Char (
57 IN TERMINAL_DEV *Utf8Device,
58 OUT UTF8_CHAR *Utf8Char,
59 OUT UINT8 *ValidBytes
60 )
61 {
62 UINT8 Temp;
63 UINT8 Index;
64 BOOLEAN FetchFlag;
65
66 Temp = 0;
67 Index = 0;
68 FetchFlag = TRUE;
69
70 //
71 // if no valid Utf8 char is found in the RawFiFo,
72 // then *ValidBytes will be zero.
73 //
74 *ValidBytes = 0;
75
76 while (!IsRawFiFoEmpty (Utf8Device)) {
77 RawFiFoRemoveOneKey (Utf8Device, &Temp);
78
79 switch (*ValidBytes) {
80 case 0:
81 if ((Temp & 0x80) == 0) {
82 //
83 // one-byte utf8 char
84 //
85 *ValidBytes = 1;
86
87 Utf8Char->Utf8_1 = Temp;
88
89 FetchFlag = FALSE;
90 } else if ((Temp & 0xe0) == 0xc0) {
91 //
92 // two-byte utf8 char
93 //
94 *ValidBytes = 2;
95
96 Utf8Char->Utf8_2[1] = Temp;
97 } else if ((Temp & 0xf0) == 0xe0) {
98 //
99 // three-byte utf8 char
100 //
101 *ValidBytes = 3;
102
103 Utf8Char->Utf8_3[2] = Temp;
104
105 Index++;
106 } else {
107 //
108 // reset *ValidBytes to zero, let valid utf8 char search restart
109 //
110 *ValidBytes = 0;
111 }
112
113 break;
114
115 case 2:
116 //
117 // two-byte utf8 char go on
118 //
119 if ((Temp & 0xc0) == 0x80) {
120 Utf8Char->Utf8_2[0] = Temp;
121
122 FetchFlag = FALSE;
123 } else {
124 *ValidBytes = 0;
125 }
126
127 break;
128
129 case 3:
130 //
131 // three-byte utf8 char go on
132 //
133 if ((Temp & 0xc0) == 0x80) {
134 if (Index == 1) {
135 Utf8Char->Utf8_3[1] = Temp;
136 Index++;
137 } else {
138 Utf8Char->Utf8_3[0] = Temp;
139 FetchFlag = FALSE;
140 }
141 } else {
142 //
143 // reset *ValidBytes and Index to zero, let valid utf8 char search restart
144 //
145 *ValidBytes = 0;
146 Index = 0;
147 }
148
149 break;
150
151 default:
152 break;
153 }
154
155 if (!FetchFlag) {
156 break;
157 }
158 }
159
160 return;
161 }
162
163 /**
164 Translate VT-UTF8 characters into one Unicode character.
165
166 UTF8 Encoding Table
167 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
168 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
169 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
170 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
171
172
173 @param Utf8Char VT-UTF8 character set needs translating.
174 @param ValidBytes The count of valid VT-UTF8 characters.
175 @param UnicodeChar Returned unicode character.
176
177 **/
178 VOID
179 Utf8ToUnicode (
180 IN UTF8_CHAR Utf8Char,
181 IN UINT8 ValidBytes,
182 OUT CHAR16 *UnicodeChar
183 )
184 {
185 UINT8 UnicodeByte0;
186 UINT8 UnicodeByte1;
187 UINT8 Byte0;
188 UINT8 Byte1;
189 UINT8 Byte2;
190
191 *UnicodeChar = 0;
192
193 //
194 // translate utf8 code to unicode, in terminal standard,
195 // up to 3 bytes utf8 code is supported.
196 //
197 switch (ValidBytes) {
198 case 1:
199 //
200 // one-byte utf8 code
201 //
202 *UnicodeChar = (UINT16)Utf8Char.Utf8_1;
203 break;
204
205 case 2:
206 //
207 // two-byte utf8 code
208 //
209 Byte0 = Utf8Char.Utf8_2[0];
210 Byte1 = Utf8Char.Utf8_2[1];
211
212 UnicodeByte0 = (UINT8)((Byte1 << 6) | (Byte0 & 0x3f));
213 UnicodeByte1 = (UINT8)((Byte1 >> 2) & 0x07);
214 *UnicodeChar = (UINT16)(UnicodeByte0 | (UnicodeByte1 << 8));
215 break;
216
217 case 3:
218 //
219 // three-byte utf8 code
220 //
221 Byte0 = Utf8Char.Utf8_3[0];
222 Byte1 = Utf8Char.Utf8_3[1];
223 Byte2 = Utf8Char.Utf8_3[2];
224
225 UnicodeByte0 = (UINT8)((Byte1 << 6) | (Byte0 & 0x3f));
226 UnicodeByte1 = (UINT8)((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
227 *UnicodeChar = (UINT16)(UnicodeByte0 | (UnicodeByte1 << 8));
228
229 default:
230 break;
231 }
232
233 return;
234 }
235
236 /**
237 Translate one Unicode character into VT-UTF8 characters.
238
239 UTF8 Encoding Table
240 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
241 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
242 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
243 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
244
245
246 @param Unicode Unicode character need translating.
247 @param Utf8Char Return VT-UTF8 character set.
248 @param ValidBytes The count of valid VT-UTF8 characters. If
249 ValidBytes is zero, no valid VT-UTF8 returned.
250
251 **/
252 VOID
253 UnicodeToUtf8 (
254 IN CHAR16 Unicode,
255 OUT UTF8_CHAR *Utf8Char,
256 OUT UINT8 *ValidBytes
257 )
258 {
259 UINT8 UnicodeByte0;
260 UINT8 UnicodeByte1;
261
262 //
263 // translate unicode to utf8 code
264 //
265 UnicodeByte0 = (UINT8)Unicode;
266 UnicodeByte1 = (UINT8)(Unicode >> 8);
267
268 if (Unicode < 0x0080) {
269 Utf8Char->Utf8_1 = (UINT8)(UnicodeByte0 & 0x7f);
270 *ValidBytes = 1;
271 } else if (Unicode < 0x0800) {
272 //
273 // byte sequence: high -> low
274 // Utf8_2[0], Utf8_2[1]
275 //
276 Utf8Char->Utf8_2[1] = (UINT8)((UnicodeByte0 & 0x3f) + 0x80);
277 Utf8Char->Utf8_2[0] = (UINT8)((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
278
279 *ValidBytes = 2;
280 } else {
281 //
282 // byte sequence: high -> low
283 // Utf8_3[0], Utf8_3[1], Utf8_3[2]
284 //
285 Utf8Char->Utf8_3[2] = (UINT8)((UnicodeByte0 & 0x3f) + 0x80);
286 Utf8Char->Utf8_3[1] = (UINT8)((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
287 Utf8Char->Utf8_3[0] = (UINT8)(((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
288
289 *ValidBytes = 3;
290 }
291 }
292
293 /**
294 Check if input string is valid VT-UTF8 string.
295
296 @param TerminalDevice The terminal device.
297 @param WString The input string.
298
299 @retval EFI_SUCCESS If all input characters are valid.
300
301 **/
302 EFI_STATUS
303 VTUTF8TestString (
304 IN TERMINAL_DEV *TerminalDevice,
305 IN CHAR16 *WString
306 )
307 {
308 //
309 // to utf8, all kind of characters are supported.
310 //
311 return EFI_SUCCESS;
312 }