]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/Console/TerminalDxe/Vtutf8.c
MdeModulePkg: Fix TerminalDxe FIFO Data Loss
[mirror_edk2.git] / MdeModulePkg / Universal / Console / TerminalDxe / Vtutf8.c
CommitLineData
02b7bcf9
RN
1/** @file
2 Implementation of translation upon VT-UTF8.
3
4Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>
5This program and the accompanying materials
6are licensed and made available under the terms and conditions of the BSD License
7which accompanies this distribution. The full text of the license may be found at
8http://opensource.org/licenses/bsd-license.php
9
10THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12
13**/
14
15#include "Terminal.h"
16
17/**
18 Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
19 and insert them into Unicode FIFO.
20
21 @param TerminalDevice The terminal device.
22
23**/
24VOID
25VTUTF8RawDataToUnicode (
26 IN TERMINAL_DEV *TerminalDevice
27 )
28{
29 UTF8_CHAR Utf8Char;
30 UINT8 ValidBytes;
31 UINT16 UnicodeChar;
32
33 ValidBytes = 0;
34 //
35 // pop the raw data out from the raw fifo,
36 // and translate it into unicode, then push
37 // the unicode into unicode fifo, until the raw fifo is empty.
38 //
39 while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull(TerminalDevice) ) {
40
41 GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
42
43 if (ValidBytes < 1 || ValidBytes > 3) {
44 continue;
45 }
46
47 Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
48
49 UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
50 }
51}
52
53/**
54 Get one valid VT-UTF8 characters set from Raw Data FIFO.
55
56 @param Utf8Device The terminal device.
57 @param Utf8Char Returned valid VT-UTF8 characters set.
58 @param ValidBytes The count of returned VT-VTF8 characters.
59 If ValidBytes is zero, no valid VT-UTF8 returned.
60
61**/
62VOID
63GetOneValidUtf8Char (
64 IN TERMINAL_DEV *Utf8Device,
65 OUT UTF8_CHAR *Utf8Char,
66 OUT UINT8 *ValidBytes
67 )
68{
69 UINT8 Temp;
70 UINT8 Index;
71 BOOLEAN FetchFlag;
72
73 Temp = 0;
74 Index = 0;
75 FetchFlag = TRUE;
76
77 //
78 // if no valid Utf8 char is found in the RawFiFo,
79 // then *ValidBytes will be zero.
80 //
81 *ValidBytes = 0;
82
83 while (!IsRawFiFoEmpty (Utf8Device)) {
84
85 RawFiFoRemoveOneKey (Utf8Device, &Temp);
86
87 switch (*ValidBytes) {
88
89 case 0:
90 if ((Temp & 0x80) == 0) {
91 //
92 // one-byte utf8 char
93 //
94 *ValidBytes = 1;
95
96 Utf8Char->Utf8_1 = Temp;
97
98 FetchFlag = FALSE;
99
100 } else if ((Temp & 0xe0) == 0xc0) {
101 //
102 // two-byte utf8 char
103 //
104 *ValidBytes = 2;
105
106 Utf8Char->Utf8_2[1] = Temp;
107
108 } else if ((Temp & 0xf0) == 0xe0) {
109 //
110 // three-byte utf8 char
111 //
112 *ValidBytes = 3;
113
114 Utf8Char->Utf8_3[2] = Temp;
115
116 Index++;
117
118 } else {
119 //
120 // reset *ValidBytes to zero, let valid utf8 char search restart
121 //
122 *ValidBytes = 0;
123 }
124
125 break;
126
127 case 2:
128 //
129 // two-byte utf8 char go on
130 //
131 if ((Temp & 0xc0) == 0x80) {
132
133 Utf8Char->Utf8_2[0] = Temp;
134
135 FetchFlag = FALSE;
136
137 } else {
138
139 *ValidBytes = 0;
140 }
141 break;
142
143 case 3:
144 //
145 // three-byte utf8 char go on
146 //
147 if ((Temp & 0xc0) == 0x80) {
148 if (Index == 1) {
149 Utf8Char->Utf8_3[1] = Temp;
150 Index++;
151 } else {
152 Utf8Char->Utf8_3[0] = Temp;
153 FetchFlag = FALSE;
154 }
155 } else {
156 //
157 // reset *ValidBytes and Index to zero, let valid utf8 char search restart
158 //
159 *ValidBytes = 0;
160 Index = 0;
161 }
162 break;
163
164 default:
165 break;
166 }
167
168 if (!FetchFlag) {
169 break;
170 }
171 }
172
173 return ;
174}
175
176/**
177 Translate VT-UTF8 characters into one Unicode character.
178
179 UTF8 Encoding Table
180 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
181 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
182 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
183 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
184
185
186 @param Utf8Char VT-UTF8 character set needs translating.
187 @param ValidBytes The count of valid VT-UTF8 characters.
188 @param UnicodeChar Returned unicode character.
189
190**/
191VOID
192Utf8ToUnicode (
193 IN UTF8_CHAR Utf8Char,
194 IN UINT8 ValidBytes,
195 OUT CHAR16 *UnicodeChar
196 )
197{
198 UINT8 UnicodeByte0;
199 UINT8 UnicodeByte1;
200 UINT8 Byte0;
201 UINT8 Byte1;
202 UINT8 Byte2;
203
204 *UnicodeChar = 0;
205
206 //
207 // translate utf8 code to unicode, in terminal standard,
208 // up to 3 bytes utf8 code is supported.
209 //
210 switch (ValidBytes) {
211 case 1:
212 //
213 // one-byte utf8 code
214 //
215 *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
216 break;
217
218 case 2:
219 //
220 // two-byte utf8 code
221 //
222 Byte0 = Utf8Char.Utf8_2[0];
223 Byte1 = Utf8Char.Utf8_2[1];
224
225 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
226 UnicodeByte1 = (UINT8) ((Byte1 >> 2) & 0x07);
227 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
228 break;
229
230 case 3:
231 //
232 // three-byte utf8 code
233 //
234 Byte0 = Utf8Char.Utf8_3[0];
235 Byte1 = Utf8Char.Utf8_3[1];
236 Byte2 = Utf8Char.Utf8_3[2];
237
238 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
239 UnicodeByte1 = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
240 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
241
242 default:
243 break;
244 }
245
246 return ;
247}
248
249/**
250 Translate one Unicode character into VT-UTF8 characters.
251
252 UTF8 Encoding Table
253 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
254 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
255 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
256 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
257
258
259 @param Unicode Unicode character need translating.
260 @param Utf8Char Return VT-UTF8 character set.
261 @param ValidBytes The count of valid VT-UTF8 characters. If
262 ValidBytes is zero, no valid VT-UTF8 returned.
263
264**/
265VOID
266UnicodeToUtf8 (
267 IN CHAR16 Unicode,
268 OUT UTF8_CHAR *Utf8Char,
269 OUT UINT8 *ValidBytes
270 )
271{
272 UINT8 UnicodeByte0;
273 UINT8 UnicodeByte1;
274 //
275 // translate unicode to utf8 code
276 //
277 UnicodeByte0 = (UINT8) Unicode;
278 UnicodeByte1 = (UINT8) (Unicode >> 8);
279
280 if (Unicode < 0x0080) {
281
282 Utf8Char->Utf8_1 = (UINT8) (UnicodeByte0 & 0x7f);
283 *ValidBytes = 1;
284
285 } else if (Unicode < 0x0800) {
286 //
287 // byte sequence: high -> low
288 // Utf8_2[0], Utf8_2[1]
289 //
290 Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
291 Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
292
293 *ValidBytes = 2;
294
295 } else {
296 //
297 // byte sequence: high -> low
298 // Utf8_3[0], Utf8_3[1], Utf8_3[2]
299 //
300 Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
301 Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
302 Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
303
304 *ValidBytes = 3;
305 }
306}
307
308
309/**
310 Check if input string is valid VT-UTF8 string.
311
312 @param TerminalDevice The terminal device.
313 @param WString The input string.
314
315 @retval EFI_SUCCESS If all input characters are valid.
316
317**/
318EFI_STATUS
319VTUTF8TestString (
320 IN TERMINAL_DEV *TerminalDevice,
321 IN CHAR16 *WString
322 )
323{
324 //
325 // to utf8, all kind of characters are supported.
326 //
327 return EFI_SUCCESS;
328}