]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/Console/TerminalDxe/Vtutf8.c
Added some functions header.
[mirror_edk2.git] / MdeModulePkg / Universal / Console / TerminalDxe / Vtutf8.c
1 /** @file
2 Implementation of translation upon VT-UTF8.
3
4 Copyright (c) 2006, Intel Corporation. <BR>
5 All rights reserved. This program and the accompanying materials
6 are licensed and made available under the terms and conditions of the BSD License
7 which accompanies this distribution. The full text of the license may be found at
8 http://opensource.org/licenses/bsd-license.php
9
10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12
13 **/
14
15 #include "Terminal.h"
16
17 /**
18 Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
19 and insert them into Unicode FIFO.
20
21 @param TerminalDevice The terminal device.
22
23 @return None.
24
25 **/
26 VOID
27 VTUTF8RawDataToUnicode (
28 IN TERMINAL_DEV *TerminalDevice
29 )
30 {
31 UTF8_CHAR Utf8Char;
32 UINT8 ValidBytes;
33 UINT16 UnicodeChar;
34
35 ValidBytes = 0;
36 //
37 // pop the raw data out from the raw fifo,
38 // and translate it into unicode, then push
39 // the unicode into unicode fifo, until the raw fifo is empty.
40 //
41 while (!IsRawFiFoEmpty (TerminalDevice)) {
42
43 GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
44
45 if (ValidBytes < 1 || ValidBytes > 3) {
46 continue;
47 }
48
49 Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
50
51 UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
52 }
53 }
54
55 /**
56 Get one valid VT-UTF8 characters set from Raw Data FIFO.
57
58 @param Utf8Device The terminal device.
59 @param Utf8Char Returned valid VT-UTF8 characters set.
60 @param ValidBytes The count of returned VT-VTF8 characters.
61 If ValidBytes is zero, no valid VT-UTF8 returned.
62
63 @retval None.
64
65 **/
66 VOID
67 GetOneValidUtf8Char (
68 IN TERMINAL_DEV *Utf8Device,
69 OUT UTF8_CHAR *Utf8Char,
70 OUT UINT8 *ValidBytes
71 )
72 {
73 UINT8 Temp;
74 UINT8 Index;
75 BOOLEAN FetchFlag;
76
77 Temp = 0;
78 Index = 0;
79 FetchFlag = TRUE;
80
81 //
82 // if no valid Utf8 char is found in the RawFiFo,
83 // then *ValidBytes will be zero.
84 //
85 *ValidBytes = 0;
86
87 while (!IsRawFiFoEmpty (Utf8Device)) {
88
89 RawFiFoRemoveOneKey (Utf8Device, &Temp);
90
91 switch (*ValidBytes) {
92
93 case 0:
94 if ((Temp & 0x80) == 0) {
95 //
96 // one-byte utf8 char
97 //
98 *ValidBytes = 1;
99
100 Utf8Char->Utf8_1 = Temp;
101
102 FetchFlag = FALSE;
103
104 } else if ((Temp & 0xe0) == 0xc0) {
105 //
106 // two-byte utf8 char
107 //
108 *ValidBytes = 2;
109
110 Utf8Char->Utf8_2[1] = Temp;
111
112 } else if ((Temp & 0xf0) == 0xe0) {
113 //
114 // three-byte utf8 char
115 //
116 *ValidBytes = 3;
117
118 Utf8Char->Utf8_3[2] = Temp;
119
120 Index++;
121
122 } else {
123 //
124 // reset *ValidBytes to zero, let valid utf8 char search restart
125 //
126 *ValidBytes = 0;
127 }
128
129 break;
130
131 case 2:
132 if ((Temp & 0xc0) == 0x80) {
133
134 Utf8Char->Utf8_2[0] = Temp;
135
136 FetchFlag = FALSE;
137
138 } else {
139
140 *ValidBytes = 0;
141 }
142 break;
143
144 case 3:
145 if ((Temp & 0xc0) == 0x80) {
146
147 Utf8Char->Utf8_3[2 - Index] = Temp;
148 Index++;
149 if (Index == 3) {
150 FetchFlag = FALSE;
151 }
152 } else {
153
154 *ValidBytes = 0;
155 Index = 0;
156 }
157 break;
158
159 default:
160 break;
161 }
162
163 if (!FetchFlag) {
164 break;
165 }
166 }
167
168 return ;
169 }
170
171 /**
172 Translate VT-UTF8 characters into one Unicode character.
173
174 UTF8 Encoding Table
175 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
176 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
177 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
178 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
179
180
181 @param Utf8Char VT-UTF8 character set needs translating.
182 @param ValidBytes The count of valid VT-UTF8 characters.
183 @param UnicodeChar Returned unicode character.
184
185 @return None.
186
187 **/
188 VOID
189 Utf8ToUnicode (
190 IN UTF8_CHAR Utf8Char,
191 IN UINT8 ValidBytes,
192 OUT CHAR16 *UnicodeChar
193 )
194 {
195 UINT8 UnicodeByte0;
196 UINT8 UnicodeByte1;
197 UINT8 Byte0;
198 UINT8 Byte1;
199 UINT8 Byte2;
200
201 *UnicodeChar = 0;
202
203 //
204 // translate utf8 code to unicode, in terminal standard,
205 // up to 3 bytes utf8 code is supported.
206 //
207 switch (ValidBytes) {
208 case 1:
209 //
210 // one-byte utf8 code
211 //
212 *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
213 break;
214
215 case 2:
216 //
217 // two-byte utf8 code
218 //
219 Byte0 = Utf8Char.Utf8_2[0];
220 Byte1 = Utf8Char.Utf8_2[1];
221
222 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
223 UnicodeByte1 = (UINT8) ((Byte1 >> 2) & 0x07);
224 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
225 break;
226
227 case 3:
228 //
229 // three-byte utf8 code
230 //
231 Byte0 = Utf8Char.Utf8_3[0];
232 Byte1 = Utf8Char.Utf8_3[1];
233 Byte2 = Utf8Char.Utf8_3[2];
234
235 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
236 UnicodeByte1 = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
237 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
238
239 default:
240 break;
241 }
242
243 return ;
244 }
245
246 /**
247 Translate one Unicode character into VT-UTF8 characters.
248
249 UTF8 Encoding Table
250 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
251 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
252 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
253 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
254
255
256 @param Unicode Unicode character need translating.
257 @param Utf8Char Return VT-UTF8 character set.
258 @param ValidBytes The count of valid VT-UTF8 characters. If
259 ValidBytes is zero, no valid VT-UTF8 returned.
260
261 @return None.
262
263 **/
264 VOID
265 UnicodeToUtf8 (
266 IN CHAR16 Unicode,
267 OUT UTF8_CHAR *Utf8Char,
268 OUT UINT8 *ValidBytes
269 )
270 {
271 UINT8 UnicodeByte0;
272 UINT8 UnicodeByte1;
273 //
274 // translate unicode to utf8 code
275 //
276 UnicodeByte0 = (UINT8) Unicode;
277 UnicodeByte1 = (UINT8) (Unicode >> 8);
278
279 if (Unicode < 0x0080) {
280
281 Utf8Char->Utf8_1 = (UINT8) (UnicodeByte0 & 0x7f);
282 *ValidBytes = 1;
283
284 } else if (Unicode < 0x0800) {
285 //
286 // byte sequence: high -> low
287 // Utf8_2[0], Utf8_2[1]
288 //
289 Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
290 Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
291
292 *ValidBytes = 2;
293
294 } else {
295 //
296 // byte sequence: high -> low
297 // Utf8_3[0], Utf8_3[1], Utf8_3[2]
298 //
299 Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
300 Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
301 Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
302
303 *ValidBytes = 3;
304 }
305 }
306
307
308 /**
309 Check if input string is valid VT-UTF8 string.
310
311 @param TerminalDevice The terminal device.
312 @param WString The input string.
313
314 @retval EFI_SUCCESS If all input characters are valid.
315
316 **/
317 EFI_STATUS
318 VTUTF8TestString (
319 IN TERMINAL_DEV *TerminalDevice,
320 IN CHAR16 *WString
321 )
322 {
323 //
324 // to utf8, all kind of characters are supported.
325 //
326 return EFI_SUCCESS;
327 }