]> git.proxmox.com Git - mirror_edk2.git/blame - BaseTools/Tests/CheckUnicodeSourceFiles.py
UefiCpuPkg: Move AsmRelocateApLoopStart from Mpfuncs.nasm to AmdSev.nasm
[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py
CommitLineData
df91e0f9
JJ
1## @file\r
2# Unit tests for AutoGen.UniClassObject\r
3#\r
4# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
5#\r
2e351cbe 6# SPDX-License-Identifier: BSD-2-Clause-Patent\r
df91e0f9
JJ
7#\r
8\r
9##\r
10# Import Modules\r
11#\r
12import os\r
13import unittest\r
14\r
15import codecs\r
16\r
17import TestTools\r
18\r
19from Common.Misc import PathClass\r
20import AutoGen.UniClassObject as BtUni\r
21\r
22from Common import EdkLogger\r
23EdkLogger.InitializeForUnitTest()\r
24\r
25class Tests(TestTools.BaseToolsTest):\r
26\r
27 SampleData = u'''\r
28 #langdef en-US "English"\r
29 #string STR_A #language en-US "STR_A for en-US"\r
30 '''\r
31\r
32 def EncodeToFile(self, encoding, string=None):\r
33 if string is None:\r
34 string = self.SampleData\r
dadfab5b
JJ
35 if encoding is not None:\r
36 data = codecs.encode(string, encoding)\r
37 else:\r
38 data = string\r
df91e0f9
JJ
39 path = 'input.uni'\r
40 self.WriteTmpFile(path, data)\r
41 return PathClass(self.GetTmpFilePath(path))\r
42\r
43 def ErrorFailure(self, error, encoding, shouldPass):\r
44 msg = error + ' should '\r
45 if shouldPass:\r
46 msg += 'not '\r
47 msg += 'be generated for '\r
48 msg += '%s data in a .uni file' % encoding\r
49 self.fail(msg)\r
50\r
51 def UnicodeErrorFailure(self, encoding, shouldPass):\r
52 self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
53\r
54 def EdkErrorFailure(self, encoding, shouldPass):\r
55 self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
56\r
57 def CheckFile(self, encoding, shouldPass, string=None):\r
58 path = self.EncodeToFile(encoding, string)\r
59 try:\r
60 BtUni.UniFileClassObject([path])\r
61 if shouldPass:\r
62 return\r
63 except UnicodeError:\r
64 if not shouldPass:\r
65 return\r
66 else:\r
67 self.UnicodeErrorFailure(encoding, shouldPass)\r
68 except EdkLogger.FatalError:\r
69 if not shouldPass:\r
70 return\r
71 else:\r
72 self.EdkErrorFailure(encoding, shouldPass)\r
73 except Exception:\r
74 pass\r
75\r
76 self.EdkErrorFailure(encoding, shouldPass)\r
77\r
78 def testUtf16InUniFile(self):\r
79 self.CheckFile('utf_16', shouldPass=True)\r
80\r
dadfab5b
JJ
81 def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
82 #\r
83 # Supplementary Plane characters can exist in UTF-16 files,\r
84 # but they are not valid UCS-2 characters.\r
85 #\r
86 # This test makes sure that BaseTools rejects these characters\r
87 # if seen in a .uni file.\r
88 #\r
89 data = u'''\r
90 #langdef en-US "English"\r
91 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
92 '''\r
93\r
94 self.CheckFile('utf_16', shouldPass=False, string=data)\r
95\r
96 def testSurrogatePairUnicodeCharInUtf16File(self):\r
97 #\r
98 # Surrogate Pair code points are used in UTF-16 files to\r
99 # encode the Supplementary Plane characters. But, a Surrogate\r
100 # Pair code point which is not followed by another Surrogate\r
101 # Pair code point might be interpreted as a single code point\r
102 # with the Surrogate Pair code point.\r
103 #\r
104 # This test makes sure that BaseTools rejects these characters\r
105 # if seen in a .uni file.\r
106 #\r
fe906312 107 data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '\r
dadfab5b
JJ
108\r
109 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
110\r
156d6d65
JJ
111 def testValidUtf8File(self):\r
112 self.CheckFile(encoding='utf_8', shouldPass=True)\r
113\r
114 def testValidUtf8FileWithBom(self):\r
115 #\r
116 # Same test as testValidUtf8File, but add the UTF-8 BOM\r
117 #\r
118 data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')\r
119\r
120 self.CheckFile(encoding=None, shouldPass=True, string=data)\r
121\r
15c3a04c
JJ
122 def test32bitUnicodeCharInUtf8File(self):\r
123 data = u'''\r
124 #langdef en-US "English"\r
125 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
126 '''\r
127\r
128 self.CheckFile('utf_16', shouldPass=False, string=data)\r
129\r
130 def test32bitUnicodeCharInUtf8File(self):\r
131 data = u'''\r
132 #langdef en-US "English"\r
133 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
134 '''\r
135\r
136 self.CheckFile('utf_8', shouldPass=False, string=data)\r
137\r
138 def test32bitUnicodeCharInUtf8Comment(self):\r
139 data = u'''\r
140 // Even in comments, we reject non-UCS-2 chars: \U00010300\r
141 #langdef en-US "English"\r
142 #string STR_A #language en-US "A"\r
143 '''\r
144\r
145 self.CheckFile('utf_8', shouldPass=False, string=data)\r
146\r
8fb5a0ca
JJ
147 def testSurrogatePairUnicodeCharInUtf8File(self):\r
148 #\r
149 # Surrogate Pair code points are used in UTF-16 files to\r
150 # encode the Supplementary Plane characters. In UTF-8, it is\r
151 # trivial to encode these code points, but they are not valid\r
152 # code points for characters, since they are reserved for the\r
153 # UTF-16 Surrogate Pairs.\r
154 #\r
155 # This test makes sure that BaseTools rejects these characters\r
156 # if seen in a .uni file.\r
157 #\r
fe906312 158 data = b'\xed\xa0\x81'\r
8fb5a0ca
JJ
159\r
160 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
161\r
162 def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
163 #\r
164 # Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
165 # the UTF-8 BOM\r
166 #\r
fe906312 167 data = codecs.BOM_UTF8 + b'\xed\xa0\x81'\r
8fb5a0ca
JJ
168\r
169 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
170\r
df91e0f9
JJ
171TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
172\r
173if __name__ == '__main__':\r
174 allTests = TheTestSuite()\r
175 unittest.TextTestRunner().run(allTests)\r