BaseTools: Remove types.TypeType
[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py
CommitLineData
df91e0f9
JJ
1## @file\r
2# Unit tests for AutoGen.UniClassObject\r
3#\r
4# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
5#\r
6# This program and the accompanying materials\r
7# are licensed and made available under the terms and conditions of the BSD License\r
8# which accompanies this distribution. The full text of the license may be found at\r
9# http://opensource.org/licenses/bsd-license.php\r
10#\r
11# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
12# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
13#\r
14\r
15##\r
16# Import Modules\r
17#\r
18import os\r
19import unittest\r
20\r
21import codecs\r
22\r
23import TestTools\r
24\r
25from Common.Misc import PathClass\r
26import AutoGen.UniClassObject as BtUni\r
27\r
28from Common import EdkLogger\r
29EdkLogger.InitializeForUnitTest()\r
30\r
31class Tests(TestTools.BaseToolsTest):\r
32\r
33 SampleData = u'''\r
34 #langdef en-US "English"\r
35 #string STR_A #language en-US "STR_A for en-US"\r
36 '''\r
37\r
38 def EncodeToFile(self, encoding, string=None):\r
39 if string is None:\r
40 string = self.SampleData\r
dadfab5b
JJ
41 if encoding is not None:\r
42 data = codecs.encode(string, encoding)\r
43 else:\r
44 data = string\r
df91e0f9
JJ
45 path = 'input.uni'\r
46 self.WriteTmpFile(path, data)\r
47 return PathClass(self.GetTmpFilePath(path))\r
48\r
49 def ErrorFailure(self, error, encoding, shouldPass):\r
50 msg = error + ' should '\r
51 if shouldPass:\r
52 msg += 'not '\r
53 msg += 'be generated for '\r
54 msg += '%s data in a .uni file' % encoding\r
55 self.fail(msg)\r
56\r
57 def UnicodeErrorFailure(self, encoding, shouldPass):\r
58 self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
59\r
60 def EdkErrorFailure(self, encoding, shouldPass):\r
61 self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
62\r
63 def CheckFile(self, encoding, shouldPass, string=None):\r
64 path = self.EncodeToFile(encoding, string)\r
65 try:\r
66 BtUni.UniFileClassObject([path])\r
67 if shouldPass:\r
68 return\r
69 except UnicodeError:\r
70 if not shouldPass:\r
71 return\r
72 else:\r
73 self.UnicodeErrorFailure(encoding, shouldPass)\r
74 except EdkLogger.FatalError:\r
75 if not shouldPass:\r
76 return\r
77 else:\r
78 self.EdkErrorFailure(encoding, shouldPass)\r
79 except Exception:\r
80 pass\r
81\r
82 self.EdkErrorFailure(encoding, shouldPass)\r
83\r
84 def testUtf16InUniFile(self):\r
85 self.CheckFile('utf_16', shouldPass=True)\r
86\r
dadfab5b
JJ
87 def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
88 #\r
89 # Supplementary Plane characters can exist in UTF-16 files,\r
90 # but they are not valid UCS-2 characters.\r
91 #\r
92 # This test makes sure that BaseTools rejects these characters\r
93 # if seen in a .uni file.\r
94 #\r
95 data = u'''\r
96 #langdef en-US "English"\r
97 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
98 '''\r
99\r
100 self.CheckFile('utf_16', shouldPass=False, string=data)\r
101\r
102 def testSurrogatePairUnicodeCharInUtf16File(self):\r
103 #\r
104 # Surrogate Pair code points are used in UTF-16 files to\r
105 # encode the Supplementary Plane characters. But, a Surrogate\r
106 # Pair code point which is not followed by another Surrogate\r
107 # Pair code point might be interpreted as a single code point\r
108 # with the Surrogate Pair code point.\r
109 #\r
110 # This test makes sure that BaseTools rejects these characters\r
111 # if seen in a .uni file.\r
112 #\r
113 data = codecs.BOM_UTF16_LE + '//\x01\xd8 '\r
114\r
115 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
116\r
156d6d65
JJ
117 def testValidUtf8File(self):\r
118 self.CheckFile(encoding='utf_8', shouldPass=True)\r
119\r
120 def testValidUtf8FileWithBom(self):\r
121 #\r
122 # Same test as testValidUtf8File, but add the UTF-8 BOM\r
123 #\r
124 data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')\r
125\r
126 self.CheckFile(encoding=None, shouldPass=True, string=data)\r
127\r
15c3a04c
JJ
128 def test32bitUnicodeCharInUtf8File(self):\r
129 data = u'''\r
130 #langdef en-US "English"\r
131 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
132 '''\r
133\r
134 self.CheckFile('utf_16', shouldPass=False, string=data)\r
135\r
136 def test32bitUnicodeCharInUtf8File(self):\r
137 data = u'''\r
138 #langdef en-US "English"\r
139 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
140 '''\r
141\r
142 self.CheckFile('utf_8', shouldPass=False, string=data)\r
143\r
144 def test32bitUnicodeCharInUtf8Comment(self):\r
145 data = u'''\r
146 // Even in comments, we reject non-UCS-2 chars: \U00010300\r
147 #langdef en-US "English"\r
148 #string STR_A #language en-US "A"\r
149 '''\r
150\r
151 self.CheckFile('utf_8', shouldPass=False, string=data)\r
152\r
8fb5a0ca
JJ
153 def testSurrogatePairUnicodeCharInUtf8File(self):\r
154 #\r
155 # Surrogate Pair code points are used in UTF-16 files to\r
156 # encode the Supplementary Plane characters. In UTF-8, it is\r
157 # trivial to encode these code points, but they are not valid\r
158 # code points for characters, since they are reserved for the\r
159 # UTF-16 Surrogate Pairs.\r
160 #\r
161 # This test makes sure that BaseTools rejects these characters\r
162 # if seen in a .uni file.\r
163 #\r
164 data = '\xed\xa0\x81'\r
165\r
166 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
167\r
168 def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
169 #\r
170 # Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
171 # the UTF-8 BOM\r
172 #\r
173 data = codecs.BOM_UTF8 + '\xed\xa0\x81'\r
174\r
175 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
176\r
df91e0f9
JJ
177TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
178\r
179if __name__ == '__main__':\r
180 allTests = TheTestSuite()\r
181 unittest.TextTestRunner().run(allTests)\r