]> git.proxmox.com Git - mirror_edk2.git/blame - BaseTools/Tests/CheckUnicodeSourceFiles.py
BaseTools/Tests: Verify unsupported UTF-8 data is rejected
[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py
CommitLineData
df91e0f9
JJ
1## @file\r
2# Unit tests for AutoGen.UniClassObject\r
3#\r
4# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
5#\r
6# This program and the accompanying materials\r
7# are licensed and made available under the terms and conditions of the BSD License\r
8# which accompanies this distribution. The full text of the license may be found at\r
9# http://opensource.org/licenses/bsd-license.php\r
10#\r
11# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
12# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
13#\r
14\r
15##\r
16# Import Modules\r
17#\r
18import os\r
19import unittest\r
20\r
21import codecs\r
22\r
23import TestTools\r
24\r
25from Common.Misc import PathClass\r
26import AutoGen.UniClassObject as BtUni\r
27\r
28from Common import EdkLogger\r
29EdkLogger.InitializeForUnitTest()\r
30\r
31class Tests(TestTools.BaseToolsTest):\r
32\r
33 SampleData = u'''\r
34 #langdef en-US "English"\r
35 #string STR_A #language en-US "STR_A for en-US"\r
36 '''\r
37\r
38 def EncodeToFile(self, encoding, string=None):\r
39 if string is None:\r
40 string = self.SampleData\r
dadfab5b
JJ
41 if encoding is not None:\r
42 data = codecs.encode(string, encoding)\r
43 else:\r
44 data = string\r
df91e0f9
JJ
45 path = 'input.uni'\r
46 self.WriteTmpFile(path, data)\r
47 return PathClass(self.GetTmpFilePath(path))\r
48\r
49 def ErrorFailure(self, error, encoding, shouldPass):\r
50 msg = error + ' should '\r
51 if shouldPass:\r
52 msg += 'not '\r
53 msg += 'be generated for '\r
54 msg += '%s data in a .uni file' % encoding\r
55 self.fail(msg)\r
56\r
57 def UnicodeErrorFailure(self, encoding, shouldPass):\r
58 self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
59\r
60 def EdkErrorFailure(self, encoding, shouldPass):\r
61 self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
62\r
63 def CheckFile(self, encoding, shouldPass, string=None):\r
64 path = self.EncodeToFile(encoding, string)\r
65 try:\r
66 BtUni.UniFileClassObject([path])\r
67 if shouldPass:\r
68 return\r
69 except UnicodeError:\r
70 if not shouldPass:\r
71 return\r
72 else:\r
73 self.UnicodeErrorFailure(encoding, shouldPass)\r
74 except EdkLogger.FatalError:\r
75 if not shouldPass:\r
76 return\r
77 else:\r
78 self.EdkErrorFailure(encoding, shouldPass)\r
79 except Exception:\r
80 pass\r
81\r
82 self.EdkErrorFailure(encoding, shouldPass)\r
83\r
84 def testUtf16InUniFile(self):\r
85 self.CheckFile('utf_16', shouldPass=True)\r
86\r
dadfab5b
JJ
87 def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
88 #\r
89 # Supplementary Plane characters can exist in UTF-16 files,\r
90 # but they are not valid UCS-2 characters.\r
91 #\r
92 # This test makes sure that BaseTools rejects these characters\r
93 # if seen in a .uni file.\r
94 #\r
95 data = u'''\r
96 #langdef en-US "English"\r
97 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
98 '''\r
99\r
100 self.CheckFile('utf_16', shouldPass=False, string=data)\r
101\r
102 def testSurrogatePairUnicodeCharInUtf16File(self):\r
103 #\r
104 # Surrogate Pair code points are used in UTF-16 files to\r
105 # encode the Supplementary Plane characters. But, a Surrogate\r
106 # Pair code point which is not followed by another Surrogate\r
107 # Pair code point might be interpreted as a single code point\r
108 # with the Surrogate Pair code point.\r
109 #\r
110 # This test makes sure that BaseTools rejects these characters\r
111 # if seen in a .uni file.\r
112 #\r
113 data = codecs.BOM_UTF16_LE + '//\x01\xd8 '\r
114\r
115 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
116\r
15c3a04c
JJ
117 def test32bitUnicodeCharInUtf8File(self):\r
118 data = u'''\r
119 #langdef en-US "English"\r
120 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
121 '''\r
122\r
123 self.CheckFile('utf_16', shouldPass=False, string=data)\r
124\r
125 def test32bitUnicodeCharInUtf8File(self):\r
126 data = u'''\r
127 #langdef en-US "English"\r
128 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
129 '''\r
130\r
131 self.CheckFile('utf_8', shouldPass=False, string=data)\r
132\r
133 def test32bitUnicodeCharInUtf8Comment(self):\r
134 data = u'''\r
135 // Even in comments, we reject non-UCS-2 chars: \U00010300\r
136 #langdef en-US "English"\r
137 #string STR_A #language en-US "A"\r
138 '''\r
139\r
140 self.CheckFile('utf_8', shouldPass=False, string=data)\r
141\r
8fb5a0ca
JJ
142 def testSurrogatePairUnicodeCharInUtf8File(self):\r
143 #\r
144 # Surrogate Pair code points are used in UTF-16 files to\r
145 # encode the Supplementary Plane characters. In UTF-8, it is\r
146 # trivial to encode these code points, but they are not valid\r
147 # code points for characters, since they are reserved for the\r
148 # UTF-16 Surrogate Pairs.\r
149 #\r
150 # This test makes sure that BaseTools rejects these characters\r
151 # if seen in a .uni file.\r
152 #\r
153 data = '\xed\xa0\x81'\r
154\r
155 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
156\r
157 def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
158 #\r
159 # Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
160 # the UTF-8 BOM\r
161 #\r
162 data = codecs.BOM_UTF8 + '\xed\xa0\x81'\r
163\r
164 self.CheckFile(encoding=None, shouldPass=False, string=data)\r
165\r
df91e0f9
JJ
166TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
167\r
168if __name__ == '__main__':\r
169 allTests = TheTestSuite()\r
170 unittest.TextTestRunner().run(allTests)\r