]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Tests/CheckUnicodeSourceFiles.py
BaseTools: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py
1 ## @file
2 # Unit tests for AutoGen.UniClassObject
3 #
4 # Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
5 #
6 # SPDX-License-Identifier: BSD-2-Clause-Patent
7 #
8
9 ##
10 # Import Modules
11 #
12 import os
13 import unittest
14
15 import codecs
16
17 import TestTools
18
19 from Common.Misc import PathClass
20 import AutoGen.UniClassObject as BtUni
21
22 from Common import EdkLogger
23 EdkLogger.InitializeForUnitTest()
24
25 class Tests(TestTools.BaseToolsTest):
26
27 SampleData = u'''
28 #langdef en-US "English"
29 #string STR_A #language en-US "STR_A for en-US"
30 '''
31
32 def EncodeToFile(self, encoding, string=None):
33 if string is None:
34 string = self.SampleData
35 if encoding is not None:
36 data = codecs.encode(string, encoding)
37 else:
38 data = string
39 path = 'input.uni'
40 self.WriteTmpFile(path, data)
41 return PathClass(self.GetTmpFilePath(path))
42
43 def ErrorFailure(self, error, encoding, shouldPass):
44 msg = error + ' should '
45 if shouldPass:
46 msg += 'not '
47 msg += 'be generated for '
48 msg += '%s data in a .uni file' % encoding
49 self.fail(msg)
50
51 def UnicodeErrorFailure(self, encoding, shouldPass):
52 self.ErrorFailure('UnicodeError', encoding, shouldPass)
53
54 def EdkErrorFailure(self, encoding, shouldPass):
55 self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)
56
57 def CheckFile(self, encoding, shouldPass, string=None):
58 path = self.EncodeToFile(encoding, string)
59 try:
60 BtUni.UniFileClassObject([path])
61 if shouldPass:
62 return
63 except UnicodeError:
64 if not shouldPass:
65 return
66 else:
67 self.UnicodeErrorFailure(encoding, shouldPass)
68 except EdkLogger.FatalError:
69 if not shouldPass:
70 return
71 else:
72 self.EdkErrorFailure(encoding, shouldPass)
73 except Exception:
74 pass
75
76 self.EdkErrorFailure(encoding, shouldPass)
77
78 def testUtf16InUniFile(self):
79 self.CheckFile('utf_16', shouldPass=True)
80
81 def testSupplementaryPlaneUnicodeCharInUtf16File(self):
82 #
83 # Supplementary Plane characters can exist in UTF-16 files,
84 # but they are not valid UCS-2 characters.
85 #
86 # This test makes sure that BaseTools rejects these characters
87 # if seen in a .uni file.
88 #
89 data = u'''
90 #langdef en-US "English"
91 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
92 '''
93
94 self.CheckFile('utf_16', shouldPass=False, string=data)
95
96 def testSurrogatePairUnicodeCharInUtf16File(self):
97 #
98 # Surrogate Pair code points are used in UTF-16 files to
99 # encode the Supplementary Plane characters. But, a Surrogate
100 # Pair code point which is not followed by another Surrogate
101 # Pair code point might be interpreted as a single code point
102 # with the Surrogate Pair code point.
103 #
104 # This test makes sure that BaseTools rejects these characters
105 # if seen in a .uni file.
106 #
107 data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '
108
109 self.CheckFile(encoding=None, shouldPass=False, string=data)
110
111 def testValidUtf8File(self):
112 self.CheckFile(encoding='utf_8', shouldPass=True)
113
114 def testValidUtf8FileWithBom(self):
115 #
116 # Same test as testValidUtf8File, but add the UTF-8 BOM
117 #
118 data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')
119
120 self.CheckFile(encoding=None, shouldPass=True, string=data)
121
122 def test32bitUnicodeCharInUtf8File(self):
123 data = u'''
124 #langdef en-US "English"
125 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
126 '''
127
128 self.CheckFile('utf_16', shouldPass=False, string=data)
129
130 def test32bitUnicodeCharInUtf8File(self):
131 data = u'''
132 #langdef en-US "English"
133 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
134 '''
135
136 self.CheckFile('utf_8', shouldPass=False, string=data)
137
138 def test32bitUnicodeCharInUtf8Comment(self):
139 data = u'''
140 // Even in comments, we reject non-UCS-2 chars: \U00010300
141 #langdef en-US "English"
142 #string STR_A #language en-US "A"
143 '''
144
145 self.CheckFile('utf_8', shouldPass=False, string=data)
146
147 def testSurrogatePairUnicodeCharInUtf8File(self):
148 #
149 # Surrogate Pair code points are used in UTF-16 files to
150 # encode the Supplementary Plane characters. In UTF-8, it is
151 # trivial to encode these code points, but they are not valid
152 # code points for characters, since they are reserved for the
153 # UTF-16 Surrogate Pairs.
154 #
155 # This test makes sure that BaseTools rejects these characters
156 # if seen in a .uni file.
157 #
158 data = b'\xed\xa0\x81'
159
160 self.CheckFile(encoding=None, shouldPass=False, string=data)
161
162 def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
163 #
164 # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
165 # the UTF-8 BOM
166 #
167 data = codecs.BOM_UTF8 + b'\xed\xa0\x81'
168
169 self.CheckFile(encoding=None, shouldPass=False, string=data)
170
171 TheTestSuite = TestTools.MakeTheTestSuite(locals())
172
173 if __name__ == '__main__':
174 allTests = TheTestSuite()
175 unittest.TextTestRunner().run(allTests)