]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Tests/CheckUnicodeSourceFiles.py
BaseTools/Tests: Verify unsupported UTF-8 data is rejected
[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py
1 ## @file
2 # Unit tests for AutoGen.UniClassObject
3 #
4 # Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14
15 ##
16 # Import Modules
17 #
18 import os
19 import unittest
20
21 import codecs
22
23 import TestTools
24
25 from Common.Misc import PathClass
26 import AutoGen.UniClassObject as BtUni
27
28 from Common import EdkLogger
29 EdkLogger.InitializeForUnitTest()
30
31 class Tests(TestTools.BaseToolsTest):
32
33 SampleData = u'''
34 #langdef en-US "English"
35 #string STR_A #language en-US "STR_A for en-US"
36 '''
37
38 def EncodeToFile(self, encoding, string=None):
39 if string is None:
40 string = self.SampleData
41 if encoding is not None:
42 data = codecs.encode(string, encoding)
43 else:
44 data = string
45 path = 'input.uni'
46 self.WriteTmpFile(path, data)
47 return PathClass(self.GetTmpFilePath(path))
48
49 def ErrorFailure(self, error, encoding, shouldPass):
50 msg = error + ' should '
51 if shouldPass:
52 msg += 'not '
53 msg += 'be generated for '
54 msg += '%s data in a .uni file' % encoding
55 self.fail(msg)
56
57 def UnicodeErrorFailure(self, encoding, shouldPass):
58 self.ErrorFailure('UnicodeError', encoding, shouldPass)
59
60 def EdkErrorFailure(self, encoding, shouldPass):
61 self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)
62
63 def CheckFile(self, encoding, shouldPass, string=None):
64 path = self.EncodeToFile(encoding, string)
65 try:
66 BtUni.UniFileClassObject([path])
67 if shouldPass:
68 return
69 except UnicodeError:
70 if not shouldPass:
71 return
72 else:
73 self.UnicodeErrorFailure(encoding, shouldPass)
74 except EdkLogger.FatalError:
75 if not shouldPass:
76 return
77 else:
78 self.EdkErrorFailure(encoding, shouldPass)
79 except Exception:
80 pass
81
82 self.EdkErrorFailure(encoding, shouldPass)
83
84 def testUtf16InUniFile(self):
85 self.CheckFile('utf_16', shouldPass=True)
86
87 def testSupplementaryPlaneUnicodeCharInUtf16File(self):
88 #
89 # Supplementary Plane characters can exist in UTF-16 files,
90 # but they are not valid UCS-2 characters.
91 #
92 # This test makes sure that BaseTools rejects these characters
93 # if seen in a .uni file.
94 #
95 data = u'''
96 #langdef en-US "English"
97 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
98 '''
99
100 self.CheckFile('utf_16', shouldPass=False, string=data)
101
102 def testSurrogatePairUnicodeCharInUtf16File(self):
103 #
104 # Surrogate Pair code points are used in UTF-16 files to
105 # encode the Supplementary Plane characters. But, a Surrogate
106 # Pair code point which is not followed by another Surrogate
107 # Pair code point might be interpreted as a single code point
108 # with the Surrogate Pair code point.
109 #
110 # This test makes sure that BaseTools rejects these characters
111 # if seen in a .uni file.
112 #
113 data = codecs.BOM_UTF16_LE + '//\x01\xd8 '
114
115 self.CheckFile(encoding=None, shouldPass=False, string=data)
116
117 def test32bitUnicodeCharInUtf8File(self):
118 data = u'''
119 #langdef en-US "English"
120 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
121 '''
122
123 self.CheckFile('utf_16', shouldPass=False, string=data)
124
125 def test32bitUnicodeCharInUtf8File(self):
126 data = u'''
127 #langdef en-US "English"
128 #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
129 '''
130
131 self.CheckFile('utf_8', shouldPass=False, string=data)
132
133 def test32bitUnicodeCharInUtf8Comment(self):
134 data = u'''
135 // Even in comments, we reject non-UCS-2 chars: \U00010300
136 #langdef en-US "English"
137 #string STR_A #language en-US "A"
138 '''
139
140 self.CheckFile('utf_8', shouldPass=False, string=data)
141
142 def testSurrogatePairUnicodeCharInUtf8File(self):
143 #
144 # Surrogate Pair code points are used in UTF-16 files to
145 # encode the Supplementary Plane characters. In UTF-8, it is
146 # trivial to encode these code points, but they are not valid
147 # code points for characters, since they are reserved for the
148 # UTF-16 Surrogate Pairs.
149 #
150 # This test makes sure that BaseTools rejects these characters
151 # if seen in a .uni file.
152 #
153 data = '\xed\xa0\x81'
154
155 self.CheckFile(encoding=None, shouldPass=False, string=data)
156
157 def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
158 #
159 # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
160 # the UTF-8 BOM
161 #
162 data = codecs.BOM_UTF8 + '\xed\xa0\x81'
163
164 self.CheckFile(encoding=None, shouldPass=False, string=data)
165
166 TheTestSuite = TestTools.MakeTheTestSuite(locals())
167
168 if __name__ == '__main__':
169 allTests = TheTestSuite()
170 unittest.TextTestRunner().run(allTests)