BaseTools/Tests/CheckUnicodeSourceFiles.py

   1 ## @file
   2 #  Unit tests for AutoGen.UniClassObject
   3 #
   4 #  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
   5 #
   6 #  SPDX-License-Identifier: BSD-2-Clause-Patent
   7 #
   8
   9 ##
  10 # Import Modules
  11 #
  12 import os
  13 import unittest
  14
  15 import codecs
  16
  17 import TestTools
  18
  19 from Common.Misc import PathClass
  20 import AutoGen.UniClassObject as BtUni
  21
  22 from Common import EdkLogger
  23 EdkLogger.InitializeForUnitTest()
  24
  25 class Tests(TestTools.BaseToolsTest):
  26
  27     SampleData = u'''
  28         #langdef en-US "English"
  29         #string STR_A #language en-US "STR_A for en-US"
  30     '''
  31
  32     def EncodeToFile(self, encoding, string=None):
  33         if string is None:
  34             string = self.SampleData
  35         if encoding is not None:
  36             data = codecs.encode(string, encoding)
  37         else:
  38             data = string
  39         path = 'input.uni'
  40         self.WriteTmpFile(path, data)
  41         return PathClass(self.GetTmpFilePath(path))
  42
  43     def ErrorFailure(self, error, encoding, shouldPass):
  44         msg = error + ' should '
  45         if shouldPass:
  46             msg += 'not '
  47         msg += 'be generated for '
  48         msg += '%s data in a .uni file' % encoding
  49         self.fail(msg)
  50
  51     def UnicodeErrorFailure(self, encoding, shouldPass):
  52         self.ErrorFailure('UnicodeError', encoding, shouldPass)
  53
  54     def EdkErrorFailure(self, encoding, shouldPass):
  55         self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)
  56
  57     def CheckFile(self, encoding, shouldPass, string=None):
  58         path = self.EncodeToFile(encoding, string)
  59         try:
  60             BtUni.UniFileClassObject([path])
  61             if shouldPass:
  62                 return
  63         except UnicodeError:
  64             if not shouldPass:
  65                 return
  66             else:
  67                 self.UnicodeErrorFailure(encoding, shouldPass)
  68         except EdkLogger.FatalError:
  69             if not shouldPass:
  70                 return
  71             else:
  72                 self.EdkErrorFailure(encoding, shouldPass)
  73         except Exception:
  74             pass
  75
  76         self.EdkErrorFailure(encoding, shouldPass)
  77
  78     def testUtf16InUniFile(self):
  79         self.CheckFile('utf_16', shouldPass=True)
  80
  81     def testSupplementaryPlaneUnicodeCharInUtf16File(self):
  82         #
  83         # Supplementary Plane characters can exist in UTF-16 files,
  84         # but they are not valid UCS-2 characters.
  85         #
  86         # This test makes sure that BaseTools rejects these characters
  87         # if seen in a .uni file.
  88         #
  89         data = u'''
  90             #langdef en-US "English"
  91             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
  92         '''
  93
  94         self.CheckFile('utf_16', shouldPass=False, string=data)
  95
  96     def testSurrogatePairUnicodeCharInUtf16File(self):
  97         #
  98         # Surrogate Pair code points are used in UTF-16 files to
  99         # encode the Supplementary Plane characters. But, a Surrogate
 100         # Pair code point which is not followed by another Surrogate
 101         # Pair code point might be interpreted as a single code point
 102         # with the Surrogate Pair code point.
 103         #
 104         # This test makes sure that BaseTools rejects these characters
 105         # if seen in a .uni file.
 106         #
 107         data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '
 108
 109         self.CheckFile(encoding=None, shouldPass=False, string=data)
 110
 111     def testValidUtf8File(self):
 112         self.CheckFile(encoding='utf_8', shouldPass=True)
 113
 114     def testValidUtf8FileWithBom(self):
 115         #
 116         # Same test as testValidUtf8File, but add the UTF-8 BOM
 117         #
 118         data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')
 119
 120         self.CheckFile(encoding=None, shouldPass=True, string=data)
 121
 122     def test32bitUnicodeCharInUtf8File(self):
 123         data = u'''
 124             #langdef en-US "English"
 125             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
 126         '''
 127
 128         self.CheckFile('utf_16', shouldPass=False, string=data)
 129
 130     def test32bitUnicodeCharInUtf8File(self):
 131         data = u'''
 132             #langdef en-US "English"
 133             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
 134         '''
 135
 136         self.CheckFile('utf_8', shouldPass=False, string=data)
 137
 138     def test32bitUnicodeCharInUtf8Comment(self):
 139         data = u'''
 140             // Even in comments, we reject non-UCS-2 chars: \U00010300
 141             #langdef en-US "English"
 142             #string STR_A #language en-US "A"
 143         '''
 144
 145         self.CheckFile('utf_8', shouldPass=False, string=data)
 146
 147     def testSurrogatePairUnicodeCharInUtf8File(self):
 148         #
 149         # Surrogate Pair code points are used in UTF-16 files to
 150         # encode the Supplementary Plane characters. In UTF-8, it is
 151         # trivial to encode these code points, but they are not valid
 152         # code points for characters, since they are reserved for the
 153         # UTF-16 Surrogate Pairs.
 154         #
 155         # This test makes sure that BaseTools rejects these characters
 156         # if seen in a .uni file.
 157         #
 158         data = b'\xed\xa0\x81'
 159
 160         self.CheckFile(encoding=None, shouldPass=False, string=data)
 161
 162     def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
 163         #
 164         # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
 165         # the UTF-8 BOM
 166         #
 167         data = codecs.BOM_UTF8 + b'\xed\xa0\x81'
 168
 169         self.CheckFile(encoding=None, shouldPass=False, string=data)
 170
 171 TheTestSuite = TestTools.MakeTheTestSuite(locals())
 172
 173 if __name__ == '__main__':
 174     allTests = TheTestSuite()
 175     unittest.TextTestRunner().run(allTests)