BaseTools/Tests/CheckUnicodeSourceFiles.py

   1 ## @file
   2 #  Unit tests for AutoGen.UniClassObject
   3 #
   4 #  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
   5 #
   6 #  This program and the accompanying materials
   7 #  are licensed and made available under the terms and conditions of the BSD License
   8 #  which accompanies this distribution.  The full text of the license may be found at
   9 #  http://opensource.org/licenses/bsd-license.php
  10 #
  11 #  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  12 #  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  13 #
  14
  15 ##
  16 # Import Modules
  17 #
  18 import os
  19 import unittest
  20
  21 import codecs
  22
  23 import TestTools
  24
  25 from Common.Misc import PathClass
  26 import AutoGen.UniClassObject as BtUni
  27
  28 from Common import EdkLogger
  29 EdkLogger.InitializeForUnitTest()
  30
  31 class Tests(TestTools.BaseToolsTest):
  32
  33     SampleData = u'''
  34         #langdef en-US "English"
  35         #string STR_A #language en-US "STR_A for en-US"
  36     '''
  37
  38     def EncodeToFile(self, encoding, string=None):
  39         if string is None:
  40             string = self.SampleData
  41         if encoding is not None:
  42             data = codecs.encode(string, encoding)
  43         else:
  44             data = string
  45         path = 'input.uni'
  46         self.WriteTmpFile(path, data)
  47         return PathClass(self.GetTmpFilePath(path))
  48
  49     def ErrorFailure(self, error, encoding, shouldPass):
  50         msg = error + ' should '
  51         if shouldPass:
  52             msg += 'not '
  53         msg += 'be generated for '
  54         msg += '%s data in a .uni file' % encoding
  55         self.fail(msg)
  56
  57     def UnicodeErrorFailure(self, encoding, shouldPass):
  58         self.ErrorFailure('UnicodeError', encoding, shouldPass)
  59
  60     def EdkErrorFailure(self, encoding, shouldPass):
  61         self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)
  62
  63     def CheckFile(self, encoding, shouldPass, string=None):
  64         path = self.EncodeToFile(encoding, string)
  65         try:
  66             BtUni.UniFileClassObject([path])
  67             if shouldPass:
  68                 return
  69         except UnicodeError:
  70             if not shouldPass:
  71                 return
  72             else:
  73                 self.UnicodeErrorFailure(encoding, shouldPass)
  74         except EdkLogger.FatalError:
  75             if not shouldPass:
  76                 return
  77             else:
  78                 self.EdkErrorFailure(encoding, shouldPass)
  79         except Exception:
  80             pass
  81
  82         self.EdkErrorFailure(encoding, shouldPass)
  83
  84     def testUtf16InUniFile(self):
  85         self.CheckFile('utf_16', shouldPass=True)
  86
  87     def testSupplementaryPlaneUnicodeCharInUtf16File(self):
  88         #
  89         # Supplementary Plane characters can exist in UTF-16 files,
  90         # but they are not valid UCS-2 characters.
  91         #
  92         # This test makes sure that BaseTools rejects these characters
  93         # if seen in a .uni file.
  94         #
  95         data = u'''
  96             #langdef en-US "English"
  97             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
  98         '''
  99
 100         self.CheckFile('utf_16', shouldPass=False, string=data)
 101
 102     def testSurrogatePairUnicodeCharInUtf16File(self):
 103         #
 104         # Surrogate Pair code points are used in UTF-16 files to
 105         # encode the Supplementary Plane characters. But, a Surrogate
 106         # Pair code point which is not followed by another Surrogate
 107         # Pair code point might be interpreted as a single code point
 108         # with the Surrogate Pair code point.
 109         #
 110         # This test makes sure that BaseTools rejects these characters
 111         # if seen in a .uni file.
 112         #
 113         data = codecs.BOM_UTF16_LE + '//\x01\xd8 '
 114
 115         self.CheckFile(encoding=None, shouldPass=False, string=data)
 116
 117     def testValidUtf8File(self):
 118         self.CheckFile(encoding='utf_8', shouldPass=True)
 119
 120     def testValidUtf8FileWithBom(self):
 121         #
 122         # Same test as testValidUtf8File, but add the UTF-8 BOM
 123         #
 124         data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')
 125
 126         self.CheckFile(encoding=None, shouldPass=True, string=data)
 127
 128     def test32bitUnicodeCharInUtf8File(self):
 129         data = u'''
 130             #langdef en-US "English"
 131             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
 132         '''
 133
 134         self.CheckFile('utf_16', shouldPass=False, string=data)
 135
 136     def test32bitUnicodeCharInUtf8File(self):
 137         data = u'''
 138             #langdef en-US "English"
 139             #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
 140         '''
 141
 142         self.CheckFile('utf_8', shouldPass=False, string=data)
 143
 144     def test32bitUnicodeCharInUtf8Comment(self):
 145         data = u'''
 146             // Even in comments, we reject non-UCS-2 chars: \U00010300
 147             #langdef en-US "English"
 148             #string STR_A #language en-US "A"
 149         '''
 150
 151         self.CheckFile('utf_8', shouldPass=False, string=data)
 152
 153     def testSurrogatePairUnicodeCharInUtf8File(self):
 154         #
 155         # Surrogate Pair code points are used in UTF-16 files to
 156         # encode the Supplementary Plane characters. In UTF-8, it is
 157         # trivial to encode these code points, but they are not valid
 158         # code points for characters, since they are reserved for the
 159         # UTF-16 Surrogate Pairs.
 160         #
 161         # This test makes sure that BaseTools rejects these characters
 162         # if seen in a .uni file.
 163         #
 164         data = '\xed\xa0\x81'
 165
 166         self.CheckFile(encoding=None, shouldPass=False, string=data)
 167
 168     def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
 169         #
 170         # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
 171         # the UTF-8 BOM
 172         #
 173         data = codecs.BOM_UTF8 + '\xed\xa0\x81'
 174
 175         self.CheckFile(encoding=None, shouldPass=False, string=data)
 176
 177 TheTestSuite = TestTools.MakeTheTestSuite(locals())
 178
 179 if __name__ == '__main__':
 180     allTests = TheTestSuite()
 181     unittest.TextTestRunner().run(allTests)