[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py

## @file\r
#  Unit tests for AutoGen.UniClassObject\r
#\r
#  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
#\r
#  This program and the accompanying materials\r
#  are licensed and made available under the terms and conditions of the BSD License\r
#  which accompanies this distribution.  The full text of the license may be found at\r
#  http://opensource.org/licenses/bsd-license.php\r
#\r
#  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
#  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
#\r
\r
##\r
# Import Modules\r
#\r
import os\r
import unittest\r
\r
import codecs\r
\r
import TestTools\r
\r
from Common.Misc import PathClass\r
import AutoGen.UniClassObject as BtUni\r
\r
from Common import EdkLogger\r
EdkLogger.InitializeForUnitTest()\r
\r
class Tests(TestTools.BaseToolsTest):\r
\r
    SampleData = u'''\r
        #langdef en-US "English"\r
        #string STR_A #language en-US "STR_A for en-US"\r
    '''\r
\r
    def EncodeToFile(self, encoding, string=None):\r
        if string is None:\r
            string = self.SampleData\r
        if encoding is not None:\r
            data = codecs.encode(string, encoding)\r
        else:\r
            data = string\r
        path = 'input.uni'\r
        self.WriteTmpFile(path, data)\r
        return PathClass(self.GetTmpFilePath(path))\r
\r
    def ErrorFailure(self, error, encoding, shouldPass):\r
        msg = error + ' should '\r
        if shouldPass:\r
            msg += 'not '\r
        msg += 'be generated for '\r
        msg += '%s data in a .uni file' % encoding\r
        self.fail(msg)\r
\r
    def UnicodeErrorFailure(self, encoding, shouldPass):\r
        self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
\r
    def EdkErrorFailure(self, encoding, shouldPass):\r
        self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
\r
    def CheckFile(self, encoding, shouldPass, string=None):\r
        path = self.EncodeToFile(encoding, string)\r
        try:\r
            BtUni.UniFileClassObject([path])\r
            if shouldPass:\r
                return\r
        except UnicodeError:\r
            if not shouldPass:\r
                return\r
            else:\r
                self.UnicodeErrorFailure(encoding, shouldPass)\r
        except EdkLogger.FatalError:\r
            if not shouldPass:\r
                return\r
            else:\r
                self.EdkErrorFailure(encoding, shouldPass)\r
        except Exception:\r
            pass\r
\r
        self.EdkErrorFailure(encoding, shouldPass)\r
\r
    def testUtf16InUniFile(self):\r
        self.CheckFile('utf_16', shouldPass=True)\r
\r
    def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
        #\r
        # Supplementary Plane characters can exist in UTF-16 files,\r
        # but they are not valid UCS-2 characters.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_16', shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf16File(self):\r
        #\r
        # Surrogate Pair code points are used in UTF-16 files to\r
        # encode the Supplementary Plane characters. But, a Surrogate\r
        # Pair code point which is not followed by another Surrogate\r
        # Pair code point might be interpreted as a single code point\r
        # with the Surrogate Pair code point.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = codecs.BOM_UTF16_LE + '//\x01\xd8 '\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
    def testValidUtf8File(self):\r
        self.CheckFile(encoding='utf_8', shouldPass=True)\r
\r
    def testValidUtf8FileWithBom(self):\r
        #\r
        # Same test as testValidUtf8File, but add the UTF-8 BOM\r
        #\r
        data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')\r
\r
        self.CheckFile(encoding=None, shouldPass=True, string=data)\r
\r
    def test32bitUnicodeCharInUtf8File(self):\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_16', shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8File(self):\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_8', shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8Comment(self):\r
        data = u'''\r
            // Even in comments, we reject non-UCS-2 chars: \U00010300\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "A"\r
        '''\r
\r
        self.CheckFile('utf_8', shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf8File(self):\r
        #\r
        # Surrogate Pair code points are used in UTF-16 files to\r
        # encode the Supplementary Plane characters. In UTF-8, it is\r
        # trivial to encode these code points, but they are not valid\r
        # code points for characters, since they are reserved for the\r
        # UTF-16 Surrogate Pairs.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = '\xed\xa0\x81'\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
        #\r
        # Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
        # the UTF-8 BOM\r
        #\r
        data = codecs.BOM_UTF8 + '\xed\xa0\x81'\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
\r
if __name__ == '__main__':\r
    allTests = TheTestSuite()\r
    unittest.TextTestRunner().run(allTests)\r
Commit	Line	Data
df91e0f9 JJ	1	## @file\r
	2	# Unit tests for AutoGen.UniClassObject\r
	3	#\r
	4	# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
	5	#\r
	6	# This program and the accompanying materials\r
	7	# are licensed and made available under the terms and conditions of the BSD License\r
	8	# which accompanies this distribution. The full text of the license may be found at\r
	9	# http://opensource.org/licenses/bsd-license.php\r
	10	#\r
	11	# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
	12	# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
	13	#\r
	14	\r
	15	##\r
	16	# Import Modules\r
	17	#\r
	18	import os\r
	19	import unittest\r
	20	\r
	21	import codecs\r
	22	\r
	23	import TestTools\r
	24	\r
	25	from Common.Misc import PathClass\r
	26	import AutoGen.UniClassObject as BtUni\r
	27	\r
	28	from Common import EdkLogger\r
	29	EdkLogger.InitializeForUnitTest()\r
	30	\r
	31	class Tests(TestTools.BaseToolsTest):\r
	32	\r
	33	SampleData = u'''\r
	34	#langdef en-US "English"\r
	35	#string STR_A #language en-US "STR_A for en-US"\r
	36	'''\r
	37	\r
	38	def EncodeToFile(self, encoding, string=None):\r
	39	if string is None:\r
	40	string = self.SampleData\r
dadfab5b JJ	41	if encoding is not None:\r
	42	data = codecs.encode(string, encoding)\r
	43	else:\r
	44	data = string\r
df91e0f9 JJ	45	path = 'input.uni'\r
	46	self.WriteTmpFile(path, data)\r
	47	return PathClass(self.GetTmpFilePath(path))\r
	48	\r
	49	def ErrorFailure(self, error, encoding, shouldPass):\r
	50	msg = error + ' should '\r
	51	if shouldPass:\r
	52	msg += 'not '\r
	53	msg += 'be generated for '\r
	54	msg += '%s data in a .uni file' % encoding\r
	55	self.fail(msg)\r
	56	\r
	57	def UnicodeErrorFailure(self, encoding, shouldPass):\r
	58	self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
	59	\r
	60	def EdkErrorFailure(self, encoding, shouldPass):\r
	61	self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
	62	\r
	63	def CheckFile(self, encoding, shouldPass, string=None):\r
	64	path = self.EncodeToFile(encoding, string)\r
	65	try:\r
	66	BtUni.UniFileClassObject([path])\r
	67	if shouldPass:\r
	68	return\r
	69	except UnicodeError:\r
	70	if not shouldPass:\r
	71	return\r
	72	else:\r
	73	self.UnicodeErrorFailure(encoding, shouldPass)\r
	74	except EdkLogger.FatalError:\r
	75	if not shouldPass:\r
	76	return\r
	77	else:\r
	78	self.EdkErrorFailure(encoding, shouldPass)\r
	79	except Exception:\r
	80	pass\r
	81	\r
	82	self.EdkErrorFailure(encoding, shouldPass)\r
	83	\r
	84	def testUtf16InUniFile(self):\r
	85	self.CheckFile('utf_16', shouldPass=True)\r
	86	\r
dadfab5b JJ	87	def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
	88	#\r
	89	# Supplementary Plane characters can exist in UTF-16 files,\r
	90	# but they are not valid UCS-2 characters.\r
	91	#\r
	92	# This test makes sure that BaseTools rejects these characters\r
	93	# if seen in a .uni file.\r
	94	#\r
	95	data = u'''\r
	96	#langdef en-US "English"\r
	97	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	98	'''\r
	99	\r
	100	self.CheckFile('utf_16', shouldPass=False, string=data)\r
	101	\r
	102	def testSurrogatePairUnicodeCharInUtf16File(self):\r
	103	#\r
	104	# Surrogate Pair code points are used in UTF-16 files to\r
	105	# encode the Supplementary Plane characters. But, a Surrogate\r
	106	# Pair code point which is not followed by another Surrogate\r
	107	# Pair code point might be interpreted as a single code point\r
	108	# with the Surrogate Pair code point.\r
	109	#\r
	110	# This test makes sure that BaseTools rejects these characters\r
	111	# if seen in a .uni file.\r
	112	#\r
	113	data = codecs.BOM_UTF16_LE + '//\x01\xd8 '\r
	114	\r
	115	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	116	\r
156d6d65 JJ	117	def testValidUtf8File(self):\r
	118	self.CheckFile(encoding='utf_8', shouldPass=True)\r
	119	\r
	120	def testValidUtf8FileWithBom(self):\r
	121	#\r
	122	# Same test as testValidUtf8File, but add the UTF-8 BOM\r
	123	#\r
	124	data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')\r
	125	\r
	126	self.CheckFile(encoding=None, shouldPass=True, string=data)\r
	127	\r
15c3a04c JJ	128	def test32bitUnicodeCharInUtf8File(self):\r
	129	data = u'''\r
	130	#langdef en-US "English"\r
	131	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	132	'''\r
	133	\r
	134	self.CheckFile('utf_16', shouldPass=False, string=data)\r
	135	\r
	136	def test32bitUnicodeCharInUtf8File(self):\r
	137	data = u'''\r
	138	#langdef en-US "English"\r
	139	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	140	'''\r
	141	\r
	142	self.CheckFile('utf_8', shouldPass=False, string=data)\r
	143	\r
	144	def test32bitUnicodeCharInUtf8Comment(self):\r
	145	data = u'''\r
	146	// Even in comments, we reject non-UCS-2 chars: \U00010300\r
	147	#langdef en-US "English"\r
	148	#string STR_A #language en-US "A"\r
	149	'''\r
	150	\r
	151	self.CheckFile('utf_8', shouldPass=False, string=data)\r
	152	\r
8fb5a0ca JJ	153	def testSurrogatePairUnicodeCharInUtf8File(self):\r
	154	#\r
	155	# Surrogate Pair code points are used in UTF-16 files to\r
	156	# encode the Supplementary Plane characters. In UTF-8, it is\r
	157	# trivial to encode these code points, but they are not valid\r
	158	# code points for characters, since they are reserved for the\r
	159	# UTF-16 Surrogate Pairs.\r
	160	#\r
	161	# This test makes sure that BaseTools rejects these characters\r
	162	# if seen in a .uni file.\r
	163	#\r
	164	data = '\xed\xa0\x81'\r
	165	\r
	166	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	167	\r
	168	def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
	169	#\r
	170	# Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
	171	# the UTF-8 BOM\r
	172	#\r
	173	data = codecs.BOM_UTF8 + '\xed\xa0\x81'\r
	174	\r
	175	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	176	\r
df91e0f9 JJ	177	TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
	178	\r
	179	if __name__ == '__main__':\r
	180	allTests = TheTestSuite()\r
	181	unittest.TextTestRunner().run(allTests)\r