[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py

## @file\r
#  Unit tests for AutoGen.UniClassObject\r
#\r
#  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
#\r
#  SPDX-License-Identifier: BSD-2-Clause-Patent\r
#\r
\r
##\r
# Import Modules\r
#\r
import os\r
import unittest\r
\r
import codecs\r
\r
import TestTools\r
\r
from Common.Misc import PathClass\r
import AutoGen.UniClassObject as BtUni\r
\r
from Common import EdkLogger\r
EdkLogger.InitializeForUnitTest()\r
\r
class Tests(TestTools.BaseToolsTest):\r
\r
    SampleData = u'''\r
        #langdef en-US "English"\r
        #string STR_A #language en-US "STR_A for en-US"\r
    '''\r
\r
    def EncodeToFile(self, encoding, string=None):\r
        if string is None:\r
            string = self.SampleData\r
        if encoding is not None:\r
            data = codecs.encode(string, encoding)\r
        else:\r
            data = string\r
        path = 'input.uni'\r
        self.WriteTmpFile(path, data)\r
        return PathClass(self.GetTmpFilePath(path))\r
\r
    def ErrorFailure(self, error, encoding, shouldPass):\r
        msg = error + ' should '\r
        if shouldPass:\r
            msg += 'not '\r
        msg += 'be generated for '\r
        msg += '%s data in a .uni file' % encoding\r
        self.fail(msg)\r
\r
    def UnicodeErrorFailure(self, encoding, shouldPass):\r
        self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
\r
    def EdkErrorFailure(self, encoding, shouldPass):\r
        self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
\r
    def CheckFile(self, encoding, shouldPass, string=None):\r
        path = self.EncodeToFile(encoding, string)\r
        try:\r
            BtUni.UniFileClassObject([path])\r
            if shouldPass:\r
                return\r
        except UnicodeError:\r
            if not shouldPass:\r
                return\r
            else:\r
                self.UnicodeErrorFailure(encoding, shouldPass)\r
        except EdkLogger.FatalError:\r
            if not shouldPass:\r
                return\r
            else:\r
                self.EdkErrorFailure(encoding, shouldPass)\r
        except Exception:\r
            pass\r
\r
        self.EdkErrorFailure(encoding, shouldPass)\r
\r
    def testUtf16InUniFile(self):\r
        self.CheckFile('utf_16', shouldPass=True)\r
\r
    def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
        #\r
        # Supplementary Plane characters can exist in UTF-16 files,\r
        # but they are not valid UCS-2 characters.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_16', shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf16File(self):\r
        #\r
        # Surrogate Pair code points are used in UTF-16 files to\r
        # encode the Supplementary Plane characters. But, a Surrogate\r
        # Pair code point which is not followed by another Surrogate\r
        # Pair code point might be interpreted as a single code point\r
        # with the Surrogate Pair code point.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
    def testValidUtf8File(self):\r
        self.CheckFile(encoding='utf_8', shouldPass=True)\r
\r
    def testValidUtf8FileWithBom(self):\r
        #\r
        # Same test as testValidUtf8File, but add the UTF-8 BOM\r
        #\r
        data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')\r
\r
        self.CheckFile(encoding=None, shouldPass=True, string=data)\r
\r
    def test32bitUnicodeCharInUtf8File(self):\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_16', shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8File(self):\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_8', shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8Comment(self):\r
        data = u'''\r
            // Even in comments, we reject non-UCS-2 chars: \U00010300\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "A"\r
        '''\r
\r
        self.CheckFile('utf_8', shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf8File(self):\r
        #\r
        # Surrogate Pair code points are used in UTF-16 files to\r
        # encode the Supplementary Plane characters. In UTF-8, it is\r
        # trivial to encode these code points, but they are not valid\r
        # code points for characters, since they are reserved for the\r
        # UTF-16 Surrogate Pairs.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = b'\xed\xa0\x81'\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
        #\r
        # Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
        # the UTF-8 BOM\r
        #\r
        data = codecs.BOM_UTF8 + b'\xed\xa0\x81'\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
\r
if __name__ == '__main__':\r
    allTests = TheTestSuite()\r
    unittest.TextTestRunner().run(allTests)\r
Commit	Line	Data
df91e0f9 JJ	1	## @file\r
	2	# Unit tests for AutoGen.UniClassObject\r
	3	#\r
	4	# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
	5	#\r
2e351cbe	6	# SPDX-License-Identifier: BSD-2-Clause-Patent\r
df91e0f9 JJ	7	#\r
	8	\r
	9	##\r
	10	# Import Modules\r
	11	#\r
	12	import os\r
	13	import unittest\r
	14	\r
	15	import codecs\r
	16	\r
	17	import TestTools\r
	18	\r
	19	from Common.Misc import PathClass\r
	20	import AutoGen.UniClassObject as BtUni\r
	21	\r
	22	from Common import EdkLogger\r
	23	EdkLogger.InitializeForUnitTest()\r
	24	\r
	25	class Tests(TestTools.BaseToolsTest):\r
	26	\r
	27	SampleData = u'''\r
	28	#langdef en-US "English"\r
	29	#string STR_A #language en-US "STR_A for en-US"\r
	30	'''\r
	31	\r
	32	def EncodeToFile(self, encoding, string=None):\r
	33	if string is None:\r
	34	string = self.SampleData\r
dadfab5b JJ	35	if encoding is not None:\r
	36	data = codecs.encode(string, encoding)\r
	37	else:\r
	38	data = string\r
df91e0f9 JJ	39	path = 'input.uni'\r
	40	self.WriteTmpFile(path, data)\r
	41	return PathClass(self.GetTmpFilePath(path))\r
	42	\r
	43	def ErrorFailure(self, error, encoding, shouldPass):\r
	44	msg = error + ' should '\r
	45	if shouldPass:\r
	46	msg += 'not '\r
	47	msg += 'be generated for '\r
	48	msg += '%s data in a .uni file' % encoding\r
	49	self.fail(msg)\r
	50	\r
	51	def UnicodeErrorFailure(self, encoding, shouldPass):\r
	52	self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
	53	\r
	54	def EdkErrorFailure(self, encoding, shouldPass):\r
	55	self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
	56	\r
	57	def CheckFile(self, encoding, shouldPass, string=None):\r
	58	path = self.EncodeToFile(encoding, string)\r
	59	try:\r
	60	BtUni.UniFileClassObject([path])\r
	61	if shouldPass:\r
	62	return\r
	63	except UnicodeError:\r
	64	if not shouldPass:\r
	65	return\r
	66	else:\r
	67	self.UnicodeErrorFailure(encoding, shouldPass)\r
	68	except EdkLogger.FatalError:\r
	69	if not shouldPass:\r
	70	return\r
	71	else:\r
	72	self.EdkErrorFailure(encoding, shouldPass)\r
	73	except Exception:\r
	74	pass\r
	75	\r
	76	self.EdkErrorFailure(encoding, shouldPass)\r
	77	\r
	78	def testUtf16InUniFile(self):\r
	79	self.CheckFile('utf_16', shouldPass=True)\r
	80	\r
dadfab5b JJ	81	def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
	82	#\r
	83	# Supplementary Plane characters can exist in UTF-16 files,\r
	84	# but they are not valid UCS-2 characters.\r
	85	#\r
	86	# This test makes sure that BaseTools rejects these characters\r
	87	# if seen in a .uni file.\r
	88	#\r
	89	data = u'''\r
	90	#langdef en-US "English"\r
	91	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	92	'''\r
	93	\r
	94	self.CheckFile('utf_16', shouldPass=False, string=data)\r
	95	\r
	96	def testSurrogatePairUnicodeCharInUtf16File(self):\r
	97	#\r
	98	# Surrogate Pair code points are used in UTF-16 files to\r
	99	# encode the Supplementary Plane characters. But, a Surrogate\r
	100	# Pair code point which is not followed by another Surrogate\r
	101	# Pair code point might be interpreted as a single code point\r
	102	# with the Surrogate Pair code point.\r
	103	#\r
	104	# This test makes sure that BaseTools rejects these characters\r
	105	# if seen in a .uni file.\r
	106	#\r
fe906312	107	data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '\r
dadfab5b JJ	108	\r
	109	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	110	\r
156d6d65 JJ	111	def testValidUtf8File(self):\r
	112	self.CheckFile(encoding='utf_8', shouldPass=True)\r
	113	\r
	114	def testValidUtf8FileWithBom(self):\r
	115	#\r
	116	# Same test as testValidUtf8File, but add the UTF-8 BOM\r
	117	#\r
	118	data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')\r
	119	\r
	120	self.CheckFile(encoding=None, shouldPass=True, string=data)\r
	121	\r
15c3a04c JJ	122	def test32bitUnicodeCharInUtf8File(self):\r
	123	data = u'''\r
	124	#langdef en-US "English"\r
	125	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	126	'''\r
	127	\r
	128	self.CheckFile('utf_16', shouldPass=False, string=data)\r
	129	\r
	130	def test32bitUnicodeCharInUtf8File(self):\r
	131	data = u'''\r
	132	#langdef en-US "English"\r
	133	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	134	'''\r
	135	\r
	136	self.CheckFile('utf_8', shouldPass=False, string=data)\r
	137	\r
	138	def test32bitUnicodeCharInUtf8Comment(self):\r
	139	data = u'''\r
	140	// Even in comments, we reject non-UCS-2 chars: \U00010300\r
	141	#langdef en-US "English"\r
	142	#string STR_A #language en-US "A"\r
	143	'''\r
	144	\r
	145	self.CheckFile('utf_8', shouldPass=False, string=data)\r
	146	\r
8fb5a0ca JJ	147	def testSurrogatePairUnicodeCharInUtf8File(self):\r
	148	#\r
	149	# Surrogate Pair code points are used in UTF-16 files to\r
	150	# encode the Supplementary Plane characters. In UTF-8, it is\r
	151	# trivial to encode these code points, but they are not valid\r
	152	# code points for characters, since they are reserved for the\r
	153	# UTF-16 Surrogate Pairs.\r
	154	#\r
	155	# This test makes sure that BaseTools rejects these characters\r
	156	# if seen in a .uni file.\r
	157	#\r
fe906312	158	data = b'\xed\xa0\x81'\r
8fb5a0ca JJ	159	\r
	160	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	161	\r
	162	def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
	163	#\r
	164	# Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
	165	# the UTF-8 BOM\r
	166	#\r
fe906312	167	data = codecs.BOM_UTF8 + b'\xed\xa0\x81'\r
8fb5a0ca JJ	168	\r
	169	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	170	\r
df91e0f9 JJ	171	TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
	172	\r
	173	if __name__ == '__main__':\r
	174	allTests = TheTestSuite()\r
	175	unittest.TextTestRunner().run(allTests)\r