[mirror_edk2.git] / BaseTools / Tests / CheckUnicodeSourceFiles.py

## @file\r
#  Unit tests for AutoGen.UniClassObject\r
#\r
#  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
#\r
#  This program and the accompanying materials\r
#  are licensed and made available under the terms and conditions of the BSD License\r
#  which accompanies this distribution.  The full text of the license may be found at\r
#  http://opensource.org/licenses/bsd-license.php\r
#\r
#  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
#  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
#\r
\r
##\r
# Import Modules\r
#\r
import os\r
import unittest\r
\r
import codecs\r
\r
import TestTools\r
\r
from Common.Misc import PathClass\r
import AutoGen.UniClassObject as BtUni\r
\r
from Common import EdkLogger\r
EdkLogger.InitializeForUnitTest()\r
\r
class Tests(TestTools.BaseToolsTest):\r
\r
    SampleData = u'''\r
        #langdef en-US "English"\r
        #string STR_A #language en-US "STR_A for en-US"\r
    '''\r
\r
    def EncodeToFile(self, encoding, string=None):\r
        if string is None:\r
            string = self.SampleData\r
        if encoding is not None:\r
            data = codecs.encode(string, encoding)\r
        else:\r
            data = string\r
        path = 'input.uni'\r
        self.WriteTmpFile(path, data)\r
        return PathClass(self.GetTmpFilePath(path))\r
\r
    def ErrorFailure(self, error, encoding, shouldPass):\r
        msg = error + ' should '\r
        if shouldPass:\r
            msg += 'not '\r
        msg += 'be generated for '\r
        msg += '%s data in a .uni file' % encoding\r
        self.fail(msg)\r
\r
    def UnicodeErrorFailure(self, encoding, shouldPass):\r
        self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
\r
    def EdkErrorFailure(self, encoding, shouldPass):\r
        self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
\r
    def CheckFile(self, encoding, shouldPass, string=None):\r
        path = self.EncodeToFile(encoding, string)\r
        try:\r
            BtUni.UniFileClassObject([path])\r
            if shouldPass:\r
                return\r
        except UnicodeError:\r
            if not shouldPass:\r
                return\r
            else:\r
                self.UnicodeErrorFailure(encoding, shouldPass)\r
        except EdkLogger.FatalError:\r
            if not shouldPass:\r
                return\r
            else:\r
                self.EdkErrorFailure(encoding, shouldPass)\r
        except Exception:\r
            pass\r
\r
        self.EdkErrorFailure(encoding, shouldPass)\r
\r
    def testUtf16InUniFile(self):\r
        self.CheckFile('utf_16', shouldPass=True)\r
\r
    def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
        #\r
        # Supplementary Plane characters can exist in UTF-16 files,\r
        # but they are not valid UCS-2 characters.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_16', shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf16File(self):\r
        #\r
        # Surrogate Pair code points are used in UTF-16 files to\r
        # encode the Supplementary Plane characters. But, a Surrogate\r
        # Pair code point which is not followed by another Surrogate\r
        # Pair code point might be interpreted as a single code point\r
        # with the Surrogate Pair code point.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = codecs.BOM_UTF16_LE + '//\x01\xd8 '\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8File(self):\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_16', shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8File(self):\r
        data = u'''\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
        '''\r
\r
        self.CheckFile('utf_8', shouldPass=False, string=data)\r
\r
    def test32bitUnicodeCharInUtf8Comment(self):\r
        data = u'''\r
            // Even in comments, we reject non-UCS-2 chars: \U00010300\r
            #langdef en-US "English"\r
            #string STR_A #language en-US "A"\r
        '''\r
\r
        self.CheckFile('utf_8', shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf8File(self):\r
        #\r
        # Surrogate Pair code points are used in UTF-16 files to\r
        # encode the Supplementary Plane characters. In UTF-8, it is\r
        # trivial to encode these code points, but they are not valid\r
        # code points for characters, since they are reserved for the\r
        # UTF-16 Surrogate Pairs.\r
        #\r
        # This test makes sure that BaseTools rejects these characters\r
        # if seen in a .uni file.\r
        #\r
        data = '\xed\xa0\x81'\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
    def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
        #\r
        # Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
        # the UTF-8 BOM\r
        #\r
        data = codecs.BOM_UTF8 + '\xed\xa0\x81'\r
\r
        self.CheckFile(encoding=None, shouldPass=False, string=data)\r
\r
TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
\r
if __name__ == '__main__':\r
    allTests = TheTestSuite()\r
    unittest.TextTestRunner().run(allTests)\r
Commit	Line	Data
df91e0f9 JJ	1	## @file\r
	2	# Unit tests for AutoGen.UniClassObject\r
	3	#\r
	4	# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r
	5	#\r
	6	# This program and the accompanying materials\r
	7	# are licensed and made available under the terms and conditions of the BSD License\r
	8	# which accompanies this distribution. The full text of the license may be found at\r
	9	# http://opensource.org/licenses/bsd-license.php\r
	10	#\r
	11	# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
	12	# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
	13	#\r
	14	\r
	15	##\r
	16	# Import Modules\r
	17	#\r
	18	import os\r
	19	import unittest\r
	20	\r
	21	import codecs\r
	22	\r
	23	import TestTools\r
	24	\r
	25	from Common.Misc import PathClass\r
	26	import AutoGen.UniClassObject as BtUni\r
	27	\r
	28	from Common import EdkLogger\r
	29	EdkLogger.InitializeForUnitTest()\r
	30	\r
	31	class Tests(TestTools.BaseToolsTest):\r
	32	\r
	33	SampleData = u'''\r
	34	#langdef en-US "English"\r
	35	#string STR_A #language en-US "STR_A for en-US"\r
	36	'''\r
	37	\r
	38	def EncodeToFile(self, encoding, string=None):\r
	39	if string is None:\r
	40	string = self.SampleData\r
dadfab5b JJ	41	if encoding is not None:\r
	42	data = codecs.encode(string, encoding)\r
	43	else:\r
	44	data = string\r
df91e0f9 JJ	45	path = 'input.uni'\r
	46	self.WriteTmpFile(path, data)\r
	47	return PathClass(self.GetTmpFilePath(path))\r
	48	\r
	49	def ErrorFailure(self, error, encoding, shouldPass):\r
	50	msg = error + ' should '\r
	51	if shouldPass:\r
	52	msg += 'not '\r
	53	msg += 'be generated for '\r
	54	msg += '%s data in a .uni file' % encoding\r
	55	self.fail(msg)\r
	56	\r
	57	def UnicodeErrorFailure(self, encoding, shouldPass):\r
	58	self.ErrorFailure('UnicodeError', encoding, shouldPass)\r
	59	\r
	60	def EdkErrorFailure(self, encoding, shouldPass):\r
	61	self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)\r
	62	\r
	63	def CheckFile(self, encoding, shouldPass, string=None):\r
	64	path = self.EncodeToFile(encoding, string)\r
	65	try:\r
	66	BtUni.UniFileClassObject([path])\r
	67	if shouldPass:\r
	68	return\r
	69	except UnicodeError:\r
	70	if not shouldPass:\r
	71	return\r
	72	else:\r
	73	self.UnicodeErrorFailure(encoding, shouldPass)\r
	74	except EdkLogger.FatalError:\r
	75	if not shouldPass:\r
	76	return\r
	77	else:\r
	78	self.EdkErrorFailure(encoding, shouldPass)\r
	79	except Exception:\r
	80	pass\r
	81	\r
	82	self.EdkErrorFailure(encoding, shouldPass)\r
	83	\r
	84	def testUtf16InUniFile(self):\r
	85	self.CheckFile('utf_16', shouldPass=True)\r
	86	\r
dadfab5b JJ	87	def testSupplementaryPlaneUnicodeCharInUtf16File(self):\r
	88	#\r
	89	# Supplementary Plane characters can exist in UTF-16 files,\r
	90	# but they are not valid UCS-2 characters.\r
	91	#\r
	92	# This test makes sure that BaseTools rejects these characters\r
	93	# if seen in a .uni file.\r
	94	#\r
	95	data = u'''\r
	96	#langdef en-US "English"\r
	97	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	98	'''\r
	99	\r
	100	self.CheckFile('utf_16', shouldPass=False, string=data)\r
	101	\r
	102	def testSurrogatePairUnicodeCharInUtf16File(self):\r
	103	#\r
	104	# Surrogate Pair code points are used in UTF-16 files to\r
	105	# encode the Supplementary Plane characters. But, a Surrogate\r
	106	# Pair code point which is not followed by another Surrogate\r
	107	# Pair code point might be interpreted as a single code point\r
	108	# with the Surrogate Pair code point.\r
	109	#\r
	110	# This test makes sure that BaseTools rejects these characters\r
	111	# if seen in a .uni file.\r
	112	#\r
	113	data = codecs.BOM_UTF16_LE + '//\x01\xd8 '\r
	114	\r
	115	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	116	\r
15c3a04c JJ	117	def test32bitUnicodeCharInUtf8File(self):\r
	118	data = u'''\r
	119	#langdef en-US "English"\r
	120	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	121	'''\r
	122	\r
	123	self.CheckFile('utf_16', shouldPass=False, string=data)\r
	124	\r
	125	def test32bitUnicodeCharInUtf8File(self):\r
	126	data = u'''\r
	127	#langdef en-US "English"\r
	128	#string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"\r
	129	'''\r
	130	\r
	131	self.CheckFile('utf_8', shouldPass=False, string=data)\r
	132	\r
	133	def test32bitUnicodeCharInUtf8Comment(self):\r
	134	data = u'''\r
	135	// Even in comments, we reject non-UCS-2 chars: \U00010300\r
	136	#langdef en-US "English"\r
	137	#string STR_A #language en-US "A"\r
	138	'''\r
	139	\r
	140	self.CheckFile('utf_8', shouldPass=False, string=data)\r
	141	\r
8fb5a0ca JJ	142	def testSurrogatePairUnicodeCharInUtf8File(self):\r
	143	#\r
	144	# Surrogate Pair code points are used in UTF-16 files to\r
	145	# encode the Supplementary Plane characters. In UTF-8, it is\r
	146	# trivial to encode these code points, but they are not valid\r
	147	# code points for characters, since they are reserved for the\r
	148	# UTF-16 Surrogate Pairs.\r
	149	#\r
	150	# This test makes sure that BaseTools rejects these characters\r
	151	# if seen in a .uni file.\r
	152	#\r
	153	data = '\xed\xa0\x81'\r
	154	\r
	155	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	156	\r
	157	def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):\r
	158	#\r
	159	# Same test as testSurrogatePairUnicodeCharInUtf8File, but add\r
	160	# the UTF-8 BOM\r
	161	#\r
	162	data = codecs.BOM_UTF8 + '\xed\xa0\x81'\r
	163	\r
	164	self.CheckFile(encoding=None, shouldPass=False, string=data)\r
	165	\r
df91e0f9 JJ	166	TheTestSuite = TestTools.MakeTheTestSuite(locals())\r
	167	\r
	168	if __name__ == '__main__':\r
	169	allTests = TheTestSuite()\r
	170	unittest.TextTestRunner().run(allTests)\r