[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Tools / unicode / gencodec.py

""" Unicode Mapping Parser and Codec Generator.\r
\r
This script parses Unicode mapping files as available from the Unicode\r
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec\r
modules from them. The codecs use the standard character mapping codec\r
to actually apply the mapping.\r
\r
Synopsis: gencodec.py dir codec_prefix\r
\r
All files in dir are scanned and those producing non-empty mappings\r
will be written to <codec_prefix><mapname>.py with <mapname> being the\r
first part of the map's filename ('a' in a.b.c.txt) converted to\r
lowercase with hyphens replaced by underscores.\r
\r
The tool also writes marshalled versions of the mapping tables to the\r
same location (with .mapping extension).\r
\r
Written by Marc-Andre Lemburg (mal@lemburg.com).\r
\r
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.\r
(c) Copyright Guido van Rossum, 2000.\r
\r
Table generation:\r
(c) Copyright Marc-Andre Lemburg, 2005.\r
    Licensed to PSF under a Contributor Agreement.\r
\r
"""#"\r
\r
import re, os, marshal, codecs\r
\r
# Maximum allowed size of charmap tables\r
MAX_TABLE_SIZE = 8192\r
\r
# Standard undefined Unicode code point\r
UNI_UNDEFINED = unichr(0xFFFE)\r
\r
mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'\r
                   '\s+'\r
                   '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'\r
                   '\s*'\r
                   '(#.+)?')\r
\r
def parsecodes(codes, len=len, range=range):\r
\r
    """ Converts code combinations to either a single code integer\r
        or a tuple of integers.\r
\r
        meta-codes (in angular brackets, e.g. <LR> and <RL>) are\r
        ignored.\r
\r
        Empty codes or illegal ones are returned as None.\r
\r
    """\r
    if not codes:\r
        return None\r
    l = codes.split('+')\r
    if len(l) == 1:\r
        return int(l[0],16)\r
    for i in range(len(l)):\r
        try:\r
            l[i] = int(l[i],16)\r
        except ValueError:\r
            l[i] = None\r
    l = [x for x in l if x is not None]\r
    if len(l) == 1:\r
        return l[0]\r
    else:\r
        return tuple(l)\r
\r
def readmap(filename):\r
\r
    f = open(filename,'r')\r
    lines = f.readlines()\r
    f.close()\r
    enc2uni = {}\r
    identity = []\r
    unmapped = range(256)\r
\r
    # UTC mapping tables per convention don't include the identity\r
    # mappings for code points 0x00 - 0x1F and 0x7F, unless these are\r
    # explicitly mapped to different characters or undefined\r
    for i in range(32) + [127]:\r
        identity.append(i)\r
        unmapped.remove(i)\r
        enc2uni[i] = (i, 'CONTROL CHARACTER')\r
\r
    for line in lines:\r
        line = line.strip()\r
        if not line or line[0] == '#':\r
            continue\r
        m = mapRE.match(line)\r
        if not m:\r
            #print '* not matched: %s' % repr(line)\r
            continue\r
        enc,uni,comment = m.groups()\r
        enc = parsecodes(enc)\r
        uni = parsecodes(uni)\r
        if comment is None:\r
            comment = ''\r
        else:\r
            comment = comment[1:].strip()\r
        if enc < 256:\r
            if enc in unmapped:\r
                unmapped.remove(enc)\r
            if enc == uni:\r
                identity.append(enc)\r
            enc2uni[enc] = (uni,comment)\r
        else:\r
            enc2uni[enc] = (uni,comment)\r
\r
    # If there are more identity-mapped entries than unmapped entries,\r
    # it pays to generate an identity dictionary first, and add explicit\r
    # mappings to None for the rest\r
    if len(identity) >= len(unmapped):\r
        for enc in unmapped:\r
            enc2uni[enc] = (None, "")\r
        enc2uni['IDENTITY'] = 256\r
\r
    return enc2uni\r
\r
def hexrepr(t, precision=4):\r
\r
    if t is None:\r
        return 'None'\r
    try:\r
        len(t)\r
    except:\r
        return '0x%0*X' % (precision, t)\r
    try:\r
        return '(' + ', '.join(['0x%0*X' % (precision, item)\r
                                for item in t]) + ')'\r
    except TypeError, why:\r
        print '* failed to convert %r: %s' % (t, why)\r
        raise\r
\r
def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):\r
\r
    l = []\r
    append = l.append\r
    if "IDENTITY" in map:\r
        append("%s = codecs.make_identity_dict(range(%d))" %\r
               (varname, map["IDENTITY"]))\r
        append("%s.update({" % varname)\r
        splits = 1\r
        del map["IDENTITY"]\r
        identity = 1\r
    else:\r
        append("%s = {" % varname)\r
        splits = 0\r
        identity = 0\r
\r
    mappings = sorted(map.items())\r
    i = 0\r
    key_precision, value_precision = precisions\r
    for mapkey, mapvalue in mappings:\r
        mapcomment = ''\r
        if isinstance(mapkey, tuple):\r
            (mapkey, mapcomment) = mapkey\r
        if isinstance(mapvalue, tuple):\r
            (mapvalue, mapcomment) = mapvalue\r
        if mapkey is None:\r
            continue\r
        if (identity and\r
            mapkey == mapvalue and\r
            mapkey < 256):\r
            # No need to include identity mappings, since these\r
            # are already set for the first 256 code points.\r
            continue\r
        key = hexrepr(mapkey, key_precision)\r
        value = hexrepr(mapvalue, value_precision)\r
        if mapcomment and comments:\r
            append('    %s: %s,\t#  %s' % (key, value, mapcomment))\r
        else:\r
            append('    %s: %s,' % (key, value))\r
        i += 1\r
        if i == 4096:\r
            # Split the definition into parts to that the Python\r
            # parser doesn't dump core\r
            if splits == 0:\r
                append('}')\r
            else:\r
                append('})')\r
            append('%s.update({' % varname)\r
            i = 0\r
            splits = splits + 1\r
    if splits == 0:\r
        append('}')\r
    else:\r
        append('})')\r
\r
    return l\r
\r
def python_tabledef_code(varname, map, comments=1, key_precision=2):\r
\r
    l = []\r
    append = l.append\r
    append('%s = (' % varname)\r
\r
    # Analyze map and create table dict\r
    mappings = sorted(map.items())\r
    table = {}\r
    maxkey = 0\r
    if 'IDENTITY' in map:\r
        for key in range(256):\r
            table[key] = (key, '')\r
        maxkey = 255\r
        del map['IDENTITY']\r
    for mapkey, mapvalue in mappings:\r
        mapcomment = ''\r
        if isinstance(mapkey, tuple):\r
            (mapkey, mapcomment) = mapkey\r
        if isinstance(mapvalue, tuple):\r
            (mapvalue, mapcomment) = mapvalue\r
        if mapkey is None:\r
            continue\r
        table[mapkey] = (mapvalue, mapcomment)\r
        if mapkey > maxkey:\r
            maxkey = mapkey\r
    if maxkey > MAX_TABLE_SIZE:\r
        # Table too large\r
        return None\r
\r
    # Create table code\r
    for key in range(maxkey + 1):\r
        if key not in table:\r
            mapvalue = None\r
            mapcomment = 'UNDEFINED'\r
        else:\r
            mapvalue, mapcomment = table[key]\r
        if mapvalue is None:\r
            mapchar = UNI_UNDEFINED\r
        else:\r
            if isinstance(mapvalue, tuple):\r
                # 1-n mappings not supported\r
                return None\r
            else:\r
                mapchar = unichr(mapvalue)\r
        if mapcomment and comments:\r
            append('    %r\t#  %s -> %s' % (mapchar,\r
                                            hexrepr(key, key_precision),\r
                                            mapcomment))\r
        else:\r
            append('    %r' % mapchar)\r
\r
    append(')')\r
    return l\r
\r
def codegen(name, map, encodingname, comments=1):\r
\r
    """ Returns Python source for the given map.\r
\r
        Comments are included in the source, if comments is true (default).\r
\r
    """\r
    # Generate code\r
    decoding_map_code = python_mapdef_code(\r
        'decoding_map',\r
        map,\r
        comments=comments)\r
    decoding_table_code = python_tabledef_code(\r
        'decoding_table',\r
        map,\r
        comments=comments)\r
    encoding_map_code = python_mapdef_code(\r
        'encoding_map',\r
        codecs.make_encoding_map(map),\r
        comments=comments,\r
        precisions=(4, 2))\r
\r
    if decoding_table_code:\r
        suffix = 'table'\r
    else:\r
        suffix = 'map'\r
\r
    l = [\r
        '''\\r
""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.\r
\r
"""#"\r
\r
import codecs\r
\r
### Codec APIs\r
\r
class Codec(codecs.Codec):\r
\r
    def encode(self,input,errors='strict'):\r
        return codecs.charmap_encode(input,errors,encoding_%s)\r
\r
    def decode(self,input,errors='strict'):\r
        return codecs.charmap_decode(input,errors,decoding_%s)\r
''' % (encodingname, name, suffix, suffix)]\r
    l.append('''\\r
class IncrementalEncoder(codecs.IncrementalEncoder):\r
    def encode(self, input, final=False):\r
        return codecs.charmap_encode(input,self.errors,encoding_%s)[0]\r
\r
class IncrementalDecoder(codecs.IncrementalDecoder):\r
    def decode(self, input, final=False):\r
        return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %\r
        (suffix, suffix))\r
\r
    l.append('''\r
class StreamWriter(Codec,codecs.StreamWriter):\r
    pass\r
\r
class StreamReader(Codec,codecs.StreamReader):\r
    pass\r
\r
### encodings module API\r
\r
def getregentry():\r
    return codecs.CodecInfo(\r
        name=%r,\r
        encode=Codec().encode,\r
        decode=Codec().decode,\r
        incrementalencoder=IncrementalEncoder,\r
        incrementaldecoder=IncrementalDecoder,\r
        streamreader=StreamReader,\r
        streamwriter=StreamWriter,\r
    )\r
''' % encodingname.replace('_', '-'))\r
\r
    # Add decoding table or map (with preference to the table)\r
    if not decoding_table_code:\r
        l.append('''\r
### Decoding Map\r
''')\r
        l.extend(decoding_map_code)\r
    else:\r
        l.append('''\r
### Decoding Table\r
''')\r
        l.extend(decoding_table_code)\r
\r
    # Add encoding map\r
    if decoding_table_code:\r
        l.append('''\r
### Encoding table\r
encoding_table=codecs.charmap_build(decoding_table)\r
''')\r
    else:\r
        l.append('''\r
### Encoding Map\r
''')\r
        l.extend(encoding_map_code)\r
\r
    # Final new-line\r
    l.append('')\r
\r
    return '\n'.join(l).expandtabs()\r
\r
def pymap(name,map,pyfile,encodingname,comments=1):\r
\r
    code = codegen(name,map,encodingname,comments)\r
    f = open(pyfile,'w')\r
    f.write(code)\r
    f.close()\r
\r
def marshalmap(name,map,marshalfile):\r
\r
    d = {}\r
    for e,(u,c) in map.items():\r
        d[e] = (u,c)\r
    f = open(marshalfile,'wb')\r
    marshal.dump(d,f)\r
    f.close()\r
\r
def convertdir(dir, dirprefix='', nameprefix='', comments=1):\r
\r
    mapnames = os.listdir(dir)\r
    for mapname in mapnames:\r
        mappathname = os.path.join(dir, mapname)\r
        if not os.path.isfile(mappathname):\r
            continue\r
        name = os.path.split(mapname)[1]\r
        name = name.replace('-','_')\r
        name = name.split('.')[0]\r
        name = name.lower()\r
        name = nameprefix + name\r
        codefile = name + '.py'\r
        marshalfile = name + '.mapping'\r
        print 'converting %s to %s and %s' % (mapname,\r
                                              dirprefix + codefile,\r
                                              dirprefix + marshalfile)\r
        try:\r
            map = readmap(os.path.join(dir,mapname))\r
            if not map:\r
                print '* map is empty; skipping'\r
            else:\r
                pymap(mappathname, map, dirprefix + codefile,name,comments)\r
                marshalmap(mappathname, map, dirprefix + marshalfile)\r
        except ValueError, why:\r
            print '* conversion failed: %s' % why\r
            raise\r
\r
def rewritepythondir(dir, dirprefix='', comments=1):\r
\r
    mapnames = os.listdir(dir)\r
    for mapname in mapnames:\r
        if not mapname.endswith('.mapping'):\r
            continue\r
        name = mapname[:-len('.mapping')]\r
        codefile = name + '.py'\r
        print 'converting %s to %s' % (mapname,\r
                                       dirprefix + codefile)\r
        try:\r
            map = marshal.load(open(os.path.join(dir,mapname),\r
                               'rb'))\r
            if not map:\r
                print '* map is empty; skipping'\r
            else:\r
                pymap(mapname, map, dirprefix + codefile,name,comments)\r
        except ValueError, why:\r
            print '* conversion failed: %s' % why\r
\r
if __name__ == '__main__':\r
\r
    import sys\r
    if 1:\r
        convertdir(*sys.argv[1:])\r
    else:\r
        rewritepythondir(*sys.argv[1:])\r
Commit	Line	Data
4710c53d	1	""" Unicode Mapping Parser and Codec Generator.\r
	2	\r
	3	This script parses Unicode mapping files as available from the Unicode\r
	4	site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec\r
	5	modules from them. The codecs use the standard character mapping codec\r
	6	to actually apply the mapping.\r
	7	\r
	8	Synopsis: gencodec.py dir codec_prefix\r
	9	\r
	10	All files in dir are scanned and those producing non-empty mappings\r
	11	will be written to <codec_prefix><mapname>.py with <mapname> being the\r
	12	first part of the map's filename ('a' in a.b.c.txt) converted to\r
	13	lowercase with hyphens replaced by underscores.\r
	14	\r
	15	The tool also writes marshalled versions of the mapping tables to the\r
	16	same location (with .mapping extension).\r
	17	\r
	18	Written by Marc-Andre Lemburg (mal@lemburg.com).\r
	19	\r
	20	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.\r
	21	(c) Copyright Guido van Rossum, 2000.\r
	22	\r
	23	Table generation:\r
	24	(c) Copyright Marc-Andre Lemburg, 2005.\r
	25	Licensed to PSF under a Contributor Agreement.\r
	26	\r
	27	"""#"\r
	28	\r
	29	import re, os, marshal, codecs\r
	30	\r
	31	# Maximum allowed size of charmap tables\r
	32	MAX_TABLE_SIZE = 8192\r
	33	\r
	34	# Standard undefined Unicode code point\r
	35	UNI_UNDEFINED = unichr(0xFFFE)\r
	36	\r
	37	mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'\r
	38	'\s+'\r
	39	'((?:(?:0x[0-9a-fA-Z]+\|<[A-Za-z]+>)\+?)*)'\r
	40	'\s*'\r
	41	'(#.+)?')\r
	42	\r
	43	def parsecodes(codes, len=len, range=range):\r
	44	\r
	45	""" Converts code combinations to either a single code integer\r
	46	or a tuple of integers.\r
	47	\r
	48	meta-codes (in angular brackets, e.g. <LR> and <RL>) are\r
	49	ignored.\r
	50	\r
	51	Empty codes or illegal ones are returned as None.\r
	52	\r
	53	"""\r
	54	if not codes:\r
	55	return None\r
	56	l = codes.split('+')\r
	57	if len(l) == 1:\r
	58	return int(l[0],16)\r
	59	for i in range(len(l)):\r
	60	try:\r
	61	l[i] = int(l[i],16)\r
	62	except ValueError:\r
	63	l[i] = None\r
	64	l = [x for x in l if x is not None]\r
65	if len(l) == 1:\r
66	return l[0]\r
67	else:\r
68	return tuple(l)\r
69	\r
70	def readmap(filename):\r
71	\r
72	f = open(filename,'r')\r
73	lines = f.readlines()\r
74	f.close()\r
75	enc2uni = {}\r
76	identity = []\r
77	unmapped = range(256)\r
78	\r
79	# UTC mapping tables per convention don't include the identity\r
80	# mappings for code points 0x00 - 0x1F and 0x7F, unless these are\r
81	# explicitly mapped to different characters or undefined\r
82	for i in range(32) + [127]:\r
83	identity.append(i)\r
84	unmapped.remove(i)\r
85	enc2uni[i] = (i, 'CONTROL CHARACTER')\r
86	\r
87	for line in lines:\r
88	line = line.strip()\r
89	if not line or line[0] == '#':\r
90	continue\r
91	m = mapRE.match(line)\r
92	if not m:\r
93	#print '* not matched: %s' % repr(line)\r
94	continue\r
95	enc,uni,comment = m.groups()\r
96	enc = parsecodes(enc)\r
97	uni = parsecodes(uni)\r
98	if comment is None:\r
99	comment = ''\r
100	else:\r
101	comment = comment[1:].strip()\r
102	if enc < 256:\r
103	if enc in unmapped:\r
104	unmapped.remove(enc)\r
105	if enc == uni:\r
106	identity.append(enc)\r
107	enc2uni[enc] = (uni,comment)\r
108	else:\r
109	enc2uni[enc] = (uni,comment)\r
110	\r
111	# If there are more identity-mapped entries than unmapped entries,\r
112	# it pays to generate an identity dictionary first, and add explicit\r
113	# mappings to None for the rest\r
114	if len(identity) >= len(unmapped):\r
115	for enc in unmapped:\r
116	enc2uni[enc] = (None, "")\r
117	enc2uni['IDENTITY'] = 256\r
118	\r
119	return enc2uni\r
120	\r
121	def hexrepr(t, precision=4):\r
122	\r
123	if t is None:\r
124	return 'None'\r
125	try:\r
126	len(t)\r
127	except:\r
128	return '0x%0*X' % (precision, t)\r
129	try:\r
130	return '(' + ', '.join(['0x%0*X' % (precision, item)\r
131	for item in t]) + ')'\r
132	except TypeError, why:\r
133	print '* failed to convert %r: %s' % (t, why)\r
134	raise\r
135	\r
136	def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):\r
137	\r
138	l = []\r
139	append = l.append\r
140	if "IDENTITY" in map:\r
141	append("%s = codecs.make_identity_dict(range(%d))" %\r
142	(varname, map["IDENTITY"]))\r
143	append("%s.update({" % varname)\r
144	splits = 1\r
145	del map["IDENTITY"]\r
146	identity = 1\r
147	else:\r
148	append("%s = {" % varname)\r
149	splits = 0\r
150	identity = 0\r
151	\r
152	mappings = sorted(map.items())\r
153	i = 0\r
154	key_precision, value_precision = precisions\r
155	for mapkey, mapvalue in mappings:\r
156	mapcomment = ''\r
157	if isinstance(mapkey, tuple):\r
158	(mapkey, mapcomment) = mapkey\r
159	if isinstance(mapvalue, tuple):\r
160	(mapvalue, mapcomment) = mapvalue\r
161	if mapkey is None:\r
162	continue\r
163	if (identity and\r
164	mapkey == mapvalue and\r
165	mapkey < 256):\r
166	# No need to include identity mappings, since these\r
167	# are already set for the first 256 code points.\r
168	continue\r
169	key = hexrepr(mapkey, key_precision)\r
170	value = hexrepr(mapvalue, value_precision)\r
171	if mapcomment and comments:\r
172	append(' %s: %s,\t# %s' % (key, value, mapcomment))\r
173	else:\r
174	append(' %s: %s,' % (key, value))\r
175	i += 1\r
176	if i == 4096:\r
177	# Split the definition into parts to that the Python\r
178	# parser doesn't dump core\r
179	if splits == 0:\r
180	append('}')\r
181	else:\r
182	append('})')\r
183	append('%s.update({' % varname)\r
184	i = 0\r
185	splits = splits + 1\r
186	if splits == 0:\r
187	append('}')\r
188	else:\r
189	append('})')\r
190	\r
191	return l\r
192	\r
193	def python_tabledef_code(varname, map, comments=1, key_precision=2):\r
194	\r
195	l = []\r
196	append = l.append\r
197	append('%s = (' % varname)\r
198	\r
199	# Analyze map and create table dict\r
200	mappings = sorted(map.items())\r
201	table = {}\r
202	maxkey = 0\r
203	if 'IDENTITY' in map:\r
204	for key in range(256):\r
205	table[key] = (key, '')\r
206	maxkey = 255\r
207	del map['IDENTITY']\r
208	for mapkey, mapvalue in mappings:\r
209	mapcomment = ''\r
210	if isinstance(mapkey, tuple):\r
211	(mapkey, mapcomment) = mapkey\r
212	if isinstance(mapvalue, tuple):\r
213	(mapvalue, mapcomment) = mapvalue\r
214	if mapkey is None:\r
215	continue\r
216	table[mapkey] = (mapvalue, mapcomment)\r
217	if mapkey > maxkey:\r
218	maxkey = mapkey\r
219	if maxkey > MAX_TABLE_SIZE:\r
220	# Table too large\r
221	return None\r
222	\r
223	# Create table code\r
224	for key in range(maxkey + 1):\r
225	if key not in table:\r
226	mapvalue = None\r
227	mapcomment = 'UNDEFINED'\r
228	else:\r
229	mapvalue, mapcomment = table[key]\r
230	if mapvalue is None:\r
231	mapchar = UNI_UNDEFINED\r
232	else:\r
233	if isinstance(mapvalue, tuple):\r
234	# 1-n mappings not supported\r
235	return None\r
236	else:\r
237	mapchar = unichr(mapvalue)\r
238	if mapcomment and comments:\r
239	append(' %r\t# %s -> %s' % (mapchar,\r
240	hexrepr(key, key_precision),\r
241	mapcomment))\r
242	else:\r
243	append(' %r' % mapchar)\r
244	\r
245	append(')')\r
246	return l\r
247	\r
248	def codegen(name, map, encodingname, comments=1):\r
249	\r
250	""" Returns Python source for the given map.\r
251	\r
252	Comments are included in the source, if comments is true (default).\r
253	\r
254	"""\r
255	# Generate code\r
256	decoding_map_code = python_mapdef_code(\r
257	'decoding_map',\r
258	map,\r
259	comments=comments)\r
260	decoding_table_code = python_tabledef_code(\r
261	'decoding_table',\r
262	map,\r
263	comments=comments)\r
264	encoding_map_code = python_mapdef_code(\r
265	'encoding_map',\r
266	codecs.make_encoding_map(map),\r
267	comments=comments,\r
268	precisions=(4, 2))\r
269	\r
270	if decoding_table_code:\r
271	suffix = 'table'\r
272	else:\r
273	suffix = 'map'\r
274	\r
275	l = [\r
276	'''\\r
277	""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.\r
278	\r
279	"""#"\r
280	\r
281	import codecs\r
282	\r
283	### Codec APIs\r
284	\r
285	class Codec(codecs.Codec):\r
286	\r
287	def encode(self,input,errors='strict'):\r
288	return codecs.charmap_encode(input,errors,encoding_%s)\r
289	\r
290	def decode(self,input,errors='strict'):\r
291	return codecs.charmap_decode(input,errors,decoding_%s)\r
292	''' % (encodingname, name, suffix, suffix)]\r
293	l.append('''\\r
294	class IncrementalEncoder(codecs.IncrementalEncoder):\r
295	def encode(self, input, final=False):\r
296	return codecs.charmap_encode(input,self.errors,encoding_%s)[0]\r
297	\r
298	class IncrementalDecoder(codecs.IncrementalDecoder):\r
299	def decode(self, input, final=False):\r
300	return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %\r
301	(suffix, suffix))\r
302	\r
303	l.append('''\r
304	class StreamWriter(Codec,codecs.StreamWriter):\r
305	pass\r
306	\r
307	class StreamReader(Codec,codecs.StreamReader):\r
308	pass\r
309	\r
310	### encodings module API\r
311	\r
312	def getregentry():\r
313	return codecs.CodecInfo(\r
314	name=%r,\r
315	encode=Codec().encode,\r
316	decode=Codec().decode,\r
317	incrementalencoder=IncrementalEncoder,\r
318	incrementaldecoder=IncrementalDecoder,\r
319	streamreader=StreamReader,\r
320	streamwriter=StreamWriter,\r
321	)\r
322	''' % encodingname.replace('_', '-'))\r
323	\r
324	# Add decoding table or map (with preference to the table)\r
325	if not decoding_table_code:\r
326	l.append('''\r
327	### Decoding Map\r
328	''')\r
329	l.extend(decoding_map_code)\r
330	else:\r
331	l.append('''\r
332	### Decoding Table\r
333	''')\r
334	l.extend(decoding_table_code)\r
335	\r
336	# Add encoding map\r
337	if decoding_table_code:\r
338	l.append('''\r
339	### Encoding table\r
340	encoding_table=codecs.charmap_build(decoding_table)\r
341	''')\r
342	else:\r
343	l.append('''\r
344	### Encoding Map\r
345	''')\r
346	l.extend(encoding_map_code)\r
347	\r
348	# Final new-line\r
349	l.append('')\r
350	\r
351	return '\n'.join(l).expandtabs()\r
352	\r
353	def pymap(name,map,pyfile,encodingname,comments=1):\r
354	\r
355	code = codegen(name,map,encodingname,comments)\r
356	f = open(pyfile,'w')\r
357	f.write(code)\r
358	f.close()\r
359	\r
360	def marshalmap(name,map,marshalfile):\r
361	\r
362	d = {}\r
363	for e,(u,c) in map.items():\r
364	d[e] = (u,c)\r
365	f = open(marshalfile,'wb')\r
366	marshal.dump(d,f)\r
367	f.close()\r
368	\r
369	def convertdir(dir, dirprefix='', nameprefix='', comments=1):\r
370	\r
371	mapnames = os.listdir(dir)\r
372	for mapname in mapnames:\r
373	mappathname = os.path.join(dir, mapname)\r
374	if not os.path.isfile(mappathname):\r
375	continue\r
376	name = os.path.split(mapname)[1]\r
377	name = name.replace('-','_')\r
378	name = name.split('.')[0]\r
379	name = name.lower()\r
380	name = nameprefix + name\r
381	codefile = name + '.py'\r
382	marshalfile = name + '.mapping'\r
383	print 'converting %s to %s and %s' % (mapname,\r
384	dirprefix + codefile,\r
385	dirprefix + marshalfile)\r
386	try:\r
387	map = readmap(os.path.join(dir,mapname))\r
388	if not map:\r
389	print '* map is empty; skipping'\r
390	else:\r
391	pymap(mappathname, map, dirprefix + codefile,name,comments)\r
392	marshalmap(mappathname, map, dirprefix + marshalfile)\r
393	except ValueError, why:\r
394	print '* conversion failed: %s' % why\r
395	raise\r
396	\r
397	def rewritepythondir(dir, dirprefix='', comments=1):\r
398	\r
399	mapnames = os.listdir(dir)\r
400	for mapname in mapnames:\r
401	if not mapname.endswith('.mapping'):\r
402	continue\r
403	name = mapname[:-len('.mapping')]\r
404	codefile = name + '.py'\r
405	print 'converting %s to %s' % (mapname,\r
406	dirprefix + codefile)\r
407	try:\r
408	map = marshal.load(open(os.path.join(dir,mapname),\r
409	'rb'))\r
410	if not map:\r
411	print '* map is empty; skipping'\r
412	else:\r
413	pymap(mapname, map, dirprefix + codefile,name,comments)\r
414	except ValueError, why:\r
415	print '* conversion failed: %s' % why\r
416	\r
417	if __name__ == '__main__':\r
418	\r
419	import sys\r
420	if 1:\r
421	convertdir(*sys.argv[1:])\r
422	else:\r
423	rewritepythondir(*sys.argv[1:])\r