1 """ Python 'utf-8-sig' Codec
2 This work similar to UTF-8 with the following changes:
4 * On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
7 * On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
14 def encode(input, errors
='strict'):
15 return (codecs
.BOM_UTF8
+ codecs
.utf_8_encode(input, errors
)[0], len(input))
17 def decode(input, errors
='strict'):
19 if input[:3] == codecs
.BOM_UTF8
:
22 (output
, consumed
) = codecs
.utf_8_decode(input, errors
, True)
23 return (output
, consumed
+prefix
)
25 class IncrementalEncoder(codecs
.IncrementalEncoder
):
26 def __init__(self
, errors
='strict'):
27 codecs
.IncrementalEncoder
.__init
__(self
, errors
)
30 def encode(self
, input, final
=False):
33 return codecs
.BOM_UTF8
+ codecs
.utf_8_encode(input, self
.errors
)[0]
35 return codecs
.utf_8_encode(input, self
.errors
)[0]
38 codecs
.IncrementalEncoder
.reset(self
)
44 def setstate(self
, state
):
47 class IncrementalDecoder(codecs
.BufferedIncrementalDecoder
):
48 def __init__(self
, errors
='strict'):
49 codecs
.BufferedIncrementalDecoder
.__init
__(self
, errors
)
52 def _buffer_decode(self
, input, errors
, final
):
55 if codecs
.BOM_UTF8
.startswith(input):
56 # not enough data to decide if this really is a BOM
57 # => try again on the next call
63 if input[:3] == codecs
.BOM_UTF8
:
64 (output
, consumed
) = codecs
.utf_8_decode(input[3:], errors
, final
)
65 return (output
, consumed
+3)
66 return codecs
.utf_8_decode(input, errors
, final
)
69 codecs
.BufferedIncrementalDecoder
.reset(self
)
72 class StreamWriter(codecs
.StreamWriter
):
74 codecs
.StreamWriter
.reset(self
)
77 except AttributeError:
80 def encode(self
, input, errors
='strict'):
81 self
.encode
= codecs
.utf_8_encode
82 return encode(input, errors
)
84 class StreamReader(codecs
.StreamReader
):
86 codecs
.StreamReader
.reset(self
)
89 except AttributeError:
92 def decode(self
, input, errors
='strict'):
94 if codecs
.BOM_UTF8
.startswith(input):
95 # not enough data to decide if this is a BOM
96 # => try again on the next call
98 elif input[:3] == codecs
.BOM_UTF8
:
99 self
.decode
= codecs
.utf_8_decode
100 (output
, consumed
) = codecs
.utf_8_decode(input[3:],errors
)
101 return (output
, consumed
+3)
102 # (else) no BOM present
103 self
.decode
= codecs
.utf_8_decode
104 return codecs
.utf_8_decode(input, errors
)
106 ### encodings module API
109 return codecs
.CodecInfo(
113 incrementalencoder
=IncrementalEncoder
,
114 incrementaldecoder
=IncrementalDecoder
,
115 streamreader
=StreamReader
,
116 streamwriter
=StreamWriter
,