+++ /dev/null
-""" Python 'utf-8-sig' Codec\r
-This work similar to UTF-8 with the following changes:\r
-\r
-* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the\r
- first three bytes.\r
-\r
-* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these\r
- bytes will be skipped.\r
-"""\r
-import codecs\r
-\r
-### Codec APIs\r
-\r
-def encode(input, errors='strict'):\r
- return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))\r
-\r
-def decode(input, errors='strict'):\r
- prefix = 0\r
- if input[:3] == codecs.BOM_UTF8:\r
- input = input[3:]\r
- prefix = 3\r
- (output, consumed) = codecs.utf_8_decode(input, errors, True)\r
- return (output, consumed+prefix)\r
-\r
-class IncrementalEncoder(codecs.IncrementalEncoder):\r
- def __init__(self, errors='strict'):\r
- codecs.IncrementalEncoder.__init__(self, errors)\r
- self.first = 1\r
-\r
- def encode(self, input, final=False):\r
- if self.first:\r
- self.first = 0\r
- return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0]\r
- else:\r
- return codecs.utf_8_encode(input, self.errors)[0]\r
-\r
- def reset(self):\r
- codecs.IncrementalEncoder.reset(self)\r
- self.first = 1\r
-\r
- def getstate(self):\r
- return self.first\r
-\r
- def setstate(self, state):\r
- self.first = state\r
-\r
-class IncrementalDecoder(codecs.BufferedIncrementalDecoder):\r
- def __init__(self, errors='strict'):\r
- codecs.BufferedIncrementalDecoder.__init__(self, errors)\r
- self.first = True\r
-\r
- def _buffer_decode(self, input, errors, final):\r
- if self.first:\r
- if len(input) < 3:\r
- if codecs.BOM_UTF8.startswith(input):\r
- # not enough data to decide if this really is a BOM\r
- # => try again on the next call\r
- return (u"", 0)\r
- else:\r
- self.first = None\r
- else:\r
- self.first = None\r
- if input[:3] == codecs.BOM_UTF8:\r
- (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)\r
- return (output, consumed+3)\r
- return codecs.utf_8_decode(input, errors, final)\r
-\r
- def reset(self):\r
- codecs.BufferedIncrementalDecoder.reset(self)\r
- self.first = True\r
-\r
-class StreamWriter(codecs.StreamWriter):\r
- def reset(self):\r
- codecs.StreamWriter.reset(self)\r
- try:\r
- del self.encode\r
- except AttributeError:\r
- pass\r
-\r
- def encode(self, input, errors='strict'):\r
- self.encode = codecs.utf_8_encode\r
- return encode(input, errors)\r
-\r
-class StreamReader(codecs.StreamReader):\r
- def reset(self):\r
- codecs.StreamReader.reset(self)\r
- try:\r
- del self.decode\r
- except AttributeError:\r
- pass\r
-\r
- def decode(self, input, errors='strict'):\r
- if len(input) < 3:\r
- if codecs.BOM_UTF8.startswith(input):\r
- # not enough data to decide if this is a BOM\r
- # => try again on the next call\r
- return (u"", 0)\r
- elif input[:3] == codecs.BOM_UTF8:\r
- self.decode = codecs.utf_8_decode\r
- (output, consumed) = codecs.utf_8_decode(input[3:],errors)\r
- return (output, consumed+3)\r
- # (else) no BOM present\r
- self.decode = codecs.utf_8_decode\r
- return codecs.utf_8_decode(input, errors)\r
-\r
-### encodings module API\r
-\r
-def getregentry():\r
- return codecs.CodecInfo(\r
- name='utf-8-sig',\r
- encode=encode,\r
- decode=decode,\r
- incrementalencoder=IncrementalEncoder,\r
- incrementaldecoder=IncrementalDecoder,\r
- streamreader=StreamReader,\r
- streamwriter=StreamWriter,\r
- )\r