+++ /dev/null
-# -*- coding: iso-8859-1 -*-\r
-""" Codec for the Punicode encoding, as specified in RFC 3492\r
-\r
-Written by Martin v. Löwis.\r
-"""\r
-\r
-import codecs\r
-\r
-##################### Encoding #####################################\r
-\r
-def segregate(str):\r
- """3.1 Basic code point segregation"""\r
- base = []\r
- extended = {}\r
- for c in str:\r
- if ord(c) < 128:\r
- base.append(c)\r
- else:\r
- extended[c] = 1\r
- extended = extended.keys()\r
- extended.sort()\r
- return "".join(base).encode("ascii"),extended\r
-\r
-def selective_len(str, max):\r
- """Return the length of str, considering only characters below max."""\r
- res = 0\r
- for c in str:\r
- if ord(c) < max:\r
- res += 1\r
- return res\r
-\r
-def selective_find(str, char, index, pos):\r
- """Return a pair (index, pos), indicating the next occurrence of\r
- char in str. index is the position of the character considering\r
- only ordinals up to and including char, and pos is the position in\r
- the full string. index/pos is the starting position in the full\r
- string."""\r
-\r
- l = len(str)\r
- while 1:\r
- pos += 1\r
- if pos == l:\r
- return (-1, -1)\r
- c = str[pos]\r
- if c == char:\r
- return index+1, pos\r
- elif c < char:\r
- index += 1\r
-\r
-def insertion_unsort(str, extended):\r
- """3.2 Insertion unsort coding"""\r
- oldchar = 0x80\r
- result = []\r
- oldindex = -1\r
- for c in extended:\r
- index = pos = -1\r
- char = ord(c)\r
- curlen = selective_len(str, char)\r
- delta = (curlen+1) * (char - oldchar)\r
- while 1:\r
- index,pos = selective_find(str,c,index,pos)\r
- if index == -1:\r
- break\r
- delta += index - oldindex\r
- result.append(delta-1)\r
- oldindex = index\r
- delta = 0\r
- oldchar = char\r
-\r
- return result\r
-\r
-def T(j, bias):\r
- # Punycode parameters: tmin = 1, tmax = 26, base = 36\r
- res = 36 * (j + 1) - bias\r
- if res < 1: return 1\r
- if res > 26: return 26\r
- return res\r
-\r
-digits = "abcdefghijklmnopqrstuvwxyz0123456789"\r
-def generate_generalized_integer(N, bias):\r
- """3.3 Generalized variable-length integers"""\r
- result = []\r
- j = 0\r
- while 1:\r
- t = T(j, bias)\r
- if N < t:\r
- result.append(digits[N])\r
- return result\r
- result.append(digits[t + ((N - t) % (36 - t))])\r
- N = (N - t) // (36 - t)\r
- j += 1\r
-\r
-def adapt(delta, first, numchars):\r
- if first:\r
- delta //= 700\r
- else:\r
- delta //= 2\r
- delta += delta // numchars\r
- # ((base - tmin) * tmax) // 2 == 455\r
- divisions = 0\r
- while delta > 455:\r
- delta = delta // 35 # base - tmin\r
- divisions += 36\r
- bias = divisions + (36 * delta // (delta + 38))\r
- return bias\r
-\r
-\r
-def generate_integers(baselen, deltas):\r
- """3.4 Bias adaptation"""\r
- # Punycode parameters: initial bias = 72, damp = 700, skew = 38\r
- result = []\r
- bias = 72\r
- for points, delta in enumerate(deltas):\r
- s = generate_generalized_integer(delta, bias)\r
- result.extend(s)\r
- bias = adapt(delta, points==0, baselen+points+1)\r
- return "".join(result)\r
-\r
-def punycode_encode(text):\r
- base, extended = segregate(text)\r
- base = base.encode("ascii")\r
- deltas = insertion_unsort(text, extended)\r
- extended = generate_integers(len(base), deltas)\r
- if base:\r
- return base + "-" + extended\r
- return extended\r
-\r
-##################### Decoding #####################################\r
-\r
-def decode_generalized_number(extended, extpos, bias, errors):\r
- """3.3 Generalized variable-length integers"""\r
- result = 0\r
- w = 1\r
- j = 0\r
- while 1:\r
- try:\r
- char = ord(extended[extpos])\r
- except IndexError:\r
- if errors == "strict":\r
- raise UnicodeError, "incomplete punicode string"\r
- return extpos + 1, None\r
- extpos += 1\r
- if 0x41 <= char <= 0x5A: # A-Z\r
- digit = char - 0x41\r
- elif 0x30 <= char <= 0x39:\r
- digit = char - 22 # 0x30-26\r
- elif errors == "strict":\r
- raise UnicodeError("Invalid extended code point '%s'"\r
- % extended[extpos])\r
- else:\r
- return extpos, None\r
- t = T(j, bias)\r
- result += digit * w\r
- if digit < t:\r
- return extpos, result\r
- w = w * (36 - t)\r
- j += 1\r
-\r
-\r
-def insertion_sort(base, extended, errors):\r
- """3.2 Insertion unsort coding"""\r
- char = 0x80\r
- pos = -1\r
- bias = 72\r
- extpos = 0\r
- while extpos < len(extended):\r
- newpos, delta = decode_generalized_number(extended, extpos,\r
- bias, errors)\r
- if delta is None:\r
- # There was an error in decoding. We can't continue because\r
- # synchronization is lost.\r
- return base\r
- pos += delta+1\r
- char += pos // (len(base) + 1)\r
- if char > 0x10FFFF:\r
- if errors == "strict":\r
- raise UnicodeError, ("Invalid character U+%x" % char)\r
- char = ord('?')\r
- pos = pos % (len(base) + 1)\r
- base = base[:pos] + unichr(char) + base[pos:]\r
- bias = adapt(delta, (extpos == 0), len(base))\r
- extpos = newpos\r
- return base\r
-\r
-def punycode_decode(text, errors):\r
- pos = text.rfind("-")\r
- if pos == -1:\r
- base = ""\r
- extended = text\r
- else:\r
- base = text[:pos]\r
- extended = text[pos+1:]\r
- base = unicode(base, "ascii", errors)\r
- extended = extended.upper()\r
- return insertion_sort(base, extended, errors)\r
-\r
-### Codec APIs\r
-\r
-class Codec(codecs.Codec):\r
-\r
- def encode(self,input,errors='strict'):\r
- res = punycode_encode(input)\r
- return res, len(input)\r
-\r
- def decode(self,input,errors='strict'):\r
- if errors not in ('strict', 'replace', 'ignore'):\r
- raise UnicodeError, "Unsupported error handling "+errors\r
- res = punycode_decode(input, errors)\r
- return res, len(input)\r
-\r
-class IncrementalEncoder(codecs.IncrementalEncoder):\r
- def encode(self, input, final=False):\r
- return punycode_encode(input)\r
-\r
-class IncrementalDecoder(codecs.IncrementalDecoder):\r
- def decode(self, input, final=False):\r
- if self.errors not in ('strict', 'replace', 'ignore'):\r
- raise UnicodeError, "Unsupported error handling "+self.errors\r
- return punycode_decode(input, self.errors)\r
-\r
-class StreamWriter(Codec,codecs.StreamWriter):\r
- pass\r
-\r
-class StreamReader(Codec,codecs.StreamReader):\r
- pass\r
-\r
-### encodings module API\r
-\r
-def getregentry():\r
- return codecs.CodecInfo(\r
- name='punycode',\r
- encode=Codec().encode,\r
- decode=Codec().decode,\r
- incrementalencoder=IncrementalEncoder,\r
- incrementaldecoder=IncrementalDecoder,\r
- streamwriter=StreamWriter,\r
- streamreader=StreamReader,\r
- )\r