+++ /dev/null
-#! /usr/bin/env python\r
-\r
-class Markov:\r
- def __init__(self, histsize, choice):\r
- self.histsize = histsize\r
- self.choice = choice\r
- self.trans = {}\r
-\r
- def add(self, state, next):\r
- self.trans.setdefault(state, []).append(next)\r
-\r
- def put(self, seq):\r
- n = self.histsize\r
- add = self.add\r
- add(None, seq[:0])\r
- for i in range(len(seq)):\r
- add(seq[max(0, i-n):i], seq[i:i+1])\r
- add(seq[len(seq)-n:], None)\r
-\r
- def get(self):\r
- choice = self.choice\r
- trans = self.trans\r
- n = self.histsize\r
- seq = choice(trans[None])\r
- while True:\r
- subseq = seq[max(0, len(seq)-n):]\r
- options = trans[subseq]\r
- next = choice(options)\r
- if not next:\r
- break\r
- seq += next\r
- return seq\r
-\r
-\r
-def test():\r
- import sys, random, getopt\r
- args = sys.argv[1:]\r
- try:\r
- opts, args = getopt.getopt(args, '0123456789cdwq')\r
- except getopt.error:\r
- print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]\r
- print 'Options:'\r
- print '-#: 1-digit history size (default 2)'\r
- print '-c: characters (default)'\r
- print '-w: words'\r
- print '-d: more debugging output'\r
- print '-q: no debugging output'\r
- print 'Input files (default stdin) are split in paragraphs'\r
- print 'separated blank lines and each paragraph is split'\r
- print 'in words by whitespace, then reconcatenated with'\r
- print 'exactly one space separating words.'\r
- print 'Output consists of paragraphs separated by blank'\r
- print 'lines, where lines are no longer than 72 characters.'\r
- sys.exit(2)\r
- histsize = 2\r
- do_words = False\r
- debug = 1\r
- for o, a in opts:\r
- if '-0' <= o <= '-9': histsize = int(o[1:])\r
- if o == '-c': do_words = False\r
- if o == '-d': debug += 1\r
- if o == '-q': debug = 0\r
- if o == '-w': do_words = True\r
- if not args:\r
- args = ['-']\r
-\r
- m = Markov(histsize, random.choice)\r
- try:\r
- for filename in args:\r
- if filename == '-':\r
- f = sys.stdin\r
- if f.isatty():\r
- print 'Sorry, need stdin from file'\r
- continue\r
- else:\r
- f = open(filename, 'r')\r
- if debug: print 'processing', filename, '...'\r
- text = f.read()\r
- f.close()\r
- paralist = text.split('\n\n')\r
- for para in paralist:\r
- if debug > 1: print 'feeding ...'\r
- words = para.split()\r
- if words:\r
- if do_words:\r
- data = tuple(words)\r
- else:\r
- data = ' '.join(words)\r
- m.put(data)\r
- except KeyboardInterrupt:\r
- print 'Interrupted -- continue with data read so far'\r
- if not m.trans:\r
- print 'No valid input files'\r
- return\r
- if debug: print 'done.'\r
-\r
- if debug > 1:\r
- for key in m.trans.keys():\r
- if key is None or len(key) < histsize:\r
- print repr(key), m.trans[key]\r
- if histsize == 0: print repr(''), m.trans['']\r
- print\r
- while True:\r
- data = m.get()\r
- if do_words:\r
- words = data\r
- else:\r
- words = data.split()\r
- n = 0\r
- limit = 72\r
- for w in words:\r
- if n + len(w) > limit:\r
- print\r
- n = 0\r
- print w,\r
- n += len(w) + 1\r
- print\r
- print\r
-\r
-if __name__ == "__main__":\r
- test()\r