| 
									
										
										
										
											2010-12-30 22:11:50 +00:00
										 |  |  | #!/usr/bin/env python3 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Markov chain simulation of words or characters. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											1993-12-14 10:08:02 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | class Markov: | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     def __init__(self, histsize, choice): | 
					
						
							|  |  |  |         self.histsize = histsize | 
					
						
							|  |  |  |         self.choice = choice | 
					
						
							|  |  |  |         self.trans = {} | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     def add(self, state, next): | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         self.trans.setdefault(state, []).append(next) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     def put(self, seq): | 
					
						
							|  |  |  |         n = self.histsize | 
					
						
							|  |  |  |         add = self.add | 
					
						
							|  |  |  |         add(None, seq[:0]) | 
					
						
							|  |  |  |         for i in range(len(seq)): | 
					
						
							|  |  |  |             add(seq[max(0, i-n):i], seq[i:i+1]) | 
					
						
							|  |  |  |         add(seq[len(seq)-n:], None) | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     def get(self): | 
					
						
							|  |  |  |         choice = self.choice | 
					
						
							|  |  |  |         trans = self.trans | 
					
						
							|  |  |  |         n = self.histsize | 
					
						
							|  |  |  |         seq = choice(trans[None]) | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         while True: | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |             subseq = seq[max(0, len(seq)-n):] | 
					
						
							|  |  |  |             options = trans[subseq] | 
					
						
							|  |  |  |             next = choice(options) | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |             if not next: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             seq += next | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |         return seq | 
					
						
							| 
									
										
										
										
											1993-12-14 10:08:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1993-12-14 10:08:02 +00:00
										 |  |  | def test(): | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |     import sys, random, getopt | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     args = sys.argv[1:] | 
					
						
							|  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         opts, args = getopt.getopt(args, '0123456789cdwq') | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     except getopt.error: | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]) | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print('Options:') | 
					
						
							|  |  |  |         print('-#: 1-digit history size (default 2)') | 
					
						
							|  |  |  |         print('-c: characters (default)') | 
					
						
							|  |  |  |         print('-w: words') | 
					
						
							|  |  |  |         print('-d: more debugging output') | 
					
						
							|  |  |  |         print('-q: no debugging output') | 
					
						
							|  |  |  |         print('Input files (default stdin) are split in paragraphs') | 
					
						
							|  |  |  |         print('separated blank lines and each paragraph is split') | 
					
						
							|  |  |  |         print('in words by whitespace, then reconcatenated with') | 
					
						
							|  |  |  |         print('exactly one space separating words.') | 
					
						
							|  |  |  |         print('Output consists of paragraphs separated by blank') | 
					
						
							|  |  |  |         print('lines, where lines are no longer than 72 characters.') | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         sys.exit(2) | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     histsize = 2 | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |     do_words = False | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     debug = 1 | 
					
						
							|  |  |  |     for o, a in opts: | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         if '-0' <= o <= '-9': histsize = int(o[1:]) | 
					
						
							|  |  |  |         if o == '-c': do_words = False | 
					
						
							|  |  |  |         if o == '-d': debug += 1 | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |         if o == '-q': debug = 0 | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         if o == '-w': do_words = True | 
					
						
							|  |  |  |     if not args: | 
					
						
							|  |  |  |         args = ['-'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     m = Markov(histsize, random.choice) | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         for filename in args: | 
					
						
							|  |  |  |             if filename == '-': | 
					
						
							|  |  |  |                 f = sys.stdin | 
					
						
							|  |  |  |                 if f.isatty(): | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |                     print('Sorry, need stdin from file') | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |                     continue | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 f = open(filename, 'r') | 
					
						
							| 
									
										
										
										
											2019-03-30 08:33:02 +02:00
										 |  |  |             with f: | 
					
						
							|  |  |  |                 if debug: print('processing', filename, '...') | 
					
						
							|  |  |  |                 text = f.read() | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |             paralist = text.split('\n\n') | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |             for para in paralist: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |                 if debug > 1: print('feeding ...') | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |                 words = para.split() | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |                 if words: | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |                     if do_words: | 
					
						
							|  |  |  |                         data = tuple(words) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         data = ' '.join(words) | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |                     m.put(data) | 
					
						
							|  |  |  |     except KeyboardInterrupt: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print('Interrupted -- continue with data read so far') | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     if not m.trans: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print('No valid input files') | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |         return | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |     if debug: print('done.') | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |     if debug > 1: | 
					
						
							| 
									
										
										
										
											2007-08-06 21:07:53 +00:00
										 |  |  |         for key in m.trans.keys(): | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |             if key is None or len(key) < histsize: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |                 print(repr(key), m.trans[key]) | 
					
						
							|  |  |  |         if histsize == 0: print(repr(''), m.trans['']) | 
					
						
							|  |  |  |         print() | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |     while True: | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |         data = m.get() | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |         if do_words: | 
					
						
							|  |  |  |             words = data | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             words = data.split() | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |         n = 0 | 
					
						
							|  |  |  |         limit = 72 | 
					
						
							|  |  |  |         for w in words: | 
					
						
							|  |  |  |             if n + len(w) > limit: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |                 print() | 
					
						
							| 
									
										
										
										
											2004-07-18 05:56:09 +00:00
										 |  |  |                 n = 0 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |             print(w, end=' ') | 
					
						
							| 
									
										
										
										
											2009-10-11 08:42:09 +00:00
										 |  |  |             n += len(w) + 1 | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print() | 
					
						
							|  |  |  |         print() | 
					
						
							| 
									
										
										
										
											1993-12-14 10:08:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-09-11 16:34:35 +00:00
										 |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     test() |