| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | """Conversion pipeline templates.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The problem: | 
					
						
							|  |  |  | ------------ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Suppose you have some data that you want to convert to another format | 
					
						
							|  |  |  | (e.g. from GIF image format to PPM image format).  Maybe the | 
					
						
							|  |  |  | conversion involves several steps (e.g. piping it through compress or | 
					
						
							|  |  |  | uuencode).  Some of the conversion steps may require that their input | 
					
						
							|  |  |  | is a disk file, others may be able to read standard input; similar for | 
					
						
							|  |  |  | their output.  The input to the entire conversion may also be read | 
					
						
							|  |  |  | from a disk file or from an open file, and similar for its output. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The module lets you construct a pipeline template by sticking one or | 
					
						
							|  |  |  | more conversion steps together.  It will take care of creating and | 
					
						
							|  |  |  | removing temporary files if they are necessary to hold intermediate | 
					
						
							|  |  |  | data.  You can then use the template to do conversions from many | 
					
						
							|  |  |  | different sources to many different destinations.  The temporary | 
					
						
							|  |  |  | file names used are different each time the template is used. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The templates are objects so you can create templates for many | 
					
						
							|  |  |  | different conversion steps and store them in a dictionary, for | 
					
						
							|  |  |  | instance. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Directions: | 
					
						
							|  |  |  | ----------- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To create a template: | 
					
						
							|  |  |  |     t = Template() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To add a conversion step to a template: | 
					
						
							|  |  |  |    t.append(command, kind) | 
					
						
							|  |  |  | where kind is a string of two characters: the first is '-' if the | 
					
						
							|  |  |  | command reads its standard input or 'f' if it requires a file; the | 
					
						
							|  |  |  | second likewise for the output. The command must be valid /bin/sh | 
					
						
							|  |  |  | syntax.  If input or output files are required, they are passed as | 
					
						
							|  |  |  | $IN and $OUT; otherwise, it must be  possible to use the command in | 
					
						
							|  |  |  | a pipeline. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To add a conversion step at the beginning: | 
					
						
							|  |  |  |    t.prepend(command, kind) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To convert a file to another file using a template: | 
					
						
							|  |  |  |   sts = t.copy(infile, outfile) | 
					
						
							|  |  |  | If infile or outfile are the empty string, standard input is read or | 
					
						
							|  |  |  | standard output is written, respectively.  The return value is the | 
					
						
							|  |  |  | exit status of the conversion pipeline. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To open a file for reading or writing through a conversion pipeline: | 
					
						
							|  |  |  |    fp = t.open(file, mode) | 
					
						
							|  |  |  | where mode is 'r' to read the file, or 'w' to write it -- just like | 
					
						
							|  |  |  | for the built-in function open() or for os.popen(). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To create a new template object initialized to a given one: | 
					
						
							|  |  |  |    t2 = t.clone() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | For an example, see the function test() at the end of the file. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import sys | 
					
						
							| 
									
										
										
										
											1997-10-22 21:00:49 +00:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import tempfile | 
					
						
							|  |  |  | import string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Conversion step kinds | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | FILEIN_FILEOUT = 'ff'			# Must read & write real files | 
					
						
							|  |  |  | STDIN_FILEOUT  = '-f'			# Must write a real file | 
					
						
							|  |  |  | FILEIN_STDOUT  = 'f-'			# Must read a real file | 
					
						
							|  |  |  | STDIN_STDOUT   = '--'			# Normal pipeline element | 
					
						
							|  |  |  | SOURCE         = '.-'			# Must be first, writes stdout | 
					
						
							|  |  |  | SINK           = '-.'			# Must be last, reads stdin | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ | 
					
						
							|  |  |  | 	     SOURCE, SINK] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Template: | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 	"""Class representing a pipeline template.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1993-12-17 15:25:27 +00:00
										 |  |  | 	def __init__(self): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""Template() returns a fresh pipeline template.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		self.debugging = 0 | 
					
						
							|  |  |  | 		self.reset() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def __repr__(self): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.__repr__() implements `t`.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		return '<Template instance, steps=' + `self.steps` + '>' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def reset(self): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.reset() restores a pipeline template to its initial state.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		self.steps = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def clone(self): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.clone() returns a new pipeline template with identical
 | 
					
						
							|  |  |  | 		initial state as the current one."""
 | 
					
						
							| 
									
										
										
										
											1993-12-17 15:25:27 +00:00
										 |  |  | 		t = Template() | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		t.steps = self.steps[:] | 
					
						
							|  |  |  | 		t.debugging = self.debugging | 
					
						
							|  |  |  | 		return t | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def debug(self, flag): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.debug(flag) turns debugging on or off.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		self.debugging = flag | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def append(self, cmd, kind): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.append(cmd, kind) adds a new step at the end.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		if type(cmd) <> type(''): | 
					
						
							|  |  |  | 			raise TypeError, \ | 
					
						
							|  |  |  | 			      'Template.append: cmd must be a string' | 
					
						
							|  |  |  | 		if kind not in stepkinds: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.append: bad kind ' + `kind` | 
					
						
							|  |  |  | 		if kind == SOURCE: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.append: SOURCE can only be prepended' | 
					
						
							|  |  |  | 		if self.steps <> [] and self.steps[-1][1] == SINK: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.append: already ends with SINK' | 
					
						
							| 
									
										
										
										
											1997-10-22 21:00:49 +00:00
										 |  |  | 		if kind[0] == 'f' and not re.search('\$IN\b', cmd): | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.append: missing $IN in cmd' | 
					
						
							| 
									
										
										
										
											1997-10-22 21:00:49 +00:00
										 |  |  | 		if kind[1] == 'f' and not re.search('\$OUT\b', cmd): | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.append: missing $OUT in cmd' | 
					
						
							|  |  |  | 		self.steps.append((cmd, kind)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def prepend(self, cmd, kind): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.prepend(cmd, kind) adds a new step at the front.""" | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		if type(cmd) <> type(''): | 
					
						
							|  |  |  | 			raise TypeError, \ | 
					
						
							|  |  |  | 			      'Template.prepend: cmd must be a string' | 
					
						
							|  |  |  | 		if kind not in stepkinds: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.prepend: bad kind ' + `kind` | 
					
						
							|  |  |  | 		if kind == SINK: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.prepend: SINK can only be appended' | 
					
						
							|  |  |  | 		if self.steps <> [] and self.steps[0][1] == SOURCE: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.prepend: already begins with SOURCE' | 
					
						
							| 
									
										
										
										
											1997-10-22 21:00:49 +00:00
										 |  |  | 		if kind[0] == 'f' and not re.search('\$IN\b', cmd): | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.prepend: missing $IN in cmd' | 
					
						
							| 
									
										
										
										
											1997-10-22 21:00:49 +00:00
										 |  |  | 		if kind[1] == 'f' and not re.search('\$OUT\b', cmd): | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.prepend: missing $OUT in cmd' | 
					
						
							|  |  |  | 		self.steps.insert(0, (cmd, kind)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def open(self, file, rw): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.open(file, rw) returns a pipe or file object open for
 | 
					
						
							|  |  |  | 		reading or writing; the file is the other end of the pipeline."""
 | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		if rw == 'r': | 
					
						
							|  |  |  | 			return self.open_r(file) | 
					
						
							|  |  |  | 		if rw == 'w': | 
					
						
							|  |  |  | 			return self.open_w(file) | 
					
						
							|  |  |  | 		raise ValueError, \ | 
					
						
							|  |  |  | 		      'Template.open: rw must be \'r\' or \'w\', not ' + `rw` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def open_r(self, file): | 
					
						
							| 
									
										
										
										
											2000-02-04 15:10:34 +00:00
										 |  |  | 		"""t.open_r(file) and t.open_w(file) implement
 | 
					
						
							|  |  |  | 		t.open(file, 'r') and t.open(file, 'w') respectively."""
 | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 		if self.steps == []: | 
					
						
							|  |  |  | 			return open(file, 'r') | 
					
						
							|  |  |  | 		if self.steps[-1][1] == SINK: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.open_r: pipeline ends width SINK' | 
					
						
							|  |  |  | 		cmd = self.makepipeline(file, '') | 
					
						
							|  |  |  | 		return os.popen(cmd, 'r') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def open_w(self, file): | 
					
						
							|  |  |  | 		if self.steps == []: | 
					
						
							|  |  |  | 			return open(file, 'w') | 
					
						
							|  |  |  | 		if self.steps[0][1] == SOURCE: | 
					
						
							|  |  |  | 			raise ValueError, \ | 
					
						
							|  |  |  | 			      'Template.open_w: pipeline begins with SOURCE' | 
					
						
							|  |  |  | 		cmd = self.makepipeline('', file) | 
					
						
							|  |  |  | 		return os.popen(cmd, 'w') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def copy(self, infile, outfile): | 
					
						
							|  |  |  | 		return os.system(self.makepipeline(infile, outfile)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	def makepipeline(self, infile, outfile): | 
					
						
							|  |  |  | 		cmd = makepipeline(infile, self.steps, outfile) | 
					
						
							|  |  |  | 		if self.debugging: | 
					
						
							|  |  |  | 			print cmd | 
					
						
							|  |  |  | 			cmd = 'set -x; ' + cmd | 
					
						
							|  |  |  | 		return cmd | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def makepipeline(infile, steps, outfile): | 
					
						
							|  |  |  | 	# Build a list with for each command: | 
					
						
							|  |  |  | 	# [input filename or '', command string, kind, output filename or ''] | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	list = [] | 
					
						
							|  |  |  | 	for cmd, kind in steps: | 
					
						
							|  |  |  | 		list.append(['', cmd, kind, '']) | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	# Make sure there is at least one step | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	if list == []: | 
					
						
							|  |  |  | 		list.append(['', 'cat', '--', '']) | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	# Take care of the input and output ends | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	[cmd, kind] = list[0][1:3] | 
					
						
							|  |  |  | 	if kind[0] == 'f' and not infile: | 
					
						
							|  |  |  | 		list.insert(0, ['', 'cat', '--', '']) | 
					
						
							|  |  |  | 	list[0][0] = infile | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	[cmd, kind] = list[-1][1:3] | 
					
						
							|  |  |  | 	if kind[1] == 'f' and not outfile: | 
					
						
							|  |  |  | 		list.append(['', 'cat', '--', '']) | 
					
						
							|  |  |  | 	list[-1][-1] = outfile | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	# Invent temporary files to connect stages that need files | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	garbage = [] | 
					
						
							|  |  |  | 	for i in range(1, len(list)): | 
					
						
							|  |  |  | 		lkind = list[i-1][2] | 
					
						
							|  |  |  | 		rkind = list[i][2] | 
					
						
							|  |  |  | 		if lkind[1] == 'f' or rkind[0] == 'f': | 
					
						
							|  |  |  | 			temp = tempfile.mktemp() | 
					
						
							|  |  |  | 			garbage.append(temp) | 
					
						
							|  |  |  | 			list[i-1][-1] = list[i][0] = temp | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	for item in list: | 
					
						
							|  |  |  | 		[inf, cmd, kind, outf] = item | 
					
						
							|  |  |  | 		if kind[1] == 'f': | 
					
						
							|  |  |  | 			cmd = 'OUT=' + quote(outf) + '; ' + cmd | 
					
						
							|  |  |  | 		if kind[0] == 'f': | 
					
						
							|  |  |  | 			cmd = 'IN=' + quote(inf) + '; ' + cmd | 
					
						
							|  |  |  | 		if kind[0] == '-' and inf: | 
					
						
							|  |  |  | 			cmd = cmd + ' <' + quote(inf) | 
					
						
							|  |  |  | 		if kind[1] == '-' and outf: | 
					
						
							|  |  |  | 			cmd = cmd + ' >' + quote(outf) | 
					
						
							|  |  |  | 		item[1] = cmd | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	cmdlist = list[0][1] | 
					
						
							|  |  |  | 	for item in list[1:]: | 
					
						
							|  |  |  | 		[cmd, kind] = item[1:3] | 
					
						
							|  |  |  | 		if item[0] == '': | 
					
						
							|  |  |  | 			if 'f' in kind: | 
					
						
							|  |  |  | 				cmd = '{ ' + cmd + '; }' | 
					
						
							|  |  |  | 			cmdlist = cmdlist + ' |\n' + cmd | 
					
						
							|  |  |  | 		else: | 
					
						
							|  |  |  | 			cmdlist = cmdlist + '\n' + cmd | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	if garbage: | 
					
						
							|  |  |  | 		rmcmd = 'rm -f' | 
					
						
							|  |  |  | 		for file in garbage: | 
					
						
							|  |  |  | 			rmcmd = rmcmd + ' ' + quote(file) | 
					
						
							|  |  |  | 		trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' | 
					
						
							|  |  |  | 		cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd | 
					
						
							|  |  |  | 	# | 
					
						
							|  |  |  | 	return cmdlist | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Reliably quote a string as a single argument for /bin/sh | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _safechars = string.letters + string.digits + '!@%_-+=:,./'	# Safe unquoted | 
					
						
							|  |  |  | _funnychars = '"`$\\'				# Unsafe inside "double quotes" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def quote(file): | 
					
						
							|  |  |  | 	for c in file: | 
					
						
							|  |  |  | 		if c not in _safechars: | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 	else: | 
					
						
							|  |  |  | 		return file | 
					
						
							|  |  |  | 	if '\'' not in file: | 
					
						
							|  |  |  | 		return '\'' + file + '\'' | 
					
						
							|  |  |  | 	res = '' | 
					
						
							|  |  |  | 	for c in file: | 
					
						
							|  |  |  | 		if c in _funnychars: | 
					
						
							|  |  |  | 			c = '\\' + c | 
					
						
							|  |  |  | 		res = res + c | 
					
						
							|  |  |  | 	return '"' + res + '"' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Small test program and example | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test(): | 
					
						
							|  |  |  | 	print 'Testing...' | 
					
						
							| 
									
										
										
										
											1993-12-17 15:25:27 +00:00
										 |  |  | 	t = Template() | 
					
						
							| 
									
										
										
										
											1992-10-18 17:09:59 +00:00
										 |  |  | 	t.append('togif $IN $OUT', 'ff') | 
					
						
							|  |  |  | 	t.append('giftoppm', '--') | 
					
						
							|  |  |  | 	t.append('ppmtogif >$OUT', '-f') | 
					
						
							|  |  |  | 	t.append('fromgif $IN $OUT', 'ff') | 
					
						
							|  |  |  | 	t.debug(1) | 
					
						
							|  |  |  | 	FILE = '/usr/local/images/rgb/rogues/guido.rgb' | 
					
						
							|  |  |  | 	t.copy(FILE, '@temp') | 
					
						
							|  |  |  | 	print 'Done.' |