| 
									
										
										
										
											1996-07-30 19:07:18 +00:00
										 |  |  | # A parallelized "find(1)" using the thread module. | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # This demonstrates the use of a work queue and worker threads. | 
					
						
							|  |  |  | # It really does do more stats/sec when using multiple threads, | 
					
						
							|  |  |  | # although the improvement is only about 20-30 percent. | 
					
						
							| 
									
										
										
										
											2002-10-18 18:20:33 +00:00
										 |  |  | # (That was 8 years ago.  In 2002, on Linux, I can't measure | 
					
						
							|  |  |  | # a speedup. :-( ) | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # I'm too lazy to write a command line parser for the full find(1) | 
					
						
							|  |  |  | # command line syntax, so the predicate it searches for is wired-in, | 
					
						
							|  |  |  | # see function selector() below.  (It currently searches for files with | 
					
						
							| 
									
										
										
										
											2002-10-18 18:20:33 +00:00
										 |  |  | # world write permission.) | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Usage: parfind.py [-w nworkers] [directory] ... | 
					
						
							| 
									
										
										
										
											2002-10-18 18:20:33 +00:00
										 |  |  | # Default nworkers is 4 | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import getopt | 
					
						
							|  |  |  | import time | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | from stat import * | 
					
						
							| 
									
										
										
										
											2008-05-25 13:05:15 +00:00
										 |  |  | import _thread as thread | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Work queue class.  Usage: | 
					
						
							|  |  |  | #   wq = WorkQ() | 
					
						
							|  |  |  | #   wq.addwork(func, (arg1, arg2, ...)) # one or more calls | 
					
						
							|  |  |  | #   wq.run(nworkers) | 
					
						
							|  |  |  | # The work is done when wq.run() completes. | 
					
						
							|  |  |  | # The function calls executed by the workers may add more work. | 
					
						
							|  |  |  | # Don't use keyboard interrupts! | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class WorkQ: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |     # Invariants: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # - busy and work are only modified when mutex is locked | 
					
						
							|  |  |  |     # - len(work) is the number of jobs ready to be taken | 
					
						
							|  |  |  |     # - busy is the number of jobs being done | 
					
						
							|  |  |  |     # - todo is locked iff there is no work and somebody is busy | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.mutex = thread.allocate() | 
					
						
							|  |  |  |         self.todo = thread.allocate() | 
					
						
							|  |  |  |         self.todo.acquire() | 
					
						
							|  |  |  |         self.work = [] | 
					
						
							|  |  |  |         self.busy = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def addwork(self, func, args): | 
					
						
							|  |  |  |         job = (func, args) | 
					
						
							|  |  |  |         self.mutex.acquire() | 
					
						
							|  |  |  |         self.work.append(job) | 
					
						
							|  |  |  |         self.mutex.release() | 
					
						
							|  |  |  |         if len(self.work) == 1: | 
					
						
							|  |  |  |             self.todo.release() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _getwork(self): | 
					
						
							|  |  |  |         self.todo.acquire() | 
					
						
							|  |  |  |         self.mutex.acquire() | 
					
						
							|  |  |  |         if self.busy == 0 and len(self.work) == 0: | 
					
						
							|  |  |  |             self.mutex.release() | 
					
						
							|  |  |  |             self.todo.release() | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  |         job = self.work[0] | 
					
						
							|  |  |  |         del self.work[0] | 
					
						
							|  |  |  |         self.busy = self.busy + 1 | 
					
						
							|  |  |  |         self.mutex.release() | 
					
						
							|  |  |  |         if len(self.work) > 0: | 
					
						
							|  |  |  |             self.todo.release() | 
					
						
							|  |  |  |         return job | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _donework(self): | 
					
						
							|  |  |  |         self.mutex.acquire() | 
					
						
							|  |  |  |         self.busy = self.busy - 1 | 
					
						
							|  |  |  |         if self.busy == 0 and len(self.work) == 0: | 
					
						
							|  |  |  |             self.todo.release() | 
					
						
							|  |  |  |         self.mutex.release() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _worker(self): | 
					
						
							|  |  |  |         time.sleep(0.00001)     # Let other threads run | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             job = self._getwork() | 
					
						
							|  |  |  |             if not job: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             func, args = job | 
					
						
							| 
									
										
										
										
											2006-03-17 08:00:19 +00:00
										 |  |  |             func(*args) | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |             self._donework() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def run(self, nworkers): | 
					
						
							|  |  |  |         if not self.work: | 
					
						
							|  |  |  |             return # Nothing to do | 
					
						
							|  |  |  |         for i in range(nworkers-1): | 
					
						
							|  |  |  |             thread.start_new(self._worker, ()) | 
					
						
							|  |  |  |         self._worker() | 
					
						
							|  |  |  |         self.todo.acquire() | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Main program | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |     nworkers = 4 | 
					
						
							|  |  |  |     opts, args = getopt.getopt(sys.argv[1:], '-w:') | 
					
						
							|  |  |  |     for opt, arg in opts: | 
					
						
							|  |  |  |         if opt == '-w': | 
					
						
							| 
									
										
										
										
											2006-03-17 08:00:19 +00:00
										 |  |  |             nworkers = int(arg) | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |     if not args: | 
					
						
							|  |  |  |         args = [os.curdir] | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |     wq = WorkQ() | 
					
						
							|  |  |  |     for dir in args: | 
					
						
							|  |  |  |         wq.addwork(find, (dir, selector, wq)) | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |     t1 = time.time() | 
					
						
							|  |  |  |     wq.run(nworkers) | 
					
						
							|  |  |  |     t2 = time.time() | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |     sys.stderr.write('Total time %r sec.\n' % (t2-t1)) | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # The predicate -- defines what files we look for. | 
					
						
							|  |  |  | # Feel free to change this to suit your purpose | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def selector(dir, name, fullname, stat): | 
					
						
							| 
									
										
										
										
											2002-10-18 18:20:33 +00:00
										 |  |  |     # Look for world writable files that are not symlinks | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |     return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE]) | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # The find procedure -- calls wq.addwork() for subdirectories | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def find(dir, pred, wq): | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |     try: | 
					
						
							|  |  |  |         names = os.listdir(dir) | 
					
						
							| 
									
										
										
										
											2007-01-10 16:19:56 +00:00
										 |  |  |     except os.error as msg: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |         print(repr(dir), ':', msg) | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |         return | 
					
						
							|  |  |  |     for name in names: | 
					
						
							|  |  |  |         if name not in (os.curdir, os.pardir): | 
					
						
							|  |  |  |             fullname = os.path.join(dir, name) | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 stat = os.lstat(fullname) | 
					
						
							| 
									
										
										
										
											2007-01-10 16:19:56 +00:00
										 |  |  |             except os.error as msg: | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |                 print(repr(fullname), ':', msg) | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |                 continue | 
					
						
							|  |  |  |             if pred(dir, name, fullname, stat): | 
					
						
							| 
									
										
										
										
											2007-07-17 20:59:35 +00:00
										 |  |  |                 print(fullname) | 
					
						
							| 
									
										
										
										
											2001-01-21 07:07:30 +00:00
										 |  |  |             if S_ISDIR(stat[ST_MODE]): | 
					
						
							|  |  |  |                 if not os.path.ismount(fullname): | 
					
						
							|  |  |  |                     wq.addwork(find, (fullname, pred, wq)) | 
					
						
							| 
									
										
										
										
											1993-12-17 14:45:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Call the main program | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | main() |