| 
									
										
										
										
											2010-08-02 22:53:22 +00:00
										 |  |  | #! /usr/bin/env python3 | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | """Show file statistics by extension.""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  | class Stats: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.stats = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def statargs(self, args): | 
					
						
							|  |  |  |         for arg in args: | 
					
						
							|  |  |  |             if os.path.isdir(arg): | 
					
						
							|  |  |  |                 self.statdir(arg) | 
					
						
							|  |  |  |             elif os.path.isfile(arg): | 
					
						
							|  |  |  |                 self.statfile(arg) | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |                 sys.stderr.write("Can't find %s\n" % arg) | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |                 self.addstats("<???>", "unknown", 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def statdir(self, dir): | 
					
						
							|  |  |  |         self.addstats("<dir>", "dirs", 1) | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             names = os.listdir(dir) | 
					
						
							| 
									
										
										
										
											2012-12-24 19:58:48 +02:00
										 |  |  |         except OSError as err: | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             sys.stderr.write("Can't list %s: %s\n" % (dir, err)) | 
					
						
							|  |  |  |             self.addstats("<dir>", "unlistable", 1) | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |             return | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |         for name in sorted(names): | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |             if name.startswith(".#"): | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |                 continue  # Skip CVS temp files | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |             if name.endswith("~"): | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |                 continue  # Skip Emacs backup files | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |             full = os.path.join(dir, name) | 
					
						
							|  |  |  |             if os.path.islink(full): | 
					
						
							|  |  |  |                 self.addstats("<lnk>", "links", 1) | 
					
						
							|  |  |  |             elif os.path.isdir(full): | 
					
						
							|  |  |  |                 self.statdir(full) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 self.statfile(full) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |     def statfile(self, filename): | 
					
						
							|  |  |  |         head, ext = os.path.splitext(filename) | 
					
						
							|  |  |  |         head, base = os.path.split(filename) | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |         if ext == base: | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |             ext = ""  # E.g. .cvsignore is deemed not to have an extension | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |         ext = os.path.normcase(ext) | 
					
						
							|  |  |  |         if not ext: | 
					
						
							|  |  |  |             ext = "<none>" | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |         self.addstats(ext, "files", 1) | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |             with open(filename, "rb") as f: | 
					
						
							|  |  |  |                 data = f.read() | 
					
						
							| 
									
										
										
										
											2007-01-10 16:19:56 +00:00
										 |  |  |         except IOError as err: | 
					
						
							| 
									
										
										
										
											2006-04-21 10:40:58 +00:00
										 |  |  |             sys.stderr.write("Can't open %s: %s\n" % (filename, err)) | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |             self.addstats(ext, "unopenable", 1) | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         self.addstats(ext, "bytes", len(data)) | 
					
						
							| 
									
										
										
										
											2007-09-27 22:39:12 +00:00
										 |  |  |         if b'\0' in data: | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |             self.addstats(ext, "binary", 1) | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         if not data: | 
					
						
							|  |  |  |             self.addstats(ext, "empty", 1) | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |         # self.addstats(ext, "chars", len(data)) | 
					
						
							| 
									
										
										
										
											2007-09-27 22:39:12 +00:00
										 |  |  |         lines = str(data, "latin-1").splitlines() | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |         self.addstats(ext, "lines", len(lines)) | 
					
						
							|  |  |  |         del lines | 
					
						
							|  |  |  |         words = data.split() | 
					
						
							|  |  |  |         self.addstats(ext, "words", len(words)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def addstats(self, ext, key, n): | 
					
						
							|  |  |  |         d = self.stats.setdefault(ext, {}) | 
					
						
							|  |  |  |         d[key] = d.get(key, 0) + n | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def report(self): | 
					
						
							| 
									
										
										
										
											2007-09-27 22:39:12 +00:00
										 |  |  |         exts = sorted(self.stats) | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |         # Get the column keys | 
					
						
							|  |  |  |         columns = {} | 
					
						
							|  |  |  |         for ext in exts: | 
					
						
							|  |  |  |             columns.update(self.stats[ext]) | 
					
						
							| 
									
										
										
										
											2007-09-27 22:39:12 +00:00
										 |  |  |         cols = sorted(columns) | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |         colwidth = {} | 
					
						
							| 
									
										
										
										
											2017-05-18 07:35:54 -07:00
										 |  |  |         colwidth["ext"] = max(map(len, exts)) | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |         minwidth = 6 | 
					
						
							|  |  |  |         self.stats["TOTAL"] = {} | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |         for col in cols: | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |             total = 0 | 
					
						
							|  |  |  |             cw = max(minwidth, len(col)) | 
					
						
							|  |  |  |             for ext in exts: | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |                 value = self.stats[ext].get(col) | 
					
						
							|  |  |  |                 if value is None: | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |                     w = 0 | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  |                 else: | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |                     w = len("%d" % value) | 
					
						
							|  |  |  |                     total += value | 
					
						
							|  |  |  |                 cw = max(cw, w) | 
					
						
							|  |  |  |             cw = max(cw, len(str(total))) | 
					
						
							|  |  |  |             colwidth[col] = cw | 
					
						
							|  |  |  |             self.stats["TOTAL"][col] = total | 
					
						
							|  |  |  |         exts.append("TOTAL") | 
					
						
							|  |  |  |         for ext in exts: | 
					
						
							|  |  |  |             self.stats[ext]["ext"] = ext | 
					
						
							|  |  |  |         cols.insert(0, "ext") | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |         def printheader(): | 
					
						
							|  |  |  |             for col in cols: | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |                 print("%*s" % (colwidth[col], col), end=' ') | 
					
						
							|  |  |  |             print() | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-04-09 19:10:46 +00:00
										 |  |  |         printheader() | 
					
						
							|  |  |  |         for ext in exts: | 
					
						
							|  |  |  |             for col in cols: | 
					
						
							|  |  |  |                 value = self.stats[ext].get(col, "") | 
					
						
							| 
									
										
										
										
											2007-08-03 17:06:41 +00:00
										 |  |  |                 print("%*s" % (colwidth[col], value), end=' ') | 
					
						
							|  |  |  |             print() | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  |         printheader()  # Another header at the bottom | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							|  |  |  |     args = sys.argv[1:] | 
					
						
							|  |  |  |     if not args: | 
					
						
							|  |  |  |         args = [os.curdir] | 
					
						
							|  |  |  |     s = Stats() | 
					
						
							|  |  |  |     s.statargs(args) | 
					
						
							|  |  |  |     s.report() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-09 12:24:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-01 14:41:25 +00:00
										 |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |