mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	* Replaced list(<generator expression>) with list comprehension * Replaced dict(<generator expression>) with dict comprehension * Replaced set(<list literal>) with set literal * Replaced builtin func(<list comprehension>) with func(<generator expression>) when supported (e.g. any(), all(), tuple(), min(), & max())
		
			
				
	
	
		
			132 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			132 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
#! /usr/bin/env python3
 | 
						|
 | 
						|
"""Show file statistics by extension."""
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
 | 
						|
 | 
						|
class Stats:
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        self.stats = {}
 | 
						|
 | 
						|
    def statargs(self, args):
 | 
						|
        for arg in args:
 | 
						|
            if os.path.isdir(arg):
 | 
						|
                self.statdir(arg)
 | 
						|
            elif os.path.isfile(arg):
 | 
						|
                self.statfile(arg)
 | 
						|
            else:
 | 
						|
                sys.stderr.write("Can't find %s\n" % arg)
 | 
						|
                self.addstats("<???>", "unknown", 1)
 | 
						|
 | 
						|
    def statdir(self, dir):
 | 
						|
        self.addstats("<dir>", "dirs", 1)
 | 
						|
        try:
 | 
						|
            names = os.listdir(dir)
 | 
						|
        except OSError as err:
 | 
						|
            sys.stderr.write("Can't list %s: %s\n" % (dir, err))
 | 
						|
            self.addstats("<dir>", "unlistable", 1)
 | 
						|
            return
 | 
						|
        for name in sorted(names):
 | 
						|
            if name.startswith(".#"):
 | 
						|
                continue  # Skip CVS temp files
 | 
						|
            if name.endswith("~"):
 | 
						|
                continue  # Skip Emacs backup files
 | 
						|
            full = os.path.join(dir, name)
 | 
						|
            if os.path.islink(full):
 | 
						|
                self.addstats("<lnk>", "links", 1)
 | 
						|
            elif os.path.isdir(full):
 | 
						|
                self.statdir(full)
 | 
						|
            else:
 | 
						|
                self.statfile(full)
 | 
						|
 | 
						|
    def statfile(self, filename):
 | 
						|
        head, ext = os.path.splitext(filename)
 | 
						|
        head, base = os.path.split(filename)
 | 
						|
        if ext == base:
 | 
						|
            ext = ""  # E.g. .cvsignore is deemed not to have an extension
 | 
						|
        ext = os.path.normcase(ext)
 | 
						|
        if not ext:
 | 
						|
            ext = "<none>"
 | 
						|
        self.addstats(ext, "files", 1)
 | 
						|
        try:
 | 
						|
            with open(filename, "rb") as f:
 | 
						|
                data = f.read()
 | 
						|
        except IOError as err:
 | 
						|
            sys.stderr.write("Can't open %s: %s\n" % (filename, err))
 | 
						|
            self.addstats(ext, "unopenable", 1)
 | 
						|
            return
 | 
						|
        self.addstats(ext, "bytes", len(data))
 | 
						|
        if b'\0' in data:
 | 
						|
            self.addstats(ext, "binary", 1)
 | 
						|
            return
 | 
						|
        if not data:
 | 
						|
            self.addstats(ext, "empty", 1)
 | 
						|
        # self.addstats(ext, "chars", len(data))
 | 
						|
        lines = str(data, "latin-1").splitlines()
 | 
						|
        self.addstats(ext, "lines", len(lines))
 | 
						|
        del lines
 | 
						|
        words = data.split()
 | 
						|
        self.addstats(ext, "words", len(words))
 | 
						|
 | 
						|
    def addstats(self, ext, key, n):
 | 
						|
        d = self.stats.setdefault(ext, {})
 | 
						|
        d[key] = d.get(key, 0) + n
 | 
						|
 | 
						|
    def report(self):
 | 
						|
        exts = sorted(self.stats)
 | 
						|
        # Get the column keys
 | 
						|
        columns = {}
 | 
						|
        for ext in exts:
 | 
						|
            columns.update(self.stats[ext])
 | 
						|
        cols = sorted(columns)
 | 
						|
        colwidth = {}
 | 
						|
        colwidth["ext"] = max(map(len, exts))
 | 
						|
        minwidth = 6
 | 
						|
        self.stats["TOTAL"] = {}
 | 
						|
        for col in cols:
 | 
						|
            total = 0
 | 
						|
            cw = max(minwidth, len(col))
 | 
						|
            for ext in exts:
 | 
						|
                value = self.stats[ext].get(col)
 | 
						|
                if value is None:
 | 
						|
                    w = 0
 | 
						|
                else:
 | 
						|
                    w = len("%d" % value)
 | 
						|
                    total += value
 | 
						|
                cw = max(cw, w)
 | 
						|
            cw = max(cw, len(str(total)))
 | 
						|
            colwidth[col] = cw
 | 
						|
            self.stats["TOTAL"][col] = total
 | 
						|
        exts.append("TOTAL")
 | 
						|
        for ext in exts:
 | 
						|
            self.stats[ext]["ext"] = ext
 | 
						|
        cols.insert(0, "ext")
 | 
						|
 | 
						|
        def printheader():
 | 
						|
            for col in cols:
 | 
						|
                print("%*s" % (colwidth[col], col), end=' ')
 | 
						|
            print()
 | 
						|
 | 
						|
        printheader()
 | 
						|
        for ext in exts:
 | 
						|
            for col in cols:
 | 
						|
                value = self.stats[ext].get(col, "")
 | 
						|
                print("%*s" % (colwidth[col], value), end=' ')
 | 
						|
            print()
 | 
						|
        printheader()  # Another header at the bottom
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    args = sys.argv[1:]
 | 
						|
    if not args:
 | 
						|
        args = [os.curdir]
 | 
						|
    s = Stats()
 | 
						|
    s.statargs(args)
 | 
						|
    s.report()
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |