diff --git a/README.md b/README.md index 3138572..eb20f27 100644 --- a/README.md +++ b/README.md @@ -13,17 +13,16 @@ Note that xxHash is not a cryptographic hash function and therefore may produce ## Usage -```console -Usage: sherly -f inputfolder1 inputfolder2 inputfolder3 [options]... - - -h / -help show this - -f / -folder all the folders you want to scan for (see example above!) - -c / -color enable colored messages - -t / -threads override default Thread number (default is usually number of cores * 2) - -p / -progress enable progress indicator - -d / -delete delete all dups except one without asking first - -n / -noinput skip all user input - -debug debug stuff +``` +usage: xxSherly.jar [options] folder1 folder2 ... + -c,--color enable colored output + -d,--delete delete all dups except one, without asking first + -h,--help show this help message + -n,--noinput skip all user input + -p,--progress enable progress indicator + -t,--threads override default thread number (defaults to the + number of cores) + -v,--verbose more verbose output ``` ## Build @@ -43,7 +42,7 @@ mvn package assembly:single ## Speed comparison -I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands: +I let Sherly v1.1.4 and xxSherly v1.0 find duplicates in my Music Library (containing `.wav` files) using the following commands: ```bash time java -jar Bin/sherly.jar -n -f ~/Music/ diff --git a/pom.xml b/pom.xml index d78e8dc..fd7aae9 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,11 @@ commons-codec 1.15 + + commons-cli + commons-cli + 1.5.0 + diff --git a/src/main/java/net/chaoticbyte/xxsherly/App.java b/src/main/java/net/chaoticbyte/xxsherly/App.java index fe53f69..b35eae4 100644 --- a/src/main/java/net/chaoticbyte/xxsherly/App.java +++ b/src/main/java/net/chaoticbyte/xxsherly/App.java @@ -1,9 +1,9 @@ package net.chaoticbyte.xxsherly; +import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -11,130 +11,142 @@ import java.util.Scanner; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; public class App { + public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ..."; + public static int completedThreads = 0; public static int progress = 0; - public static HashMap> fileMap = new HashMap<>(); + public static HashMap> fileMap = new HashMap<>(); + + public static boolean doTheColorThingy = false; public static void main(String[] args) throws InterruptedException { - boolean doTheColorThingy = false; + + // Arguments + List folderList = new ArrayList<>(); boolean showProgress = false; boolean deleteDups = false; - boolean recordFolder = false; - boolean recordThreads = false; - int saidThreads = 0; - boolean showDebug = false; + boolean verbose = false; boolean noInput = false; - boolean help = false; + boolean displayHelp = false; + int requestedThreads = 0; - List paths = new ArrayList<>(); + // CLI - for(String i : args) { - if (recordFolder) { - if(Files.isDirectory(Paths.get(i))) { - paths.add(i); - } else {recordFolder = false;} + HelpFormatter helpFormatter = new HelpFormatter(); + + Options commandlineOptions = new Options(); + commandlineOptions.addOption("c", "color", false, "enable colored output"); + commandlineOptions.addOption("t", "threads", true, "override default thread number (defaults to the number of cores)"); + commandlineOptions.addOption("p", "progress", false, "enable progress indicator"); + commandlineOptions.addOption("d", "delete", false, "delete all dups except one, without asking first"); + commandlineOptions.addOption("n", "noinput", false, "skip all user input"); + commandlineOptions.addOption("v", "verbose", false, "more verbose output"); + commandlineOptions.addOption("h", "help", false, "show this help message"); + + try { + CommandLine arguments = new DefaultParser().parse(commandlineOptions, args, false); + // Get folder paths + for (String folderArgument : arguments.getArgList()) { + File folder = new File(folderArgument); + if (folder.isDirectory() && folder.canRead()) folderList.add(folder); + else System.err.println(folderArgument + " is not a folder or isn't readable."); } - if (recordThreads) { - saidThreads = Integer.parseInt(i); - recordThreads = false; - } - if (i.equalsIgnoreCase("-c") || i.equalsIgnoreCase("-color")) { doTheColorThingy = true;} - if (i.equalsIgnoreCase("-p") || i.equalsIgnoreCase("-progress")) { showProgress = true;} - if (i.equalsIgnoreCase("-f") || i.equalsIgnoreCase("-folder")) { recordFolder = true;} - if (i.equalsIgnoreCase("-t") || i.equalsIgnoreCase("-threads")) { recordThreads = true;} - if (i.equalsIgnoreCase("-d") || i.equalsIgnoreCase("-delete")) { deleteDups = true;} - if (i.equalsIgnoreCase("-n") || i.equalsIgnoreCase("-noinput")) { noInput = true; } - if (i.equalsIgnoreCase("-h") || i.equalsIgnoreCase("-help")) { help = true;} - if (i.equalsIgnoreCase("-debug")) { showDebug = true;} - + // Get arguments & options + doTheColorThingy = arguments.hasOption("c"); + showProgress = arguments.hasOption("p"); + deleteDups = arguments.hasOption("d"); + verbose = arguments.hasOption("v"); + noInput = arguments.hasOption("n"); + displayHelp = arguments.hasOption("h"); + requestedThreads = Integer.parseInt(arguments.getOptionValue("t", "0")); } - - if (help) { - System.out.println("Usage: sherly -f inputfolder1 inputfolder2 inputfolder3 [options]..."); - System.out.println(" "); - System.out.println(" -h / -help show this"); - System.out.println(" -f / -folder all the folders you want to scan for (see example above!)"); - System.out.println(" -c / -color enable colored messages"); - System.out.println(" -t / -threads override default Thread number (default is usually number of cores * 2)"); - System.out.println(" -p / -progress enable progress indicator"); - System.out.println(" -d / -delete delete all dups except one without asking first"); - System.out.println(" -n / -noinput skip all user input"); - System.out.println(" -debug debug stuff"); - return; - } - if (paths.size() == 0) { - System.out.println("Aborted, no Folders Found!"); + catch (ParseException | NumberFormatException e) { + helpFormatter.printHelp(usageHelp, commandlineOptions); + System.err.println(); + System.err.println(e.getMessage()); return; } - if (showDebug) { - System.out.println("Folders: " + paths.size()); - System.out.println("Color: " + doTheColorThingy); - System.out.println("Delete: " + deleteDups); - System.out.println("Progressbar: " + showProgress); - System.out.println("Commanded Threads " + saidThreads); + if (displayHelp) { + helpFormatter.printHelp(usageHelp, commandlineOptions); + return; } - List pathList = new ArrayList<>(); - List allFiles = new ArrayList<>(); - - for (String folder : paths) { - try (Stream stream = Files.walk(Paths.get(folder))) { - pathList = stream.map(Path::normalize).filter(Files::isRegularFile).collect(Collectors.toList()); - } catch (IOException e) { - e.printStackTrace(); - } - allFiles.addAll(pathList); + if (folderList.size() < 1) { + System.err.println("No valid folders specified."); + helpFormatter.printHelp(usageHelp, commandlineOptions); + return; } - // calculations for multithreading - //The number of Cores or better said Threads that can be used - int availableThreads = Runtime.getRuntime().availableProcessors(); - if (saidThreads != 0) {availableThreads = saidThreads;} + if (verbose) { + System.out.println("Arguments:");; + System.out.println(" Folders: " + folderList.size()); + System.out.println(" Color: " + doTheColorThingy); + System.out.println(" Delete: " + deleteDups); + System.out.println(" Progress: " + showProgress); + } - //just the number of All Files in all Folders taken from the Args - int filesToBeDone = allFiles.size(); + // Calculations for multithreading + // The number of Cores or better said Threads that can be used + int availableProcessors = Runtime.getRuntime().availableProcessors(); + int nThreads = availableProcessors; + if (requestedThreads > 0) nThreads = requestedThreads; + if (verbose) System.out.println("Threads: " + nThreads); - //Every Thread that is going to be started gets a range of files - //They are seperated and are called sections - int sections = filesToBeDone / availableThreads; - - for (int i = 1; i <= availableThreads; i++) { - - List sectionedList = new ArrayList<>(); - - //Here the different Threads are being started - //Usually the separation gives the first threads the same number of files to be working on and the last one is given all the files that could not be separetated - if (i == availableThreads) { - for (int x = (sections * i) - (sections); x < filesToBeDone; x++) { - sectionedList.add(allFiles.get(x)); - } - } else { - for (int x = (sections * i) - (sections); x < (sections * i); x++) { - sectionedList.add(allFiles.get(x)); - - } + // Find all files + List files = new ArrayList<>(); + for (File folder : folderList) { + try (Stream stream = Files.walk(folder.toPath())) { + List filePaths = stream + .filter(Files::isReadable) + .filter(Files::isRegularFile) + .filter(f -> !Files.isSymbolicLink(f)) + .collect(Collectors.toList()); + filePaths.forEach((filePath) -> { + files.add(filePath.toFile()); + }); } + catch (IOException e) { + System.out.println(e.getMessage()); + return; + } + } + int nFiles = files.size(); + if (verbose) System.out.println("Files: " + nFiles); - //Start Multithreading - //sectionedList gives the thread their Assigned Part of Files + // Every Thread that is going to be started gets a range of files + // They are seperated and are called sections + int sections = nFiles / nThreads; + for (int i = 1; i <= nThreads; i++) { + List sectionedList = new ArrayList<>(); + // Here the different Threads are being started + // Usually the separation gives the first threads the same number of files to be working on and the last one is given all the files that could not be separetated + if (i == nThreads) for (int x = (sections * i) - (sections); x < nFiles; x++) { + sectionedList.add(files.get(x)); + } else for (int x = (sections * i) - (sections); x < (sections * i); x++) { + sectionedList.add(files.get(x)); + } + // Start Multithreading + // sectionedList gives the thread their Assigned Part of Files ThreadedCompare threadedCompare = new ThreadedCompare(sectionedList); threadedCompare.start(); - } - //this updates if necessary the Progress bar and checks for Finished threads - - while (completedThreads < availableThreads) { + // This updates if necessary the Progress bar and checks for Finished threads + while (completedThreads < nThreads) { TimeUnit.MILLISECONDS.sleep(250); - if (showProgress && doTheColorThingy) { - System.out.print(ConsoleColors.BLUE_BOLD + "Progress: " + ConsoleColors.GREEN_BOLD + progress + " / " + filesToBeDone + " | " + (progress * 100 / filesToBeDone) + "%" + ConsoleColors.RESET + "\r"); + System.out.print(ConsoleColors.BLUE_BOLD + "Progress: " + ConsoleColors.GREEN_BOLD + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + ConsoleColors.RESET + "\r"); } else if (showProgress) { - System.out.print("Progress: " + progress + " / " + filesToBeDone + " | " + (progress * 100 / filesToBeDone) + "%" + "\r"); + System.out.print("Progress: " + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + "\r"); } } @@ -146,68 +158,70 @@ public class App { } fileMap.keySet().removeAll(toRemove); - // now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view + // Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view + if (fileMap.size() > 0) System.out.println(); for (String checksum: fileMap.keySet()) { - if (doTheColorThingy) { - System.out.println(ConsoleColors.BLUE_BOLD + checksum + ConsoleColors.CYAN_BOLD + "\t--> " + ConsoleColors.GREEN_BOLD + fileMap.get(checksum) + ConsoleColors.RESET); - - } else { - System.out.println(checksum +"\t--> " + fileMap.get(checksum)); - } - + System.out.println( + ConsoleColors.BLUE_BOLD + checksum + + ConsoleColors.CYAN_BOLD + "\t--> " + + ConsoleColors.GREEN_BOLD + fileMap.get(checksum) + + ConsoleColors.RESET); + } else System.out.println(checksum +"\t--> " + fileMap.get(checksum)); } + if (fileMap.size() > 0) System.out.println(); - List allTheFilesWillBeDeleted = new ArrayList<>(); - + List toBeDeleted = new ArrayList<>(); long bytes = 0; - - for (String md5: fileMap.keySet()) { - App.fileMap.get(md5).remove(0); - for (Path file: App.fileMap.get(md5)) { - if (file != null) { - bytes += file.toFile().length(); - } + for (String checksum: fileMap.keySet()) { + App.fileMap.get(checksum).remove(0); + for (File file: App.fileMap.get(checksum)) { + if (file != null) bytes += file.length(); } - allTheFilesWillBeDeleted.addAll(App.fileMap.get(md5)); + toBeDeleted.addAll(App.fileMap.get(checksum)); } + if (doTheColorThingy) { + String color = ConsoleColors.RED_BOLD; + if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD; + System.out.println(color + (bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found." + ConsoleColors.RESET); + } else System.out.println((bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found."); + + // Don't go further if there is nothing to delete + if (fileMap.size() < 1) return; + if (deleteDups) { - delete(allTheFilesWillBeDeleted); + System.out.println(); + delete(toBeDeleted); } else if (!noInput) { - ask(doTheColorThingy, bytes, allTheFilesWillBeDeleted); - } - - } - - // print files and ask user - public static void ask(boolean color, long bytes, List deleteThem) { - if (color) { - System.out.println(ConsoleColors.RED_BOLD + (bytes / 8000000) + " unnecessary MB in " + deleteThem.size() + " Files found, do you want to Delete them? Y / N" + ConsoleColors.RESET); - } else { - System.out.println((bytes / 8000000) + " unnecessary MB in " + deleteThem.size() + " Files found, do you want to Delete them? Y / N"); - } - Scanner input = new Scanner(System.in); - String answer = input.next(); - if (answer.toLowerCase().contains("y")) { - delete(deleteThem); + // Ask if the user wants to delete the file + Scanner input = new Scanner(System.in); + while (true) { + if (doTheColorThingy) System.out.print(ConsoleColors.RED_BOLD + "Do you want to delete them? [y/n] " + ConsoleColors.RESET); + else System.out.print("Do you want to delete them? [y/n] "); + String answer = input.next(); + if (answer.toLowerCase().contains("y")) { + System.out.println(); + delete(toBeDeleted); + break; + } + else if (answer.toLowerCase().contains("n")) break; + } input.close(); - - } else if (answer.toLowerCase().contains("n")) { - input.close(); - return; - } else { - ask(color, bytes, deleteThem); - } - input.close(); - } - - public static void delete(List deleteThem) { - for (Path file : deleteThem) { - if (file != null) {file.toFile().delete();} - } } + public static void delete(List fileList) { + for (File file : fileList) if (file != null) { + if (file.delete()) { + if (doTheColorThingy) System.out.println(ConsoleColors.RED_BOLD + "Deleted " + file.toPath() + ConsoleColors.RESET); + else System.out.println("Deleted " + file.toPath()); + } + else { + if (doTheColorThingy) System.err.println(ConsoleColors.RED_BOLD + "Couldn't delete " + ConsoleColors.RESET + file.toPath()); + else System.err.println("Couldn't delete " + file.toPath()); + } + } + } } diff --git a/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java b/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java index 1b97058..4aefd1f 100644 --- a/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java +++ b/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java @@ -1,7 +1,6 @@ package net.chaoticbyte.xxsherly; import java.io.*; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.zip.Checksum; @@ -9,30 +8,35 @@ import org.apache.commons.codec.digest.XXHash32; public class ThreadedCompare extends Thread { - private final List pathsToCompareTo; + private final List filesToCompare; - public ThreadedCompare (List pathsToCompareTo) { - this.pathsToCompareTo = pathsToCompareTo; + public ThreadedCompare (List pathsToCompare_) { + this.filesToCompare = pathsToCompare_; } @Override public void run() { - for (Path file : pathsToCompareTo) { - List fileArray = new ArrayList<>(); + for (File file : filesToCompare) { + + List fileArray = new ArrayList<>(); assert fileArray != null; fileArray.add(file); - String checksum; + + // Generate Checksum try { - checksum = getChecksum(file.toFile()); - } catch (IOException e) { - throw new RuntimeException(e); + String checksum = getChecksum(file); + if (App.fileMap.containsKey(checksum)) { + fileArray.addAll(App.fileMap.get(checksum)); + App.fileMap.put(checksum, fileArray); + } else { + App.fileMap.put(checksum, fileArray); + } } - if (App.fileMap.containsKey(checksum)) { - fileArray.addAll(App.fileMap.get(checksum)); - App.fileMap.put(checksum, fileArray); - } else { - App.fileMap.put(checksum, fileArray); + catch (IOException e) { + System.err.println("An exception occured while processing the file " + file.getPath()); + System.err.println(e.getMessage()); } + App.progress++; } App.completedThreads++; @@ -44,7 +48,6 @@ public class ThreadedCompare extends Thread { String digest = ""; // Calculate xxHash32 and add it's hexadecimal presentation to the digest - Checksum xxHash = new XXHash32(); FileInputStream inputStream = new FileInputStream(file); byte[] dataBytes = new byte[1024]; @@ -56,7 +59,6 @@ public class ThreadedCompare extends Thread { digest += Long.toHexString(xxHash.getValue()); // Add File length to the digest - digest += Long.toHexString(file.length()); // return result