diff --git a/README.md b/README.md index 4a26ac2..793bd97 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,9 @@ Note that xxHash is not a cryptographic hash function and therefore may produce ``` usage: xxSherly.jar [options] folder1 folder2 ... - -c,--color enable colored output - -h,--help show this help message - -p,--progress enable progress indicator - -t,--threads override default thread number (defaults to the - number of cores) - -v,--verbose more verbose output + -c,--color enable colored output + -h,--help show this help message + -v,--verbose more verbose output ``` ## Build @@ -44,15 +41,19 @@ mvn package assembly:single I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands: ```bash +# Sherly v1.1.4 time java -jar Bin/sherly.jar -n -f ~/Music/ -time java -jar target/xxSherly-x.y-jar-with-dependencies.jar -n -f ~/Music/ +# xxSherly v2.1 +time java -jar target/xxSherly-2.1-jar-with-dependencies.jar ~/Music/ +# xxSherly v3.0 +time java -jar target/xxSherly-3.0-jar-with-dependencies.jar ~/Music/ ``` The timings are measured using the Linux tool `time` (`real`). -| | Sherly v1.1.4 | xxSherly v1.0 | -| --------: | ------------: | --------------: | -| 1st run | 4.055s | 2.561s | -| 2nd run | 4.055s | 2.304s | -| 3rd run | 4.066s | 2.549s | -| **avg** | **4.059s** | **2.471s** | +| | Sherly v1.1.4 | xxSherly v2.1 | xxSherly v3.0 | +| --------: | ------------: | ------------: | ------------: | +| 1st run | 4.055s | 2.554s | 2.086s | +| 2nd run | 4.055s | 2.554s | 2.109s | +| 3rd run | 4.066s | 2.556s | 2.092s | +| **avg** | **4.059s** | **2.555s** | **2.096s** | diff --git a/images/screenshot.png b/images/screenshot.png index 5408966..9ee412b 100644 Binary files a/images/screenshot.png and b/images/screenshot.png differ diff --git a/pom.xml b/pom.xml index 46b9f95..35e45f0 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ net.chaoticbyte.xxsherly xxSherly - 2.1 + 3.0 xxSherly diff --git a/src/main/java/net/chaoticbyte/xxsherly/App.java b/src/main/java/net/chaoticbyte/xxsherly/App.java index c2ef3ed..855ffb2 100644 --- a/src/main/java/net/chaoticbyte/xxsherly/App.java +++ b/src/main/java/net/chaoticbyte/xxsherly/App.java @@ -5,9 +5,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.cli.CommandLine; @@ -20,29 +19,20 @@ public class App { public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ..."; - public static int completedThreads = 0; - public static int progress = 0; - public static HashMap> fileMap = new HashMap<>(); - public static boolean doTheColorThingy = false; + public static boolean verbose = false; public static void main(String[] args) throws InterruptedException { - // Arguments - List folderList = new ArrayList<>(); - boolean showProgress = false; - boolean verbose = false; - boolean displayHelp = false; - int requestedThreads = 0; - // CLI + List folderList = new ArrayList<>(); + boolean displayHelp = false; + HelpFormatter helpFormatter = new HelpFormatter(); Options commandlineOptions = new Options(); commandlineOptions.addOption("c", "color", false, "enable colored output"); - commandlineOptions.addOption("t", "threads", true, "override default thread number (defaults to the number of cores)"); - commandlineOptions.addOption("p", "progress", false, "enable progress indicator"); commandlineOptions.addOption("v", "verbose", false, "more verbose output"); commandlineOptions.addOption("h", "help", false, "show this help message"); @@ -56,10 +46,8 @@ public class App { } // Get arguments & options doTheColorThingy = arguments.hasOption("c"); - showProgress = arguments.hasOption("p"); verbose = arguments.hasOption("v"); displayHelp = arguments.hasOption("h"); - requestedThreads = Integer.parseInt(arguments.getOptionValue("t", "0")); } catch (ParseException | NumberFormatException e) { helpFormatter.printHelp(usageHelp, commandlineOptions); @@ -83,16 +71,8 @@ public class App { System.out.println("Arguments:");; System.out.println(" Folders: " + folderList.size()); System.out.println(" Color: " + doTheColorThingy); - System.out.println(" Progress: " + showProgress); } - // Calculations for multithreading - // The number of Cores or better said Threads that can be used - int availableProcessors = Runtime.getRuntime().availableProcessors(); - int nThreads = availableProcessors; - if (requestedThreads > 0) nThreads = requestedThreads; - if (verbose) System.out.println("Threads: " + nThreads); - // Find all files List files = new ArrayList<>(); for (File folder : folderList) { @@ -114,37 +94,35 @@ public class App { int nFiles = files.size(); if (verbose) System.out.println("Files: " + nFiles); - // Every Thread that is going to be started gets a range of files - // They are seperated and are called sections - int sections = nFiles / nThreads; - for (int i = 1; i <= nThreads; i++) { - List sectionedList = new ArrayList<>(); - // Here the different Threads are being started - // Usually the separation gives the first threads the same number of files to be working on and the last one is given all the files that could not be separetated - if (i == nThreads) for (int x = (sections * i) - (sections); x < nFiles; x++) { - sectionedList.add(files.get(x)); - } else for (int x = (sections * i) - (sections); x < (sections * i); x++) { - sectionedList.add(files.get(x)); - } - // Start Multithreading - // sectionedList gives the thread their Assigned Part of Files - ThreadedCompare threadedCompare = new ThreadedCompare(sectionedList); - threadedCompare.start(); - } + // Calculate Hashes - // This updates if necessary the Progress bar and checks for Finished threads - while (completedThreads < nThreads) { - TimeUnit.MILLISECONDS.sleep(250); - if (showProgress && doTheColorThingy) { - System.out.print(ConsoleColors.BLUE_BOLD + "Progress: " + ConsoleColors.GREEN_BOLD + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + ConsoleColors.RESET + "\r"); - } else if (showProgress) { - System.out.print("Progress: " + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + "\r"); + ConcurrentHashMap> fileMap = new ConcurrentHashMap<>(); + + files.parallelStream().forEach(file -> { + + List fileArray = new ArrayList<>(); + assert fileArray != null; + fileArray.add(file); + + // Generate Checksum + try { + String checksum = FileChecksum.getChecksum(file); + if (fileMap.containsKey(checksum)) { + fileArray.addAll(fileMap.get(checksum)); + fileMap.put(checksum, fileArray); + } else { + fileMap.put(checksum, fileArray); + } } - } + catch (IOException e) { + System.err.println("An exception occured while processing the file " + file.getPath()); + System.err.println(e.getMessage()); + } + }); ArrayList toRemove = new ArrayList(); for (String checksum: fileMap.keySet()) { - if (App.fileMap.get(checksum).size() == 1) { + if (fileMap.get(checksum).size() == 1) { toRemove.add(checksum); } } @@ -152,32 +130,36 @@ public class App { // Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view - if (fileMap.size() > 0) System.out.println(); - for (String checksum: fileMap.keySet()) { - if (doTheColorThingy) { - System.out.println( - ConsoleColors.BLUE_BOLD + checksum - + ConsoleColors.CYAN_BOLD + "\t--> " - + ConsoleColors.GREEN_BOLD + fileMap.get(checksum) - + ConsoleColors.RESET); - } else System.out.println(checksum +"\t--> " + fileMap.get(checksum)); + if (fileMap.size() > 0) { + System.out.println(); + for (String checksum: fileMap.keySet()) { + if (doTheColorThingy) { + System.out.println( + ConsoleColors.BLUE_BOLD + checksum + + ConsoleColors.CYAN_BOLD + "\t--> " + + ConsoleColors.GREEN_BOLD + fileMap.get(checksum) + + ConsoleColors.RESET); + } else System.out.println(checksum +"\t--> " + fileMap.get(checksum)); + } + System.out.println(); } - if (fileMap.size() > 0) System.out.println(); - List toBeDeleted = new ArrayList<>(); + // Count redundant files and bytes + + int toBeDeleted = 0; long bytes = 0; for (String checksum: fileMap.keySet()) { - App.fileMap.get(checksum).remove(0); - for (File file: App.fileMap.get(checksum)) { + fileMap.get(checksum).remove(0); + for (File file: fileMap.get(checksum)) { if (file != null) bytes += file.length(); } - toBeDeleted.addAll(App.fileMap.get(checksum)); + toBeDeleted++; } if (doTheColorThingy) { String color = ConsoleColors.RED_BOLD; - if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD; - System.out.println(color + (bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found." + ConsoleColors.RESET); - } else System.out.println((bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found."); + if (toBeDeleted < 1) color = ConsoleColors.GREEN_BOLD; + System.out.println(color + (bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found." + ConsoleColors.RESET); + } else System.out.println((bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found."); } } diff --git a/src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java b/src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java new file mode 100644 index 0000000..b91c362 --- /dev/null +++ b/src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java @@ -0,0 +1,31 @@ +package net.chaoticbyte.xxsherly; + +import java.io.*; +import java.util.zip.Checksum; +import org.apache.commons.codec.digest.XXHash32; + +public class FileChecksum { + + //this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value) + public static String getChecksum (File file) throws IOException { + + String digest = ""; + + // Calculate xxHash32 and add it's hexadecimal presentation to the digest + Checksum xxHash = new XXHash32(); + FileInputStream inputStream = new FileInputStream(file); + byte[] dataBytes = new byte[1024]; + int unread = 0; + while ((unread = inputStream.read(dataBytes)) != -1) { + xxHash.update(dataBytes, 0, unread); + } + inputStream.close(); + digest += Long.toHexString(xxHash.getValue()); + + // Add File length to the digest + digest += Long.toHexString(file.length()); + + // return result + return digest; + } +} diff --git a/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java b/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java deleted file mode 100644 index 4aefd1f..0000000 --- a/src/main/java/net/chaoticbyte/xxsherly/ThreadedCompare.java +++ /dev/null @@ -1,67 +0,0 @@ -package net.chaoticbyte.xxsherly; - -import java.io.*; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.Checksum; -import org.apache.commons.codec.digest.XXHash32; - -public class ThreadedCompare extends Thread { - - private final List filesToCompare; - - public ThreadedCompare (List pathsToCompare_) { - this.filesToCompare = pathsToCompare_; - } - - @Override - public void run() { - for (File file : filesToCompare) { - - List fileArray = new ArrayList<>(); - assert fileArray != null; - fileArray.add(file); - - // Generate Checksum - try { - String checksum = getChecksum(file); - if (App.fileMap.containsKey(checksum)) { - fileArray.addAll(App.fileMap.get(checksum)); - App.fileMap.put(checksum, fileArray); - } else { - App.fileMap.put(checksum, fileArray); - } - } - catch (IOException e) { - System.err.println("An exception occured while processing the file " + file.getPath()); - System.err.println(e.getMessage()); - } - - App.progress++; - } - App.completedThreads++; - } - - //this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value) - private String getChecksum (File file) throws IOException { - - String digest = ""; - - // Calculate xxHash32 and add it's hexadecimal presentation to the digest - Checksum xxHash = new XXHash32(); - FileInputStream inputStream = new FileInputStream(file); - byte[] dataBytes = new byte[1024]; - int unread = 0; - while ((unread = inputStream.read(dataBytes)) != -1) { - xxHash.update(dataBytes, 0, unread); - } - inputStream.close(); - digest += Long.toHexString(xxHash.getValue()); - - // Add File length to the digest - digest += Long.toHexString(file.length()); - - // return result - return digest; - } -}