Increased performance by using parallelStream instead of manual threading, removed -t/--threads option, updated README, bumped version to 3.0

This commit is contained in:
Julian Müller (ChaoticByte) 2023-05-07 18:22:40 +02:00
parent 2770390df3
commit 37d7b5bd06
5 changed files with 89 additions and 142 deletions

View file

@ -16,12 +16,9 @@ Note that xxHash is not a cryptographic hash function and therefore may produce
``` ```
usage: xxSherly.jar [options] folder1 folder2 ... usage: xxSherly.jar [options] folder1 folder2 ...
-c,--color enable colored output -c,--color enable colored output
-h,--help show this help message -h,--help show this help message
-p,--progress enable progress indicator -v,--verbose more verbose output
-t,--threads <arg> override default thread number (defaults to the
number of cores)
-v,--verbose more verbose output
``` ```
## Build ## Build
@ -44,15 +41,19 @@ mvn package assembly:single
I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands: I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands:
```bash ```bash
# Sherly v1.1.4
time java -jar Bin/sherly.jar -n -f ~/Music/ time java -jar Bin/sherly.jar -n -f ~/Music/
time java -jar target/xxSherly-x.y-jar-with-dependencies.jar -n -f ~/Music/ # xxSherly v2.1
time java -jar target/xxSherly-2.1-jar-with-dependencies.jar ~/Music/
# xxSherly v3.0
time java -jar target/xxSherly-3.0-jar-with-dependencies.jar ~/Music/
``` ```
The timings are measured using the Linux tool `time` (`real`). The timings are measured using the Linux tool `time` (`real`).
| | Sherly v1.1.4 | xxSherly v1.0 | | | Sherly v1.1.4 | xxSherly v2.1 | xxSherly v3.0 |
| --------: | ------------: | --------------: | | --------: | ------------: | ------------: | ------------: |
| 1st run | 4.055s | 2.561s | | 1st run | 4.055s | 2.554s | 2.086s |
| 2nd run | 4.055s | 2.304s | | 2nd run | 4.055s | 2.554s | 2.109s |
| 3rd run | 4.066s | 2.549s | | 3rd run | 4.066s | 2.556s | 2.092s |
| **avg** | **4.059s** | **2.471s** | | **avg** | **4.059s** | **2.555s** | **2.096s** |

View file

@ -6,7 +6,7 @@
<groupId>net.chaoticbyte.xxsherly</groupId> <groupId>net.chaoticbyte.xxsherly</groupId>
<artifactId>xxSherly</artifactId> <artifactId>xxSherly</artifactId>
<version>2.1</version> <version>3.0</version>
<name>xxSherly</name> <name>xxSherly</name>
<!-- FIXME change it to the project's website --> <!-- FIXME change it to the project's website -->

View file

@ -7,7 +7,6 @@ import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLine;
@ -20,29 +19,22 @@ public class App {
public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ..."; public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ...";
public static int completedThreads = 0;
public static int progress = 0;
public static HashMap<String, List<File>> fileMap = new HashMap<>(); public static HashMap<String, List<File>> fileMap = new HashMap<>();
public static boolean doTheColorThingy = false; public static boolean doTheColorThingy = false;
public static boolean verbose = false;
public static void main(String[] args) throws InterruptedException { public static void main(String[] args) throws InterruptedException {
// Arguments
List<File> folderList = new ArrayList<>();
boolean showProgress = false;
boolean verbose = false;
boolean displayHelp = false;
int requestedThreads = 0;
// CLI // CLI
List<File> folderList = new ArrayList<>();
boolean displayHelp = false;
HelpFormatter helpFormatter = new HelpFormatter(); HelpFormatter helpFormatter = new HelpFormatter();
Options commandlineOptions = new Options(); Options commandlineOptions = new Options();
commandlineOptions.addOption("c", "color", false, "enable colored output"); commandlineOptions.addOption("c", "color", false, "enable colored output");
commandlineOptions.addOption("t", "threads", true, "override default thread number (defaults to the number of cores)");
commandlineOptions.addOption("p", "progress", false, "enable progress indicator");
commandlineOptions.addOption("v", "verbose", false, "more verbose output"); commandlineOptions.addOption("v", "verbose", false, "more verbose output");
commandlineOptions.addOption("h", "help", false, "show this help message"); commandlineOptions.addOption("h", "help", false, "show this help message");
@ -56,10 +48,8 @@ public class App {
} }
// Get arguments & options // Get arguments & options
doTheColorThingy = arguments.hasOption("c"); doTheColorThingy = arguments.hasOption("c");
showProgress = arguments.hasOption("p");
verbose = arguments.hasOption("v"); verbose = arguments.hasOption("v");
displayHelp = arguments.hasOption("h"); displayHelp = arguments.hasOption("h");
requestedThreads = Integer.parseInt(arguments.getOptionValue("t", "0"));
} }
catch (ParseException | NumberFormatException e) { catch (ParseException | NumberFormatException e) {
helpFormatter.printHelp(usageHelp, commandlineOptions); helpFormatter.printHelp(usageHelp, commandlineOptions);
@ -83,16 +73,8 @@ public class App {
System.out.println("Arguments:");; System.out.println("Arguments:");;
System.out.println(" Folders: " + folderList.size()); System.out.println(" Folders: " + folderList.size());
System.out.println(" Color: " + doTheColorThingy); System.out.println(" Color: " + doTheColorThingy);
System.out.println(" Progress: " + showProgress);
} }
// Calculations for multithreading
// The number of Cores or better said Threads that can be used
int availableProcessors = Runtime.getRuntime().availableProcessors();
int nThreads = availableProcessors;
if (requestedThreads > 0) nThreads = requestedThreads;
if (verbose) System.out.println("Threads: " + nThreads);
// Find all files // Find all files
List<File> files = new ArrayList<>(); List<File> files = new ArrayList<>();
for (File folder : folderList) { for (File folder : folderList) {
@ -114,33 +96,29 @@ public class App {
int nFiles = files.size(); int nFiles = files.size();
if (verbose) System.out.println("Files: " + nFiles); if (verbose) System.out.println("Files: " + nFiles);
// Every Thread that is going to be started gets a range of files // Calculate Hashes
// They are seperated and are called sections
int sections = nFiles / nThreads;
for (int i = 1; i <= nThreads; i++) {
List<File> sectionedList = new ArrayList<>();
// Here the different Threads are being started
// Usually the separation gives the first threads the same number of files to be working on and the last one is given all the files that could not be separetated
if (i == nThreads) for (int x = (sections * i) - (sections); x < nFiles; x++) {
sectionedList.add(files.get(x));
} else for (int x = (sections * i) - (sections); x < (sections * i); x++) {
sectionedList.add(files.get(x));
}
// Start Multithreading
// sectionedList gives the thread their Assigned Part of Files
ThreadedCompare threadedCompare = new ThreadedCompare(sectionedList);
threadedCompare.start();
}
// This updates if necessary the Progress bar and checks for Finished threads files.parallelStream().forEach(file -> {
while (completedThreads < nThreads) {
TimeUnit.MILLISECONDS.sleep(250); List<File> fileArray = new ArrayList<>();
if (showProgress && doTheColorThingy) { assert fileArray != null;
System.out.print(ConsoleColors.BLUE_BOLD + "Progress: " + ConsoleColors.GREEN_BOLD + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + ConsoleColors.RESET + "\r"); fileArray.add(file);
} else if (showProgress) {
System.out.print("Progress: " + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + "\r"); // Generate Checksum
try {
String checksum = FileChecksum.getChecksum(file);
if (App.fileMap.containsKey(checksum)) {
fileArray.addAll(App.fileMap.get(checksum));
App.fileMap.put(checksum, fileArray);
} else {
App.fileMap.put(checksum, fileArray);
}
} }
} catch (IOException e) {
System.err.println("An exception occured while processing the file " + file.getPath());
System.err.println(e.getMessage());
}
});
ArrayList<String> toRemove = new ArrayList<String>(); ArrayList<String> toRemove = new ArrayList<String>();
for (String checksum: fileMap.keySet()) { for (String checksum: fileMap.keySet()) {
@ -152,32 +130,36 @@ public class App {
// Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view // Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view
if (fileMap.size() > 0) System.out.println(); if (fileMap.size() > 0) {
for (String checksum: fileMap.keySet()) { System.out.println();
if (doTheColorThingy) { for (String checksum: fileMap.keySet()) {
System.out.println( if (doTheColorThingy) {
ConsoleColors.BLUE_BOLD + checksum System.out.println(
+ ConsoleColors.CYAN_BOLD + "\t--> " ConsoleColors.BLUE_BOLD + checksum
+ ConsoleColors.GREEN_BOLD + fileMap.get(checksum) + ConsoleColors.CYAN_BOLD + "\t--> "
+ ConsoleColors.RESET); + ConsoleColors.GREEN_BOLD + fileMap.get(checksum)
} else System.out.println(checksum +"\t--> " + fileMap.get(checksum)); + ConsoleColors.RESET);
} else System.out.println(checksum +"\t--> " + fileMap.get(checksum));
}
System.out.println();
} }
if (fileMap.size() > 0) System.out.println();
List<File> toBeDeleted = new ArrayList<>(); // Count redundant files and bytes
int toBeDeleted = 0;
long bytes = 0; long bytes = 0;
for (String checksum: fileMap.keySet()) { for (String checksum: fileMap.keySet()) {
App.fileMap.get(checksum).remove(0); App.fileMap.get(checksum).remove(0);
for (File file: App.fileMap.get(checksum)) { for (File file: App.fileMap.get(checksum)) {
if (file != null) bytes += file.length(); if (file != null) bytes += file.length();
} }
toBeDeleted.addAll(App.fileMap.get(checksum)); toBeDeleted++;
} }
if (doTheColorThingy) { if (doTheColorThingy) {
String color = ConsoleColors.RED_BOLD; String color = ConsoleColors.RED_BOLD;
if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD; if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD;
System.out.println(color + (bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found." + ConsoleColors.RESET); System.out.println(color + (bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found." + ConsoleColors.RESET);
} else System.out.println((bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found."); } else System.out.println((bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found.");
} }
} }

View file

@ -0,0 +1,31 @@
package net.chaoticbyte.xxsherly;
import java.io.*;
import java.util.zip.Checksum;
import org.apache.commons.codec.digest.XXHash32;
public class FileChecksum {
//this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value)
public static String getChecksum (File file) throws IOException {
String digest = "";
// Calculate xxHash32 and add it's hexadecimal presentation to the digest
Checksum xxHash = new XXHash32();
FileInputStream inputStream = new FileInputStream(file);
byte[] dataBytes = new byte[1024];
int unread = 0;
while ((unread = inputStream.read(dataBytes)) != -1) {
xxHash.update(dataBytes, 0, unread);
}
inputStream.close();
digest += Long.toHexString(xxHash.getValue());
// Add File length to the digest
digest += Long.toHexString(file.length());
// return result
return digest;
}
}

View file

@ -1,67 +0,0 @@
package net.chaoticbyte.xxsherly;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.Checksum;
import org.apache.commons.codec.digest.XXHash32;
public class ThreadedCompare extends Thread {
private final List<File> filesToCompare;
public ThreadedCompare (List<File> pathsToCompare_) {
this.filesToCompare = pathsToCompare_;
}
@Override
public void run() {
for (File file : filesToCompare) {
List<File> fileArray = new ArrayList<>();
assert fileArray != null;
fileArray.add(file);
// Generate Checksum
try {
String checksum = getChecksum(file);
if (App.fileMap.containsKey(checksum)) {
fileArray.addAll(App.fileMap.get(checksum));
App.fileMap.put(checksum, fileArray);
} else {
App.fileMap.put(checksum, fileArray);
}
}
catch (IOException e) {
System.err.println("An exception occured while processing the file " + file.getPath());
System.err.println(e.getMessage());
}
App.progress++;
}
App.completedThreads++;
}
//this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value)
private String getChecksum (File file) throws IOException {
String digest = "";
// Calculate xxHash32 and add it's hexadecimal presentation to the digest
Checksum xxHash = new XXHash32();
FileInputStream inputStream = new FileInputStream(file);
byte[] dataBytes = new byte[1024];
int unread = 0;
while ((unread = inputStream.read(dataBytes)) != -1) {
xxHash.update(dataBytes, 0, unread);
}
inputStream.close();
digest += Long.toHexString(xxHash.getValue());
// Add File length to the digest
digest += Long.toHexString(file.length());
// return result
return digest;
}
}