Increased performance by using parallelStream instead of manual threading, removed -t/--threads option, updated README, bumped version to 3.0
This commit is contained in:
parent
2770390df3
commit
37d7b5bd06
5 changed files with 89 additions and 142 deletions
21
README.md
21
README.md
|
@ -18,9 +18,6 @@ Note that xxHash is not a cryptographic hash function and therefore may produce
|
||||||
usage: xxSherly.jar [options] folder1 folder2 ...
|
usage: xxSherly.jar [options] folder1 folder2 ...
|
||||||
-c,--color enable colored output
|
-c,--color enable colored output
|
||||||
-h,--help show this help message
|
-h,--help show this help message
|
||||||
-p,--progress enable progress indicator
|
|
||||||
-t,--threads <arg> override default thread number (defaults to the
|
|
||||||
number of cores)
|
|
||||||
-v,--verbose more verbose output
|
-v,--verbose more verbose output
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -44,15 +41,19 @@ mvn package assembly:single
|
||||||
I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands:
|
I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Sherly v1.1.4
|
||||||
time java -jar Bin/sherly.jar -n -f ~/Music/
|
time java -jar Bin/sherly.jar -n -f ~/Music/
|
||||||
time java -jar target/xxSherly-x.y-jar-with-dependencies.jar -n -f ~/Music/
|
# xxSherly v2.1
|
||||||
|
time java -jar target/xxSherly-2.1-jar-with-dependencies.jar ~/Music/
|
||||||
|
# xxSherly v3.0
|
||||||
|
time java -jar target/xxSherly-3.0-jar-with-dependencies.jar ~/Music/
|
||||||
```
|
```
|
||||||
|
|
||||||
The timings are measured using the Linux tool `time` (`real`).
|
The timings are measured using the Linux tool `time` (`real`).
|
||||||
|
|
||||||
| | Sherly v1.1.4 | xxSherly v1.0 |
|
| | Sherly v1.1.4 | xxSherly v2.1 | xxSherly v3.0 |
|
||||||
| --------: | ------------: | --------------: |
|
| --------: | ------------: | ------------: | ------------: |
|
||||||
| 1st run | 4.055s | 2.561s |
|
| 1st run | 4.055s | 2.554s | 2.086s |
|
||||||
| 2nd run | 4.055s | 2.304s |
|
| 2nd run | 4.055s | 2.554s | 2.109s |
|
||||||
| 3rd run | 4.066s | 2.549s |
|
| 3rd run | 4.066s | 2.556s | 2.092s |
|
||||||
| **avg** | **4.059s** | **2.471s** |
|
| **avg** | **4.059s** | **2.555s** | **2.096s** |
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
<groupId>net.chaoticbyte.xxsherly</groupId>
|
<groupId>net.chaoticbyte.xxsherly</groupId>
|
||||||
<artifactId>xxSherly</artifactId>
|
<artifactId>xxSherly</artifactId>
|
||||||
<version>2.1</version>
|
<version>3.0</version>
|
||||||
|
|
||||||
<name>xxSherly</name>
|
<name>xxSherly</name>
|
||||||
<!-- FIXME change it to the project's website -->
|
<!-- FIXME change it to the project's website -->
|
||||||
|
|
|
@ -7,7 +7,6 @@ import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
|
@ -20,29 +19,22 @@ public class App {
|
||||||
|
|
||||||
public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ...";
|
public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ...";
|
||||||
|
|
||||||
public static int completedThreads = 0;
|
|
||||||
public static int progress = 0;
|
|
||||||
public static HashMap<String, List<File>> fileMap = new HashMap<>();
|
public static HashMap<String, List<File>> fileMap = new HashMap<>();
|
||||||
|
|
||||||
public static boolean doTheColorThingy = false;
|
public static boolean doTheColorThingy = false;
|
||||||
|
public static boolean verbose = false;
|
||||||
|
|
||||||
public static void main(String[] args) throws InterruptedException {
|
public static void main(String[] args) throws InterruptedException {
|
||||||
|
|
||||||
// Arguments
|
|
||||||
List<File> folderList = new ArrayList<>();
|
|
||||||
boolean showProgress = false;
|
|
||||||
boolean verbose = false;
|
|
||||||
boolean displayHelp = false;
|
|
||||||
int requestedThreads = 0;
|
|
||||||
|
|
||||||
// CLI
|
// CLI
|
||||||
|
|
||||||
|
List<File> folderList = new ArrayList<>();
|
||||||
|
boolean displayHelp = false;
|
||||||
|
|
||||||
HelpFormatter helpFormatter = new HelpFormatter();
|
HelpFormatter helpFormatter = new HelpFormatter();
|
||||||
|
|
||||||
Options commandlineOptions = new Options();
|
Options commandlineOptions = new Options();
|
||||||
commandlineOptions.addOption("c", "color", false, "enable colored output");
|
commandlineOptions.addOption("c", "color", false, "enable colored output");
|
||||||
commandlineOptions.addOption("t", "threads", true, "override default thread number (defaults to the number of cores)");
|
|
||||||
commandlineOptions.addOption("p", "progress", false, "enable progress indicator");
|
|
||||||
commandlineOptions.addOption("v", "verbose", false, "more verbose output");
|
commandlineOptions.addOption("v", "verbose", false, "more verbose output");
|
||||||
commandlineOptions.addOption("h", "help", false, "show this help message");
|
commandlineOptions.addOption("h", "help", false, "show this help message");
|
||||||
|
|
||||||
|
@ -56,10 +48,8 @@ public class App {
|
||||||
}
|
}
|
||||||
// Get arguments & options
|
// Get arguments & options
|
||||||
doTheColorThingy = arguments.hasOption("c");
|
doTheColorThingy = arguments.hasOption("c");
|
||||||
showProgress = arguments.hasOption("p");
|
|
||||||
verbose = arguments.hasOption("v");
|
verbose = arguments.hasOption("v");
|
||||||
displayHelp = arguments.hasOption("h");
|
displayHelp = arguments.hasOption("h");
|
||||||
requestedThreads = Integer.parseInt(arguments.getOptionValue("t", "0"));
|
|
||||||
}
|
}
|
||||||
catch (ParseException | NumberFormatException e) {
|
catch (ParseException | NumberFormatException e) {
|
||||||
helpFormatter.printHelp(usageHelp, commandlineOptions);
|
helpFormatter.printHelp(usageHelp, commandlineOptions);
|
||||||
|
@ -83,16 +73,8 @@ public class App {
|
||||||
System.out.println("Arguments:");;
|
System.out.println("Arguments:");;
|
||||||
System.out.println(" Folders: " + folderList.size());
|
System.out.println(" Folders: " + folderList.size());
|
||||||
System.out.println(" Color: " + doTheColorThingy);
|
System.out.println(" Color: " + doTheColorThingy);
|
||||||
System.out.println(" Progress: " + showProgress);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculations for multithreading
|
|
||||||
// The number of Cores or better said Threads that can be used
|
|
||||||
int availableProcessors = Runtime.getRuntime().availableProcessors();
|
|
||||||
int nThreads = availableProcessors;
|
|
||||||
if (requestedThreads > 0) nThreads = requestedThreads;
|
|
||||||
if (verbose) System.out.println("Threads: " + nThreads);
|
|
||||||
|
|
||||||
// Find all files
|
// Find all files
|
||||||
List<File> files = new ArrayList<>();
|
List<File> files = new ArrayList<>();
|
||||||
for (File folder : folderList) {
|
for (File folder : folderList) {
|
||||||
|
@ -114,33 +96,29 @@ public class App {
|
||||||
int nFiles = files.size();
|
int nFiles = files.size();
|
||||||
if (verbose) System.out.println("Files: " + nFiles);
|
if (verbose) System.out.println("Files: " + nFiles);
|
||||||
|
|
||||||
// Every Thread that is going to be started gets a range of files
|
// Calculate Hashes
|
||||||
// They are seperated and are called sections
|
|
||||||
int sections = nFiles / nThreads;
|
|
||||||
for (int i = 1; i <= nThreads; i++) {
|
|
||||||
List<File> sectionedList = new ArrayList<>();
|
|
||||||
// Here the different Threads are being started
|
|
||||||
// Usually the separation gives the first threads the same number of files to be working on and the last one is given all the files that could not be separetated
|
|
||||||
if (i == nThreads) for (int x = (sections * i) - (sections); x < nFiles; x++) {
|
|
||||||
sectionedList.add(files.get(x));
|
|
||||||
} else for (int x = (sections * i) - (sections); x < (sections * i); x++) {
|
|
||||||
sectionedList.add(files.get(x));
|
|
||||||
}
|
|
||||||
// Start Multithreading
|
|
||||||
// sectionedList gives the thread their Assigned Part of Files
|
|
||||||
ThreadedCompare threadedCompare = new ThreadedCompare(sectionedList);
|
|
||||||
threadedCompare.start();
|
|
||||||
}
|
|
||||||
|
|
||||||
// This updates if necessary the Progress bar and checks for Finished threads
|
files.parallelStream().forEach(file -> {
|
||||||
while (completedThreads < nThreads) {
|
|
||||||
TimeUnit.MILLISECONDS.sleep(250);
|
List<File> fileArray = new ArrayList<>();
|
||||||
if (showProgress && doTheColorThingy) {
|
assert fileArray != null;
|
||||||
System.out.print(ConsoleColors.BLUE_BOLD + "Progress: " + ConsoleColors.GREEN_BOLD + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + ConsoleColors.RESET + "\r");
|
fileArray.add(file);
|
||||||
} else if (showProgress) {
|
|
||||||
System.out.print("Progress: " + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + "\r");
|
// Generate Checksum
|
||||||
|
try {
|
||||||
|
String checksum = FileChecksum.getChecksum(file);
|
||||||
|
if (App.fileMap.containsKey(checksum)) {
|
||||||
|
fileArray.addAll(App.fileMap.get(checksum));
|
||||||
|
App.fileMap.put(checksum, fileArray);
|
||||||
|
} else {
|
||||||
|
App.fileMap.put(checksum, fileArray);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
System.err.println("An exception occured while processing the file " + file.getPath());
|
||||||
|
System.err.println(e.getMessage());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
ArrayList<String> toRemove = new ArrayList<String>();
|
ArrayList<String> toRemove = new ArrayList<String>();
|
||||||
for (String checksum: fileMap.keySet()) {
|
for (String checksum: fileMap.keySet()) {
|
||||||
|
@ -152,7 +130,8 @@ public class App {
|
||||||
|
|
||||||
// Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view
|
// Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view
|
||||||
|
|
||||||
if (fileMap.size() > 0) System.out.println();
|
if (fileMap.size() > 0) {
|
||||||
|
System.out.println();
|
||||||
for (String checksum: fileMap.keySet()) {
|
for (String checksum: fileMap.keySet()) {
|
||||||
if (doTheColorThingy) {
|
if (doTheColorThingy) {
|
||||||
System.out.println(
|
System.out.println(
|
||||||
|
@ -162,22 +141,25 @@ public class App {
|
||||||
+ ConsoleColors.RESET);
|
+ ConsoleColors.RESET);
|
||||||
} else System.out.println(checksum +"\t--> " + fileMap.get(checksum));
|
} else System.out.println(checksum +"\t--> " + fileMap.get(checksum));
|
||||||
}
|
}
|
||||||
if (fileMap.size() > 0) System.out.println();
|
System.out.println();
|
||||||
|
}
|
||||||
|
|
||||||
List<File> toBeDeleted = new ArrayList<>();
|
// Count redundant files and bytes
|
||||||
|
|
||||||
|
int toBeDeleted = 0;
|
||||||
long bytes = 0;
|
long bytes = 0;
|
||||||
for (String checksum: fileMap.keySet()) {
|
for (String checksum: fileMap.keySet()) {
|
||||||
App.fileMap.get(checksum).remove(0);
|
App.fileMap.get(checksum).remove(0);
|
||||||
for (File file: App.fileMap.get(checksum)) {
|
for (File file: App.fileMap.get(checksum)) {
|
||||||
if (file != null) bytes += file.length();
|
if (file != null) bytes += file.length();
|
||||||
}
|
}
|
||||||
toBeDeleted.addAll(App.fileMap.get(checksum));
|
toBeDeleted++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doTheColorThingy) {
|
if (doTheColorThingy) {
|
||||||
String color = ConsoleColors.RED_BOLD;
|
String color = ConsoleColors.RED_BOLD;
|
||||||
if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD;
|
if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD;
|
||||||
System.out.println(color + (bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found." + ConsoleColors.RESET);
|
System.out.println(color + (bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found." + ConsoleColors.RESET);
|
||||||
} else System.out.println((bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found.");
|
} else System.out.println((bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
31
src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java
Normal file
31
src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
package net.chaoticbyte.xxsherly;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.zip.Checksum;
|
||||||
|
import org.apache.commons.codec.digest.XXHash32;
|
||||||
|
|
||||||
|
public class FileChecksum {
|
||||||
|
|
||||||
|
//this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value)
|
||||||
|
public static String getChecksum (File file) throws IOException {
|
||||||
|
|
||||||
|
String digest = "";
|
||||||
|
|
||||||
|
// Calculate xxHash32 and add it's hexadecimal presentation to the digest
|
||||||
|
Checksum xxHash = new XXHash32();
|
||||||
|
FileInputStream inputStream = new FileInputStream(file);
|
||||||
|
byte[] dataBytes = new byte[1024];
|
||||||
|
int unread = 0;
|
||||||
|
while ((unread = inputStream.read(dataBytes)) != -1) {
|
||||||
|
xxHash.update(dataBytes, 0, unread);
|
||||||
|
}
|
||||||
|
inputStream.close();
|
||||||
|
digest += Long.toHexString(xxHash.getValue());
|
||||||
|
|
||||||
|
// Add File length to the digest
|
||||||
|
digest += Long.toHexString(file.length());
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return digest;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,67 +0,0 @@
|
||||||
package net.chaoticbyte.xxsherly;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.zip.Checksum;
|
|
||||||
import org.apache.commons.codec.digest.XXHash32;
|
|
||||||
|
|
||||||
public class ThreadedCompare extends Thread {
|
|
||||||
|
|
||||||
private final List<File> filesToCompare;
|
|
||||||
|
|
||||||
public ThreadedCompare (List<File> pathsToCompare_) {
|
|
||||||
this.filesToCompare = pathsToCompare_;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
for (File file : filesToCompare) {
|
|
||||||
|
|
||||||
List<File> fileArray = new ArrayList<>();
|
|
||||||
assert fileArray != null;
|
|
||||||
fileArray.add(file);
|
|
||||||
|
|
||||||
// Generate Checksum
|
|
||||||
try {
|
|
||||||
String checksum = getChecksum(file);
|
|
||||||
if (App.fileMap.containsKey(checksum)) {
|
|
||||||
fileArray.addAll(App.fileMap.get(checksum));
|
|
||||||
App.fileMap.put(checksum, fileArray);
|
|
||||||
} else {
|
|
||||||
App.fileMap.put(checksum, fileArray);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (IOException e) {
|
|
||||||
System.err.println("An exception occured while processing the file " + file.getPath());
|
|
||||||
System.err.println(e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
App.progress++;
|
|
||||||
}
|
|
||||||
App.completedThreads++;
|
|
||||||
}
|
|
||||||
|
|
||||||
//this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value)
|
|
||||||
private String getChecksum (File file) throws IOException {
|
|
||||||
|
|
||||||
String digest = "";
|
|
||||||
|
|
||||||
// Calculate xxHash32 and add it's hexadecimal presentation to the digest
|
|
||||||
Checksum xxHash = new XXHash32();
|
|
||||||
FileInputStream inputStream = new FileInputStream(file);
|
|
||||||
byte[] dataBytes = new byte[1024];
|
|
||||||
int unread = 0;
|
|
||||||
while ((unread = inputStream.read(dataBytes)) != -1) {
|
|
||||||
xxHash.update(dataBytes, 0, unread);
|
|
||||||
}
|
|
||||||
inputStream.close();
|
|
||||||
digest += Long.toHexString(xxHash.getValue());
|
|
||||||
|
|
||||||
// Add File length to the digest
|
|
||||||
digest += Long.toHexString(file.length());
|
|
||||||
|
|
||||||
// return result
|
|
||||||
return digest;
|
|
||||||
}
|
|
||||||
}
|
|
Reference in a new issue