Increased performance by using parallelStream instead of manual threading, removed -t/--threads option, updated README, bumped version to 3.0
This commit is contained in:
parent
2770390df3
commit
37d7b5bd06
5 changed files with 89 additions and 142 deletions
27
README.md
27
README.md
|
@ -16,12 +16,9 @@ Note that xxHash is not a cryptographic hash function and therefore may produce
|
|||
|
||||
```
|
||||
usage: xxSherly.jar [options] folder1 folder2 ...
|
||||
-c,--color enable colored output
|
||||
-h,--help show this help message
|
||||
-p,--progress enable progress indicator
|
||||
-t,--threads <arg> override default thread number (defaults to the
|
||||
number of cores)
|
||||
-v,--verbose more verbose output
|
||||
-c,--color enable colored output
|
||||
-h,--help show this help message
|
||||
-v,--verbose more verbose output
|
||||
```
|
||||
|
||||
## Build
|
||||
|
@ -44,15 +41,19 @@ mvn package assembly:single
|
|||
I let Sherly and xxSherly find duplicates in my Music Library (containing `.wav` files) using the following commands:
|
||||
|
||||
```bash
|
||||
# Sherly v1.1.4
|
||||
time java -jar Bin/sherly.jar -n -f ~/Music/
|
||||
time java -jar target/xxSherly-x.y-jar-with-dependencies.jar -n -f ~/Music/
|
||||
# xxSherly v2.1
|
||||
time java -jar target/xxSherly-2.1-jar-with-dependencies.jar ~/Music/
|
||||
# xxSherly v3.0
|
||||
time java -jar target/xxSherly-3.0-jar-with-dependencies.jar ~/Music/
|
||||
```
|
||||
|
||||
The timings are measured using the Linux tool `time` (`real`).
|
||||
|
||||
| | Sherly v1.1.4 | xxSherly v1.0 |
|
||||
| --------: | ------------: | --------------: |
|
||||
| 1st run | 4.055s | 2.561s |
|
||||
| 2nd run | 4.055s | 2.304s |
|
||||
| 3rd run | 4.066s | 2.549s |
|
||||
| **avg** | **4.059s** | **2.471s** |
|
||||
| | Sherly v1.1.4 | xxSherly v2.1 | xxSherly v3.0 |
|
||||
| --------: | ------------: | ------------: | ------------: |
|
||||
| 1st run | 4.055s | 2.554s | 2.086s |
|
||||
| 2nd run | 4.055s | 2.554s | 2.109s |
|
||||
| 3rd run | 4.066s | 2.556s | 2.092s |
|
||||
| **avg** | **4.059s** | **2.555s** | **2.096s** |
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -6,7 +6,7 @@
|
|||
|
||||
<groupId>net.chaoticbyte.xxsherly</groupId>
|
||||
<artifactId>xxSherly</artifactId>
|
||||
<version>2.1</version>
|
||||
<version>3.0</version>
|
||||
|
||||
<name>xxSherly</name>
|
||||
<!-- FIXME change it to the project's website -->
|
||||
|
|
|
@ -7,7 +7,6 @@ import java.nio.file.Path;
|
|||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
|
@ -20,29 +19,22 @@ public class App {
|
|||
|
||||
public static final String usageHelp = "xxSherly.jar [options] folder1 folder2 ...";
|
||||
|
||||
public static int completedThreads = 0;
|
||||
public static int progress = 0;
|
||||
public static HashMap<String, List<File>> fileMap = new HashMap<>();
|
||||
|
||||
public static boolean doTheColorThingy = false;
|
||||
public static boolean verbose = false;
|
||||
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
|
||||
// Arguments
|
||||
List<File> folderList = new ArrayList<>();
|
||||
boolean showProgress = false;
|
||||
boolean verbose = false;
|
||||
boolean displayHelp = false;
|
||||
int requestedThreads = 0;
|
||||
|
||||
// CLI
|
||||
|
||||
List<File> folderList = new ArrayList<>();
|
||||
boolean displayHelp = false;
|
||||
|
||||
HelpFormatter helpFormatter = new HelpFormatter();
|
||||
|
||||
Options commandlineOptions = new Options();
|
||||
commandlineOptions.addOption("c", "color", false, "enable colored output");
|
||||
commandlineOptions.addOption("t", "threads", true, "override default thread number (defaults to the number of cores)");
|
||||
commandlineOptions.addOption("p", "progress", false, "enable progress indicator");
|
||||
commandlineOptions.addOption("v", "verbose", false, "more verbose output");
|
||||
commandlineOptions.addOption("h", "help", false, "show this help message");
|
||||
|
||||
|
@ -56,10 +48,8 @@ public class App {
|
|||
}
|
||||
// Get arguments & options
|
||||
doTheColorThingy = arguments.hasOption("c");
|
||||
showProgress = arguments.hasOption("p");
|
||||
verbose = arguments.hasOption("v");
|
||||
displayHelp = arguments.hasOption("h");
|
||||
requestedThreads = Integer.parseInt(arguments.getOptionValue("t", "0"));
|
||||
}
|
||||
catch (ParseException | NumberFormatException e) {
|
||||
helpFormatter.printHelp(usageHelp, commandlineOptions);
|
||||
|
@ -83,16 +73,8 @@ public class App {
|
|||
System.out.println("Arguments:");;
|
||||
System.out.println(" Folders: " + folderList.size());
|
||||
System.out.println(" Color: " + doTheColorThingy);
|
||||
System.out.println(" Progress: " + showProgress);
|
||||
}
|
||||
|
||||
// Calculations for multithreading
|
||||
// The number of Cores or better said Threads that can be used
|
||||
int availableProcessors = Runtime.getRuntime().availableProcessors();
|
||||
int nThreads = availableProcessors;
|
||||
if (requestedThreads > 0) nThreads = requestedThreads;
|
||||
if (verbose) System.out.println("Threads: " + nThreads);
|
||||
|
||||
// Find all files
|
||||
List<File> files = new ArrayList<>();
|
||||
for (File folder : folderList) {
|
||||
|
@ -114,33 +96,29 @@ public class App {
|
|||
int nFiles = files.size();
|
||||
if (verbose) System.out.println("Files: " + nFiles);
|
||||
|
||||
// Every Thread that is going to be started gets a range of files
|
||||
// They are seperated and are called sections
|
||||
int sections = nFiles / nThreads;
|
||||
for (int i = 1; i <= nThreads; i++) {
|
||||
List<File> sectionedList = new ArrayList<>();
|
||||
// Here the different Threads are being started
|
||||
// Usually the separation gives the first threads the same number of files to be working on and the last one is given all the files that could not be separetated
|
||||
if (i == nThreads) for (int x = (sections * i) - (sections); x < nFiles; x++) {
|
||||
sectionedList.add(files.get(x));
|
||||
} else for (int x = (sections * i) - (sections); x < (sections * i); x++) {
|
||||
sectionedList.add(files.get(x));
|
||||
}
|
||||
// Start Multithreading
|
||||
// sectionedList gives the thread their Assigned Part of Files
|
||||
ThreadedCompare threadedCompare = new ThreadedCompare(sectionedList);
|
||||
threadedCompare.start();
|
||||
}
|
||||
// Calculate Hashes
|
||||
|
||||
// This updates if necessary the Progress bar and checks for Finished threads
|
||||
while (completedThreads < nThreads) {
|
||||
TimeUnit.MILLISECONDS.sleep(250);
|
||||
if (showProgress && doTheColorThingy) {
|
||||
System.out.print(ConsoleColors.BLUE_BOLD + "Progress: " + ConsoleColors.GREEN_BOLD + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + ConsoleColors.RESET + "\r");
|
||||
} else if (showProgress) {
|
||||
System.out.print("Progress: " + progress + " / " + nFiles + " | " + (progress * 100 / nFiles) + "%" + "\r");
|
||||
files.parallelStream().forEach(file -> {
|
||||
|
||||
List<File> fileArray = new ArrayList<>();
|
||||
assert fileArray != null;
|
||||
fileArray.add(file);
|
||||
|
||||
// Generate Checksum
|
||||
try {
|
||||
String checksum = FileChecksum.getChecksum(file);
|
||||
if (App.fileMap.containsKey(checksum)) {
|
||||
fileArray.addAll(App.fileMap.get(checksum));
|
||||
App.fileMap.put(checksum, fileArray);
|
||||
} else {
|
||||
App.fileMap.put(checksum, fileArray);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
System.err.println("An exception occured while processing the file " + file.getPath());
|
||||
System.err.println(e.getMessage());
|
||||
}
|
||||
});
|
||||
|
||||
ArrayList<String> toRemove = new ArrayList<String>();
|
||||
for (String checksum: fileMap.keySet()) {
|
||||
|
@ -152,32 +130,36 @@ public class App {
|
|||
|
||||
// Now everything is finished and the Filemap (hashmap with all Dups) can be printed out in a nice view
|
||||
|
||||
if (fileMap.size() > 0) System.out.println();
|
||||
for (String checksum: fileMap.keySet()) {
|
||||
if (doTheColorThingy) {
|
||||
System.out.println(
|
||||
ConsoleColors.BLUE_BOLD + checksum
|
||||
+ ConsoleColors.CYAN_BOLD + "\t--> "
|
||||
+ ConsoleColors.GREEN_BOLD + fileMap.get(checksum)
|
||||
+ ConsoleColors.RESET);
|
||||
} else System.out.println(checksum +"\t--> " + fileMap.get(checksum));
|
||||
if (fileMap.size() > 0) {
|
||||
System.out.println();
|
||||
for (String checksum: fileMap.keySet()) {
|
||||
if (doTheColorThingy) {
|
||||
System.out.println(
|
||||
ConsoleColors.BLUE_BOLD + checksum
|
||||
+ ConsoleColors.CYAN_BOLD + "\t--> "
|
||||
+ ConsoleColors.GREEN_BOLD + fileMap.get(checksum)
|
||||
+ ConsoleColors.RESET);
|
||||
} else System.out.println(checksum +"\t--> " + fileMap.get(checksum));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
if (fileMap.size() > 0) System.out.println();
|
||||
|
||||
List<File> toBeDeleted = new ArrayList<>();
|
||||
// Count redundant files and bytes
|
||||
|
||||
int toBeDeleted = 0;
|
||||
long bytes = 0;
|
||||
for (String checksum: fileMap.keySet()) {
|
||||
App.fileMap.get(checksum).remove(0);
|
||||
for (File file: App.fileMap.get(checksum)) {
|
||||
if (file != null) bytes += file.length();
|
||||
}
|
||||
toBeDeleted.addAll(App.fileMap.get(checksum));
|
||||
toBeDeleted++;
|
||||
}
|
||||
|
||||
if (doTheColorThingy) {
|
||||
String color = ConsoleColors.RED_BOLD;
|
||||
if (fileMap.size() < 1) color = ConsoleColors.GREEN_BOLD;
|
||||
System.out.println(color + (bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found." + ConsoleColors.RESET);
|
||||
} else System.out.println((bytes / 1000000.0) + " unnecessary MB in " + toBeDeleted.size() + " file(s) found.");
|
||||
System.out.println(color + (bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found." + ConsoleColors.RESET);
|
||||
} else System.out.println((bytes / 1000000.0) + " redundant MB in " + toBeDeleted + " file(s) found.");
|
||||
}
|
||||
}
|
||||
|
|
31
src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java
Normal file
31
src/main/java/net/chaoticbyte/xxsherly/FileChecksum.java
Normal file
|
@ -0,0 +1,31 @@
|
|||
package net.chaoticbyte.xxsherly;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.zip.Checksum;
|
||||
import org.apache.commons.codec.digest.XXHash32;
|
||||
|
||||
public class FileChecksum {
|
||||
|
||||
//this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value)
|
||||
public static String getChecksum (File file) throws IOException {
|
||||
|
||||
String digest = "";
|
||||
|
||||
// Calculate xxHash32 and add it's hexadecimal presentation to the digest
|
||||
Checksum xxHash = new XXHash32();
|
||||
FileInputStream inputStream = new FileInputStream(file);
|
||||
byte[] dataBytes = new byte[1024];
|
||||
int unread = 0;
|
||||
while ((unread = inputStream.read(dataBytes)) != -1) {
|
||||
xxHash.update(dataBytes, 0, unread);
|
||||
}
|
||||
inputStream.close();
|
||||
digest += Long.toHexString(xxHash.getValue());
|
||||
|
||||
// Add File length to the digest
|
||||
digest += Long.toHexString(file.length());
|
||||
|
||||
// return result
|
||||
return digest;
|
||||
}
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
package net.chaoticbyte.xxsherly;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.zip.Checksum;
|
||||
import org.apache.commons.codec.digest.XXHash32;
|
||||
|
||||
public class ThreadedCompare extends Thread {
|
||||
|
||||
private final List<File> filesToCompare;
|
||||
|
||||
public ThreadedCompare (List<File> pathsToCompare_) {
|
||||
this.filesToCompare = pathsToCompare_;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
for (File file : filesToCompare) {
|
||||
|
||||
List<File> fileArray = new ArrayList<>();
|
||||
assert fileArray != null;
|
||||
fileArray.add(file);
|
||||
|
||||
// Generate Checksum
|
||||
try {
|
||||
String checksum = getChecksum(file);
|
||||
if (App.fileMap.containsKey(checksum)) {
|
||||
fileArray.addAll(App.fileMap.get(checksum));
|
||||
App.fileMap.put(checksum, fileArray);
|
||||
} else {
|
||||
App.fileMap.put(checksum, fileArray);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
System.err.println("An exception occured while processing the file " + file.getPath());
|
||||
System.err.println(e.getMessage());
|
||||
}
|
||||
|
||||
App.progress++;
|
||||
}
|
||||
App.completedThreads++;
|
||||
}
|
||||
|
||||
//this is used to get the MD5 String of one of the files (one of them is just fine since they both have the same value)
|
||||
private String getChecksum (File file) throws IOException {
|
||||
|
||||
String digest = "";
|
||||
|
||||
// Calculate xxHash32 and add it's hexadecimal presentation to the digest
|
||||
Checksum xxHash = new XXHash32();
|
||||
FileInputStream inputStream = new FileInputStream(file);
|
||||
byte[] dataBytes = new byte[1024];
|
||||
int unread = 0;
|
||||
while ((unread = inputStream.read(dataBytes)) != -1) {
|
||||
xxHash.update(dataBytes, 0, unread);
|
||||
}
|
||||
inputStream.close();
|
||||
digest += Long.toHexString(xxHash.getValue());
|
||||
|
||||
// Add File length to the digest
|
||||
digest += Long.toHexString(file.length());
|
||||
|
||||
// return result
|
||||
return digest;
|
||||
}
|
||||
}
|
Reference in a new issue