First, the Java code:
package org.bongiorno;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import static java.util.stream.Collectors.groupingBy;
@SpringBootApplication
@EnableAutoConfiguration
public class DeDup {
private boolean help;
private List<File> roots = new LinkedList<>();
private List<String> fileTypes = Arrays.asList(".jpg", ".gif");
private String hashAlgo = "MD5";
private Map<String, List<File>> hashes;
private Map<Long, List<File>> sizes;
private Map<Boolean, List<File>> deleted;
public static void main(String[] args) throws Exception {
DeDup app = SpringApplication.run(DeDup.class, args).getBean(DeDup.class);
if (app.help) {
System.exit(0);
}
app.execute();
System.out.println("Files found to process: " + app.hashes.values().parallelStream().mapToInt(List::size).sum());
System.out.println("Files deleted: " + app.deleted.getOrDefault(Boolean.TRUE, new LinkedList<>()).size());
}
public DeDup() {
}
@Autowired
public DeDup(@Value("#{new java.io.File('${root}')}") File root,
@Value("${hash:MD5}") String hashAlgo,
@Value("${ext:T(java.util.Arrays).asList('.jpg,.gif')}") List<String> fileTypes) {
this.roots = Arrays.asList(root);
this.hashAlgo = hashAlgo;
this.fileTypes = fileTypes;
}
public Map<Boolean, List<File>> execute() throws Exception {
FileFilter filter = (file) -> file.isDirectory() || fileTypes.stream().anyMatch(t -> file.getName().endsWith(t));
List<File> files = new LinkedList<>();
roots.forEach(f -> getFiles(f, filter, files));
sizes = files.stream().filter(File::isFile).collect(groupingBy(File::length));
// we could say don't hash if there is only 1 file, but that would make debugging harder as it wouldn't show up here
hashes = sizes.values().stream().flatMap(Collection::parallelStream).collect(groupingBy(this::hash));
this.deleted = hashes.values().stream().map(l -> l.subList(1, l.size()))
.flatMap(Collection::parallelStream).collect(groupingBy(this::delete));
return this.deleted;
}
private boolean delete(File f) {
try {
return Files.deleteIfExists(f.toPath());
} catch (IOException e) {
System.err.println(e.toString());
return false;
}
}
protected static Collection<File> getFiles(File start, FileFilter filter, Collection<File> results) {
if (start.isDirectory()) {
File[] files = start.listFiles(filter);
if (files != null) {
for (File file : files) {
getFiles(file, filter, results);
}
}
} else
results.add(start);
return results;
}
private String hash(File f) {
MessageDigest digest = null;
try {
digest = MessageDigest.getInstance(hashAlgo);
FileInputStream input = new FileInputStream(f);
byte[] buffer = new byte[1024];
for (long l = input.read(buffer); l > -1; l = input.read(buffer))
digest.update(buffer, 0, (int) l);
input.close();
} catch (NoSuchAlgorithmException | IOException e) {
throw new RuntimeException(e);
}
Formatter formatter = new Formatter();
for (byte b : digest.digest())
formatter.format("%02x", b);
return f.toString();
}
public Map<Long, List<File>> getSizes() {
return sizes;
}
public List<File> getRoots() {
return roots;
}
public Map<String, List<File>> getHashes() {
return hashes;
}
public String getHashAlgo() {
return hashAlgo;
}
public List<String> getFileTypes() {
return fileTypes;
}
}
Kotlin:
package com.example.demo
import org.springframework.beans.factory.annotation.Value
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.SpringBootApplication
import java.io.File
import java.io.FileInputStream
import java.security.MessageDigest
@SpringBootApplication
open class DeDup(@Value("#{new java.io.File('\${root}')}") val root: File,
@Value("\${algo}") val hashAlgo: String,
@Value("\${types}")val fileTypes: List<String>) {
fun execute(): Map<Boolean, List<File>> {
val sizes = root.walkTopDown().filter { it.isFile }.filter { fileTypes.contains(it.extension) }.groupBy { it.length() }
val hashes = sizes.values.flatMap { it.asIterable() }.groupBy { hash(it) }
return hashes.values.map { it.subList(1, it.size) }.flatMap { it.asIterable() }.groupBy { it.exists() && it.delete() }
}
private fun hash(f: File): String {
MessageDigest.getInstance(hashAlgo)?.let { digest ->
FileInputStream(f).let { fis ->
fis.readBytes().forEach { digest.update(it) }
fis.close()
}
digest.digest().joinToString("") { "%02x".format(it) }
}
throw RuntimeException("Something went wrong")
}
}
fun main(args: Array<String>) {
val app = SpringApplication.run(DeDup::class.java, *args).getBean(DeDup::class.java)
val results = app.execute()
println("${results.getOrDefault(true, listOf()).size} files deleted")
}