diff --git a/.gitignore b/.gitignore
index 84d6087..e32e1df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,6 @@
*.dll
*.so
*.dylib
-Linux*
-Darwin*
# Config file
config.json
@@ -14,8 +12,8 @@ config.json
*.test
# Output
-*.out
-duplicates.json
+build/
+output.json
# IDEA configuration files
*.iws
diff --git a/LICENSE b/LICENSE
index 9ef0225..d5e90db 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2019 Дмитрий Васильев
+Copyright (c) 2019, 2020 Дмитрий Васильев
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index bbf89d2..eb56034 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,37 @@
-# go-file-copies
+go-file-copies
+==============
-A Go package to fetch duplicate files
+A Go program to get duplicates from specified paths.
+
+Table of Contents
+=================
+
++ [Images](#Images)
++ [Flags](#Flags)
++ [Install](#Install)
++ [Run](#Run)
### Images
-data:image/s3,"s3://crabby-images/cb4f5/cb4f503f3a86d1393357388ba11beeba42fc6ce6" alt=""
+data:image/s3,"s3://crabby-images/a4cdf/a4cdfce2beda2d245e3a126252df74f1c28e1ee3" alt=""
+
+### Flags
+
+ * config - _specify the path to your config file, which has paths to directories with duplicates._
+ * output - _specify the path to the output file with results._
+
+### Install
+
+##### Compile for yourself
+Install [Go](https://golang.org/) and run [compile.sh](compile.sh) from the terminal.
+Binaries will be placed in the "build" directory.
-data:image/s3,"s3://crabby-images/c9766/c976682ef94fcccf84b6ecd00e0242830af8dad5" alt=""
+##### Use precompiled binaries
+Download binary for your system from [releases](https://github.com/Dmitriy-Vas/go-file-copies/releases)
-### Usage
+### Run
-Clone repository and write in terminal: `cd go-dungeon-gen && go build main.go && main.exe`
-
-Or you can use precompiled binaries from [releases](https://github.com/Dmitriy-Vas/go-file-copies/releases) and run as default program.
-
-You'll get the file `duplicates.json` in the directory with program.
-
-**Files with 0 hash are empty.**
+Rename [config-sample.json](config-sample.json) to config.json and add paths with duplicates to "dirs".
+The program will take __all files recursively__ from specified directories.
+You can specify paths to config and output via [flags](#Flags).
+Go to the directory with the program and run it like usual binary.
diff --git a/compile.bat b/compile.bat
deleted file mode 100644
index 233d998..0000000
--- a/compile.bat
+++ /dev/null
@@ -1,6 +0,0 @@
-set GOARCH=386
-go build -o WindowsCopies.exe main.go
-set GOOS=darwin
-go build -o DarwinCopies main.go
-set GOOS=linux
-go build -o LinuxCopies main.go
diff --git a/compile.sh b/compile.sh
new file mode 100644
index 0000000..9f2acb8
--- /dev/null
+++ b/compile.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+OsArray=("linux" "darwin" "windows")
+
+for os in ${OsArray[*]}; do
+ name="go-file-copies"
+ if [ "$os" = "windows" ]; then
+ name+=".exe"
+ fi
+ GOOS=$os go build -o build/"$os"/$name
+ cp config-sample.json build/"$os"/
+done
diff --git a/config-sample.json b/config-sample.json
index e86f70c..4282c30 100644
--- a/config-sample.json
+++ b/config-sample.json
@@ -1,6 +1,7 @@
{
- "directories": [
- "C:\\Program Files",
- "C:\\Path\\To\\Your\\Directory"
+ "dirs": [
+ "/mnt/Data/Pictures",
+ "/home/Dmitriy/Pictures",
+ "./../../Downloads"
]
}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..c933533
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,5 @@
+module github.com/Dmitriy-Vas/go-file-copies
+
+go 1.13
+
+require github.com/cespare/xxhash/v2 v2.1.1
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..3f603af
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,3 @@
+github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
+github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
+github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
diff --git a/main.go b/main.go
index acda3dd..b2cfe5d 100644
--- a/main.go
+++ b/main.go
@@ -1,36 +1,48 @@
package main
-//#region Header
import (
"encoding/json"
- "fmt"
- "hash/crc32"
+ "flag"
+ "github.com/cespare/xxhash/v2"
"io/ioutil"
"log"
"os"
"os/signal"
"path/filepath"
+ "sync"
+ "syscall"
)
-const (
- OS_PERMISSIONS os.FileMode = 0644
+var (
+ config *Config
)
-var (
- config Config
- duplicates map[uint32][]File
+const (
+ OsFilePermissions os.FileMode = 0644
+ DefaultConfigPath string = "./config.json"
+ DefaultOutputPath string = "./output.json"
)
-type Config struct {
- Directories []string `json:"directories"`
-}
+type (
+ Config struct {
+ ConfigPath string
+ OutputPath string
-type File struct {
- Hash uint32
- Path string
-}
+ // Paths to directories, which has duplicates
+ Directories []string `json:"dirs"`
+ }
+
+ File string
+
+ Executor struct {
+ mutex *sync.Mutex
+ wg *sync.WaitGroup
-//#endregion
+ // Results, which will be saved in JSON
+ // map[xxHash][]Path
+ Results map[uint64][]File
+ }
+)
func isErr(err error) {
if err != nil {
@@ -38,94 +50,101 @@ func isErr(err error) {
}
}
-//#region Storage
-func getConfig() {
- data, err := ioutil.ReadFile("config.json")
- isErr(err)
- err = json.Unmarshal(data, &config)
- isErr(err)
-}
-
-func saveConfig() {
- data, err := json.Marshal(&config)
- isErr(err)
- err = ioutil.WriteFile("config.json", data, OS_PERMISSIONS)
- isErr(err)
- fmt.Println("Config successfully saved!")
-}
-
-func saveResult() {
- data, err := json.Marshal(&duplicates)
- isErr(err)
- err = ioutil.WriteFile("duplicates.json", data, OS_PERMISSIONS)
- isErr(err)
-}
-
-//#endregion
-
-func getCRCHash(name string) uint32 {
- data, err := ioutil.ReadFile(name)
- isErr(err)
- return crc32.ChecksumIEEE(data)
+// Check directories, which specified as paths with duplicates, to exists
+// Returns non-existing directory
+func (c *Config) IsDirsExists() (dir string) {
+ for _, dir := range config.Directories {
+ if _, err := os.Stat(dir); os.IsNotExist(err) {
+ return dir
+ }
+ }
+ return
}
-func getFilesToScan() {
- duplicates = make(map[uint32][]File)
- // По очереди берём пути из конфига и проходим по ним, собирая все найденные файлы
+// Walkthrough specified directories and save paths to all found files
+// Returns slice of found paths
+func (c *Config) GetFiles() []File {
+ output := make([]File, 0)
for _, dir := range config.Directories {
- err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
- // Проверяем путь на директорию, добавляем только файлы
- if !info.IsDir() {
- // Получаем хэш файла
- hash := getCRCHash(path)
- f := File{
- Path: path,
- Hash: hash,
- }
- duplicates[hash] = append(duplicates[hash], f)
+ filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+ if info.IsDir() {
+ return nil
}
+ output = append(output, File(path))
return nil
})
- isErr(err)
}
+ return output
+}
- fmt.Println("Found", len(duplicates), "hashes")
+// Get uint64 hash from a file, using xxHash algorithm
+// https://github.com/Cyan4973/xxHash#benchmarks
+func (f File) GetHash() uint64 {
+ raw, err := ioutil.ReadFile(string(f))
+ isErr(err)
+ return xxhash.Sum64(raw)
}
-func checkFiles() {
- result := make(map[uint32][]File)
+// Collect all files and save their hashes to the mapping
+func (e *Executor) SaveFileHash(file File) {
+ e.mutex.Lock()
+ defer e.mutex.Unlock()
+ defer e.wg.Done()
- for h, v := range duplicates {
- if len(v) == 1 {
- continue
- }
- result[h] = duplicates[h]
- }
- duplicates = result
+ hash := file.GetHash()
+ e.Results[hash] = append(e.Results[hash], file)
+}
+
+func init() {
+ config = new(Config)
+
+ flag.StringVar(&config.ConfigPath, "config", DefaultConfigPath, "Use this flag to specify the path to your config file, which has paths to directories with duplicates.")
+ flag.StringVar(&config.OutputPath, "output", DefaultOutputPath, "Use this flag to specify the path to the output file with results.")
+ flag.Parse()
- fmt.Println("Found", len(duplicates), "copies")
+ raw, err := ioutil.ReadFile(config.ConfigPath)
+ isErr(err)
+ isErr(json.Unmarshal(raw, &config))
}
func main() {
- // Загружаем конфиг с диска
- getConfig()
-
- // Отлавливаем Ctrl^C для сохранения конфига
go func() {
signalChan := make(chan os.Signal, 1)
- signal.Notify(signalChan, os.Interrupt)
+ signal.Notify(signalChan, os.Interrupt, syscall.SIGKILL, syscall.SIGHUP)
+ defer close(signalChan)
<-signalChan
-
- saveConfig()
+ log.Println("Shutting down the program...")
os.Exit(0)
}()
- // Сохраняем конфиг при ошибке или если программа закончила выполнение
- defer saveConfig()
- getFilesToScan()
+ if dir := config.IsDirsExists(); dir != "" {
+ log.Fatalf("Directory %s can not be found.", dir)
+ }
- checkFiles()
+ files := config.GetFiles()
+ log.Printf("Found %d files.", len(files))
- saveResult()
+ exec := &Executor{
+ mutex: new(sync.Mutex),
+ wg: new(sync.WaitGroup),
+ Results: make(map[uint64][]File),
+ }
+ for _, file := range files {
+ exec.wg.Add(1)
+ go exec.SaveFileHash(file)
+ }
+ exec.wg.Wait()
+
+ for hash, files := range exec.Results {
+ if len(files) <= 1 {
+ delete(exec.Results, hash)
+ }
+ }
+ log.Printf("Found %d hashes of duplicates.", len(exec.Results))
+
+ raw, err := json.Marshal(exec.Results)
+ isErr(err)
+ isErr(ioutil.WriteFile(config.OutputPath, raw, OsFilePermissions))
+ log.Printf("All results saved to %s", config.OutputPath)
}