Skip to content

Commit

Permalink
Merge pull request #1 from Alex99y/develop
Browse files Browse the repository at this point in the history
v0.1.0
  • Loading branch information
Alex99y authored Sep 12, 2020
2 parents 0dadb60 + ac49fb5 commit 5430b0d
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 149 deletions.
37 changes: 22 additions & 15 deletions pkg/cmd/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@ package cmd
import (
"fmt"

"github.com/Alex99y/duplicate-files/pkg/utils"

"github.com/spf13/cobra"
)

// CobraInterface represents the CMD interface
type CobraInterface struct {
RootCmd *cobra.Command
NumberOfThreads uint64
RootFolder string
RootCmd *cobra.Command
RootFolder string
}

func (cmd *CobraInterface) setRootCommand() {
cmd.RootCmd = &cobra.Command{
Short: "Short",
Long: "Long",
Short: "Application to search duplicate files inside a folder",
}
}

Expand All @@ -25,25 +25,32 @@ func (cmd *CobraInterface) setVersion() {
Use: "version",
Short: "Print app version",
Run: func(c *cobra.Command, arg []string) {
fmt.Print("v0.0.1")
fmt.Print("v0.1.0")
},
}
cmd.RootCmd.AddCommand(version)
}

func (cmd *CobraInterface) setStart() {
start := &cobra.Command{
Use: "start",
Short: "Execute duplicate files searcher",
Long: "Long description",
Run: func(c *cobra.Command, arg []string) {
cmd.RootFolder, _ = c.PersistentFlags().GetString("path")
cmd.NumberOfThreads, _ = c.PersistentFlags().GetUint64("threads")
Use: "start",
Short: "Search for duplicated files",
Example: "start [rootFolder]",
Long: "This command receives a folder, find recursively and print all duplicate files inside this folder and his subfolderss",
Run: func(c *cobra.Command, args []string) {
cmd.RootFolder = args[0]
},
Args: func(c *cobra.Command, args []string) error {
if len(args) != 1 {
return fmt.Errorf("No root folder provided")
}
isDir, err := utils.IsDirectory(args[0])
if err != nil || !isDir {
return fmt.Errorf("Invalid root folder provided")
}
return nil
},
}
start.PersistentFlags().Uint64P("threads", "t", 4, "--threads 2")
start.PersistentFlags().StringP("path", "f", "", "--path /home")
start.MarkPersistentFlagRequired("path")

cmd.RootCmd.AddCommand(start)
}
Expand Down
106 changes: 29 additions & 77 deletions pkg/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,113 +3,65 @@ package core
import (
"fmt"
"path"
"runtime"
"sync"
"time"

"github.com/Alex99y/duplicate-files/pkg/cmd"
"github.com/Alex99y/duplicate-files/pkg/crypto"
"github.com/Alex99y/duplicate-files/pkg/structures"
"github.com/Alex99y/duplicate-files/pkg/utils"
)

const threadRetryBeforeReturn = 1

var wg sync.WaitGroup

// StructureInfo contains the configuration to start the process
type StructureInfo struct {
folderQueue *structures.QueueWithSync
resultMap *structures.MapWithSync
}

func (s *StructureInfo) processFile() {
// Process file
}

func (s *StructureInfo) processFolder(id int) {
// Retries before return
retriesLeft := threadRetryBeforeReturn
for {
// Dequeue next file to process
nextFolderToProcess := s.folderQueue.Dequeue()

if nextFolderToProcess != nil {
retriesLeft = threadRetryBeforeReturn
file := nextFolderToProcess.(string)
isDir, err := utils.IsDirectory(file)
if err != nil {
fmt.Println(err)
continue
}
if isDir {
// Process folder
files := utils.ReadFilesFromDirectory(file)
for _, f := range files {
s.folderQueue.Enqueue(path.Join(file, f))
}
} else {
// Process regular file
fileContent := utils.ReadFile(file)
fileHash := crypto.GetFileHash(fileContent)
s.resultMap.AddElement(fileHash, file)
fileContent = nil
}
} else {
if retriesLeft == 0 {
break
} else {
time.Sleep(500 * time.Millisecond)
retriesLeft--
}
func processFolder(file string) {
isDir, err := utils.IsDirectory(file)
if err != nil {
fmt.Println(err)
wg.Done()
return
}
if isDir {
files := utils.ReadFilesFromDirectory(file)
for _, f := range files {
wg.Add(1)
go processFolder(path.Join(file, f))
}
} else {
fileContent := utils.ReadFile(file)
fileHash := crypto.GetFileHash(fileContent)
structures.AddElement(fileHash, file)
fileContent = nil
}

// End task
wg.Done()
}

// Start function will start the thread process
// Start function will begin the thread process
func Start(config cmd.CobraInterface) {

// Prepare queues
structure := StructureInfo{
// Contains the folder/files to process
folderQueue: structures.NewQueue(),
// Contains the result (duplicated files)
resultMap: structures.NewMap(),
}
structure.folderQueue.Enqueue(config.RootFolder)

// Total threads to improve paralellism
threads := runtime.NumCPU() / 2
if int(config.NumberOfThreads) < threads {
threads = int(config.NumberOfThreads)
}
runtime.GOMAXPROCS(threads)
wg.Add(threads)

// Start searching
for i := 0; i < threads; i++ {
go structure.processFolder(i)
}
// Excecute first thread
wg.Add(1)
go processFolder(config.RootFolder)

// Wait until goroutines ends
// Wait until all goroutines ends
wg.Wait()

resultMap := structure.resultMap.GetMap()
resultMap := structures.GetMap()
gotDuplicates := false

for key, files := range resultMap {
resultMap.Range(func(key interface{}, value interface{}) bool {
files := value.([]string)
if len(files) > 1 {
gotDuplicates = true
fmt.Println("Duplicated files (" + key + "):")
fmt.Println("Duplicated files (" + key.(string) + "):")
for _, file := range files {
fmt.Println(file)
}
fmt.Print("\n")
}
}
return true
})

if gotDuplicates == false {
fmt.Println("No duplicated files found")
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,8 @@ import (
func main() {
cobra := new(cmd.CobraInterface)
cobra.Execute()
if cobra.RootFolder == "" {
return
}
core.Start(*cobra)
}
36 changes: 16 additions & 20 deletions pkg/structures/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,25 @@ import (
"sync"
)

// MapWithSync have the map with mutex
type MapWithSync struct {
mutex sync.RWMutex
smap map[string][]string
}
// SMap is the structure that have the results
var SMap sync.Map

// AddElement add an element to the map
func (c *MapWithSync) AddElement(key string, value string) {
c.mutex.Lock()
c.smap[key] = append(c.smap[key], value)
c.mutex.Unlock()
func AddElement(key string, value string) {
results, ok := SMap.Load(key)
if ok == true {
tempArray := results.([]string)
tempArray = append(tempArray, value)
SMap.Store(key, tempArray)
} else {
var newArray []string
newArray = make([]string, 0)
newArray = append(newArray, value)
SMap.Store(key, newArray)
}
}

// GetMap returns the map
func (c *MapWithSync) GetMap() map[string][]string {
c.mutex.RLock()
defer c.mutex.RUnlock()
return c.smap
}

// NewMap creates a new MapWithSync instance
func NewMap() *MapWithSync {
return &MapWithSync{
smap: make(map[string][]string),
}
func GetMap() *sync.Map {
return &SMap
}
37 changes: 0 additions & 37 deletions pkg/structures/queue.go

This file was deleted.

0 comments on commit 5430b0d

Please sign in to comment.