diff --git a/pkg/cmd/cli.go b/pkg/cmd/cli.go index db2288e..a143636 100644 --- a/pkg/cmd/cli.go +++ b/pkg/cmd/cli.go @@ -3,20 +3,20 @@ package cmd import ( "fmt" + "github.com/Alex99y/duplicate-files/pkg/utils" + "github.com/spf13/cobra" ) // CobraInterface represents the CMD interface type CobraInterface struct { - RootCmd *cobra.Command - NumberOfThreads uint64 - RootFolder string + RootCmd *cobra.Command + RootFolder string } func (cmd *CobraInterface) setRootCommand() { cmd.RootCmd = &cobra.Command{ - Short: "Short", - Long: "Long", + Short: "Application to search duplicate files inside a folder", } } @@ -25,7 +25,7 @@ func (cmd *CobraInterface) setVersion() { Use: "version", Short: "Print app version", Run: func(c *cobra.Command, arg []string) { - fmt.Print("v0.0.1") + fmt.Print("v0.1.0") }, } cmd.RootCmd.AddCommand(version) @@ -33,17 +33,24 @@ func (cmd *CobraInterface) setVersion() { func (cmd *CobraInterface) setStart() { start := &cobra.Command{ - Use: "start", - Short: "Execute duplicate files searcher", - Long: "Long description", - Run: func(c *cobra.Command, arg []string) { - cmd.RootFolder, _ = c.PersistentFlags().GetString("path") - cmd.NumberOfThreads, _ = c.PersistentFlags().GetUint64("threads") + Use: "start", + Short: "Search for duplicated files", + Example: "start [rootFolder]", + Long: "This command receives a folder, find recursively and print all duplicate files inside this folder and his subfolderss", + Run: func(c *cobra.Command, args []string) { + cmd.RootFolder = args[0] + }, + Args: func(c *cobra.Command, args []string) error { + if len(args) != 1 { + return fmt.Errorf("No root folder provided") + } + isDir, err := utils.IsDirectory(args[0]) + if err != nil || !isDir { + return fmt.Errorf("Invalid root folder provided") + } + return nil }, } - start.PersistentFlags().Uint64P("threads", "t", 4, "--threads 2") - start.PersistentFlags().StringP("path", "f", "", "--path /home") - start.MarkPersistentFlagRequired("path") cmd.RootCmd.AddCommand(start) } diff --git a/pkg/core/core.go b/pkg/core/core.go index 9ba0923..3d76b75 100644 --- a/pkg/core/core.go +++ b/pkg/core/core.go @@ -3,9 +3,7 @@ package core import ( "fmt" "path" - "runtime" "sync" - "time" "github.com/Alex99y/duplicate-files/pkg/cmd" "github.com/Alex99y/duplicate-files/pkg/crypto" @@ -13,103 +11,57 @@ import ( "github.com/Alex99y/duplicate-files/pkg/utils" ) -const threadRetryBeforeReturn = 1 - var wg sync.WaitGroup -// StructureInfo contains the configuration to start the process -type StructureInfo struct { - folderQueue *structures.QueueWithSync - resultMap *structures.MapWithSync -} - -func (s *StructureInfo) processFile() { - // Process file -} - -func (s *StructureInfo) processFolder(id int) { - // Retries before return - retriesLeft := threadRetryBeforeReturn - for { - // Dequeue next file to process - nextFolderToProcess := s.folderQueue.Dequeue() - - if nextFolderToProcess != nil { - retriesLeft = threadRetryBeforeReturn - file := nextFolderToProcess.(string) - isDir, err := utils.IsDirectory(file) - if err != nil { - fmt.Println(err) - continue - } - if isDir { - // Process folder - files := utils.ReadFilesFromDirectory(file) - for _, f := range files { - s.folderQueue.Enqueue(path.Join(file, f)) - } - } else { - // Process regular file - fileContent := utils.ReadFile(file) - fileHash := crypto.GetFileHash(fileContent) - s.resultMap.AddElement(fileHash, file) - fileContent = nil - } - } else { - if retriesLeft == 0 { - break - } else { - time.Sleep(500 * time.Millisecond) - retriesLeft-- - } +func processFolder(file string) { + isDir, err := utils.IsDirectory(file) + if err != nil { + fmt.Println(err) + wg.Done() + return + } + if isDir { + files := utils.ReadFilesFromDirectory(file) + for _, f := range files { + wg.Add(1) + go processFolder(path.Join(file, f)) } + } else { + fileContent := utils.ReadFile(file) + fileHash := crypto.GetFileHash(fileContent) + structures.AddElement(fileHash, file) + fileContent = nil } - // End task wg.Done() } -// Start function will start the thread process +// Start function will begin the thread process func Start(config cmd.CobraInterface) { - // Prepare queues - structure := StructureInfo{ - // Contains the folder/files to process - folderQueue: structures.NewQueue(), - // Contains the result (duplicated files) - resultMap: structures.NewMap(), - } - structure.folderQueue.Enqueue(config.RootFolder) - - // Total threads to improve paralellism - threads := runtime.NumCPU() / 2 - if int(config.NumberOfThreads) < threads { - threads = int(config.NumberOfThreads) - } - runtime.GOMAXPROCS(threads) - wg.Add(threads) - - // Start searching - for i := 0; i < threads; i++ { - go structure.processFolder(i) - } + // Excecute first thread + wg.Add(1) + go processFolder(config.RootFolder) - // Wait until goroutines ends + // Wait until all goroutines ends wg.Wait() - resultMap := structure.resultMap.GetMap() + resultMap := structures.GetMap() gotDuplicates := false - for key, files := range resultMap { + resultMap.Range(func(key interface{}, value interface{}) bool { + files := value.([]string) if len(files) > 1 { gotDuplicates = true - fmt.Println("Duplicated files (" + key + "):") + fmt.Println("Duplicated files (" + key.(string) + "):") for _, file := range files { fmt.Println(file) } fmt.Print("\n") } - } + return true + }) + if gotDuplicates == false { fmt.Println("No duplicated files found") } diff --git a/pkg/main.go b/pkg/main.go index 784a6dd..d2dd602 100644 --- a/pkg/main.go +++ b/pkg/main.go @@ -8,5 +8,8 @@ import ( func main() { cobra := new(cmd.CobraInterface) cobra.Execute() + if cobra.RootFolder == "" { + return + } core.Start(*cobra) } diff --git a/pkg/structures/map.go b/pkg/structures/map.go index 2146999..18d9537 100644 --- a/pkg/structures/map.go +++ b/pkg/structures/map.go @@ -4,29 +4,25 @@ import ( "sync" ) -// MapWithSync have the map with mutex -type MapWithSync struct { - mutex sync.RWMutex - smap map[string][]string -} +// SMap is the structure that have the results +var SMap sync.Map // AddElement add an element to the map -func (c *MapWithSync) AddElement(key string, value string) { - c.mutex.Lock() - c.smap[key] = append(c.smap[key], value) - c.mutex.Unlock() +func AddElement(key string, value string) { + results, ok := SMap.Load(key) + if ok == true { + tempArray := results.([]string) + tempArray = append(tempArray, value) + SMap.Store(key, tempArray) + } else { + var newArray []string + newArray = make([]string, 0) + newArray = append(newArray, value) + SMap.Store(key, newArray) + } } // GetMap returns the map -func (c *MapWithSync) GetMap() map[string][]string { - c.mutex.RLock() - defer c.mutex.RUnlock() - return c.smap -} - -// NewMap creates a new MapWithSync instance -func NewMap() *MapWithSync { - return &MapWithSync{ - smap: make(map[string][]string), - } +func GetMap() *sync.Map { + return &SMap } diff --git a/pkg/structures/queue.go b/pkg/structures/queue.go deleted file mode 100644 index dff0e75..0000000 --- a/pkg/structures/queue.go +++ /dev/null @@ -1,37 +0,0 @@ -package structures - -import ( - "container/list" - "sync" -) - -// QueueWithSync is the queue interface -type QueueWithSync struct { - queue *list.List - mutex sync.Mutex -} - -// Enqueue add a new element in to the queue -func (f *QueueWithSync) Enqueue(element interface{}) { - f.mutex.Lock() - f.queue.PushBack(element) - f.mutex.Unlock() -} - -// Dequeue remove an element from the queue -func (f *QueueWithSync) Dequeue() interface{} { - f.mutex.Lock() - last := f.queue.Back() - defer f.mutex.Unlock() - if last == nil { - return nil - } - return f.queue.Remove(last) -} - -// NewQueue creates a new instance of QueueWithSync -func NewQueue() *QueueWithSync { - return &QueueWithSync{ - queue: list.New(), - } -}