-
Notifications
You must be signed in to change notification settings - Fork 0
/
md5sum.go
148 lines (137 loc) · 3.32 KB
/
md5sum.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
package main
import (
"crypto/md5"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"sync"
)
// Calculate the MD5 sum of a single file
func MD5SumFile(path string) (sum string, err error) {
f, err := os.Open(path)
if err != nil {
return
}
defer f.Close()
h := md5.New()
_, err = io.Copy(h, f)
if err != nil {
return
}
sum = fmt.Sprintf("%x", h.Sum(nil))
return
}
// walkFiles starts a goroutine to walk the directory tree at root and send the
// path of each regular file on the string channel. It sends the result of the
// walk on the error channel. If done is closed, walkFiles abandons its work.
func walkFiles(done <-chan struct{}, root string) (<-chan string, <-chan error) {
paths := make(chan string)
errc := make(chan error, 1)
go func() { // HL
// Close the paths channel after Walk returns.
defer close(paths) // HL
// No select needed for this send, since errc is buffered.
errc <- filepath.Walk(root, func(path string, info os.FileInfo, err error) error { // HL
if err != nil {
return err
}
if !info.Mode().IsRegular() {
return nil
}
select {
case paths <- path: // HL
case <-done: // HL
return errors.New("walk canceled")
}
return nil
})
}()
return paths, errc
}
// A result is the product of reading and summing a file using MD5.
type result struct {
path string
sum []byte
err error
}
// digester reads path names from paths and sends digests of the corresponding
// files on c until either paths or done is closed.
func digester(done <-chan struct{}, paths <-chan string, c chan<- result) {
for path := range paths {
var err error
h := md5.New()
f, err := os.Open(path)
if err == nil {
_, err = io.Copy(h, f)
}
defer f.Close()
select {
case c <- result{path, h.Sum(nil), err}:
case <-done:
return
}
}
}
// md5All reads all the files in the file tree rooted at root and returns a map
// from file path to the MD5 sum of the file's contents. If the directory walk
// fails or any read operation fails, md5All returns an error. In that case,
// md5All does not wait for inflight read operations to complete.
func md5All(root string) (map[string][]byte, error) {
// md5All closes the done channel when it returns; it may do so before
// receiving all the values from c and errc.
done := make(chan struct{})
defer close(done)
paths, errc := walkFiles(done, root)
// Start a fixed number of goroutines to read and digest files.
c := make(chan result) // HLc
var wg sync.WaitGroup
const numDigesters = 20
wg.Add(numDigesters)
for i := 0; i < numDigesters; i++ {
go func() {
digester(done, paths, c) // HLc
wg.Done()
}()
}
go func() {
wg.Wait()
close(c) // HLc
}()
// End of pipeline. OMIT
m := make(map[string][]byte)
for r := range c {
if r.err != nil {
return nil, r.err
}
m[r.path] = r.sum
}
// Check whether the Walk failed.
if err := <-errc; err != nil { // HLerrc
return nil, err
}
return m, nil
}
// Calculate the MD5 sum of a whole directory
func MD5SumDir(path string) (sum string, err error) {
ms, err := md5All(path)
if err != nil {
return
}
var paths []string
for path := range ms {
paths = append(paths, path)
}
sort.Strings(paths)
h := md5.New()
var s []byte
for _, path := range paths {
io.WriteString(h, path)
s = ms[path]
h.Write(s)
}
sum = fmt.Sprintf("%x", h.Sum(nil))
return
}