-
Notifications
You must be signed in to change notification settings - Fork 22
/
rsync.go
164 lines (145 loc) · 4.46 KB
/
rsync.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
// Copyright 2012 Julian Gutierrez Oschmann (github.com/julian-gutierrez-o).
// All rights reserved.
// Use of this source code is governed by a BSD-style license that can be found
// in the LICENSE file.
// A Golang implementation of the rsync algorithm.
// This package contains the algorithm for both client and server side.
package rsync
import "crypto/md5"
const (
BlockSize = 1024 * 64
M = 1 << 16
)
type BlockHash struct {
index int
strongHash []byte
weakHash uint32
}
// There are two kind of operations: BLOCK and DATA.
// If a block match is found on the server, a BLOCK operation is sent over the channel along with the block index.
// Modified data between two block matches is sent like a DATA operation.
const (
BLOCK = iota
DATA
)
// An rsync operation (typically to be sent across the network). It can be either a block of raw data or a block index.
type RSyncOp struct {
// Kind of operation: BLOCK | DATA.
opCode int
// The raw modificated (or misaligned) data. Iff opCode == DATA, nil otherwise.
data []byte
// The index of found block. Iff opCode == BLOCK. nil otherwise.
blockIndex int
}
// Returns weak and strong hashes for a given slice.
func CalculateBlockHashes(content []byte) []BlockHash {
blockHashes := make([]BlockHash, getBlocksNumber(content))
for i := range blockHashes {
initialByte := i * BlockSize
endingByte := min((i+1)*BlockSize, len(content))
block := content[initialByte:endingByte]
weak, _, _ := weakHash(block)
blockHashes[i] = BlockHash{index: i, strongHash: strongHash(block), weakHash: weak}
}
return blockHashes
}
// Returns the number of blocks for a given slice of content.
func getBlocksNumber(content []byte) int {
blockNumber := (len(content) / BlockSize)
if len(content)%BlockSize != 0 {
blockNumber += 1
}
return blockNumber
}
// Applies operations from the channel to the original content.
// Returns the modified content.
func ApplyOps(content []byte, ops chan RSyncOp, fileSize int) []byte {
var offset int
result := make([]byte, fileSize)
for op := range ops {
switch op.opCode {
case BLOCK:
copy(result[offset:offset+BlockSize], content[op.blockIndex*BlockSize:op.blockIndex*BlockSize+BlockSize])
offset += BlockSize
case DATA:
copy(result[offset:], op.data)
offset += len(op.data)
}
}
return result
}
// Computes all the operations needed to recreate content.
// All these operations are sent through a channel of RSyncOp.
func CalculateDifferences(content []byte, hashes []BlockHash, opsChannel chan RSyncOp) {
hashesMap := make(map[uint32][]BlockHash)
defer close(opsChannel)
for _, h := range hashes {
key := h.weakHash
hashesMap[key] = append(hashesMap[key], h)
}
var offset, previousMatch int
var aweak, bweak, weak uint32
var dirty, isRolling bool
for offset < len(content) {
endingByte := min(offset+BlockSize, len(content)-1)
block := content[offset:endingByte]
if !isRolling {
weak, aweak, bweak = weakHash(block)
isRolling = true
} else {
aweak = (aweak - uint32(content[offset-1]) + uint32(content[endingByte-1])) % M
bweak = (bweak - (uint32(endingByte-offset) * uint32(content[offset-1])) + aweak) % M
weak = aweak + (1 << 16 * bweak)
}
if l := hashesMap[weak]; l != nil {
blockFound, blockHash := searchStrongHash(l, strongHash(block))
if blockFound {
if dirty {
opsChannel <- RSyncOp{opCode: DATA, data: content[previousMatch:offset]}
dirty = false
}
opsChannel <- RSyncOp{opCode: BLOCK, blockIndex: blockHash.index}
previousMatch = endingByte
isRolling = false
offset += BlockSize
continue
}
}
dirty = true
offset++
}
if dirty {
opsChannel <- RSyncOp{opCode: DATA, data: content[previousMatch:]}
}
}
// Searches for a given strong hash among all strong hashes in this bucket.
func searchStrongHash(l []BlockHash, hashValue []byte) (bool, *BlockHash) {
for _, blockHash := range l {
if string(blockHash.strongHash) == string(hashValue) {
return true, &blockHash
}
}
return false, nil
}
// Returns a strong hash for a given block of data
func strongHash(v []byte) []byte {
h := md5.New()
h.Write(v)
return h.Sum(nil)
}
// Returns a weak hash for a given block of data.
func weakHash(v []byte) (uint32, uint32, uint32) {
var a, b uint32
for i := range v {
a += uint32(v[i])
b += (uint32(len(v)-1) - uint32(i) + 1) * uint32(v[i])
}
return (a % M) + (1 << 16 * (b % M)), a % M, b % M
}
// Returns the smaller of a or b.
func min(a, b int) int {
if a < b {
return a
}
return b
}