Skip to content

Commit

Permalink
Merge pull request #4 from buildbuddy-io/opt
Browse files Browse the repository at this point in the history
Improve performance for small inputs
  • Loading branch information
luluz66 authored Aug 16, 2023
2 parents 91f3cd5 + 2fb0045 commit bd4c286
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 5 deletions.
21 changes: 16 additions & 5 deletions fastcdc.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ type Chunker struct {

buf []byte
cursor int
end int
offset int
eof bool
}
Expand Down Expand Up @@ -167,13 +168,23 @@ func NewChunker(rd io.Reader, opts Options) (*Chunker, error) {
rd: rd,
buf: make([]byte, opts.BufSize),
cursor: opts.BufSize,
end: opts.BufSize,
}

return chunker, nil
}

// Reset reinitializes the chunker with a new reader
func (c *Chunker) Reset(rd io.Reader) {
c.rd = rd
c.offset = 0
c.eof = false
c.cursor = len(c.buf)
c.end = len(c.buf)
}

func (c *Chunker) fillBuffer() error {
n := len(c.buf) - c.cursor
n := c.end - c.cursor
if n >= c.maxSize {
return nil
}
Expand All @@ -183,14 +194,14 @@ func (c *Chunker) fillBuffer() error {
c.cursor = 0

if c.eof {
c.buf = c.buf[:n]
c.end = n
return nil
}

// Fill the rest of the buffer
m, err := io.ReadFull(c.rd, c.buf[n:])
if err == io.EOF || err == io.ErrUnexpectedEOF {
c.buf = c.buf[:n+m]
c.end = n+m
c.eof = true
} else if err != nil {
return err
Expand All @@ -204,11 +215,11 @@ func (c *Chunker) Next() (Chunk, error) {
if err := c.fillBuffer(); err != nil {
return Chunk{}, err
}
if len(c.buf) == 0 {
if c.end == 0 {
return Chunk{}, io.EOF
}

length, fp := c.nextChunk(c.buf[c.cursor:])
length, fp := c.nextChunk(c.buf[c.cursor:c.end])

chunk := Chunk{
Offset: c.offset,
Expand Down
104 changes: 104 additions & 0 deletions fastcdc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ var defaultOpts = Options{
}

var defaultData = randBytes(4321, 7291)
var resetData = randBytes(1234, 1729)

// Chunks generated with defaultOpts from defaultData
var defaultChunks = []Chunk{
Expand Down Expand Up @@ -43,6 +44,14 @@ var defaultChunks = []Chunk{
Data: decodeHex("f137c8c9cc9d5bf8e9194dc4ca95e069c0c537b69e9f4d327bc39883a342c15d90c362fd68ab7443f58eeb18dc4a6f1834662ddab5800bdb3d95c49697667c027050fc53e0928a0047ff8dac3b41407456560f98b22805cf9dea6dd458fe0611351c2d8a9f738a38ad6336a9f54f167afde26ccb8bfa4e83261e58c79a880ab4dfd1d69556fca8d9b317a445dd4b5a4fe7ec483d01f3726f7402303abca81df9f1761029edb87e0c52a3ed1852193772ae582a883865b2a014cdde703295f1e413e9149ea44b52d82e4795c676a1046519dab66342d5ca2c28d526b08a70b36d968e3517762b5936486982e251bba6325a812fdeb14cc76f3e7c3e98e8fbabd9e2e61fa17029c946811371351ed0b294ac8013a534aa25bbba3b3c547f70649a66a36b948bcbfe6879f389dcfcf6a564c47e7a1abd051860f6426db6f23f815faa29b484d3323772b616da282253a321f369e1961e1304db3e484cb4357a184f087281955450da9f55bcc502876991f8863918152fe02ebb6ca13a06852fb1ef6d627e0387056bc2f5b0fb7758ede4d494be94860398eb6d8af22898b33e08628f6822a710a7c1775da705f02688e7647940beeabe09381688cd80623fbbe6720451a785d9dd010e03cc10f84aa291e0760940e3ff901742060832725730947783c15671e75b732e99d3e5f0360ef5715113bc9f63d9c5069550297a43f512195e48440ebc8523a71c3ed7bd71ab0718d70a4b8b059d54b278962c6bced7cbb398c9ea65791fd4cbede0bfa26f06a9e2d64a7b455e0e9618e66f4c552507f5bacad9936415ef170acafb69469f4f5abdc68e919024c9254a9c7257aa90eb9e86d7b742b87bad69a9b60559a9eaa1409e5e0faf1414884de6829a52399cd181f32be59b81911eece3e6c6ba33"),
},
}
var resetChunks []Chunk = []Chunk{
{
Offset: 0,
Length: 1234,
Fingerprint: 4311516274091363630,
Data: decodeHex("387a518e109f76ee1fedd636dbc7e8d3d240f2a68c16c15b128e80172106886c1789c62d80fbd67e24fc3dd28fb7bdec1c3461e1ea9ac7c90156244f25e8a14f2e25fa29ceb081bbd1bbf3ac0a6e1f0358f90d993e643844b0309c60b4622d3000ea729db93f149c87d558e04af5de4bfa497401ddfc403249ed9903c4097cd2244ffc73c448b56d7db4962dab55d2df6f74408baa19fc5b18c28e0d0c2bd07d733031381ee33b77403b227e49f55a53e1d2c561fcd2cb98c6188552d06e167d60a685a2e2d51139daedda5033ecf080ab86407be70312458dcb47c39cd8586ccdc9c3c4e08e731ef84d37e1f445d958e7580c6de64d8ce6358557e5089e27810b5c9c7fd15ccb8575e0616bb66cde78b67d399c866a9903fe5387f4a11984eec8607e795e51cae9bad177c5bd83862ddf6014465fc3beaa518016ae559f441cf736dffacf042b57587e72db0ebf68971749212873b1b4710e5144b39fa8ad76e7ce039fb92c9f3f63e296434f9e273b3a8cbce07431e2436af2fab0e2511f7a14acb683c5f1f073d51609333339b7fbb4a1c54abaca7e8e6d0ac10df2827af2ecfb993929db4295cf22675b6315b1db98584d4ee2a91afc9a666ed6cabeccdbaec6ac05d5ec18031db42e93264674a6ca4e5c4b4b180f1f9c1b12c7b5979591078449002aebc2d275ea859ea8c6d38087c558e3db28ea3db0730b6722dac763b55820e90cd0b76cf6ddeb1ade14af5824fb3faeb84e28baf5ac40a6490a58788dea56a8d662135fcf9f8f36352335476c437269fc75c185c6775a23e2b2669068e183353771be80efe5508ff88aae99db088daa42ccc0027f23bb7bef8f6c8769f8a1bfc67086fb105a32e35a7040883158d40d1ce1fba8647d731abcadd21a6f82d64b5cd5cb198588fe9fcd642f1c20e1a0e98a89805d03cba7bd4c05ff5105d11449e15f7ed6716412747ea79335929dd2aa03fda4f994e7bf105c5ccb194f67e75399cfbf0e7942035bb0fd535ba5a67d61edb1e9d1ac814ce65a7b427ad346f930918f204bbef63f148092608dd4b76faab5594c2715a7c464ff109479e272a5e995676c3ac035390d0b35ce2fecd6a97cc2a718ff7a3a14e0ef8ace998802c031641c3735b8416355851129e25a3ca476a15af03d6d59427b7f5fcab7ae3429db87e57d657c5fd428243ba33e752a06ba609bf5361aa66de3f7374cd8f60daac41248424c0c361fae92869b93e68c41b9503f686d63e320ee0449c9a0d9f4d8e540e29f533e4a876ee58694ada344e2df1d77e057aa4e5fb36f38b8f08102bf8dac67ad22ce5f95fd833b5bb98a922a1211464ce10707b1ef564005eea856dd7c46a12076ea79cb265761c9b5c095e1a7121cf12555923fdfceaaf26215da9334d0e38b7292227c7649255ae48feba4761f7df85d8e639b4b0d145aced87e78b8b09d49fd62f9becc298cc0f6ba8cbf5d0d9d8e52a377aba200e47a38a801f9ad0da953ccd584d051db214284feecec28c05f0abfb5d383efc3bb94e5a6308541468b64e18fc90ecc7f90e3d78ef9531d46feacb0c1ec831e167e984aa2e4d9b58fefca1c37efd820ccf992d5206cf1565c2306da61fb6c4bd5c679543dddb7a7e4505fc5138939d9b833378369a14842337fb681247e10ad78306d5e763aa3956d904e838689f88de6f532264b070cf2eaa52ccf565e96f8685f7a7f57d7062e806beac9baa327d866e528a110a5"),
},
}

func TestChunking(t *testing.T) {
chunker, err := NewChunker(bytes.NewReader(defaultData), defaultOpts)
Expand All @@ -69,6 +78,29 @@ func TestChunking(t *testing.T) {
t.Errorf("chunk %d: data not equal to expected, actual: %s", i, hex.EncodeToString(c.Data))
}
}

chunker.Reset(bytes.NewReader(resetData))
for i := 0; ; i++ {
c, err := chunker.Next()
if err == io.EOF {
break
}
assertNoError(t, err)
cdef := resetChunks[i]

if c.Offset != cdef.Offset {
t.Errorf("chunk %d: expected offset %d but received %d", i, cdef.Offset, c.Offset)
}
if c.Length != cdef.Length {
t.Errorf("chunk %d: expected length %d but received %d", i, cdef.Length, c.Length)
}
if c.Fingerprint != cdef.Fingerprint {
t.Errorf("chunk %d: expected fp %d but received %d", i, cdef.Fingerprint, c.Fingerprint)
}
if !bytes.Equal(cdef.Data, c.Data) {
t.Errorf("chunk %d: data not equal to expected, actual: %s", i, hex.EncodeToString(c.Data))
}
}
}

func TestChunkingRandom(t *testing.T) {
Expand Down Expand Up @@ -180,6 +212,78 @@ func BenchmarkFastCDC(b *testing.B) {
b.SetBytes(int64(n * len(benchData)))
}

type bencSpec struct {
size int
name string
}

var bSizes = []bencSpec{
{1 << 10, "1k"},
{4 << 10, "4k"},
{16 << 10, "16k"},
{32 << 10, "32k"},
{64 << 10, "64k"},
{128 << 10, "128k"},
{256 << 10, "256k"},
{512 << 10, "512k"},
{1 << 20, "1M"},
{4 << 20, "4M"},
{16 << 20, "16M"},
{32 << 20, "32M"},
{64 << 20, "64M"},
{128 << 20, "128M"},
{512 << 20, "512M"},
{1 << 30, "1G"},
}

func BenchmarkFastCDCSize(b *testing.B) {
for _, s := range bSizes {
s := s
b.Run(s.name, func(b *testing.B) {
benchmarkFastCDCSize(b, s.size)
})
}
}

func benchmarkFastCDCSize(b *testing.B, size int) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, size)
rng.Read(data)

r := bytes.NewReader(data)
b.SetBytes(int64(size))
b.ReportAllocs()
b.ResetTimer()

cnkr, err := NewChunker(r, Options{
AverageSize: 1 * miB,
})
if err != nil {
b.Fatal(err)
}

var res uint64
var nchks int64

for i := 0; i < b.N; i++ {
r.Reset(data)
cnkr.Reset(r)

for {
chunk, err := cnkr.Next()
if err != nil {
if err == io.EOF {
break
}
b.Fatal(err)
}
res = res + uint64(len(chunk.Data))
nchks++
}
}
b.ReportMetric(float64(nchks)/float64(b.N), "chunks")
}

// loopReader implements io.Reader, looping over a provided buffer a given number of
// times.
type loopReader struct {
Expand Down

0 comments on commit bd4c286

Please sign in to comment.