diff --git a/fastcdc.go b/fastcdc.go index 334aa25..a36d474 100644 --- a/fastcdc.go +++ b/fastcdc.go @@ -68,6 +68,7 @@ type Chunker struct { buf []byte cursor int + end int offset int eof bool } @@ -167,13 +168,23 @@ func NewChunker(rd io.Reader, opts Options) (*Chunker, error) { rd: rd, buf: make([]byte, opts.BufSize), cursor: opts.BufSize, + end: opts.BufSize, } return chunker, nil } +// Reset reinitializes the chunker with a new reader +func (c *Chunker) Reset(rd io.Reader) { + c.rd = rd + c.offset = 0 + c.eof = false + c.cursor = len(c.buf) + c.end = len(c.buf) +} + func (c *Chunker) fillBuffer() error { - n := len(c.buf) - c.cursor + n := c.end - c.cursor if n >= c.maxSize { return nil } @@ -183,14 +194,14 @@ func (c *Chunker) fillBuffer() error { c.cursor = 0 if c.eof { - c.buf = c.buf[:n] + c.end = n return nil } // Fill the rest of the buffer m, err := io.ReadFull(c.rd, c.buf[n:]) if err == io.EOF || err == io.ErrUnexpectedEOF { - c.buf = c.buf[:n+m] + c.end = n+m c.eof = true } else if err != nil { return err @@ -204,11 +215,11 @@ func (c *Chunker) Next() (Chunk, error) { if err := c.fillBuffer(); err != nil { return Chunk{}, err } - if len(c.buf) == 0 { + if c.end == 0 { return Chunk{}, io.EOF } - length, fp := c.nextChunk(c.buf[c.cursor:]) + length, fp := c.nextChunk(c.buf[c.cursor:c.end]) chunk := Chunk{ Offset: c.offset, diff --git a/fastcdc_test.go b/fastcdc_test.go index c6b1253..5df4d2b 100644 --- a/fastcdc_test.go +++ b/fastcdc_test.go @@ -15,6 +15,7 @@ var defaultOpts = Options{ } var defaultData = randBytes(4321, 7291) +var resetData = randBytes(1234, 1729) // Chunks generated with defaultOpts from defaultData var defaultChunks = []Chunk{ @@ -43,6 +44,14 @@ var defaultChunks = []Chunk{ Data: decodeHex("f137c8c9cc9d5bf8e9194dc4ca95e069c0c537b69e9f4d327bc39883a342c15d90c362fd68ab7443f58eeb18dc4a6f1834662ddab5800bdb3d95c49697667c027050fc53e0928a0047ff8dac3b41407456560f98b22805cf9dea6dd458fe0611351c2d8a9f738a38ad6336a9f54f167afde26ccb8bfa4e83261e58c79a880ab4dfd1d69556fca8d9b317a445dd4b5a4fe7ec483d01f3726f7402303abca81df9f1761029edb87e0c52a3ed1852193772ae582a883865b2a014cdde703295f1e413e9149ea44b52d82e4795c676a1046519dab66342d5ca2c28d526b08a70b36d968e3517762b5936486982e251bba6325a812fdeb14cc76f3e7c3e98e8fbabd9e2e61fa17029c946811371351ed0b294ac8013a534aa25bbba3b3c547f70649a66a36b948bcbfe6879f389dcfcf6a564c47e7a1abd051860f6426db6f23f815faa29b484d3323772b616da282253a321f369e1961e1304db3e484cb4357a184f087281955450da9f55bcc502876991f8863918152fe02ebb6ca13a06852fb1ef6d627e0387056bc2f5b0fb7758ede4d494be94860398eb6d8af22898b33e08628f6822a710a7c1775da705f02688e7647940beeabe09381688cd80623fbbe6720451a785d9dd010e03cc10f84aa291e0760940e3ff901742060832725730947783c15671e75b732e99d3e5f0360ef5715113bc9f63d9c5069550297a43f512195e48440ebc8523a71c3ed7bd71ab0718d70a4b8b059d54b278962c6bced7cbb398c9ea65791fd4cbede0bfa26f06a9e2d64a7b455e0e9618e66f4c552507f5bacad9936415ef170acafb69469f4f5abdc68e919024c9254a9c7257aa90eb9e86d7b742b87bad69a9b60559a9eaa1409e5e0faf1414884de6829a52399cd181f32be59b81911eece3e6c6ba33"), }, } +var resetChunks []Chunk = []Chunk{ + { + Offset: 0, + Length: 1234, + Fingerprint: 4311516274091363630, + Data: decodeHex("387a518e109f76ee1fedd636dbc7e8d3d240f2a68c16c15b128e80172106886c1789c62d80fbd67e24fc3dd28fb7bdec1c3461e1ea9ac7c90156244f25e8a14f2e25fa29ceb081bbd1bbf3ac0a6e1f0358f90d993e643844b0309c60b4622d3000ea729db93f149c87d558e04af5de4bfa497401ddfc403249ed9903c4097cd2244ffc73c448b56d7db4962dab55d2df6f74408baa19fc5b18c28e0d0c2bd07d733031381ee33b77403b227e49f55a53e1d2c561fcd2cb98c6188552d06e167d60a685a2e2d51139daedda5033ecf080ab86407be70312458dcb47c39cd8586ccdc9c3c4e08e731ef84d37e1f445d958e7580c6de64d8ce6358557e5089e27810b5c9c7fd15ccb8575e0616bb66cde78b67d399c866a9903fe5387f4a11984eec8607e795e51cae9bad177c5bd83862ddf6014465fc3beaa518016ae559f441cf736dffacf042b57587e72db0ebf68971749212873b1b4710e5144b39fa8ad76e7ce039fb92c9f3f63e296434f9e273b3a8cbce07431e2436af2fab0e2511f7a14acb683c5f1f073d51609333339b7fbb4a1c54abaca7e8e6d0ac10df2827af2ecfb993929db4295cf22675b6315b1db98584d4ee2a91afc9a666ed6cabeccdbaec6ac05d5ec18031db42e93264674a6ca4e5c4b4b180f1f9c1b12c7b5979591078449002aebc2d275ea859ea8c6d38087c558e3db28ea3db0730b6722dac763b55820e90cd0b76cf6ddeb1ade14af5824fb3faeb84e28baf5ac40a6490a58788dea56a8d662135fcf9f8f36352335476c437269fc75c185c6775a23e2b2669068e183353771be80efe5508ff88aae99db088daa42ccc0027f23bb7bef8f6c8769f8a1bfc67086fb105a32e35a7040883158d40d1ce1fba8647d731abcadd21a6f82d64b5cd5cb198588fe9fcd642f1c20e1a0e98a89805d03cba7bd4c05ff5105d11449e15f7ed6716412747ea79335929dd2aa03fda4f994e7bf105c5ccb194f67e75399cfbf0e7942035bb0fd535ba5a67d61edb1e9d1ac814ce65a7b427ad346f930918f204bbef63f148092608dd4b76faab5594c2715a7c464ff109479e272a5e995676c3ac035390d0b35ce2fecd6a97cc2a718ff7a3a14e0ef8ace998802c031641c3735b8416355851129e25a3ca476a15af03d6d59427b7f5fcab7ae3429db87e57d657c5fd428243ba33e752a06ba609bf5361aa66de3f7374cd8f60daac41248424c0c361fae92869b93e68c41b9503f686d63e320ee0449c9a0d9f4d8e540e29f533e4a876ee58694ada344e2df1d77e057aa4e5fb36f38b8f08102bf8dac67ad22ce5f95fd833b5bb98a922a1211464ce10707b1ef564005eea856dd7c46a12076ea79cb265761c9b5c095e1a7121cf12555923fdfceaaf26215da9334d0e38b7292227c7649255ae48feba4761f7df85d8e639b4b0d145aced87e78b8b09d49fd62f9becc298cc0f6ba8cbf5d0d9d8e52a377aba200e47a38a801f9ad0da953ccd584d051db214284feecec28c05f0abfb5d383efc3bb94e5a6308541468b64e18fc90ecc7f90e3d78ef9531d46feacb0c1ec831e167e984aa2e4d9b58fefca1c37efd820ccf992d5206cf1565c2306da61fb6c4bd5c679543dddb7a7e4505fc5138939d9b833378369a14842337fb681247e10ad78306d5e763aa3956d904e838689f88de6f532264b070cf2eaa52ccf565e96f8685f7a7f57d7062e806beac9baa327d866e528a110a5"), + }, +} func TestChunking(t *testing.T) { chunker, err := NewChunker(bytes.NewReader(defaultData), defaultOpts) @@ -69,6 +78,29 @@ func TestChunking(t *testing.T) { t.Errorf("chunk %d: data not equal to expected, actual: %s", i, hex.EncodeToString(c.Data)) } } + + chunker.Reset(bytes.NewReader(resetData)) + for i := 0; ; i++ { + c, err := chunker.Next() + if err == io.EOF { + break + } + assertNoError(t, err) + cdef := resetChunks[i] + + if c.Offset != cdef.Offset { + t.Errorf("chunk %d: expected offset %d but received %d", i, cdef.Offset, c.Offset) + } + if c.Length != cdef.Length { + t.Errorf("chunk %d: expected length %d but received %d", i, cdef.Length, c.Length) + } + if c.Fingerprint != cdef.Fingerprint { + t.Errorf("chunk %d: expected fp %d but received %d", i, cdef.Fingerprint, c.Fingerprint) + } + if !bytes.Equal(cdef.Data, c.Data) { + t.Errorf("chunk %d: data not equal to expected, actual: %s", i, hex.EncodeToString(c.Data)) + } + } } func TestChunkingRandom(t *testing.T) { @@ -180,6 +212,78 @@ func BenchmarkFastCDC(b *testing.B) { b.SetBytes(int64(n * len(benchData))) } +type bencSpec struct { + size int + name string +} + +var bSizes = []bencSpec{ + {1 << 10, "1k"}, + {4 << 10, "4k"}, + {16 << 10, "16k"}, + {32 << 10, "32k"}, + {64 << 10, "64k"}, + {128 << 10, "128k"}, + {256 << 10, "256k"}, + {512 << 10, "512k"}, + {1 << 20, "1M"}, + {4 << 20, "4M"}, + {16 << 20, "16M"}, + {32 << 20, "32M"}, + {64 << 20, "64M"}, + {128 << 20, "128M"}, + {512 << 20, "512M"}, + {1 << 30, "1G"}, +} + +func BenchmarkFastCDCSize(b *testing.B) { + for _, s := range bSizes { + s := s + b.Run(s.name, func(b *testing.B) { + benchmarkFastCDCSize(b, s.size) + }) + } +} + +func benchmarkFastCDCSize(b *testing.B, size int) { + rng := rand.New(rand.NewSource(1)) + data := make([]byte, size) + rng.Read(data) + + r := bytes.NewReader(data) + b.SetBytes(int64(size)) + b.ReportAllocs() + b.ResetTimer() + + cnkr, err := NewChunker(r, Options{ + AverageSize: 1 * miB, + }) + if err != nil { + b.Fatal(err) + } + + var res uint64 + var nchks int64 + + for i := 0; i < b.N; i++ { + r.Reset(data) + cnkr.Reset(r) + + for { + chunk, err := cnkr.Next() + if err != nil { + if err == io.EOF { + break + } + b.Fatal(err) + } + res = res + uint64(len(chunk.Data)) + nchks++ + } + } + b.ReportMetric(float64(nchks)/float64(b.N), "chunks") +} + // loopReader implements io.Reader, looping over a provided buffer a given number of // times. type loopReader struct {