From f7387495c9ec51403843078f215ce4aef9eaeb61 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 15:01:21 +0100 Subject: [PATCH 01/63] Remove compactindex (8 byte) --- compactindex/LICENSE | 202 -------------------- compactindex/README.md | 132 ------------- compactindex/build.go | 301 ------------------------------ compactindex/build_test.go | 248 ------------------------ compactindex/compactindex.go | 277 --------------------------- compactindex/compactindex_test.go | 84 --------- compactindex/fallocate_fake.go | 27 --- compactindex/fallocate_generic.go | 11 -- compactindex/fallocate_linux.go | 17 -- compactindex/query.go | 212 --------------------- compactindex/query_test.go | 58 ------ 11 files changed, 1569 deletions(-) delete mode 100644 compactindex/LICENSE delete mode 100644 compactindex/README.md delete mode 100644 compactindex/build.go delete mode 100644 compactindex/build_test.go delete mode 100644 compactindex/compactindex.go delete mode 100644 compactindex/compactindex_test.go delete mode 100644 compactindex/fallocate_fake.go delete mode 100644 compactindex/fallocate_generic.go delete mode 100644 compactindex/fallocate_linux.go delete mode 100644 compactindex/query.go delete mode 100644 compactindex/query_test.go diff --git a/compactindex/LICENSE b/compactindex/LICENSE deleted file mode 100644 index d6456956..00000000 --- a/compactindex/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/compactindex/README.md b/compactindex/README.md deleted file mode 100644 index d6398818..00000000 --- a/compactindex/README.md +++ /dev/null @@ -1,132 +0,0 @@ -# a fast flat-file index for constant datasets - -This package specifies a file format and Go implementation for indexing constant datasets. - -*`compactindex` …* -- is an immutable file format; -- maps arbitrary keys into offsets in an external flat file; -- consumes a constant amount of space per entry - - ~6-8 bytes, regardless of key size - - 3 bytes per enty -- `O(1)` complexity queries, with `2 + log2(10000)` lookups worst- & average-case (binary search); -- during construction, requires near-constant memory space and `O(n)` scratch space with regard to entries per file; -- during construction, features a constant >500k entry/s per-core write rate (2.5 GHz Intel laptop); -- works on any storage supporting random reads (regular files, HTTP range requests, on-chain, ...); -- is based on the "FKS method" which uses perfect (collision-free) hash functions in a two-level hashtable; [^1] -- is inspired by D. J. Bernstein's "constant database"; [^2] -- uses the xxHash64 non-cryptographic hash-function; [^3] - -Refer to the Go documentation for the algorithms used and implementation details. - -[![Go Reference](https://pkg.go.dev/badge/go.firedancer.io/radiance/pkg/compactindex.svg)](https://pkg.go.dev/go.firedancer.io/radiance/pkg/compactindex) - -[^1]: Fredman, M. L., Komlós, J., & Szemerédi, E. (1984). Storing a Sparse Table with 0 (1) Worst Case Access Time. Journal of the ACM, 31(3), 538–544. https://doi.org/10.1145/828.1884 -[^2]: cdb by D. J. Bernstein https://cr.yp.to/cdb.html -[^3]: Go implementation of xxHash by @cespare: https://github.com/cespare/xxhash/ - -## Interface - -In programming terms: - -```rs -fn lookup(key: &[byte]) -> Option -``` - -Given an arbitrary key, the index -- states whether the key exists in the index -- if it exists, maps the key to an integer (usually an offset into a file) - -## Examples - -Here are some example scenarios where `compactindex` is useful: - -- When working with immutable data structures - - Example: Indexing [IPLD CAR files][3] carrying Merkle-DAGs of content-addressable data -- When working with archived/constant data - - Example: Indexing files in `.tar` archives -- When dealing with immutable remote storage such as S3-like object storage - - Example: Storing the index and target file in S3, then using [HTTP range requests][4] to efficiently query data - -[3]: https://ipld.io/specs/transport/car/ -[4]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests - -Here are some things compactindex cannot do: - -- Cannot add more entries to an existing index - - Reason 1: indexes are tightly packed, so there is no space to insert new entries (though `fallocate(2)` with `FALLOC_FL_INSERT_RANGE` would technically work) - - Reason 2: the second-level hashtable uses a perfect hash function ensuring collision-free indexing of a subset of entries; - inserting new entries might cause a collision requiring - - Reason 3: adding too many entries will eventually create an imbalance in the first-level hashtable; - fixing this imbalance effectively requires re-constructing the file from scratch -- Cannot iterate over keys - - Reason: compactindex stores hashes, not the entries themselves. - This saves space but also allows for efficient random reads used during binary search - -## File Format (v0) - -**Encoding** - -The file format contains binary packed structures with byte alignment. - -Integers are encoded as little endian. - -**File Header** - -The file beings with a 32 byte file header. - -```rust -#[repr(packed)] -struct FileHeader { - magic: [u8; 8], // 0x00 - max_value: u64, // 0x08 - num_buckets: u32, // 0x10 - padding_14: [u8; 12], // 0x14 -} -``` - -- `magic` is set to the UTF-8 string `"rdcecidx"`. - The reader should reject files that don't start with this string. -- `num_buckets` is set to the number of hashtable buckets. -- `max_value` indicates the integer width of index values. -- `padding_14` must be zero. (reserved for future use) - -**Bucket Header Table** - -The file header is followed by a vector of bucket headers. -The number of is set by `num_buckets` in the file header. - -Each bucket header is 16 bytes long. - -```rust -#[repr(packed)] -struct BucketHeader { - hash_domain: u32, // 0x00 - num_entries: u32, // 0x04 - hash_len: u8, // 0x08 - padding_09: u8, // 0x09 - file_offset: u48, // 0x10 -} -``` - -- `hash_domain` is a "salt" to the per-bucket hash function. -- `num_entries` is set to the number of records in the bucket. -- `hash_len` is the size of the per-record hash in bytes and currently hardcoded to `3`. -- `padding_09` must be zero. -- `file_offset` is an offset from the beginning of the file header to the start of the bucket entries. - -**Bucket Entry Table** - -Each bucket has a vector of entries with length `num_entries`. -This structure makes up the vast majority of the index. - -```rust -#[repr(packed)] -struct Entry { - hash: u??, - value: u??, -} -``` - -The size of entry is static within a bucket. It is determined by its components: -- The size of `hash` in bytes equals `hash_len` -- The size of `value` in bytes equals the byte aligned integer width that is minimally required to represent `max_value` diff --git a/compactindex/build.go b/compactindex/build.go deleted file mode 100644 index 57eb06c0..00000000 --- a/compactindex/build.go +++ /dev/null @@ -1,301 +0,0 @@ -package compactindex - -import ( - "bufio" - "context" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - "path/filepath" - "sort" - "syscall" -) - -// Builder creates new compactindex files. -type Builder struct { - Header - buckets []tempBucket - dir string -} - -// NewBuilder creates a new index builder. -// -// If dir is an empty string, a random temporary directory is used. -// -// numItems refers to the number of items in the index. -// -// targetFileSize is the size of the file that index entries point to. -// Can be set to zero if unknown, which results in a less efficient (larger) index. -func NewBuilder(dir string, numItems uint, targetFileSize uint64) (*Builder, error) { - if dir == "" { - var err error - dir, err = os.MkdirTemp("", "compactindex-") - if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) - } - } - if targetFileSize == 0 { - targetFileSize = math.MaxUint64 - } - - numBuckets := (numItems + targetEntriesPerBucket - 1) / targetEntriesPerBucket - buckets := make([]tempBucket, numBuckets) - for i := range buckets { - name := filepath.Join(dir, fmt.Sprintf("keys-%d", i)) - f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o666) - if err != nil { - return nil, err - } - buckets[i].file = f - buckets[i].writer = bufio.NewWriter(f) - } - - return &Builder{ - Header: Header{ - FileSize: targetFileSize, - NumBuckets: uint32(numBuckets), - }, - buckets: buckets, - dir: dir, - }, nil -} - -// Insert writes a key-value mapping to the index. -// -// Index generation will fail if the same key is inserted twice. -// The writer must not pass a value greater than targetFileSize. -func (b *Builder) Insert(key []byte, value uint64) error { - return b.buckets[b.Header.BucketHash(key)].writeTuple(key, value) -} - -// Seal writes the final index to the provided file. -// This process is CPU-intensive, use context to abort prematurely. -// -// The file should be opened with access mode os.O_RDWR. -// Passing a non-empty file will result in a corrupted index. -func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { - // TODO support in-place writing. - - // Write header. - var headerBuf [headerSize]byte - b.Header.Store(&headerBuf) - _, err = f.Write(headerBuf[:]) - if err != nil { - return fmt.Errorf("failed to write header: %w", err) - } - // Create hole to leave space for bucket header table. - bucketTableLen := int64(b.NumBuckets) * bucketHdrLen - err = fallocate(f, headerSize, bucketTableLen) - if errors.Is(err, syscall.EOPNOTSUPP) { - // The underlying file system may not support fallocate - err = fake_fallocate(f, headerSize, bucketTableLen) - if err != nil { - return fmt.Errorf("failed to fake fallocate() bucket table: %w", err) - } - } - if err != nil { - return fmt.Errorf("failed to fallocate() bucket table: %w", err) - } - // Seal each bucket. - for i := range b.buckets { - if err := b.sealBucket(ctx, i, f); err != nil { - return err - } - } - return nil -} - -// sealBucket will mine a bucket hashtable, write entries to a file, a -func (b *Builder) sealBucket(ctx context.Context, i int, f *os.File) error { - // Produce perfect hash table for bucket. - bucket := &b.buckets[i] - if err := bucket.flush(); err != nil { - return err - } - const mineAttempts uint32 = 1000 - entries, domain, err := bucket.mine(ctx, mineAttempts) - if err != nil { - return fmt.Errorf("failed to mine bucket %d: %w", i, err) - } - // Find current file length. - offset, err := f.Seek(0, io.SeekEnd) - if err != nil { - return fmt.Errorf("failed to seek to EOF: %w", err) - } - if offset < 0 { - panic("os.File.Seek() < 0") - } - // Write header to file. - desc := BucketDescriptor{ - BucketHeader: BucketHeader{ - HashDomain: domain, - NumEntries: uint32(bucket.records), - HashLen: 3, // TODO remove hardcoded constant - FileOffset: uint64(offset), - }, - Stride: 3 + intWidth(b.FileSize), // TODO remove hardcoded constant - OffsetWidth: intWidth(b.FileSize), - } - // Write entries to file. - wr := bufio.NewWriter(f) - entryBuf := make([]byte, desc.HashLen+intWidth(b.FileSize)) // TODO remove hardcoded constant - for _, entry := range entries { - desc.marshalEntry(entryBuf, entry) - if _, err := wr.Write(entryBuf[:]); err != nil { - return fmt.Errorf("failed to write record to index: %w", err) - } - } - if err := wr.Flush(); err != nil { - return fmt.Errorf("failed to flush bucket to index: %w", err) - } - // Write header to file. - if err := desc.BucketHeader.writeTo(f, uint(i)); err != nil { - return fmt.Errorf("failed to write bucket header %d: %w", i, err) - } - return nil -} - -func (b *Builder) Close() error { - return os.RemoveAll(b.dir) -} - -// tempBucket represents the "temporary bucket" file, -// a disk buffer containing a vector of key-value-tuples. -type tempBucket struct { - records uint - file *os.File - writer *bufio.Writer -} - -// writeTuple performs a buffered write of a KV-tuple. -func (b *tempBucket) writeTuple(key []byte, value uint64) (err error) { - b.records++ - var static [10]byte - binary.LittleEndian.PutUint16(static[0:2], uint16(len(key))) - binary.LittleEndian.PutUint64(static[2:10], value) - if _, err = b.writer.Write(static[:]); err != nil { - return err - } - _, err = b.writer.Write(key) - return -} - -// flush empties the in-memory write buffer to the file. -func (b *tempBucket) flush() error { - if err := b.writer.Flush(); err != nil { - return fmt.Errorf("failed to flush writer: %w", err) - } - b.writer = nil - return nil -} - -// mine repeatedly hashes the set of entries with different nonces. -// -// Returns a sorted list of hashtable entries upon finding a set of hashes without collisions. -// If a number of attempts was made without success, returns ErrCollision instead. -func (b *tempBucket) mine(ctx context.Context, attempts uint32) (entries []Entry, domain uint32, err error) { - entries = make([]Entry, b.records) - bitmap := make([]byte, 1<<21) - - rd := bufio.NewReader(b.file) - for domain = uint32(0); domain < attempts; domain++ { - if err = ctx.Err(); err != nil { - return - } - // Reset bitmap - for i := range bitmap { - bitmap[i] = 0 - } - // Reset reader - if _, err = b.file.Seek(0, io.SeekStart); err != nil { - return - } - rd.Reset(b.file) - - if hashErr := hashBucket(rd, entries, bitmap, domain); errors.Is(hashErr, ErrCollision) { - continue - } else if hashErr != nil { - return nil, 0, hashErr - } - - return // ok - } - - return nil, domain, ErrCollision -} - -// hashBucket reads and hashes entries from a temporary bucket file. -// -// Uses a 2^24 wide bitmap to detect collisions. -func hashBucket(rd *bufio.Reader, entries []Entry, bitmap []byte, nonce uint32) error { - // TODO Don't hardcode this, choose hash depth dynamically - mask := uint64(0xffffff) - - // Scan provided reader for entries and hash along the way. - for i := range entries { - // Read next key from file (as defined by writeTuple) - var static [10]byte - if _, err := io.ReadFull(rd, static[:]); err != nil { - return err - } - keyLen := binary.LittleEndian.Uint16(static[0:2]) - value := binary.LittleEndian.Uint64(static[2:10]) - key := make([]byte, keyLen) - if _, err := io.ReadFull(rd, key); err != nil { - return err - } - - // Hash to entry - hash := EntryHash64(nonce, key) & mask - - // Check for collision in bitmap - bi, bj := hash/8, hash%8 - chunk := bitmap[bi] - if (chunk>>bj)&1 == 1 { - return ErrCollision - } - bitmap[bi] = chunk | (1 << bj) - - // Export entry - entries[i] = Entry{ - Hash: hash, - Value: value, - } - } - - // Sort entries. - sortWithCompare(entries, func(i, j int) int { - if entries[i].Hash < entries[j].Hash { - return -1 - } else if entries[i].Hash > entries[j].Hash { - return 1 - } - return 0 - }) - - return nil -} - -var ErrCollision = errors.New("hash collision") - -func sortWithCompare[T any](a []T, compare func(i, j int) int) { - sort.Slice(a, func(i, j int) bool { - return compare(i, j) < 0 - }) - sorted := make([]T, len(a)) - eytzinger(a, sorted, 0, 1) - copy(a, sorted) -} - -func eytzinger[T any](in, out []T, i, k int) int { - if k <= len(in) { - i = eytzinger(in, out, i, 2*k) - out[k-1] = in[i] - i++ - i = eytzinger(in, out, i, 2*k+1) - } - return i -} diff --git a/compactindex/build_test.go b/compactindex/build_test.go deleted file mode 100644 index ccf0b8a1..00000000 --- a/compactindex/build_test.go +++ /dev/null @@ -1,248 +0,0 @@ -package compactindex - -import ( - "context" - "encoding/binary" - "errors" - "io" - "io/fs" - "math" - "math/rand" - "os" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/vbauerster/mpb/v8/decor" -) - -func TestBuilder(t *testing.T) { - const numBuckets = 3 - const maxValue = math.MaxUint64 - - // Create a table with 3 buckets. - builder, err := NewBuilder("", numBuckets*targetEntriesPerBucket, maxValue) - require.NoError(t, err) - require.NotNil(t, builder) - assert.Len(t, builder.buckets, 3) - defer builder.Close() - - // Insert a few entries. - require.NoError(t, builder.Insert([]byte("hello"), 1)) - require.NoError(t, builder.Insert([]byte("world"), 2)) - require.NoError(t, builder.Insert([]byte("blub"), 3)) - - // Create index file. - targetFile, err := os.CreateTemp("", "compactindex-final-") - require.NoError(t, err) - defer os.Remove(targetFile.Name()) - defer targetFile.Close() - - // Seal index. - require.NoError(t, builder.Seal(context.TODO(), targetFile)) - - // Assert binary content. - buf, err := os.ReadFile(targetFile.Name()) - require.NoError(t, err) - assert.Equal(t, []byte{ - // --- File header - // magic - 0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78, - // max file size - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - // num buckets - 0x03, 0x00, 0x00, 0x00, - // padding - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - - // --- Bucket header 0 - // hash domain - 0x00, 0x00, 0x00, 0x00, - // num entries - 0x01, 0x00, 0x00, 0x00, - // hash len - 0x03, - // padding - 0x00, - // file offset - 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, - - // --- Bucket header 1 - // hash domain - 0x00, 0x00, 0x00, 0x00, - // num entries - 0x01, 0x00, 0x00, 0x00, - // hash len - 0x03, - // padding - 0x00, - // file offset - 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, - - // --- Bucket header 2 - // hash domain - 0x00, 0x00, 0x00, 0x00, - // num entries - 0x01, 0x00, 0x00, 0x00, - // hash len - 0x03, - // padding - 0x00, - // file offset - 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, - - // --- Bucket 0 - // hash - 0xe2, 0xdb, 0x55, - // value - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - - // --- Bucket 1 - // hash - 0x92, 0xcd, 0xbb, - // value - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - - // --- Bucket 2 - // hash - 0xe3, 0x09, 0x6b, - // value - 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, buf) - - // Reset file offset. - _, seekErr := targetFile.Seek(0, io.SeekStart) - require.NoError(t, seekErr) - - // Open index. - db, err := Open(targetFile) - require.NoError(t, err, "Failed to open generated index") - require.NotNil(t, db) - - // File header assertions. - assert.Equal(t, Header{ - FileSize: maxValue, - NumBuckets: numBuckets, - }, db.Header) - - // Get bucket handles. - buckets := make([]*Bucket, numBuckets) - for i := range buckets { - buckets[i], err = db.GetBucket(uint(i)) - require.NoError(t, err) - } - - // Ensure out-of-bounds bucket accesses fail. - _, wantErr := db.GetBucket(numBuckets) - assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") - - // Bucket header assertions. - assert.Equal(t, BucketDescriptor{ - BucketHeader: BucketHeader{ - HashDomain: 0x00, - NumEntries: 1, - HashLen: 3, - FileOffset: 0x50, - }, - Stride: 11, // 3 + 8 - OffsetWidth: 8, - }, buckets[0].BucketDescriptor) - assert.Equal(t, BucketHeader{ - HashDomain: 0x00, - NumEntries: 1, - HashLen: 3, - FileOffset: 0x5b, - }, buckets[1].BucketHeader) - assert.Equal(t, BucketHeader{ - HashDomain: 0x00, - NumEntries: 1, - HashLen: 3, - FileOffset: 0x66, - }, buckets[2].BucketHeader) - - // Test lookups. - entries, err := buckets[2].Load( /*batchSize*/ 4) - require.NoError(t, err) - assert.Equal(t, []Entry{ - { - Hash: 0x6b09e3, - Value: 3, - }, - }, entries) -} - -func TestBuilder_Random(t *testing.T) { - if testing.Short() { - t.Skip("Skipping long test") - } - - const numKeys = uint(500000) - const keySize = uint(16) - const maxOffset = uint64(1000000) - const queries = int(10000) - - // Create new builder session. - builder, err := NewBuilder("", numKeys, maxOffset) - require.NoError(t, err) - require.NotNil(t, builder) - require.NotEmpty(t, builder.buckets) - - // Ensure we cleaned up after ourselves. - defer func() { - _, statErr := os.Stat(builder.dir) - assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) - }() - defer builder.Close() - - // Insert items to temp buckets. - preInsert := time.Now() - key := make([]byte, keySize) - for i := uint(0); i < numKeys; i++ { - binary.LittleEndian.PutUint64(key, uint64(i)) - err := builder.Insert(key, uint64(rand.Int63n(int64(maxOffset)))) - require.NoError(t, err) - } - t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) - - // Create file for final index. - targetFile, err := os.CreateTemp("", "compactindex-final-") - require.NoError(t, err) - defer os.Remove(targetFile.Name()) - defer targetFile.Close() - - // Seal to final index. - preSeal := time.Now() - sealErr := builder.Seal(context.TODO(), targetFile) - require.NoError(t, sealErr, "Seal failed") - t.Logf("Sealed in %s", time.Since(preSeal)) - - // Print some stats. - targetStat, err := targetFile.Stat() - require.NoError(t, err) - t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) - t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) - t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) - - // Open index. - _, seekErr := targetFile.Seek(0, io.SeekStart) - require.NoError(t, seekErr) - db, err := Open(targetFile) - require.NoError(t, err, "Failed to open generated index") - - // Run query benchmark. - preQuery := time.Now() - for i := queries; i != 0; i-- { - keyN := uint64(rand.Int63n(int64(numKeys))) - binary.LittleEndian.PutUint64(key, keyN) - - bucket, err := db.LookupBucket(key) - require.NoError(t, err) - - value, err := bucket.Lookup(key) - require.NoError(t, err) - require.True(t, value > 0) - } - t.Logf("Queried %d items", queries) - t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) -} diff --git a/compactindex/compactindex.go b/compactindex/compactindex.go deleted file mode 100644 index 1aeb18e7..00000000 --- a/compactindex/compactindex.go +++ /dev/null @@ -1,277 +0,0 @@ -// Package compactindex is an immutable hashtable index format inspired by djb's constant database (cdb). -// -// # Design -// -// Compactindex is used to create secondary indexes over arbitrary flat files. -// Each index is a single, immutable flat file. -// -// Index files consist of a space-optimized and query-optimized key-value-like table. -// -// Instead of storing actual keys, the format stores FKS dynamic perfect hashes. -// And instead of storing values, the format contains offsets into some file. -// -// As a result, the database effectively only supports two operations, similarly to cdb. -// (Note that the actual Go interface is a bit more flexible). -// -// func Create(kv map[[]byte]uint64) *Index -// func (*Index) Lookup(key []byte) (value uint64, exist bool) -// -// # Buckets -// -// The set of items is split into buckets of approx 10000 records. -// The number of buckets is unlimited. -// -// The key-to-bucket assignment is determined by xxHash3 using uniform discrete hashing over the key space. -// -// The index file header also mentions the number of buckets and the file offset of each bucket. -// -// # Tables -// -// Each bucket contains a table of entries, indexed by a collision-free hash function. -// -// The hash function used in the entry table is xxHash. -// A 32-bit hash domain is prefixed to mine collision-free sets of hashes (FKS scheme). -// This hash domain is also recorded at the bucket header. -// -// Each bucket entry is a constant-size record consisting of a 3-byte hash and an offset to the value. -// The size of the offset integer is the minimal byte-aligned integer width that can represent the target file size. -// -// # Querying -// -// The query interface (DB) is backend-agnostic, supporting any storage medium that provides random reads. -// To name a few: Memory buffers, local files, arbitrary embedded buffers, HTTP range requests, plan9, etc... -// -// The DB struct itself performs zero memory allocations and therefore also doesn't cache. -// It is therefore recommended to provide a io.ReaderAt backed by a cache to improve performance. -// -// Given a key, the query strategy is simple: -// -// 1. Hash key to bucket using global hash function -// 2. Retrieve bucket offset from bucket header table -// 3. Hash key to entry using per-bucket hash function -// 4. Search for entry in bucket (binary search) -// -// The search strategy for locating entries in buckets can be adjusted to fit the latency/bandwidth profile of the underlying storage medium. -// -// For example, the fastest lookup strategy in memory is a binary search retrieving double cache lines at a time. -// When doing range requests against high-latency remote storage (e.g. S3 buckets), -// it is typically faster to retrieve and scan through large parts of a bucket (multiple kilobytes) at once. -// -// # Construction -// -// Constructing a compactindex requires upfront knowledge of the number of items and highest possible target offset (read: target file size). -// -// The process requires scratch space of around 16 bytes per entry. During generation, data is offloaded to disk for memory efficiency. -// -// The process works as follows: -// -// 1. Determine number of buckets and offset integer width -// based on known input params (item count and target file size). -// 2. Linear pass over input data, populating temporary files that -// contain the unsorted entries of each bucket. -// 3. For each bucket, brute force a perfect hash function that -// defines a bijection between hash values and keys in the bucket. -// 4. For each bucket, sort by hash values. -// 5. Store to index. -// -// An alternative construction approach is available when the number of items or target file size is unknown. -// In this case, a set of keys is first serialized to a flat file. -package compactindex - -import ( - "encoding/binary" - "fmt" - "math" - "math/bits" - "sort" - - "github.com/cespare/xxhash/v2" -) - -// Magic are the first eight bytes of an index. -var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} - -const Version = uint8(1) - -// Header occurs once at the beginning of the index. -type Header struct { - FileSize uint64 - NumBuckets uint32 -} - -// headerSize is the size of the header at the beginning of the file. -const headerSize = 32 - -// Load checks the Magic sequence and loads the header fields. -func (h *Header) Load(buf *[headerSize]byte) error { - // Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream. - if *(*[8]byte)(buf[:8]) != Magic { - return fmt.Errorf("not a radiance compactindex file") - } - *h = Header{ - FileSize: binary.LittleEndian.Uint64(buf[8:16]), - NumBuckets: binary.LittleEndian.Uint32(buf[16:20]), - } - // Check version. - if buf[20] != Version { - return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) - } - // 11 bytes to spare for now. Might use it in the future. - // Force to zero for now. - for _, b := range buf[21:32] { - if b != 0x00 { - return fmt.Errorf("unsupported index version") - } - } - return nil -} - -func (h *Header) Store(buf *[headerSize]byte) { - copy(buf[0:8], Magic[:]) - binary.LittleEndian.PutUint64(buf[8:16], h.FileSize) - binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets) - buf[20] = Version - for i := 21; i < 32; i++ { - buf[i] = 0 - } -} - -// BucketHash returns the bucket index for the given key. -// -// Uses a truncated xxHash64 rotated until the result fits. -func (h *Header) BucketHash(key []byte) uint { - u := xxhash.Sum64(key) - n := uint64(h.NumBuckets) - r := (-n) % n - for u < r { - u = hashUint64(u) - } - return uint(u % n) -} - -// hashUint64 is a reversible uint64 permutation based on Google's -// Murmur3 hash finalizer (public domain) -func hashUint64(x uint64) uint64 { - x ^= x >> 33 - x *= 0xff51afd7ed558ccd - x ^= x >> 33 - x *= 0xc4ceb9fe1a85ec53 - x ^= x >> 33 - return x -} - -// BucketHeader occurs at the beginning of each bucket. -type BucketHeader struct { - HashDomain uint32 - NumEntries uint32 - HashLen uint8 - FileOffset uint64 -} - -// bucketHdrLen is the size of the header preceding the hash table entries. -const bucketHdrLen = 16 - -func (b *BucketHeader) Store(buf *[bucketHdrLen]byte) { - binary.LittleEndian.PutUint32(buf[0:4], b.HashDomain) - binary.LittleEndian.PutUint32(buf[4:8], b.NumEntries) - buf[8] = b.HashLen - buf[9] = 0 - putUintLe(buf[10:16], b.FileOffset) -} - -func (b *BucketHeader) Load(buf *[bucketHdrLen]byte) { - b.HashDomain = binary.LittleEndian.Uint32(buf[0:4]) - b.NumEntries = binary.LittleEndian.Uint32(buf[4:8]) - b.HashLen = buf[8] - b.FileOffset = uintLe(buf[10:16]) -} - -// Hash returns the per-bucket hash of a key. -func (b *BucketHeader) Hash(key []byte) uint64 { - xsum := EntryHash64(b.HashDomain, key) - // Mask sum by hash length. - return xsum & (math.MaxUint64 >> (64 - b.HashLen*8)) -} - -type BucketDescriptor struct { - BucketHeader - Stride uint8 // size of one entry in bucket - OffsetWidth uint8 // with of offset field in bucket -} - -func (b *BucketDescriptor) unmarshalEntry(buf []byte) (e Entry) { - e.Hash = uintLe(buf[0:b.HashLen]) - e.Value = uintLe(buf[b.HashLen : b.HashLen+b.OffsetWidth]) - return -} - -func (b *BucketDescriptor) marshalEntry(buf []byte, e Entry) { - if len(buf) < int(b.Stride) { - panic("serializeEntry: buf too small") - } - putUintLe(buf[0:b.HashLen], e.Hash) - putUintLe(buf[b.HashLen:b.HashLen+b.OffsetWidth], e.Value) -} - -// SearchSortedEntries performs an in-memory binary search for a given hash. -func SearchSortedEntries(entries []Entry, hash uint64) *Entry { - i, found := sort.Find(len(entries), func(i int) int { - other := entries[i].Hash - // Note: This is safe because neither side exceeds 2^24. - return int(hash) - int(other) - }) - if !found { - return nil - } - if i >= len(entries) || entries[i].Hash != hash { - return nil - } - return &entries[i] -} - -// EntryHash64 is a xxHash-based hash function using an arbitrary prefix. -func EntryHash64(prefix uint32, key []byte) uint64 { - const blockSize = 32 - var prefixBlock [blockSize]byte - binary.LittleEndian.PutUint32(prefixBlock[:4], prefix) - - var digest xxhash.Digest - digest.Reset() - digest.Write(prefixBlock[:]) - digest.Write(key) - return digest.Sum64() -} - -// Entry is a single element in a hash table. -type Entry struct { - Hash uint64 - Value uint64 -} - -// intWidth returns the number of bytes minimally required to represent the given integer. -func intWidth(n uint64) uint8 { - msb := 64 - bits.LeadingZeros64(n) - return uint8((msb + 7) / 8) -} - -// maxCls64 returns the max integer that has the same amount of leading zeros as n. -func maxCls64(n uint64) uint64 { - return math.MaxUint64 >> bits.LeadingZeros64(n) -} - -// uintLe decodes an unsigned little-endian integer without bounds assertions. -// out-of-bounds bits are set to zero. -func uintLe(buf []byte) uint64 { - var full [8]byte - copy(full[:], buf) - return binary.LittleEndian.Uint64(full[:]) -} - -// putUintLe encodes an unsigned little-endian integer without bounds assertions. -// Returns true if the integer fully fit in the provided buffer. -func putUintLe(buf []byte, x uint64) bool { - var full [8]byte - binary.LittleEndian.PutUint64(full[:], x) - copy(buf, full[:]) - return int(intWidth(x)) <= len(buf) -} diff --git a/compactindex/compactindex_test.go b/compactindex/compactindex_test.go deleted file mode 100644 index 75e56082..00000000 --- a/compactindex/compactindex_test.go +++ /dev/null @@ -1,84 +0,0 @@ -package compactindex - -import ( - "math" - "math/rand" - "sort" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestMaxCls64(t *testing.T) { - cases := [][2]uint64{ - {0x0000_0000_0000_0000, 0x0000_0000_0000_0000}, - {0x0000_0000_0000_0001, 0x0000_0000_0000_0001}, - {0x0000_0000_0000_0003, 0x0000_0000_0000_0002}, - {0x0000_0000_0000_0003, 0x0000_0000_0000_0003}, - {0x0000_0000_0000_0007, 0x0000_0000_0000_0004}, - {0x0000_0000_FFFF_FFFF, 0x0000_0000_F000_000F}, - {0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF}, - } - for _, tc := range cases { - assert.Equal(t, tc[0], maxCls64(tc[1])) - } -} - -func TestHeader_BucketHash(t *testing.T) { - const numItems = 500000 - const numBuckets = 1000 - - header := Header{ - NumBuckets: numBuckets, - } - - keys := make([][]byte, numItems) - hits := make([]int, numBuckets) - for i := range keys { - var buf [16]byte - n, _ := rand.Read(buf[:]) - keys[i] = buf[:n] - } - - // Bounds check and count hits. - for _, key := range keys { - idx := header.BucketHash(key) - require.True(t, idx < numBuckets) - hits[idx]++ - } - - // Calculate standard deviation. - mean := float64(numItems) / float64(numBuckets) - var cumVariance float64 - for _, bucketHits := range hits { - delta := float64(bucketHits) - mean - cumVariance += (delta * delta) - } - variance := cumVariance / float64(len(hits)) - stddev := math.Sqrt(variance) - t.Logf("mean % 12.2f", mean) - normStddev := stddev / mean - t.Logf("stddev % 10.2f", stddev) - t.Logf("1σ / mean % 7.2f%%", 100*normStddev) - - const failNormStddev = 0.08 - if normStddev > failNormStddev { - t.Logf("FAIL: > %f%%", 100*failNormStddev) - t.Fail() - } else { - t.Logf(" OK: <= %f%%", 100*failNormStddev) - } - - // Print percentiles. - sort.Ints(hits) - t.Logf("min % 10d", hits[0]) - t.Logf("p01 % 10d", hits[int(math.Round(0.01*float64(len(hits))))]) - t.Logf("p05 % 10d", hits[int(math.Round(0.05*float64(len(hits))))]) - t.Logf("p10 % 10d", hits[int(math.Round(0.10*float64(len(hits))))]) - t.Logf("p50 % 10d", hits[int(math.Round(0.50*float64(len(hits))))]) - t.Logf("p90 % 10d", hits[int(math.Round(0.90*float64(len(hits))))]) - t.Logf("p95 % 10d", hits[int(math.Round(0.95*float64(len(hits))))]) - t.Logf("p99 % 10d", hits[int(math.Round(0.99*float64(len(hits))))]) - t.Logf("max % 10d", hits[len(hits)-1]) -} diff --git a/compactindex/fallocate_fake.go b/compactindex/fallocate_fake.go deleted file mode 100644 index d345a40f..00000000 --- a/compactindex/fallocate_fake.go +++ /dev/null @@ -1,27 +0,0 @@ -package compactindex - -import ( - "fmt" - "os" -) - -func fake_fallocate(f *os.File, offset int64, size int64) error { - const blockSize = 4096 - var zero [blockSize]byte - - for size > 0 { - step := size - if step > blockSize { - step = blockSize - } - - if _, err := f.Write(zero[:step]); err != nil { - return fmt.Errorf("failure while generic fallocate: %w", err) - } - - offset += step - size -= step - } - - return nil -} diff --git a/compactindex/fallocate_generic.go b/compactindex/fallocate_generic.go deleted file mode 100644 index e0fb1b33..00000000 --- a/compactindex/fallocate_generic.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !linux - -package compactindex - -import ( - "os" -) - -func fallocate(f *os.File, offset int64, size int64) error { - return fake_fallocate(f, offset, size) -} diff --git a/compactindex/fallocate_linux.go b/compactindex/fallocate_linux.go deleted file mode 100644 index 5cdde837..00000000 --- a/compactindex/fallocate_linux.go +++ /dev/null @@ -1,17 +0,0 @@ -//go:build linux - -package compactindex - -import ( - "fmt" - "os" - "syscall" -) - -func fallocate(f *os.File, offset int64, size int64) error { - err := syscall.Fallocate(int(f.Fd()), 0, offset, size) - if err != nil { - return fmt.Errorf("failure while linux fallocate: %w", err) - } - return nil -} diff --git a/compactindex/query.go b/compactindex/query.go deleted file mode 100644 index 49b5b31d..00000000 --- a/compactindex/query.go +++ /dev/null @@ -1,212 +0,0 @@ -package compactindex - -import ( - "errors" - "fmt" - "io" -) - -// DB is a compactindex handle. -type DB struct { - Header - Stream io.ReaderAt - prefetch bool -} - -// Open returns a handle to access a compactindex. -// -// The provided stream must start with the Magic byte sequence. -// Tip: Use io.NewSectionReader to create aligned substreams when dealing with a file that contains multiple indexes. -func Open(stream io.ReaderAt) (*DB, error) { - // Read the static 32-byte header. - // Ignore errors if the read fails after filling the buffer (e.g. EOF). - var fileHeader [headerSize]byte - n, readErr := stream.ReadAt(fileHeader[:], 0) - if n < len(fileHeader) { - // ReadAt must return non-nil error here. - return nil, readErr - } - db := new(DB) - if err := db.Header.Load(&fileHeader); err != nil { - return nil, err - } - db.Stream = stream - return db, nil -} - -func (db *DB) Prefetch(yes bool) { - db.prefetch = yes -} - -// Lookup queries for a key in the index and returns the value (offset), if any. -// -// Returns ErrNotFound if the key is unknown. -func (db *DB) Lookup(key []byte) (uint64, error) { - bucket, err := db.LookupBucket(key) - if err != nil { - return 0, err - } - return bucket.Lookup(key) -} - -// LookupBucket returns a handle to the bucket that might contain the given key. -func (db *DB) LookupBucket(key []byte) (*Bucket, error) { - return db.GetBucket(db.Header.BucketHash(key)) -} - -// GetBucket returns a handle to the bucket at the given index. -func (db *DB) GetBucket(i uint) (*Bucket, error) { - if i >= uint(db.Header.NumBuckets) { - return nil, fmt.Errorf("out of bounds bucket index: %d >= %d", i, db.Header.NumBuckets) - } - - // Fill bucket handle. - bucket := &Bucket{ - BucketDescriptor: BucketDescriptor{ - Stride: db.entryStride(), - OffsetWidth: intWidth(db.FileSize), - }, - } - // Read bucket header. - readErr := bucket.BucketHeader.readFrom(db.Stream, i) - if readErr != nil { - return nil, readErr - } - bucket.Entries = io.NewSectionReader(db.Stream, int64(bucket.FileOffset), int64(bucket.NumEntries)*int64(bucket.Stride)) - if db.prefetch { - // TODO: find good value for numEntriesToPrefetch - numEntriesToPrefetch := minInt64(3_000, int64(bucket.NumEntries)) - prefetchSize := (4 + 3) * numEntriesToPrefetch - buf := make([]byte, prefetchSize) - _, err := bucket.Entries.ReadAt(buf, 0) - if err != nil && !errors.Is(err, io.EOF) { - return nil, err - } - } - return bucket, nil -} - -func minInt64(a, b int64) int64 { - if a < b { - return a - } - return b -} - -func (db *DB) entryStride() uint8 { - hashSize := 3 // TODO remove hardcoded constant - offsetSize := intWidth(db.FileSize) - return uint8(hashSize) + offsetSize -} - -func bucketOffset(i uint) int64 { - return headerSize + int64(i)*bucketHdrLen -} - -func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { - var buf [bucketHdrLen]byte - n, err := rd.ReadAt(buf[:], bucketOffset(i)) - if n < len(buf) { - return err - } - b.Load(&buf) - return nil -} - -func (b *BucketHeader) writeTo(wr io.WriterAt, i uint) error { - var buf [bucketHdrLen]byte - b.Store(&buf) - _, err := wr.WriteAt(buf[:], bucketOffset(i)) - return err -} - -// Bucket is a database handle pointing to a subset of the index. -type Bucket struct { - BucketDescriptor - Entries *io.SectionReader -} - -// maxEntriesPerBucket is the hardcoded maximum permitted number of entries per bucket. -const maxEntriesPerBucket = 1 << 24 // (16 * stride) MiB - -// targetEntriesPerBucket is the average number of records in each hashtable bucket we aim for. -const targetEntriesPerBucket = 10000 - -// Load retrieves all entries in the hashtable. -func (b *Bucket) Load(batchSize int) ([]Entry, error) { - if batchSize <= 0 { - batchSize = 512 // default to reasonable batch size - } - // TODO bounds check - if b.NumEntries > maxEntriesPerBucket { - return nil, fmt.Errorf("refusing to load bucket with %d entries", b.NumEntries) - } - entries := make([]Entry, 0, b.NumEntries) - - stride := int(b.Stride) - buf := make([]byte, batchSize*stride) - off := int64(0) - for { - // Read another chunk. - n, err := b.Entries.ReadAt(buf, off) - // Decode all entries in it. - sub := buf[:n] - for len(sub) >= stride { - entries = append(entries, b.unmarshalEntry(sub)) - sub = sub[stride:] - off += int64(stride) - } - // Handle error. - if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { - break - } else if err != nil { - return nil, err - } - } - - return entries, nil -} - -// TODO: This binary search algo is not optimized for high-latency remotes yet. - -// Lookup queries for a key using binary search. -func (b *Bucket) Lookup(key []byte) (uint64, error) { - return b.binarySearch(b.Hash(key)) -} - -func (b *Bucket) binarySearch(target uint64) (uint64, error) { - low := 0 - high := int(b.NumEntries) - return searchEytzinger(low, high, target, b.loadEntry) -} - -func searchEytzinger(min int, max int, x uint64, getter func(int) (Entry, error)) (uint64, error) { - var index int - for index < max { - k, err := getter(index) - if err != nil { - return 0, err - } - if k.Hash == x { - return k.Value, nil - } - index = index<<1 | 1 - if k.Hash < x { - index++ - } - } - return 0, ErrNotFound -} - -func (b *Bucket) loadEntry(i int) (Entry, error) { - off := int64(i) * int64(b.Stride) - buf := make([]byte, b.Stride) - n, err := b.Entries.ReadAt(buf, off) - if n != len(buf) { - return Entry{}, err - } - return b.unmarshalEntry(buf), nil -} - -// ErrNotFound marks a missing entry. -var ErrNotFound = errors.New("not found") diff --git a/compactindex/query_test.go b/compactindex/query_test.go deleted file mode 100644 index 0908372a..00000000 --- a/compactindex/query_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package compactindex - -import ( - "bytes" - "errors" - "math/rand" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -type failReader struct{ err error } - -func (rd failReader) ReadAt([]byte, int64) (int, error) { - return 0, rd.err -} - -func TestOpen_ReadFail(t *testing.T) { - err := errors.New("oh no!") - db, dbErr := Open(failReader{err}) - require.Nil(t, db) - require.Same(t, err, dbErr) -} - -func TestOpen_InvalidMagic(t *testing.T) { - var buf [32]byte - rand.Read(buf[:]) - buf[1] = '.' // make test deterministic - - db, dbErr := Open(bytes.NewReader(buf[:])) - require.Nil(t, db) - require.EqualError(t, dbErr, "not a radiance compactindex file") -} - -func TestOpen_HeaderOnly(t *testing.T) { - buf := [32]byte{ - // Magic - 'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x', - // FileSize - 0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - // NumBuckets - 0x42, 0x00, 0x00, 0x00, - // Padding - 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - } - - db, dbErr := Open(bytes.NewReader(buf[:])) - require.NotNil(t, db) - require.NoError(t, dbErr) - - assert.NotNil(t, db.Stream) - assert.Equal(t, Header{ - FileSize: 0x1337, - NumBuckets: 0x42, - }, db.Header) -} From 0dae7ce29bdb8b35867b3ecfd73f0dd9436bca45 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 15:02:05 +0100 Subject: [PATCH 02/63] Add compactindexsized --- compactindexsized/LICENSE | 202 +++++++++++ compactindexsized/README.md | 137 ++++++++ compactindexsized/build.go | 340 +++++++++++++++++++ compactindexsized/build36_test.go | 448 +++++++++++++++++++++++++ compactindexsized/build48_test.go | 410 ++++++++++++++++++++++ compactindexsized/build8_test.go | 257 ++++++++++++++ compactindexsized/compactindex.go | 278 +++++++++++++++ compactindexsized/compactindex_test.go | 89 +++++ compactindexsized/fallocate_fake.go | 27 ++ compactindexsized/fallocate_generic.go | 11 + compactindexsized/fallocate_linux.go | 17 + compactindexsized/query.go | 228 +++++++++++++ compactindexsized/query_test.go | 58 ++++ compactindexsized/sort_test.go | 23 ++ 14 files changed, 2525 insertions(+) create mode 100644 compactindexsized/LICENSE create mode 100644 compactindexsized/README.md create mode 100644 compactindexsized/build.go create mode 100644 compactindexsized/build36_test.go create mode 100644 compactindexsized/build48_test.go create mode 100644 compactindexsized/build8_test.go create mode 100644 compactindexsized/compactindex.go create mode 100644 compactindexsized/compactindex_test.go create mode 100644 compactindexsized/fallocate_fake.go create mode 100644 compactindexsized/fallocate_generic.go create mode 100644 compactindexsized/fallocate_linux.go create mode 100644 compactindexsized/query.go create mode 100644 compactindexsized/query_test.go create mode 100644 compactindexsized/sort_test.go diff --git a/compactindexsized/LICENSE b/compactindexsized/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/compactindexsized/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/compactindexsized/README.md b/compactindexsized/README.md new file mode 100644 index 00000000..ef24d1e4 --- /dev/null +++ b/compactindexsized/README.md @@ -0,0 +1,137 @@ +# a fast flat-file index for constant datasets + +This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +The following changes have been made: + - The package has been renamed to `compactindex36` to avoid conflicts with the original package + - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +This package specifies a file format and Go implementation for indexing constant datasets. + +*`compactindex` …* +- is an immutable file format; +- maps arbitrary keys into offsets in an external flat file; +- consumes a constant amount of space per entry + - ~6-8 bytes, regardless of key size + - 3 bytes per enty +- `O(1)` complexity queries, with `2 + log2(10000)` lookups worst- & average-case (binary search); +- during construction, requires near-constant memory space and `O(n)` scratch space with regard to entries per file; +- during construction, features a constant >500k entry/s per-core write rate (2.5 GHz Intel laptop); +- works on any storage supporting random reads (regular files, HTTP range requests, on-chain, ...); +- is based on the "FKS method" which uses perfect (collision-free) hash functions in a two-level hashtable; [^1] +- is inspired by D. J. Bernstein's "constant database"; [^2] +- uses the xxHash64 non-cryptographic hash-function; [^3] + +Refer to the Go documentation for the algorithms used and implementation details. + +[![Go Reference](https://pkg.go.dev/badge/go.firedancer.io/radiance/pkg/compactindex.svg)](https://pkg.go.dev/go.firedancer.io/radiance/pkg/compactindex) + +[^1]: Fredman, M. L., Komlós, J., & Szemerédi, E. (1984). Storing a Sparse Table with 0 (1) Worst Case Access Time. Journal of the ACM, 31(3), 538–544. https://doi.org/10.1145/828.1884 +[^2]: cdb by D. J. Bernstein https://cr.yp.to/cdb.html +[^3]: Go implementation of xxHash by @cespare: https://github.com/cespare/xxhash/ + +## Interface + +In programming terms: + +```rs +fn lookup(key: &[byte]) -> Option +``` + +Given an arbitrary key, the index +- states whether the key exists in the index +- if it exists, maps the key to an integer (usually an offset into a file) + +## Examples + +Here are some example scenarios where `compactindex` is useful: + +- When working with immutable data structures + - Example: Indexing [IPLD CAR files][3] carrying Merkle-DAGs of content-addressable data +- When working with archived/constant data + - Example: Indexing files in `.tar` archives +- When dealing with immutable remote storage such as S3-like object storage + - Example: Storing the index and target file in S3, then using [HTTP range requests][4] to efficiently query data + +[3]: https://ipld.io/specs/transport/car/ +[4]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests + +Here are some things compactindex cannot do: + +- Cannot add more entries to an existing index + - Reason 1: indexes are tightly packed, so there is no space to insert new entries (though `fallocate(2)` with `FALLOC_FL_INSERT_RANGE` would technically work) + - Reason 2: the second-level hashtable uses a perfect hash function ensuring collision-free indexing of a subset of entries; + inserting new entries might cause a collision requiring + - Reason 3: adding too many entries will eventually create an imbalance in the first-level hashtable; + fixing this imbalance effectively requires re-constructing the file from scratch +- Cannot iterate over keys + - Reason: compactindex stores hashes, not the entries themselves. + This saves space but also allows for efficient random reads used during binary search + +## File Format (v0) + +**Encoding** + +The file format contains binary packed structures with byte alignment. + +Integers are encoded as little endian. + +**File Header** + +The file beings with a 32 byte file header. + +```rust +#[repr(packed)] +struct FileHeader { + magic: [u8; 8], // 0x00 + max_value: u64, // 0x08 + num_buckets: u32, // 0x10 + padding_14: [u8; 12], // 0x14 +} +``` + +- `magic` is set to the UTF-8 string `"rdcecidx"`. + The reader should reject files that don't start with this string. +- `num_buckets` is set to the number of hashtable buckets. +- `max_value` indicates the integer width of index values. +- `padding_14` must be zero. (reserved for future use) + +**Bucket Header Table** + +The file header is followed by a vector of bucket headers. +The number of is set by `num_buckets` in the file header. + +Each bucket header is 16 bytes long. + +```rust +#[repr(packed)] +struct BucketHeader { + hash_domain: u32, // 0x00 + num_entries: u32, // 0x04 + hash_len: u8, // 0x08 + padding_09: u8, // 0x09 + file_offset: u48, // 0x10 +} +``` + +- `hash_domain` is a "salt" to the per-bucket hash function. +- `num_entries` is set to the number of records in the bucket. +- `hash_len` is the size of the per-record hash in bytes and currently hardcoded to `3`. +- `padding_09` must be zero. +- `file_offset` is an offset from the beginning of the file header to the start of the bucket entries. + +**Bucket Entry Table** + +Each bucket has a vector of entries with length `num_entries`. +This structure makes up the vast majority of the index. + +```rust +#[repr(packed)] +struct Entry { + hash: u??, + value: u??, +} +``` + +The size of entry is static within a bucket. It is determined by its components: +- The size of `hash` in bytes equals `hash_len` +- The size of `value` in bytes equals the byte aligned integer width that is minimally required to represent `max_value` diff --git a/compactindexsized/build.go b/compactindexsized/build.go new file mode 100644 index 00000000..3ece8ce2 --- /dev/null +++ b/compactindexsized/build.go @@ -0,0 +1,340 @@ +package compactindexsized + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindexsized` to avoid conflicts with the original package +// - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. + +import ( + "bufio" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "syscall" +) + +// Builder creates new compactindex files. +type Builder struct { + Header + dir string + closers []io.Closer + buckets []tempBucket +} + +// NewBuilderSized creates a new index builder. +// +// If dir is an empty string, a random temporary directory is used. +// +// numItems refers to the number of items in the index. +// +// valueSize is the size of each value in bytes. It must be > 0 and <= 256. +// All values must be of the same size. +func NewBuilderSized( + dir string, + numItems uint, + valueSize uint, +) (*Builder, error) { + if dir == "" { + var err error + dir, err = os.MkdirTemp("", "compactindex-") + if err != nil { + return nil, fmt.Errorf("failed to create temp dir: %w", err) + } + } + if valueSize == 0 { + return nil, fmt.Errorf("valueSize must be > 0") + } + if valueSize > 256 { + return nil, fmt.Errorf("valueSize must be <= 256") + } + if numItems == 0 { + return nil, fmt.Errorf("numItems must be > 0") + } + + numBuckets := (numItems + targetEntriesPerBucket - 1) / targetEntriesPerBucket + buckets := make([]tempBucket, numBuckets) + closers := make([]io.Closer, 0, numBuckets) + for i := range buckets { + name := filepath.Join(dir, fmt.Sprintf("keys-%d", i)) + f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o666) + if err != nil { + for _, c := range closers { + c.Close() + } + return nil, err + } + closers = append(closers, f) + buckets[i].file = f + buckets[i].writer = bufio.NewWriter(f) + buckets[i].valueSize = uint(valueSize) + } + + return &Builder{ + Header: Header{ + ValueSize: uint64(valueSize), + NumBuckets: uint32(numBuckets), + }, + closers: closers, + buckets: buckets, + dir: dir, + }, nil +} + +func (b *Builder) SetKind(kind uint8) { + b.Header.Kind = kind +} + +func (b *Builder) getValueSize() int { + return int(b.ValueSize) +} + +// Insert writes a key-value mapping to the index. +// +// Index generation will fail if the same key is inserted twice. +// The writer must not pass a value greater than targetFileSize. +func (b *Builder) Insert(key []byte, value []byte) error { + return b.buckets[b.Header.BucketHash(key)].writeTuple(key, value) +} + +// Seal writes the final index to the provided file. +// This process is CPU-intensive, use context to abort prematurely. +// +// The file should be opened with access mode os.O_RDWR. +// Passing a non-empty file will result in a corrupted index. +func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { + // TODO support in-place writing. + + // Write header. + var headerBuf [headerSize]byte + b.Header.Store(&headerBuf) + _, err = f.Write(headerBuf[:]) + if err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + // Create hole to leave space for bucket header table. + bucketTableLen := int64(b.NumBuckets) * bucketHdrLen + err = fallocate(f, headerSize, bucketTableLen) + if errors.Is(err, syscall.EOPNOTSUPP) { + // The underlying file system may not support fallocate + err = fake_fallocate(f, headerSize, bucketTableLen) + if err != nil { + return fmt.Errorf("failed to fake fallocate() bucket table: %w", err) + } + } + if err != nil { + return fmt.Errorf("failed to fallocate() bucket table: %w", err) + } + // Seal each bucket. + for i := range b.buckets { + if err := b.sealBucket(ctx, i, f); err != nil { + return fmt.Errorf("failed to seal bucket %d: %w", i, err) + } + } + return nil +} + +// sealBucket will mine a bucket hashtable, write entries to a file, a +func (b *Builder) sealBucket(ctx context.Context, i int, f *os.File) error { + // Produce perfect hash table for bucket. + bucket := &b.buckets[i] + if err := bucket.flush(); err != nil { + return fmt.Errorf("failed to flush bucket %d: %w", i, err) + } + const mineAttempts uint32 = 1000 + entries, domain, err := bucket.mine(ctx, mineAttempts) + if err != nil { + return fmt.Errorf("failed to mine bucket %d: %w", i, err) + } + // Find current file length. + offset, err := f.Seek(0, io.SeekEnd) + if err != nil { + return fmt.Errorf("failed to seek to EOF: %w", err) + } + if offset < 0 { + panic("os.File.Seek() < 0") + } + // Write header to file. + desc := BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: domain, + NumEntries: uint32(bucket.records), + HashLen: HashSize, + FileOffset: uint64(offset), + }, + Stride: b.getEntryStride(), + OffsetWidth: uint8(b.getValueSize()), + } + // Write entries to file. + wr := bufio.NewWriter(f) + entryBuf := make([]byte, b.getEntryStride()) // TODO remove hardcoded constant + for _, entry := range entries { + desc.marshalEntry(entryBuf, entry) + if _, err := wr.Write(entryBuf[:]); err != nil { + return fmt.Errorf("failed to write record to index: %w", err) + } + } + if err := wr.Flush(); err != nil { + return fmt.Errorf("failed to flush bucket to index: %w", err) + } + // Write header to file. + if err := desc.BucketHeader.writeTo(f, uint(i)); err != nil { + return fmt.Errorf("failed to write bucket header %d: %w", i, err) + } + return nil +} + +func (b *Builder) getEntryStride() uint8 { + offsetSize := b.getValueSize() + return uint8(HashSize) + uint8(offsetSize) +} + +func (b *Builder) Close() error { + for _, c := range b.closers { + c.Close() + } + return os.RemoveAll(b.dir) +} + +// tempBucket represents the "temporary bucket" file, +// a disk buffer containing a vector of key-value-tuples. +type tempBucket struct { + records uint + valueSize uint + file *os.File + writer *bufio.Writer +} + +// writeTuple performs a buffered write of a KV-tuple. +func (b *tempBucket) writeTuple(key []byte, value []byte) (err error) { + b.records++ + static := make([]byte, 2+b.valueSize) + binary.LittleEndian.PutUint16(static[0:2], uint16(len(key))) + copy(static[2:], value[:]) + if _, err = b.writer.Write(static[:]); err != nil { + return err + } + _, err = b.writer.Write(key) + return +} + +// flush empties the in-memory write buffer to the file. +func (b *tempBucket) flush() error { + if err := b.writer.Flush(); err != nil { + return fmt.Errorf("failed to flush writer: %w", err) + } + b.writer = nil + return nil +} + +// mine repeatedly hashes the set of entries with different nonces. +// +// Returns a sorted list of hashtable entries upon finding a set of hashes without collisions. +// If a number of attempts was made without success, returns ErrCollision instead. +func (b *tempBucket) mine(ctx context.Context, attempts uint32) (entries []Entry, domain uint32, err error) { + entries = make([]Entry, b.records) + bitmap := make([]byte, 1<<21) + + rd := bufio.NewReader(b.file) + for domain = uint32(0); domain < attempts; domain++ { + if err = ctx.Err(); err != nil { + return + } + // Reset bitmap + for i := range bitmap { + bitmap[i] = 0 + } + // Reset reader + if _, err = b.file.Seek(0, io.SeekStart); err != nil { + return + } + rd.Reset(b.file) + + if hashErr := hashBucket(b.valueSize, rd, entries, bitmap, domain); errors.Is(hashErr, ErrCollision) { + continue + } else if hashErr != nil { + return nil, 0, hashErr + } + + return // ok + } + + return nil, domain, ErrCollision +} + +var ErrCollision = errors.New("hash collision") + +// hashBucket reads and hashes entries from a temporary bucket file. +// +// Uses a 2^24 wide bitmap to detect collisions. +func hashBucket( + valueSize uint, + rd *bufio.Reader, + entries []Entry, + bitmap []byte, + nonce uint32, +) error { + // TODO Don't hardcode this, choose hash depth dynamically + mask := uint64(0xffffff) + + // Scan provided reader for entries and hash along the way. + static := make([]byte, 2+valueSize) + for i := range entries { + // Read next key from file (as defined by writeTuple) + if _, err := io.ReadFull(rd, static[:]); err != nil { + return err + } + keyLen := binary.LittleEndian.Uint16(static[0:2]) + value := make([]byte, valueSize) + copy(value[:], static[2:]) + key := make([]byte, keyLen) + if _, err := io.ReadFull(rd, key); err != nil { + return err + } + + // Hash to entry + hash := EntryHash64(nonce, key) & mask + + // Check for collision in bitmap + bi, bj := hash/8, hash%8 + chunk := bitmap[bi] + if (chunk>>bj)&1 == 1 { + return ErrCollision + } + bitmap[bi] = chunk | (1 << bj) + + // Export entry + entries[i] = Entry{ + Hash: hash, + Value: value, + } + } + + // Sort entries. + sortWithCompare(entries, func(i, j int) bool { + return entries[i].Hash < entries[j].Hash + }) + + return nil +} + +func sortWithCompare[T any](a []T, compare func(i, j int) bool) { + sort.Slice(a, compare) + sorted := make([]T, len(a)) + eytzinger(a, sorted, 0, 1) + copy(a, sorted) +} + +func eytzinger[T any](in, out []T, i, k int) int { + if k <= len(in) { + i = eytzinger(in, out, i, 2*k) + out[k-1] = in[i] + i++ + i = eytzinger(in, out, i, 2*k+1) + } + return i +} diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go new file mode 100644 index 00000000..2d3d7394 --- /dev/null +++ b/compactindexsized/build36_test.go @@ -0,0 +1,448 @@ +package compactindexsized + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindexsized` to avoid conflicts with the original package +// - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. + +import ( + "bytes" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "io/fs" + "math/rand" + "os" + "strings" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/ipfs/go-cid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vbauerster/mpb/v8/decor" +) + +var testCidStrings = []string{ + "bafyreiba5kzq6wf6neax6ascsh5khxhuy7zc6vqsu6zac32i7ilv4u62nm", + "bafyreie42alzugm43fiqv64ss3h5fh3xriaeamul7d7qmrrbxe6fpjo5b4", + "bafyreidam5koitaftfx7sydge5ta3ig2j5qbabqcql4umpom3yuia4sbm4", + "bafyreia3pebgypo4oqgdg4pqpjfybmcdbsbavcdscotji4wj2gfc3r4icm", + "bafyreigudmeashua4432mbq3tawwnsz3qfpmm5tjpwahopn7cxttotqdge", + "bafyreic3azak2ds4fomkw35pmvsznu46sgonmketlnfaqnoc6owi4t64my", + "bafyreib6t4ooiajnebkwgk4z57fhcvejc663a6haq6cb6tjjluj4fuulla", + "bafyreidmohyrgchkgavx7wubebip5agb4ngisnlkqaot4kz2eo635ny5m4", + "bafyreicpmxvpxwjemofmic6aka72dliueqxtsklrilkofwbqgn6ffuz7ka", + "bafyreifkjdmj3kmi2hkoqcqweunbktouxo6sy362rysl34ffyjinebylim", + "bafyreidzql2rmbs3chtq2cmbnncvfyz2tjclwqx4vnowvyph77fomh26qi", + "bafyreig4kpaq6rf5y46qgqhdzgr5uauubfqyevbmj6pmtaxxhh3tkyzury", + "bafyreianxqyomvh6dl533cs25z7yfda2z62ity3w7sdqf3kk4tmogu7t24", + "bafyreicaq6dv5jsq4du2tqiefr3baepnj4ei3bpxvg5g6np7ygacgbw5aq", + "bafyreia4b2nleifcp54w4scrjy7fgctsoy6zz4mkot3gw6xydqkrc2wdtq", + "bafyreierpgsryprxfgshtzjarnb662d5akhg7om6utubggjwtlg6qwwj5i", + "bafyreidufcwvs7fvot2blqnwciaxre35s3ip6xxkncrus4voci3ktots2q", + "bafyreif23uzartrw62g5pywtrsz3xsl2wdw73o4fvtsf76gqgx37mfpqjm", + "bafyreianu4oifizvqyop753ao4hrocftlbnn6kzm7xtsm4ryaz6uawkgmu", + "bafyreidekyir7cti4jch652nnmimrftoqynjxl6vzjimfkqxh42rx27yiy", + "bafyreia3zuym3akg4gp5ewlmdwxnybrsqrab4m6tpsgxq65az6z7r5jtba", + "bafyreihherovnppygar6h5hu4we4hkxrvoqtpkawwcmn7hkyeq6cisouyu", + "bafyreicmqd5dhn2hv2qcskf27vsml36p6srz6zoxjfjkmnu7ltczqtbkbe", + "bafyreihy2c7lomf3i3nucd5bbwvox3luhtnzujkybchgmyxenmanon7rxe", + "bafyreicld6buy3mr4ibs2jzakoaptdj7xvpjo4pwhwiuywnrzfzoh5ahqi", + "bafyreifyapa5a5ii72hfmqiwgsjto6iarshfwwvrrxdw3bhr62ucuutl4e", + "bafyreigrlvwdaivwthwvihcbyrnl5pl7jfor72xlaivi2f6ajypy4yku3a", + "bafyreiamvgkbpuahegu3mhxxujzvxk2t5hrykhrfw47yurlxqumkv243iy", + "bafyreib4qf7qpjmpr2eqi7mqwqxw2fznnkvhzkpj3udiloxqay5fhk5wui", + "bafyreidbol6tdhj42rdpchpafszgmnmg7tgvi2uwou7s2whiamznzawhk4", + "bafyreidrpejzimhuwq6j74jzv2odzriuitwmdkp2ibojzcax6jdpqiztti", + "bafyreidrgb4vmgvsreebrj6apscopszfbgw5e7llh22kk2cdayyeoyggwy", + "bafyreigpzlopkl2ttxfdf6n5sgxyda4bvlglre7nkjq37uecmvf47f6ttm", + "bafyreidcq3csrifsyeed42fbky42w7bxhvg6fd42l7qkw3cnxliab4e7nu", + "bafyreibchdux4qchrrz67kikde273mjth475fedjisvoazf3zhmodlkx7a", + "bafyreie4rdlgpfcrrdlonofkwlrefh6z5hcwieasatkddozvyknwqahh4q", + "bafyreibhwuih7ekso6zypyr4uwl37xewyu7foy2clqvz4l7lbgwxpslyyu", + "bafyreigltijqq3m6h7h6du5o4ynqwmimtslnsmyu3njwlnpuyadyev6awa", + "bafyreihwtszo3p7ujg2wsuhsqon5tidxxnyin2t42uhj7zq6xta7fo2suy", + "bafyreie2uggjajncn2lna6ytq2sw2uu4xw724pe6wj4ihhiawnnjm5sgwa", + "bafyreignb5gdw7fwfycoipjqbkvkve7dkuugr3s5ylkaucn3ks7klxh4te", + "bafyreib3iwnufpnoxgf7z5w3vtygu2z2kcqxj3quxypupfgmr53tyt6wdq", + "bafyreic7kxsh7nmfpxmrm727yug2rfnrhfuavmpll3cms4r6cpnbbuwgqm", + "bafyreig2o4yrzlwo74eom4v65tenr6yjh2v23vbl7sjffrppzceenxs3eq", + "bafyreidletnh5bxnc6k2p3idnul5qatfcf4qqrgmkjxolgpu7wolye47hm", + "bafyreigv2nni66nb6be5dchkonpb2t556qplv5xz4vdolwmyz4m32aufdi", + "bafyreid66pezal5svaidpvxc3zz6w5eksxcjn6omelhsqhj5jmcmxhgjhm", + "bafyreihjhwpvm2soq5syyovsiqrchsuojsdk4imj2gqk6pikc4rxdqtmny", + "bafyreidt3oveadwf5jrmxatrwa5bdxvfyxnrucypmtqwiu2pvrrztrj5xe", + "bafyreid6y6r44wqcwql5yyitmw5mpfmrrlsois2unbqzmtlvyeefqahnnu", + "bafyreic6evvtf3y3slkbwhzbjuvspqu2jxf7qr267rhigmox6f4a5a36eq", + "bafyreiekep5a55yvebqzzi6x7xyotse57zfwcpyeh2xermqkvxlkvpxh24", + "bafyreigwb22sgfg56dc2jnnvxttjyhwfp4itevlukqj2wfz5ebru72elv4", + "bafyreiebz2fxh64dqvbiwmqnyj5rj63txl5u7abmets2imhn2su6tcuvyu", + "bafyreigcm7wkxlsyc26acgb7nfjho2twh6au2pbk35w6bsbv2qt7rt7iaq", + "bafyreieiuq6g74i25huoumvey7oynljndt2d4qvbddqkhpysrexu7ixsuy", + "bafyreihuhj5slybgbqzdr4mpkyo5dwvqjxfhicardbph6htiyeut2frol4", + "bafyreiaskg4kwqrpdcatnymvno4xf54uewysdiz3357fdct2tlnx2gpkqq", + "bafyreicakit2lbmg3wo4uoox4rc2gv3odzrrkrr32zwk7qaolpoc7uyz5u", + "bafyreih5jcnhw4evhq5j4n75miruqfofo2dv46hdtqyd5ht2eqeu7g5cme", + "bafyreicwtl6ulct4ckjnq57gmctw3wjo6ctvjbbr7l4bwfbzpj3y3g6unm", + "bafyreiebgoqj3nawzcwjy4t67uljnmvfh55fiqaxsskld6qpjvd2majesq", + "bafyreif472dxwhnyjhxmxoto3czfblhssgmhrpsqcmrwzprywk45wqdtmi", + "bafyreiaz444on546zihfuygqchlw4r4vu2tuw5xnelm6dsodqcno23pvzu", + "bafyreidgzghcd2lfdcylsccvlj43f5ujj7xtriu6ojp7jog5iainecagka", + "bafyreiehvi56dn3zm2ltfgecss2ydfmcb2hmf6hk76b6ebpoxhquajawze", + "bafyreie4wcortvdsirbontddokin6wgm25xg46lu3qxcyyjj6rgkuk5cca", + "bafyreicurlgiukht7wnxy3za3hz5fzs2a62ggc6i3rqhzhck4p2lgt5754", + "bafyreihn2zwm7m3tqfwa53me4qxiit66yiny5sxtkvvjewjfkbjrgmeswu", + "bafyreid7m33qok7d66vsyc5mq257rya5sg24rzv5qwbghwsimclt5ll7pi", +} + +var testCids = func() []cid.Cid { + var cids []cid.Cid + for _, s := range testCidStrings { + c, err := cid.Decode(s) + if err != nil { + panic(err) + } + cids = append(cids, c) + } + return cids +}() + +func concatBytes(bs ...[]byte) []byte { + var out []byte + for _, b := range bs { + out = append(out, b...) + } + return out +} + +func numberToHexBytes(n int) string { + return (fmt.Sprintf("0x%02x", n)) +} + +func FormatByteSlice(buf []byte) string { + elems := make([]string, 0) + for _, v := range buf { + elems = append(elems, numberToHexBytes(int(v))) + } + + return "{" + strings.Join(elems, ", ") + "}" + fmt.Sprintf("(len=%v)", len(elems)) +} + +func splitBufferWithProvidedSizes(buf []byte, sizes []int) [][]byte { + var out [][]byte + var offset int + for _, size := range sizes { + out = append(out, buf[offset:offset+size]) + offset += size + } + return out +} + +func compareBufferArrays(a, b [][]byte) []bool { + var out []bool + + for i := 0; i < len(a); i++ { + out = append(out, bytes.Equal(a[i], b[i])) + } + + return out +} + +func TestBuilder36(t *testing.T) { + const numBuckets = 3 + const valueSize = 36 + + // Create a table with 3 buckets. + builder, err := NewBuilderSized("", numBuckets*targetEntriesPerBucket, valueSize) + require.NoError(t, err) + require.NotNil(t, builder) + assert.Len(t, builder.buckets, 3) + defer builder.Close() + + kindSomething := uint8(0x42) + builder.SetKind(kindSomething) + + // Insert a few entries. + keys := []string{"hello", "world", "blub", "foo"} + for i, key := range keys { + require.NoError(t, builder.Insert([]byte(key), []byte(testCids[i].Bytes()))) + } + { + // print test values + for _, tc := range testCids { + spew.Dump(FormatByteSlice(tc.Bytes())) + } + } + + // Create index file. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal index. + require.NoError(t, builder.Seal(context.TODO(), targetFile)) + + // Assert binary content. + buf, err := os.ReadFile(targetFile.Name()) + require.NoError(t, err) + expected := concatBytes( + // --- File header + // magic + []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, // 0 + // value size (36 bytes in this case) + []byte{0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 1 + // num buckets + []byte{0x03, 0x00, 0x00, 0x00}, // 2 + []byte{ + 0x01, // version + 0x42, // kind + // padding + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, // 3 + + // --- Bucket header 0 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 4 + // num entries + []byte{0x01, 0x00, 0x00, 0x00}, // 5 + // hash len + []byte{0x03}, // 6 + // padding + []byte{0x00}, // 7 + // file offset + []byte{0x50, 0x00, 0x00, 0x00, 0x00, 0x00}, // 8 + + // --- Bucket header 1 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 9 + // num entries + []byte{0x01, 0x00, 0x00, 0x00}, // 10 + // hash len + []byte{0x03}, // 11 + // padding + []byte{0x00}, // 12 + // file offset + []byte{0x77, 0x00, 0x00, 0x00, 0x00, 0x00}, // 13 + + // --- Bucket header 2 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 14 + // num entries + []byte{0x02, 0x00, 0x00, 0x00}, // 15 + // hash len + []byte{0x03}, // 16 + // padding + []byte{0x00}, // 17 + // file offset + []byte{0x9e, 0x00, 0x00, 0x00, 0x00, 0x00}, // 18 + + // --- Bucket 0 + // hash + []byte{0xe2, 0xdb, 0x55}, // 19 + // value + []byte{0x1, 0x71, 0x12, 0x20, 0x20, 0xea, 0xb3, 0xf, 0x58, 0xbe, 0x69, 0x1, 0x7f, 0x2, 0x42, 0x91, 0xfa, 0xa3, 0xdc, 0xf4, 0xc7, 0xf2, 0x2f, 0x56, 0x12, 0xa7, 0xb2, 0x1, 0x6f, 0x48, 0xfa, 0x17, 0x5e, 0x53, 0xda, 0x6b}, // 20 + + // --- Bucket 2 + // hash + []byte{0x92, 0xcd, 0xbb}, // 21 + // value + []byte{0x01, 0x71, 0x12, 0x20, 0x9c, 0xd0, 0x17, 0x9a, 0x19, 0x9c, 0xd9, 0x51, 0x0a, 0xfb, 0x92, 0x96, 0xcf, 0xd2, 0x9f, 0x77, 0x8a, 0x00, 0x40, 0x32, 0x8b, 0xf8, 0xff, 0x06, 0x46, 0x21, 0xb9, 0x3c, 0x57, 0xa5, 0xdd, 0x0f}, // 22 + // hash + []byte{0x98, 0x3d, 0xbd}, // 25 + // value + []byte{0x01, 0x71, 0x12, 0x20, 0x1b, 0x79, 0x02, 0x6c, 0x3d, 0xdc, 0x74, 0x0c, 0x33, 0x71, 0xf0, 0x7a, 0x4b, 0x80, 0xb0, 0x43, 0x0c, 0x82, 0x0a, 0x88, 0x72, 0x13, 0xa6, 0x94, 0x72, 0xc9, 0xd1, 0x8a, 0x2d, 0xc7, 0x88, 0x13}, // 26 + // hash + []byte{0xe3, 0x09, 0x6b}, // 23 + // value + []byte{0x1, 0x71, 0x12, 0x20, 0x60, 0x67, 0x54, 0xe4, 0x4c, 0x5, 0x99, 0x6f, 0xf9, 0x60, 0x66, 0x27, 0x66, 0xd, 0xa0, 0xda, 0x4f, 0x60, 0x10, 0x6, 0x2, 0x82, 0xf9, 0x46, 0x3d, 0xcc, 0xde, 0x28, 0x80, 0x72, 0x41, 0x67}, // 24 + ) + assert.Equal(t, expected, buf) + + { + splitSizes := []int{ + // --- File header + 8, 8, 4, 12, + // --- Bucket header 0 + 4, 4, 1, 1, 6, + // --- Bucket header 1 + 4, 4, 1, 1, 6, + // --- Bucket header 2 + 4, 4, 1, 1, 6, + // --- Bucket 0 + 3, valueSize, + // --- Bucket 2 + 3, valueSize, 3, valueSize, 3, valueSize, + } + splitExpected := splitBufferWithProvidedSizes(expected, splitSizes) + splitGot := splitBufferWithProvidedSizes(buf, splitSizes) + + comparations := compareBufferArrays(splitExpected, splitGot) + + for i, equal := range comparations { + if !equal { + t.Errorf("%d: \nexpected: %v, \n got: %v", i, FormatByteSlice(splitExpected[i]), FormatByteSlice(splitGot[i])) + } + } + } + + // Reset file offset. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + + // Open index. + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + require.NotNil(t, db) + + assert.Equal(t, kindSomething, db.Header.Kind) + assert.Equal(t, kindSomething, db.GetKind()) + + // File header assertions. + assert.Equal(t, Header{ + ValueSize: valueSize, + NumBuckets: numBuckets, + Kind: kindSomething, + }, db.Header) + + // Get bucket handles. + buckets := make([]*Bucket, numBuckets) + for i := range buckets { + buckets[i], err = db.GetBucket(uint(i)) + require.NoError(t, err) + } + + // Ensure out-of-bounds bucket accesses fail. + _, wantErr := db.GetBucket(numBuckets) + assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") + + // Bucket header assertions. + assert.Equal(t, BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x50, + }, + Stride: 3 + valueSize, // 3 + 36 + OffsetWidth: valueSize, + }, buckets[0].BucketDescriptor) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 119, + }, buckets[1].BucketHeader) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 2, + HashLen: 3, + FileOffset: 158, + }, buckets[2].BucketHeader) + + assert.Equal(t, uint8(3+valueSize), buckets[2].Stride) + // Test lookups. + entries, err := buckets[2].Load( /*batchSize*/ 3) + require.NoError(t, err) + assert.Equal(t, []Entry{ + { + Hash: 12402072, + Value: []byte(testCids[3].Bytes()), + }, + { + Hash: 7014883, + Value: []byte(testCids[2].Bytes()), + }, + }, entries) + + { + for i, keyString := range keys { + key := []byte(keyString) + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + assert.Equal(t, []byte(testCids[i].Bytes()), value) + } + } +} + +func TestBuilder36_Random(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long test") + } + + numKeys := uint(len(testCids)) + const keySize = uint(16) + const valueSize = 36 + const queries = int(10000) + + // Create new builder session. + builder, err := NewBuilderSized("", numKeys, valueSize) + require.NoError(t, err) + require.NotNil(t, builder) + require.NotEmpty(t, builder.buckets) + + // Ensure we cleaned up after ourselves. + defer func() { + _, statErr := os.Stat(builder.dir) + assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) + }() + defer builder.Close() + + // Insert items to temp buckets. + preInsert := time.Now() + key := make([]byte, keySize) + for i := uint(0); i < numKeys; i++ { + binary.LittleEndian.PutUint64(key, uint64(i)) + err := builder.Insert(key, []byte(testCids[i].Bytes())) + require.NoError(t, err) + } + t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) + + // Create file for final index. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal to final index. + preSeal := time.Now() + sealErr := builder.Seal(context.TODO(), targetFile) + require.NoError(t, sealErr, "Seal failed") + t.Logf("Sealed in %s", time.Since(preSeal)) + + // Print some stats. + targetStat, err := targetFile.Stat() + require.NoError(t, err) + t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) + t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) + t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) + + // Open index. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + + // Run query benchmark. + preQuery := time.Now() + for i := queries; i != 0; i-- { + keyN := uint64(rand.Int63n(int64(numKeys))) + binary.LittleEndian.PutUint64(key, keyN) + + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + require.Equal(t, []byte(testCids[keyN].Bytes()), value) + } + t.Logf("Queried %d items", queries) + t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) +} diff --git a/compactindexsized/build48_test.go b/compactindexsized/build48_test.go new file mode 100644 index 00000000..403fe597 --- /dev/null +++ b/compactindexsized/build48_test.go @@ -0,0 +1,410 @@ +package compactindexsized + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindexsized` to avoid conflicts with the original package +// - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. + +import ( + "context" + "encoding/binary" + "errors" + "io" + "io/fs" + "math/rand" + "os" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vbauerster/mpb/v8/decor" +) + +var testValues48 = [][]byte{ + {0xcc, 0x0a, 0xd4, 0x66, 0x32, 0x50, 0xc3, 0x96, 0x8b, 0x5c, 0x77, 0x7e, 0xb8, 0xfd, 0x9c, 0x78, 0xea, 0xfb, 0xd3, 0x4f, 0x1a, 0x59, 0x4e, 0xda, 0x1d, 0x90, 0x2a, 0xcd, 0x79, 0xb6, 0x0b, 0x2d, 0xea, 0x76, 0x36, 0x54, 0x65, 0xe6, 0x53, 0x1b, 0x70, 0x38, 0x84, 0xb2, 0xbf, 0x5d, 0xf9, 0x30}, + {0x7c, 0x18, 0x51, 0xd7, 0x63, 0x83, 0xf9, 0xc5, 0xaa, 0x48, 0x3c, 0x8e, 0xff, 0xf0, 0xf1, 0xab, 0xee, 0xda, 0xb0, 0x2f, 0x92, 0xcc, 0xb8, 0x78, 0x11, 0x5b, 0xa0, 0xb9, 0xfa, 0xf5, 0x2e, 0xb4, 0xd7, 0x10, 0x2d, 0x7b, 0xe5, 0xb6, 0x9f, 0xd0, 0xb1, 0xff, 0xd0, 0xf2, 0xef, 0xcd, 0x72, 0x1a}, + {0x0b, 0x2f, 0xc2, 0x4d, 0xc5, 0x98, 0x8b, 0x13, 0xd9, 0x17, 0xf8, 0xc1, 0xb8, 0x59, 0xd4, 0x24, 0xad, 0xef, 0xe5, 0xb6, 0xb8, 0xb9, 0xba, 0x01, 0x9c, 0xe0, 0x7f, 0x96, 0x25, 0x83, 0xd6, 0xbf, 0xa3, 0xb2, 0xf2, 0x29, 0xb9, 0xa1, 0xa1, 0x92, 0xd0, 0xc0, 0xe5, 0x06, 0x94, 0xea, 0x6c, 0xb3}, + {0xbb, 0x12, 0x08, 0x5f, 0x73, 0xee, 0x39, 0x69, 0x9f, 0x6e, 0x5a, 0xd8, 0x21, 0x2d, 0x43, 0xbe, 0x01, 0xc1, 0x3f, 0xc5, 0xfa, 0x86, 0x09, 0x7e, 0x97, 0x61, 0x59, 0xb8, 0xc9, 0x16, 0x47, 0xe3, 0x18, 0xfe, 0x52, 0x1e, 0xa2, 0x98, 0x59, 0x83, 0x16, 0x88, 0x5b, 0x46, 0x83, 0x2b, 0xa3, 0x2a}, + {0xe5, 0x8f, 0x27, 0xfd, 0x2f, 0x24, 0xf3, 0x40, 0xe4, 0x0b, 0xb4, 0xcf, 0x8d, 0x5d, 0xc1, 0x36, 0x84, 0x2b, 0x64, 0x11, 0x8b, 0x29, 0x8c, 0x17, 0xe2, 0xa6, 0x8c, 0xfb, 0x57, 0xe7, 0xc7, 0x48, 0x38, 0x4e, 0x3a, 0xad, 0xd4, 0xac, 0xed, 0x65, 0x6c, 0xd5, 0xd3, 0x2d, 0x3d, 0x44, 0xea, 0xb0}, + {0xc6, 0x73, 0xd8, 0x4d, 0x55, 0xae, 0x7d, 0x0b, 0x2a, 0xe7, 0x21, 0x58, 0x0e, 0x11, 0xb5, 0x31, 0xff, 0xb1, 0x5c, 0xb2, 0x22, 0x89, 0xa5, 0x3e, 0x7a, 0x94, 0x48, 0xc5, 0x5c, 0x41, 0x3b, 0x2e, 0x2b, 0x44, 0xa4, 0x60, 0xc8, 0x78, 0xab, 0xb8, 0xac, 0x94, 0xcb, 0x4b, 0x17, 0x6f, 0x7c, 0x14}, + {0x5b, 0x60, 0x10, 0x51, 0x44, 0x61, 0xf8, 0x08, 0x24, 0xca, 0x38, 0x25, 0xf1, 0x03, 0x9a, 0x09, 0x9c, 0xa4, 0xf5, 0x6f, 0x7b, 0x78, 0x98, 0x00, 0xaf, 0xdb, 0x29, 0x5d, 0xdb, 0x8d, 0xc8, 0x89, 0x5e, 0xd0, 0x35, 0x7c, 0x8a, 0x4c, 0x61, 0x19, 0x7c, 0xa5, 0xe3, 0x19, 0xf1, 0x27, 0x11, 0x4b}, + {0x05, 0xfb, 0x22, 0xef, 0xc3, 0x75, 0xa4, 0x0c, 0x17, 0xa8, 0x3d, 0x55, 0xfb, 0x9c, 0x6b, 0xf5, 0xed, 0xc0, 0x23, 0x19, 0x3a, 0x90, 0x81, 0x9e, 0xa0, 0x64, 0x36, 0x2f, 0x17, 0xd7, 0xd1, 0x05, 0x65, 0x58, 0xe9, 0x0b, 0xcf, 0xbb, 0xcf, 0x91, 0xf7, 0x99, 0x26, 0x00, 0x2d, 0x41, 0x28, 0xf4}, + {0xa6, 0xdd, 0x09, 0x1e, 0x59, 0x8c, 0xf8, 0x5b, 0xa4, 0x52, 0x67, 0xa9, 0x9d, 0xbc, 0x4b, 0x3e, 0x85, 0x52, 0xf0, 0x1c, 0xda, 0xf8, 0x50, 0xee, 0x57, 0x19, 0xe4, 0xad, 0x96, 0xb9, 0xef, 0x2e, 0x8e, 0xba, 0x80, 0xa6, 0xd8, 0xdd, 0x3a, 0xd1, 0x4c, 0xe2, 0x74, 0xd9, 0xb3, 0xcb, 0xf5, 0x81}, + {0xe8, 0x94, 0x5f, 0xc8, 0x35, 0xf6, 0x80, 0x82, 0xe0, 0xdb, 0xbe, 0x5d, 0x6d, 0x9b, 0x98, 0x13, 0xe5, 0xd2, 0x4b, 0xa5, 0x66, 0x9c, 0x0f, 0x50, 0x74, 0x9e, 0x6f, 0xfe, 0xeb, 0x52, 0xd5, 0xfc, 0x35, 0x02, 0x2e, 0xfd, 0xc5, 0xf8, 0x14, 0xb8, 0x72, 0xb9, 0xb8, 0xd6, 0xc8, 0x71, 0x6c, 0x9b}, + {0x72, 0x75, 0xab, 0xc9, 0xfd, 0x20, 0x50, 0xb8, 0x65, 0x3f, 0x9f, 0x0d, 0xc7, 0xd4, 0xd3, 0x05, 0x9e, 0xf8, 0x83, 0x29, 0x53, 0x48, 0x60, 0xc8, 0x68, 0xb9, 0x27, 0x45, 0xdc, 0x98, 0x45, 0x8b, 0x4b, 0x50, 0xb4, 0x2b, 0xee, 0xd9, 0x40, 0x9d, 0x91, 0x48, 0x55, 0x22, 0xdd, 0x4e, 0x85, 0xe6}, + {0x80, 0xdf, 0x02, 0x03, 0xc9, 0x71, 0x99, 0x8d, 0x87, 0x77, 0x9c, 0xed, 0x06, 0xd9, 0x41, 0xe8, 0x27, 0xcb, 0xd0, 0xce, 0xb3, 0x17, 0x6f, 0x23, 0x51, 0xe0, 0x68, 0x1e, 0xac, 0x28, 0x60, 0x84, 0xa3, 0x9f, 0x7c, 0x50, 0xe8, 0xd8, 0xcf, 0x4d, 0xde, 0x1d, 0xbb, 0x1c, 0x36, 0xac, 0xbb, 0x19}, + {0xfd, 0xe3, 0x3b, 0x9d, 0x0b, 0xb8, 0x70, 0xa5, 0xd7, 0x27, 0x0a, 0x05, 0x3a, 0x21, 0x2d, 0x74, 0xfd, 0xe2, 0xed, 0x2f, 0x33, 0x33, 0x42, 0x75, 0xf8, 0x69, 0x66, 0xc7, 0xf4, 0xf5, 0xf9, 0x8c, 0x74, 0xe2, 0x84, 0x77, 0x88, 0x34, 0x20, 0x9f, 0x1f, 0xef, 0x69, 0xfd, 0x23, 0x0b, 0x2d, 0x59}, + {0x8c, 0x5c, 0xc0, 0x72, 0xde, 0xca, 0x10, 0x62, 0xdd, 0x43, 0xe2, 0x02, 0x52, 0xe7, 0x64, 0x55, 0xf8, 0xa9, 0xf9, 0x0b, 0x98, 0x0f, 0xc0, 0x1c, 0x17, 0xc4, 0x60, 0xa6, 0x7c, 0x15, 0x8f, 0xa0, 0xa9, 0x92, 0xf5, 0xb3, 0x65, 0x31, 0x06, 0xd6, 0x20, 0xb1, 0x46, 0xe3, 0x90, 0x03, 0xe1, 0x0a}, + {0x30, 0xc3, 0x6e, 0x7b, 0x6f, 0xf1, 0x65, 0x0b, 0x8f, 0x7e, 0xa4, 0xaf, 0x65, 0x49, 0x67, 0xc6, 0x5b, 0x55, 0xfe, 0x58, 0xde, 0x41, 0x42, 0x8b, 0x6c, 0x84, 0x6e, 0xac, 0x9d, 0xb4, 0xe5, 0x61, 0x57, 0x0b, 0x94, 0xb8, 0x19, 0xc2, 0x9d, 0x17, 0xcd, 0xd0, 0x09, 0xd9, 0x36, 0x2c, 0xe0, 0x44}, + {0x16, 0x47, 0xf2, 0xba, 0x4c, 0xeb, 0xdf, 0x74, 0x5c, 0x33, 0x6b, 0xae, 0xb6, 0xd5, 0x0c, 0x5a, 0x1a, 0xb0, 0x9c, 0xf8, 0xa8, 0x46, 0xc2, 0x8c, 0x1e, 0x26, 0x8c, 0x8f, 0xc1, 0xfe, 0xd8, 0x18, 0x35, 0x27, 0xbd, 0xf7, 0x6d, 0x0d, 0xb1, 0xbb, 0x7f, 0xc4, 0x40, 0xd1, 0xa9, 0x15, 0xd3, 0xf2}, + {0xc5, 0x6f, 0x90, 0x80, 0x3c, 0x70, 0x98, 0xc3, 0xb8, 0x43, 0x5e, 0xe9, 0x3a, 0xbd, 0xe9, 0xcb, 0x0c, 0x54, 0xd1, 0xd2, 0x2b, 0x0e, 0xa3, 0x11, 0x48, 0xfc, 0x6e, 0x8f, 0xb3, 0x63, 0x02, 0xcf, 0x4c, 0x74, 0x85, 0x5f, 0x70, 0x1d, 0x05, 0xb2, 0x83, 0x92, 0x7b, 0x18, 0x9b, 0x8f, 0x7c, 0x96}, + {0x9d, 0xdb, 0x06, 0x39, 0x04, 0xf3, 0x25, 0x8e, 0xe1, 0xcc, 0xfa, 0xfc, 0xda, 0x97, 0xee, 0x3a, 0x81, 0x57, 0x7d, 0x69, 0x34, 0x76, 0x0e, 0x10, 0xc2, 0x61, 0xd5, 0xa0, 0x6d, 0xfd, 0x30, 0x42, 0x5f, 0x34, 0x24, 0xb0, 0x90, 0x7e, 0x29, 0x6f, 0x9b, 0x12, 0x71, 0xd6, 0x8b, 0x9e, 0x9e, 0x80}, + {0xe1, 0xf6, 0x85, 0x83, 0x84, 0x17, 0x6c, 0xcf, 0x47, 0x2f, 0x45, 0x42, 0x10, 0xef, 0x45, 0xe2, 0x6b, 0x6c, 0x36, 0x0c, 0x6a, 0x02, 0x15, 0xb2, 0x84, 0x7a, 0x81, 0xe9, 0xd0, 0x78, 0xf3, 0x8e, 0x2a, 0x9f, 0xf5, 0x3c, 0xeb, 0x4c, 0xb9, 0x8d, 0xd1, 0x7b, 0x66, 0xae, 0xf2, 0x10, 0x52, 0x62}, + {0x53, 0x53, 0x35, 0x69, 0xa0, 0x5b, 0x02, 0x0b, 0x0c, 0xb1, 0xc0, 0x37, 0xfc, 0xe0, 0xf3, 0xfa, 0xcc, 0x7f, 0x77, 0x6b, 0x14, 0xb3, 0xd5, 0xfb, 0x5a, 0x8e, 0x5e, 0x1a, 0xbd, 0xf5, 0xd5, 0x80, 0xf1, 0x33, 0x2c, 0x23, 0x63, 0x7d, 0x2e, 0xbb, 0x6d, 0x29, 0x00, 0x84, 0xee, 0x81, 0xa3, 0x42}, + {0x7e, 0xf7, 0x84, 0xd5, 0x4a, 0x59, 0xb2, 0x0f, 0xea, 0x5c, 0x41, 0x13, 0xb5, 0x6e, 0x36, 0x59, 0x46, 0x81, 0xfe, 0x2a, 0x73, 0xc7, 0x01, 0x84, 0x6b, 0x12, 0xcc, 0xb3, 0xe2, 0x79, 0x75, 0x7d, 0x0a, 0x01, 0xc3, 0xae, 0xf2, 0xb5, 0x52, 0x12, 0x5f, 0xe0, 0xe9, 0x9c, 0x1b, 0x95, 0x7d, 0x31}, + {0xb6, 0xdc, 0xc4, 0xc0, 0xfb, 0xbb, 0xe3, 0x24, 0x62, 0xa5, 0x4f, 0x11, 0x17, 0x0a, 0x0c, 0x58, 0x2f, 0x32, 0xf1, 0x26, 0x54, 0xb2, 0x0a, 0xff, 0xfd, 0xb0, 0x2d, 0xd2, 0x67, 0xad, 0x48, 0x50, 0x3e, 0x9d, 0x26, 0x34, 0xc3, 0xbb, 0x32, 0x81, 0x8f, 0xf8, 0x83, 0xe8, 0x5c, 0x8c, 0xd4, 0x39}, + {0x15, 0x5a, 0xb0, 0xda, 0x0d, 0xbb, 0xa5, 0xa2, 0x66, 0xf9, 0x22, 0x33, 0xef, 0xc7, 0x59, 0x50, 0x7e, 0xaa, 0xb3, 0xe8, 0x0a, 0x42, 0xe7, 0xab, 0xa0, 0x29, 0xa2, 0x9f, 0x4e, 0x53, 0x9d, 0x95, 0x4a, 0xea, 0x63, 0xd2, 0xd3, 0xd1, 0x63, 0x2e, 0x18, 0x59, 0x6e, 0xdd, 0xa4, 0xc7, 0x67, 0xeb}, + {0x78, 0x0e, 0xba, 0x3e, 0x10, 0x6b, 0x27, 0xf8, 0x39, 0x92, 0x4a, 0x01, 0x6d, 0x20, 0xc8, 0x70, 0xd6, 0x40, 0xcd, 0xc0, 0x05, 0x91, 0x09, 0xa6, 0xb2, 0x84, 0xba, 0x53, 0x36, 0xe4, 0x00, 0x01, 0x02, 0xd9, 0x17, 0xaf, 0xe5, 0x0c, 0xfd, 0xae, 0xf6, 0x03, 0x69, 0x72, 0x34, 0x35, 0x31, 0x6c}, + {0xb9, 0x4c, 0xa5, 0x09, 0x6c, 0x9d, 0x52, 0x7b, 0xb9, 0x2c, 0x2c, 0x3e, 0xc5, 0x49, 0x80, 0x14, 0xe1, 0x8a, 0xf1, 0x2e, 0xa2, 0x1e, 0x9a, 0x11, 0x00, 0x85, 0xed, 0x43, 0x4f, 0x00, 0xf9, 0x2b, 0x26, 0x77, 0x2c, 0xe0, 0x5e, 0x63, 0x66, 0x53, 0x86, 0x87, 0xa2, 0x68, 0x71, 0xdc, 0x32, 0x41}, + {0xd5, 0xa1, 0x1a, 0x05, 0xba, 0xba, 0x33, 0x00, 0x55, 0x36, 0x2a, 0xfe, 0x8b, 0x80, 0xb1, 0x54, 0x08, 0x6f, 0x6f, 0x8c, 0x0a, 0x64, 0x80, 0xeb, 0x68, 0xc7, 0xba, 0x12, 0x4a, 0xa1, 0x42, 0xba, 0xac, 0x5e, 0x1d, 0xfc, 0xa0, 0x5c, 0x98, 0x84, 0x76, 0xd1, 0xa4, 0x25, 0xd5, 0xd2, 0x97, 0x77}, + {0x20, 0x99, 0xf6, 0x3d, 0xbc, 0xc8, 0x7a, 0x51, 0x18, 0x8c, 0xc3, 0x36, 0xbe, 0x04, 0xc0, 0x85, 0xfe, 0x2e, 0x89, 0xad, 0x2a, 0x7d, 0x77, 0x53, 0x12, 0x5c, 0x41, 0x2b, 0xc7, 0x41, 0x93, 0x54, 0xd8, 0x5c, 0xc4, 0xe9, 0xe0, 0x8d, 0xba, 0x2d, 0xc6, 0x8a, 0xf8, 0x7e, 0x55, 0xfa, 0x26, 0xb5}, + {0xfd, 0x0c, 0x70, 0xe7, 0x89, 0x89, 0xcd, 0x34, 0x28, 0x71, 0x74, 0xde, 0xf8, 0x82, 0xd3, 0xb9, 0x4e, 0xae, 0x30, 0x88, 0xc4, 0x42, 0xc8, 0x75, 0x54, 0x6e, 0x58, 0x8c, 0xea, 0x62, 0x15, 0x8c, 0x2d, 0xd2, 0x92, 0x55, 0xdb, 0xf4, 0x62, 0xe6, 0xae, 0x42, 0xf9, 0xb5, 0xd7, 0xe8, 0x74, 0xef}, + {0x69, 0xb4, 0x06, 0x18, 0x27, 0x7a, 0x55, 0x2a, 0x7e, 0x19, 0xb0, 0xab, 0xff, 0xf3, 0x4b, 0xeb, 0x0d, 0xd8, 0x67, 0x51, 0x9e, 0x9b, 0x9e, 0x99, 0x33, 0x2b, 0xf5, 0xa2, 0x65, 0x7d, 0x8a, 0x37, 0xde, 0x7c, 0x48, 0x94, 0x74, 0xc1, 0xe1, 0xcf, 0x60, 0x70, 0x92, 0xdf, 0x78, 0xe1, 0xac, 0x34}, + {0x20, 0x2e, 0x04, 0x8b, 0x9a, 0xe9, 0x50, 0x20, 0x44, 0x59, 0xb7, 0xc6, 0xd7, 0xd3, 0x1a, 0xa5, 0x2d, 0xb7, 0x7b, 0x4d, 0x6e, 0x73, 0x07, 0x80, 0xdf, 0x46, 0xeb, 0x25, 0xc2, 0xf0, 0xc4, 0x84, 0x28, 0x23, 0x80, 0x01, 0x69, 0x8a, 0x4d, 0x5c, 0x81, 0x2e, 0xeb, 0x81, 0xde, 0xe6, 0x9c, 0xe7}, + {0xe1, 0xc2, 0x69, 0x76, 0x6e, 0x0e, 0x04, 0x01, 0xfc, 0xc4, 0x9c, 0xc0, 0xad, 0x14, 0x39, 0xc1, 0x30, 0xeb, 0xf2, 0x80, 0x1d, 0x9d, 0xf2, 0x2e, 0x3a, 0x1b, 0x60, 0x6b, 0x5b, 0xde, 0xd9, 0xca, 0xc5, 0x74, 0x57, 0xc2, 0x30, 0x05, 0xf4, 0x91, 0x4a, 0xf1, 0xae, 0x5f, 0x4f, 0x95, 0x2c, 0xac}, + {0x8f, 0x59, 0xc3, 0xb3, 0x06, 0x3c, 0x0f, 0x4e, 0x5a, 0x19, 0xb8, 0x58, 0xc1, 0x7d, 0x77, 0xf8, 0xaa, 0xf3, 0xff, 0x96, 0xbe, 0x4e, 0x10, 0xff, 0x30, 0x94, 0x95, 0x3a, 0x27, 0xcd, 0xba, 0x4c, 0x18, 0x2b, 0x08, 0x74, 0xa5, 0x39, 0xcf, 0xc2, 0x32, 0x46, 0x58, 0x4e, 0x31, 0x89, 0x0c, 0xc9}, + {0x5e, 0x5e, 0x84, 0xdb, 0xc4, 0x3e, 0xd8, 0xcc, 0x85, 0x3b, 0x49, 0xf1, 0x0f, 0x11, 0x02, 0xa9, 0x84, 0xbe, 0x1c, 0x48, 0xd2, 0xda, 0xd6, 0x93, 0xd2, 0x7f, 0x46, 0xb9, 0xb4, 0x8f, 0xd6, 0x80, 0x31, 0x9f, 0x51, 0x78, 0x63, 0xcf, 0x04, 0x07, 0x0f, 0xed, 0xe6, 0x7a, 0xfe, 0xd0, 0x46, 0x2f}, + {0x09, 0x66, 0x2f, 0x64, 0x9a, 0x02, 0x60, 0xb6, 0xf5, 0x37, 0xc2, 0x89, 0x5e, 0xf9, 0xbf, 0x02, 0xc6, 0x8b, 0x7a, 0xfe, 0xec, 0x50, 0xc2, 0x9a, 0xc7, 0xf2, 0x47, 0x00, 0x72, 0x13, 0x38, 0x05, 0x52, 0xcd, 0x00, 0x70, 0x4f, 0x3b, 0x58, 0xe8, 0x35, 0x7e, 0xc1, 0x24, 0x70, 0x19, 0x36, 0xf0}, + {0xb3, 0x2e, 0xe9, 0x6c, 0xa9, 0x3c, 0x94, 0x8d, 0x6c, 0xdf, 0x18, 0x57, 0xcd, 0x28, 0x5f, 0x90, 0x2f, 0x87, 0xc0, 0xf1, 0x76, 0xb4, 0x91, 0x2a, 0xdb, 0x9e, 0xea, 0x66, 0x08, 0x39, 0x2a, 0xbe, 0xf8, 0x03, 0x4d, 0x26, 0x4b, 0xe3, 0x16, 0xa7, 0xd3, 0xe7, 0x45, 0x8d, 0x71, 0xb8, 0xd3, 0x66}, + {0x5f, 0x07, 0x86, 0xb0, 0x81, 0x09, 0x75, 0x43, 0x0e, 0x66, 0x2e, 0x1d, 0x11, 0x9b, 0x75, 0x71, 0x46, 0xa9, 0x71, 0x89, 0x7f, 0xf4, 0x73, 0x1a, 0x0b, 0xa5, 0x17, 0x2c, 0xb8, 0x6c, 0xdf, 0x19, 0xe4, 0x1d, 0x72, 0xc8, 0x63, 0x2e, 0xc1, 0x57, 0x38, 0x5a, 0x8c, 0x3f, 0x6f, 0x54, 0xdb, 0x2b}, + {0x57, 0xde, 0x52, 0x20, 0x82, 0x3e, 0x40, 0xa3, 0x84, 0xe0, 0xd0, 0x1f, 0x1a, 0xd8, 0x9f, 0x8a, 0x6d, 0xf9, 0x33, 0xd3, 0x49, 0x1f, 0x0f, 0x69, 0x11, 0xa7, 0x69, 0xdd, 0x05, 0xed, 0xce, 0x5a, 0x52, 0xa5, 0x9d, 0xf8, 0x1e, 0xcb, 0xdf, 0xda, 0x6d, 0x58, 0x90, 0x59, 0x10, 0xe0, 0xfa, 0x72}, + {0xae, 0x98, 0x20, 0x94, 0xfe, 0xfa, 0xe5, 0x20, 0x99, 0xf0, 0xc3, 0xe1, 0xed, 0x97, 0x8d, 0x94, 0x23, 0x05, 0xaf, 0x5b, 0x00, 0x68, 0x57, 0xcd, 0xf6, 0x55, 0x0d, 0xe0, 0x83, 0x13, 0x22, 0xf3, 0xbf, 0x3e, 0xe4, 0xb8, 0x5d, 0xbd, 0x5f, 0x02, 0xac, 0x63, 0x42, 0xed, 0x71, 0xcd, 0xa6, 0x45}, + {0xdf, 0x7f, 0xa3, 0x9c, 0x91, 0x63, 0xee, 0x5a, 0x03, 0x6c, 0x16, 0x9b, 0xc3, 0x9e, 0x8e, 0xfb, 0x57, 0x24, 0xbc, 0x58, 0xa4, 0xda, 0x3c, 0x93, 0xbd, 0x29, 0xd7, 0xc9, 0x4d, 0x22, 0xbe, 0x8b, 0x7a, 0xe0, 0x3f, 0x12, 0x1c, 0x5f, 0xf1, 0x91, 0xb0, 0xe0, 0x53, 0xf1, 0xac, 0xc4, 0x55, 0x6b}, + {0xae, 0x41, 0xe7, 0x29, 0x1d, 0x56, 0x4d, 0x68, 0x19, 0xa3, 0xfe, 0xe6, 0xc5, 0xb7, 0x12, 0x22, 0x52, 0x4f, 0x79, 0x9c, 0x35, 0xef, 0x89, 0x1e, 0xbf, 0xca, 0xb9, 0x7d, 0x72, 0x55, 0xc6, 0x8c, 0x28, 0x2f, 0x71, 0xbc, 0x0a, 0x69, 0xef, 0x53, 0x96, 0x63, 0x1f, 0x2b, 0xed, 0xc0, 0xec, 0x56}, + {0xbe, 0xfa, 0x1e, 0x04, 0x44, 0x4a, 0x73, 0x35, 0x82, 0xf2, 0xe7, 0x65, 0xe9, 0x67, 0x78, 0x56, 0x01, 0xe1, 0x62, 0x45, 0x3c, 0xd7, 0x62, 0xf5, 0xd1, 0x29, 0xbd, 0x98, 0x4f, 0x57, 0xfa, 0x58, 0xea, 0x9d, 0xc5, 0x41, 0xca, 0x11, 0x11, 0x15, 0x2a, 0xff, 0xa1, 0x84, 0x5f, 0x94, 0x7f, 0x8f}, + {0x92, 0x1e, 0xef, 0x68, 0x4b, 0x75, 0x5e, 0x0a, 0x92, 0xbb, 0xe6, 0x2b, 0x06, 0x1b, 0x38, 0xf4, 0x89, 0x03, 0x88, 0x9f, 0x61, 0x96, 0xc2, 0x55, 0xa0, 0x27, 0x6d, 0x02, 0x70, 0x0b, 0x94, 0xce, 0x47, 0x4e, 0x4c, 0xe0, 0x55, 0xa0, 0xcc, 0x47, 0xc8, 0xee, 0xb1, 0x51, 0x80, 0x01, 0x30, 0x29}, + {0xcf, 0x98, 0xf1, 0x22, 0x83, 0x4d, 0x90, 0x94, 0x49, 0xf8, 0xbc, 0xa3, 0x81, 0xb4, 0x3e, 0x11, 0x9e, 0x78, 0xe6, 0xd4, 0x26, 0xdf, 0x79, 0xdb, 0xe2, 0x5b, 0xee, 0x76, 0x1b, 0x65, 0x82, 0x8d, 0x9d, 0x59, 0x14, 0xd0, 0x11, 0x2b, 0xd4, 0x9a, 0xfd, 0x09, 0xe2, 0x0e, 0x57, 0xe3, 0xa3, 0xdb}, + {0x90, 0xd9, 0x58, 0x8c, 0xc9, 0x24, 0x52, 0xc0, 0x88, 0xac, 0x8e, 0xd3, 0x63, 0x36, 0xda, 0x8a, 0xf7, 0xf8, 0x30, 0xfd, 0xc5, 0x30, 0x32, 0x1b, 0x4a, 0x8c, 0xcf, 0x82, 0xd9, 0x54, 0xf1, 0xce, 0xfe, 0x55, 0x05, 0x27, 0x96, 0x15, 0x8e, 0x46, 0xa6, 0xf5, 0x44, 0xd0, 0x94, 0xf7, 0x97, 0x63}, + {0x07, 0x9c, 0x80, 0x15, 0xe7, 0x31, 0xba, 0x9c, 0xeb, 0xb2, 0x80, 0x40, 0xd2, 0x67, 0x3a, 0x02, 0xce, 0x4c, 0xbe, 0xe6, 0x6f, 0xea, 0xec, 0x62, 0x86, 0x9b, 0x3e, 0xde, 0x14, 0xcf, 0xd0, 0x8d, 0xaf, 0xeb, 0x7b, 0x84, 0x78, 0xab, 0x79, 0x2a, 0xc7, 0x4b, 0x54, 0x99, 0xc6, 0x2e, 0xb0, 0x5d}, + {0x82, 0x2d, 0x0c, 0x6a, 0x7f, 0x5b, 0x0a, 0xd1, 0xb4, 0x4a, 0xe7, 0x36, 0xc0, 0xc5, 0xcb, 0x90, 0x55, 0x8b, 0x36, 0x4e, 0x33, 0x8a, 0xef, 0xf9, 0x7a, 0x9f, 0x29, 0xf7, 0x18, 0xad, 0xd4, 0x3a, 0xfc, 0x03, 0x55, 0xf5, 0x41, 0xca, 0xbd, 0xe2, 0x82, 0xc8, 0xae, 0x8e, 0x84, 0x6d, 0xda, 0x42}, + {0xc3, 0x74, 0xbd, 0x74, 0x87, 0xd0, 0x85, 0xd6, 0x2f, 0x48, 0xd8, 0xb8, 0x0b, 0xf1, 0x89, 0xb9, 0x53, 0x1a, 0xf0, 0x72, 0x34, 0x77, 0x2e, 0x8f, 0x09, 0x48, 0xd9, 0x15, 0xdf, 0xe1, 0x64, 0xfd, 0xfd, 0xa5, 0x42, 0xb2, 0x66, 0xbe, 0x72, 0x76, 0x36, 0xcb, 0x4c, 0xa5, 0xf4, 0x85, 0xf8, 0x91}, + {0x45, 0x66, 0x51, 0x2d, 0x7a, 0x47, 0xc9, 0x73, 0xc3, 0x35, 0x70, 0x4f, 0xce, 0x06, 0x7e, 0xd6, 0x1e, 0x67, 0x1c, 0x10, 0xc9, 0x9c, 0x0a, 0x87, 0x95, 0x73, 0x97, 0x1a, 0xfd, 0x2a, 0xce, 0xc8, 0xf2, 0x4f, 0x03, 0x30, 0xc7, 0x26, 0xd8, 0xb4, 0x29, 0xf8, 0xa4, 0x29, 0xf1, 0xdb, 0x3a, 0x42}, + {0xfa, 0x9b, 0x9a, 0xa1, 0x7f, 0xce, 0x65, 0x5a, 0x72, 0x4c, 0x02, 0x86, 0x52, 0x1f, 0x5a, 0x6b, 0x0d, 0xa6, 0x15, 0xdb, 0x4e, 0x6a, 0xea, 0xc9, 0x8b, 0xde, 0xa2, 0x51, 0xcf, 0x88, 0xfb, 0xcb, 0x14, 0x67, 0x9d, 0x34, 0x76, 0x6e, 0x6e, 0x12, 0x44, 0x22, 0xb9, 0x44, 0xe6, 0xea, 0x1d, 0xa0}, + {0x22, 0xb6, 0x78, 0x74, 0x37, 0x8b, 0x63, 0x92, 0x2a, 0x00, 0xf5, 0x7a, 0xf3, 0x15, 0xa9, 0xf8, 0x51, 0xd0, 0x92, 0x60, 0x2d, 0x44, 0x28, 0x04, 0x2b, 0x8f, 0x8f, 0xfe, 0x7a, 0x1f, 0x32, 0xe0, 0x24, 0x05, 0x36, 0x13, 0x02, 0x49, 0xd5, 0x11, 0x47, 0x7d, 0x7c, 0xe4, 0x02, 0x82, 0xfc, 0x6b}, + {0x88, 0x3c, 0x96, 0xda, 0x83, 0x2f, 0x6f, 0xc5, 0xf2, 0xb4, 0x6c, 0xab, 0x78, 0x9d, 0x7c, 0x4d, 0x83, 0x44, 0x74, 0x9e, 0x0a, 0x10, 0xd7, 0xf9, 0x3b, 0x39, 0xb0, 0xc0, 0xc8, 0x20, 0x6e, 0x62, 0xd7, 0x18, 0x13, 0x49, 0xde, 0x7e, 0x33, 0x90, 0x03, 0x84, 0x64, 0x84, 0xfa, 0x9b, 0x68, 0x9a}, + {0x2e, 0xd3, 0x4f, 0xe1, 0x7f, 0x60, 0x5c, 0x9e, 0x99, 0xdf, 0x34, 0x8b, 0xe9, 0xc6, 0x63, 0xa7, 0x2e, 0x02, 0xd3, 0xe9, 0x73, 0xc6, 0xf7, 0x23, 0xf5, 0xe6, 0xb9, 0x08, 0x4e, 0x9e, 0xe7, 0xf7, 0x9b, 0xd5, 0x57, 0x7a, 0xf6, 0x4e, 0x42, 0x07, 0x97, 0x0b, 0xfe, 0xc2, 0xd1, 0xa5, 0xe7, 0xba}, + {0x90, 0x05, 0xc9, 0x5a, 0x1b, 0x93, 0x8c, 0xda, 0xd2, 0x34, 0xcc, 0xac, 0x4f, 0xa6, 0x11, 0x4c, 0xef, 0x3f, 0xe3, 0xcc, 0x5d, 0x5a, 0x9a, 0x5e, 0xe9, 0xa1, 0x05, 0x29, 0x8c, 0x1d, 0x48, 0xb2, 0x5a, 0xcf, 0xac, 0x83, 0x40, 0xdf, 0xc3, 0x4c, 0xdf, 0xa0, 0x1e, 0x25, 0x73, 0x20, 0x2f, 0x54}, + {0x33, 0x7e, 0x2c, 0xc0, 0x41, 0x73, 0xb1, 0x50, 0x44, 0x55, 0x9c, 0x46, 0x0e, 0x5b, 0x35, 0x68, 0x67, 0x88, 0x8c, 0x41, 0x9a, 0x51, 0x38, 0xf0, 0xe1, 0xf6, 0xdb, 0x06, 0xae, 0x8e, 0xed, 0x6c, 0x53, 0x02, 0xf5, 0xd3, 0xcb, 0x76, 0x36, 0xdf, 0x88, 0x6e, 0xaf, 0xc4, 0xc0, 0x5e, 0x52, 0x09}, + {0x6f, 0x40, 0xfc, 0xc3, 0x2d, 0x48, 0xa6, 0x90, 0x25, 0x27, 0x21, 0x73, 0xb4, 0x48, 0xce, 0x51, 0x06, 0x2d, 0x51, 0xb9, 0xb7, 0xd6, 0x1a, 0x6a, 0x17, 0xb0, 0x5c, 0xf0, 0x36, 0x91, 0xfc, 0x6e, 0x10, 0xde, 0x97, 0x60, 0x2a, 0x75, 0x74, 0xd2, 0x13, 0xe8, 0xf8, 0x8b, 0xe3, 0xee, 0x71, 0x40}, + {0x7e, 0x8e, 0x7d, 0x45, 0xeb, 0x49, 0xcd, 0x3c, 0x35, 0x24, 0x68, 0x16, 0xaf, 0x2d, 0xe7, 0x62, 0xe0, 0x89, 0x23, 0x8e, 0xde, 0x76, 0xf8, 0x85, 0xc4, 0x06, 0xb1, 0x9e, 0xc7, 0xdb, 0x32, 0x6f, 0x22, 0xe8, 0x4a, 0xd5, 0x69, 0x04, 0xf0, 0xe6, 0x41, 0x6b, 0xf1, 0xd3, 0x78, 0xcc, 0x05, 0x93}, + {0xc4, 0xe2, 0x4d, 0xa7, 0x69, 0xae, 0x0c, 0xdd, 0xd4, 0xc8, 0x3e, 0x54, 0x76, 0xbf, 0x33, 0xf1, 0xe0, 0x91, 0x6e, 0x02, 0x20, 0x82, 0x95, 0x53, 0xa1, 0x73, 0x93, 0x63, 0x35, 0x35, 0x16, 0x60, 0x36, 0xdb, 0xe0, 0xf0, 0x85, 0x11, 0xc8, 0xe0, 0x84, 0xde, 0x9d, 0xf1, 0x62, 0xe0, 0xad, 0x3b}, + {0x3c, 0xf8, 0x5d, 0xf3, 0x8e, 0xb4, 0x26, 0x18, 0x0c, 0x2c, 0xdf, 0x50, 0xa7, 0x25, 0x6d, 0xaa, 0x8e, 0x6e, 0x2e, 0x45, 0xa4, 0x77, 0xa6, 0x6a, 0x78, 0x58, 0xf7, 0x3b, 0x5e, 0x6f, 0x92, 0xa5, 0x09, 0x5c, 0x53, 0x99, 0xbe, 0x24, 0xa1, 0xda, 0xf8, 0xee, 0x41, 0x4b, 0x36, 0xbf, 0x02, 0xef}, + {0x6d, 0x3e, 0x80, 0x33, 0xb4, 0x47, 0xb8, 0xc1, 0x36, 0x27, 0xe4, 0xe1, 0x04, 0x9e, 0x11, 0xa1, 0x5a, 0x41, 0xbd, 0x7c, 0x3d, 0x26, 0x71, 0xc0, 0xa1, 0xed, 0x03, 0xd9, 0x3f, 0x4c, 0x09, 0x59, 0xb5, 0xe3, 0xd7, 0xfb, 0x0c, 0x32, 0xa6, 0x6b, 0x36, 0xfd, 0x05, 0xe1, 0xd5, 0x94, 0xf9, 0xd6}, + {0x77, 0x50, 0x30, 0xc2, 0x72, 0x38, 0xc0, 0x3d, 0xa8, 0x2e, 0xe8, 0x32, 0x18, 0xfb, 0x84, 0x8d, 0xe5, 0x5d, 0xac, 0x17, 0xb5, 0x68, 0xdd, 0x31, 0x6a, 0x4b, 0xea, 0xee, 0xa2, 0x7d, 0x61, 0x0d, 0xb0, 0x86, 0x4f, 0x60, 0xe4, 0x3f, 0x3b, 0x97, 0xc8, 0xb6, 0x40, 0xc9, 0x5c, 0x0b, 0x02, 0xc5}, + {0x0a, 0x1f, 0x1c, 0xc7, 0xb5, 0xea, 0xda, 0xcb, 0x08, 0xc3, 0x8a, 0x9b, 0x6e, 0x3c, 0x55, 0x4b, 0xb8, 0x4f, 0x71, 0x8d, 0x31, 0xef, 0xc7, 0x0f, 0xa7, 0x17, 0xa2, 0xdd, 0xa2, 0xf8, 0xf3, 0xa3, 0x6e, 0x6e, 0xf9, 0xa1, 0x53, 0xe7, 0x9a, 0xc1, 0xa0, 0xbe, 0x5f, 0x5b, 0xe5, 0xfa, 0x0c, 0x4d}, + {0x26, 0xcd, 0xba, 0x61, 0xef, 0x79, 0xc1, 0x3a, 0x61, 0xbd, 0x85, 0x0d, 0xb7, 0x2e, 0x14, 0x3d, 0x3e, 0x4a, 0x07, 0x3d, 0x01, 0xc8, 0x1f, 0x92, 0xfc, 0x73, 0x24, 0xcd, 0xe4, 0x23, 0x99, 0xb2, 0x2c, 0xba, 0x43, 0x73, 0xcd, 0x01, 0x49, 0xcb, 0x26, 0x2f, 0x1c, 0x01, 0xcc, 0x96, 0x57, 0xc8}, + {0x6a, 0x68, 0x23, 0x0c, 0xb6, 0x0f, 0xff, 0x28, 0x6e, 0x22, 0xb6, 0xc6, 0x5e, 0xc3, 0xda, 0x39, 0xde, 0xe5, 0x10, 0x24, 0x36, 0x80, 0x8d, 0x0a, 0x97, 0xfc, 0xc2, 0x5c, 0x0d, 0xa5, 0x55, 0x0f, 0x6f, 0x10, 0x28, 0x35, 0x75, 0xfe, 0xf9, 0x76, 0xac, 0x90, 0x2f, 0xac, 0x1c, 0x1e, 0x26, 0xa7}, + {0x89, 0x04, 0xc8, 0xcc, 0x4c, 0x22, 0xe2, 0x69, 0x9d, 0xa3, 0x13, 0x86, 0x10, 0xf2, 0xd8, 0x70, 0x1f, 0xb4, 0x5e, 0x3c, 0x60, 0xbf, 0xa4, 0x11, 0x27, 0x41, 0xf6, 0x19, 0xcb, 0x85, 0x96, 0xfd, 0x2b, 0x4e, 0xb3, 0x96, 0x0e, 0x78, 0x8b, 0x9c, 0xd6, 0x3b, 0xff, 0x4c, 0x1e, 0x7e, 0xcb, 0xb0}, + {0x7e, 0x31, 0x6e, 0xb8, 0x5d, 0xc6, 0xdd, 0x2b, 0xf5, 0xbe, 0x4d, 0x65, 0xc9, 0x88, 0x7b, 0x65, 0xa8, 0xeb, 0xef, 0x7a, 0x99, 0x27, 0x62, 0xb5, 0x52, 0xe5, 0x2d, 0xce, 0x07, 0x53, 0xe2, 0x6d, 0x77, 0xe5, 0x0f, 0xc5, 0x18, 0x0b, 0x52, 0x9b, 0xb4, 0xc3, 0x1c, 0xbe, 0x16, 0x2b, 0xca, 0x64}, + {0x2c, 0xb8, 0xca, 0x33, 0xb1, 0xf8, 0x20, 0x23, 0x48, 0xbf, 0xf3, 0x0d, 0xd3, 0x32, 0x9d, 0x58, 0xa2, 0x90, 0x1c, 0x8f, 0x20, 0x07, 0x2b, 0xb0, 0x74, 0x45, 0x58, 0xf0, 0x37, 0x95, 0xbb, 0x03, 0x1d, 0x42, 0x5a, 0xae, 0x76, 0x7f, 0x8f, 0x01, 0x70, 0x4b, 0xa1, 0xa4, 0xd2, 0xb2, 0x80, 0x0e}, + {0x2c, 0x98, 0x59, 0x79, 0xfe, 0xa7, 0x48, 0xdd, 0xfa, 0x71, 0xaa, 0x85, 0xa9, 0xa4, 0x8b, 0x5c, 0x26, 0x08, 0x3a, 0xbd, 0x0c, 0x2a, 0xf1, 0xa4, 0x07, 0x34, 0x87, 0xa9, 0xe0, 0xa8, 0x94, 0x41, 0x62, 0x9e, 0x62, 0x72, 0xd8, 0x09, 0x98, 0x0e, 0x37, 0xd1, 0x5c, 0xc9, 0x66, 0x47, 0x5b, 0xd6}, + {0x4a, 0x66, 0x7d, 0x63, 0x48, 0xe7, 0xfb, 0x34, 0xf7, 0x9b, 0x25, 0x23, 0xe4, 0x87, 0x3b, 0x55, 0x13, 0x58, 0xcb, 0x2a, 0x4b, 0x64, 0xe3, 0xff, 0x29, 0x95, 0xa2, 0x1a, 0xfa, 0x74, 0xf8, 0x99, 0x42, 0xe6, 0x3b, 0x4d, 0xb8, 0x4a, 0x37, 0xd9, 0x87, 0x46, 0x07, 0x93, 0x20, 0xd6, 0xa4, 0x88}, + {0x5c, 0x57, 0x90, 0x8f, 0x5d, 0x49, 0xc2, 0xd6, 0x64, 0x97, 0xd5, 0xd1, 0xd8, 0x31, 0x30, 0xe8, 0x96, 0x7c, 0xdc, 0xbe, 0xca, 0x35, 0x05, 0x74, 0x53, 0xaf, 0x4a, 0xae, 0xd7, 0xc4, 0x88, 0x1f, 0xf3, 0x1f, 0xe4, 0x0e, 0xfe, 0x35, 0x8e, 0x2d, 0x64, 0x6a, 0x32, 0x9c, 0x46, 0x12, 0xf4, 0xd0}, + {0x85, 0xdb, 0x16, 0x44, 0xae, 0xbf, 0xcf, 0x7a, 0x84, 0x1e, 0x32, 0x94, 0x48, 0x08, 0x91, 0x02, 0xa4, 0xb7, 0xd1, 0xfc, 0xd7, 0x27, 0x70, 0xa8, 0xff, 0x1d, 0x5f, 0x87, 0x72, 0x96, 0x2e, 0xfd, 0xcc, 0x17, 0x14, 0x20, 0xcb, 0xb6, 0xff, 0x1f, 0xe2, 0xc7, 0xec, 0x05, 0x95, 0x04, 0x30, 0x06}, + {0x46, 0x60, 0xf7, 0x14, 0x85, 0x27, 0xce, 0x78, 0xc2, 0x54, 0xc3, 0x0d, 0x10, 0xc0, 0x64, 0x79, 0xd7, 0xdc, 0x42, 0x94, 0x5f, 0x0d, 0xad, 0xdb, 0x40, 0x78, 0x0c, 0x18, 0xa7, 0xcc, 0x90, 0xa4, 0xd8, 0xef, 0x9c, 0xa6, 0x6a, 0xa1, 0x8d, 0xdb, 0xe9, 0x21, 0xc2, 0x28, 0x17, 0x67, 0x07, 0x6f}, + {0xbc, 0x4a, 0x8e, 0x7a, 0x60, 0xb1, 0xf3, 0x48, 0x85, 0x63, 0x13, 0xd8, 0x25, 0x55, 0xeb, 0xed, 0xbd, 0x0c, 0x4b, 0x1d, 0x40, 0x53, 0xfd, 0xca, 0xb2, 0x43, 0x6a, 0x96, 0x5b, 0x96, 0xb2, 0x32, 0x66, 0x8c, 0x9b, 0xfc, 0x46, 0x07, 0xec, 0xb2, 0xaa, 0x7f, 0x27, 0x5b, 0x84, 0xf5, 0xc9, 0x86}, + {0x9a, 0xe9, 0xc6, 0xd5, 0x46, 0xbe, 0x9f, 0xb6, 0xd4, 0xc4, 0x5f, 0x45, 0xf0, 0xf2, 0x28, 0x0c, 0xdb, 0xa0, 0x0e, 0x4f, 0xe6, 0xac, 0x93, 0x0e, 0x06, 0xad, 0xd5, 0x70, 0x86, 0x7c, 0x3e, 0x82, 0x04, 0x8a, 0x84, 0x87, 0x2c, 0x7e, 0xf7, 0xf6, 0xd2, 0xfd, 0x09, 0x63, 0x5f, 0x20, 0xe6, 0x03}, + {0xde, 0x29, 0xf0, 0xa7, 0x98, 0x1c, 0x10, 0xe3, 0x5f, 0x7d, 0x95, 0x06, 0xb1, 0x71, 0xa8, 0x69, 0x9b, 0x4b, 0x0e, 0x0e, 0x32, 0xb0, 0xb8, 0x2f, 0x3c, 0xd6, 0x28, 0xb0, 0x4e, 0x0b, 0xd1, 0x09, 0x36, 0x60, 0x61, 0x67, 0xb5, 0xf1, 0xe9, 0x87, 0xbb, 0xed, 0xdf, 0x38, 0x9c, 0xf7, 0x58, 0xc3}, + {0x7f, 0xe5, 0xfa, 0xc4, 0xf8, 0x5a, 0x14, 0x5c, 0x33, 0x7d, 0xb2, 0x87, 0x26, 0xaf, 0x52, 0xf7, 0xf2, 0x5e, 0xeb, 0x63, 0x9a, 0x38, 0xc2, 0x03, 0x46, 0x61, 0xc8, 0xbd, 0x37, 0x1f, 0x67, 0x04, 0x27, 0xd6, 0xf2, 0x85, 0xfa, 0x9a, 0xab, 0x36, 0xc0, 0xc0, 0x68, 0x7b, 0x70, 0x3a, 0x01, 0x65}, + {0x79, 0x86, 0x8b, 0xb8, 0x0d, 0x75, 0x16, 0xa7, 0x9b, 0x6a, 0xe0, 0x82, 0x95, 0xb7, 0xe3, 0x9b, 0xde, 0x66, 0x7f, 0xa7, 0xe4, 0x45, 0x92, 0xaf, 0xe8, 0xf7, 0x6c, 0xa7, 0x5e, 0xdf, 0x1b, 0xc5, 0x99, 0xa5, 0xbc, 0x4a, 0x77, 0x97, 0x91, 0x2c, 0x43, 0xc5, 0xc2, 0xfa, 0xcb, 0xd3, 0x5f, 0xd6}, + {0x8e, 0xe7, 0xb2, 0x60, 0x10, 0xa2, 0x55, 0x3a, 0x52, 0xee, 0x21, 0xc4, 0x7a, 0x90, 0x07, 0x60, 0xb5, 0x8e, 0xbb, 0x1a, 0x5f, 0x30, 0x59, 0x1e, 0x85, 0xef, 0x00, 0xff, 0x23, 0x5c, 0x7a, 0xa7, 0x02, 0xbf, 0x72, 0xde, 0x49, 0x21, 0xd7, 0xfc, 0x29, 0x2c, 0x9e, 0x7f, 0x8b, 0xe8, 0xb3, 0x5e}, + {0x1b, 0x16, 0x75, 0x6f, 0xfb, 0xac, 0x84, 0x6c, 0x36, 0x3a, 0xde, 0x95, 0xf2, 0x7a, 0xa5, 0x09, 0x79, 0x34, 0xfd, 0x0d, 0xd1, 0x1e, 0x34, 0x3e, 0x29, 0x94, 0x2a, 0x00, 0xf0, 0x81, 0xfe, 0x8b, 0xef, 0xc9, 0x19, 0x62, 0xae, 0x96, 0x6a, 0x1e, 0xc5, 0x23, 0x79, 0x96, 0x26, 0x26, 0xb9, 0xf8}, + {0xc2, 0xec, 0xc9, 0x6c, 0xf5, 0xb3, 0x0e, 0xa1, 0x70, 0x29, 0x38, 0xc9, 0xcc, 0x63, 0xf1, 0xce, 0xf4, 0x76, 0x5b, 0x67, 0x13, 0xec, 0x83, 0xb3, 0xcb, 0xd5, 0x05, 0x51, 0xad, 0x1e, 0x17, 0xce, 0xf6, 0x80, 0x4d, 0x5f, 0x55, 0xed, 0x8c, 0x4e, 0x4e, 0xe7, 0xd6, 0x2f, 0xff, 0x4f, 0x83, 0x74}, + {0x8f, 0x9e, 0x50, 0xba, 0x45, 0x1c, 0xf3, 0x04, 0x5d, 0x5f, 0xc3, 0x0e, 0x1e, 0xe2, 0x6c, 0x9d, 0x38, 0x36, 0x3e, 0xe7, 0xbb, 0x17, 0x75, 0x54, 0x12, 0xb6, 0xc4, 0x8f, 0xd3, 0x70, 0xbc, 0x87, 0xd6, 0x4e, 0xad, 0x46, 0x7d, 0x58, 0x4f, 0x68, 0x6e, 0x28, 0xfe, 0x36, 0x5a, 0xc3, 0x72, 0x84}, + {0x4a, 0xac, 0x21, 0x69, 0x08, 0xef, 0x62, 0x93, 0x54, 0x22, 0x9e, 0xb3, 0x0e, 0x72, 0x41, 0x91, 0x0f, 0x0a, 0x63, 0xef, 0x9e, 0x28, 0xf6, 0x85, 0x7a, 0x65, 0x3a, 0x41, 0xe9, 0x6b, 0x98, 0x00, 0xd6, 0x06, 0x12, 0x9c, 0xdb, 0xf2, 0xe5, 0x41, 0xc6, 0x54, 0xf6, 0x05, 0x16, 0xd6, 0x38, 0x6e}, + {0xe6, 0xad, 0xe6, 0x59, 0x28, 0xd3, 0x7f, 0x76, 0x59, 0x32, 0x32, 0x13, 0xea, 0xf3, 0xf8, 0xee, 0xcd, 0x98, 0x73, 0x90, 0xc6, 0x3e, 0xfa, 0x8e, 0xd8, 0xff, 0xec, 0xd7, 0xbf, 0x5a, 0x17, 0x18, 0x33, 0x80, 0x7d, 0x54, 0x37, 0x6c, 0xc3, 0x1a, 0x71, 0x90, 0xf9, 0x68, 0xca, 0x1e, 0x43, 0x25}, + {0x51, 0xcf, 0x34, 0x61, 0x60, 0xa7, 0xf8, 0xf5, 0xb8, 0xcf, 0xa0, 0x12, 0xa2, 0x4b, 0xf7, 0x0b, 0x18, 0xed, 0xd4, 0xad, 0x77, 0xb3, 0x78, 0x1d, 0x2f, 0x4c, 0xe7, 0x73, 0x73, 0x08, 0x47, 0x3d, 0x2e, 0x03, 0x65, 0x7e, 0xe4, 0xbb, 0x26, 0xdb, 0xb2, 0x4d, 0xb4, 0x8c, 0x5e, 0x01, 0x1e, 0x49}, + {0x6e, 0x1b, 0x19, 0x59, 0x7c, 0x2b, 0xe3, 0x00, 0x12, 0x28, 0x43, 0x49, 0xbf, 0xd8, 0xfe, 0x34, 0x73, 0x26, 0x89, 0xbb, 0x7f, 0x59, 0xca, 0xc2, 0xc7, 0x1a, 0x88, 0x4a, 0x5a, 0x97, 0xcb, 0xb4, 0xa4, 0xa0, 0x19, 0x5f, 0xaa, 0x6e, 0x9f, 0x48, 0x97, 0xfd, 0xec, 0x0a, 0xcf, 0x9e, 0xcc, 0x10}, + {0x46, 0x74, 0x91, 0x9d, 0xf7, 0x61, 0x82, 0xa6, 0xb2, 0xd4, 0x07, 0x68, 0x88, 0x7e, 0x1c, 0xbe, 0x07, 0xac, 0x7e, 0xc1, 0xf7, 0xf1, 0x6f, 0x6f, 0x10, 0x3a, 0xdd, 0xd3, 0x82, 0x27, 0x13, 0x2c, 0xde, 0xbd, 0x00, 0x0c, 0xa5, 0xd7, 0x89, 0xbc, 0x91, 0xd2, 0x20, 0xfd, 0x0c, 0x62, 0xcd, 0x9a}, + {0xf5, 0xb7, 0xcf, 0x31, 0x31, 0x7f, 0x79, 0x51, 0x54, 0xf8, 0x50, 0xa3, 0xb7, 0x88, 0x27, 0x42, 0x61, 0x74, 0x29, 0xfd, 0x00, 0x0c, 0x32, 0x84, 0xfe, 0x69, 0x2c, 0xb1, 0xdc, 0x66, 0x33, 0x70, 0x89, 0x9d, 0xd6, 0xc5, 0xef, 0x51, 0xa7, 0x01, 0x22, 0x73, 0x9c, 0x22, 0xfa, 0xfd, 0xb7, 0x00}, + {0x01, 0x59, 0x8f, 0x63, 0x9d, 0x37, 0x57, 0x3d, 0x20, 0x76, 0x78, 0xe1, 0xe2, 0x26, 0x6a, 0x7b, 0xe3, 0x4f, 0x25, 0xc5, 0x18, 0x7a, 0xda, 0xb0, 0xd4, 0x34, 0x88, 0x24, 0xd8, 0xbb, 0x30, 0x40, 0x2f, 0x4b, 0x2f, 0xab, 0x6d, 0x47, 0x7b, 0x51, 0x76, 0xad, 0xd6, 0xac, 0xd4, 0xf1, 0x31, 0x10}, + {0x9a, 0xdd, 0x5d, 0x4e, 0xc5, 0xc9, 0xaf, 0xef, 0x50, 0x9a, 0x0e, 0xb3, 0x97, 0x2d, 0x93, 0x0a, 0x36, 0xe2, 0x86, 0xdd, 0xe4, 0x2d, 0x3f, 0x58, 0x96, 0x68, 0x14, 0x66, 0x92, 0x83, 0x05, 0x59, 0xcf, 0xac, 0x59, 0x66, 0x85, 0xce, 0x71, 0x81, 0x1b, 0xa5, 0x18, 0x57, 0x01, 0x72, 0x49, 0xbf}, + {0x57, 0xaf, 0x78, 0x35, 0xd3, 0xcf, 0x9c, 0x33, 0x91, 0xe4, 0x23, 0x3f, 0xa5, 0x42, 0x4b, 0x67, 0x9f, 0x9f, 0x38, 0xad, 0xc2, 0x9a, 0xa2, 0x7e, 0x53, 0x69, 0x3f, 0x4a, 0xd5, 0xa0, 0x62, 0x07, 0xb8, 0x44, 0xa1, 0x5d, 0x69, 0xd6, 0x9d, 0xbe, 0x2e, 0x63, 0x70, 0x0b, 0xdb, 0x7d, 0xeb, 0x78}, + {0xda, 0x12, 0x0b, 0x1e, 0xe3, 0x22, 0xaf, 0x5d, 0xfd, 0x7c, 0xd7, 0x63, 0x98, 0x8d, 0xea, 0x7e, 0xaa, 0xff, 0x36, 0xfe, 0xd1, 0xf1, 0x3d, 0x1e, 0x36, 0xb2, 0x2b, 0x53, 0x20, 0x39, 0x95, 0x40, 0x43, 0x26, 0x2d, 0x3f, 0x10, 0xfc, 0x5c, 0x7a, 0xe7, 0x84, 0xb5, 0x34, 0x5f, 0x01, 0x92, 0xaa}, + {0x2d, 0x7e, 0x06, 0x19, 0x57, 0x77, 0x42, 0xb2, 0xf8, 0x4b, 0xaf, 0x37, 0x8b, 0xbc, 0xb0, 0xdb, 0x09, 0x62, 0xc0, 0x99, 0x12, 0x5e, 0x30, 0x4a, 0x33, 0x1e, 0x78, 0xa0, 0xcc, 0xf8, 0x28, 0x8f, 0x97, 0x3d, 0x9b, 0xf2, 0x5d, 0x21, 0xf4, 0x02, 0xf8, 0xfc, 0xd0, 0x65, 0x17, 0x15, 0x58, 0x54}, + {0xe0, 0xd0, 0x6b, 0x69, 0x5d, 0x89, 0x2a, 0x04, 0xc2, 0x76, 0x9c, 0x66, 0xf0, 0xb0, 0xc4, 0x96, 0x79, 0x8a, 0xc9, 0x33, 0xa5, 0x7b, 0xe2, 0x08, 0x89, 0x13, 0x8d, 0xfe, 0xad, 0xb5, 0xf5, 0xd0, 0x74, 0x9d, 0x31, 0x2b, 0x7e, 0x09, 0xe8, 0xd3, 0xc3, 0xca, 0x1b, 0x5b, 0x58, 0x87, 0x61, 0xdc}, + {0xb2, 0x63, 0x0e, 0xa9, 0x5e, 0xdb, 0x62, 0x36, 0x22, 0xfc, 0xca, 0xb0, 0x78, 0x3c, 0xf7, 0x42, 0x25, 0x1c, 0xb1, 0xe7, 0x63, 0xcb, 0xdd, 0xfd, 0xf9, 0x91, 0x13, 0x68, 0xdc, 0xf2, 0x70, 0xd2, 0xe1, 0x02, 0x31, 0x27, 0xef, 0xbc, 0xb1, 0xf4, 0xb4, 0xb0, 0xeb, 0xa5, 0x84, 0x3c, 0x7a, 0xd0}, + {0x6c, 0x7e, 0xec, 0x9a, 0x56, 0x8d, 0x4f, 0x2f, 0xd1, 0xc4, 0x8f, 0xd4, 0xfe, 0x0c, 0x9d, 0xcf, 0x5b, 0x48, 0xdc, 0x81, 0xbc, 0x2a, 0xb1, 0x3d, 0xb3, 0xbb, 0x47, 0xa4, 0xc4, 0x8b, 0x8d, 0x06, 0x7a, 0xd9, 0xab, 0xe2, 0xb6, 0x60, 0x1a, 0x24, 0x72, 0xfb, 0x75, 0x8a, 0xfa, 0xc8, 0x19, 0xcb}, + {0x65, 0x9b, 0x07, 0xfc, 0x0d, 0x2b, 0x93, 0x41, 0x5b, 0x7c, 0xfd, 0x5b, 0x37, 0xe3, 0xe2, 0xc2, 0x68, 0x08, 0x85, 0x02, 0xdd, 0x13, 0xde, 0x8c, 0xf2, 0xf2, 0xc0, 0x3a, 0xc6, 0xb8, 0x33, 0x0b, 0xf1, 0x1c, 0x56, 0x1c, 0x14, 0x63, 0x43, 0x18, 0x98, 0x19, 0xea, 0xe0, 0xb9, 0x48, 0xb5, 0xcd}, + {0x24, 0x17, 0xf2, 0xd8, 0x8a, 0xc2, 0xf0, 0xa8, 0x13, 0x82, 0x5e, 0x13, 0xf3, 0x82, 0xd2, 0x25, 0x20, 0x2d, 0x43, 0xab, 0xf5, 0x23, 0x97, 0x33, 0x33, 0x4f, 0xd0, 0x8c, 0xf3, 0xb3, 0x5c, 0xe6, 0x77, 0x08, 0x7c, 0xb6, 0x21, 0x01, 0x30, 0x2f, 0xd8, 0x3b, 0x68, 0x8d, 0xc7, 0x46, 0x6d, 0x56}, + {0x67, 0xdf, 0xb8, 0xac, 0xee, 0x4a, 0xc8, 0xb2, 0x97, 0xd3, 0x5a, 0x09, 0x8d, 0x64, 0x0f, 0x71, 0x05, 0x8b, 0xfd, 0x5b, 0x35, 0x43, 0xa1, 0x5e, 0xcf, 0x87, 0xcd, 0x8f, 0x2f, 0x21, 0x6d, 0x7d, 0x3a, 0x41, 0x44, 0xca, 0x89, 0x4b, 0xf8, 0x87, 0x36, 0xe3, 0x45, 0x7f, 0xd1, 0x00, 0x11, 0x5c}, + {0x1a, 0x37, 0x6a, 0x8b, 0xd3, 0x03, 0x63, 0x7e, 0x9d, 0xbc, 0xff, 0x24, 0x1e, 0x7e, 0x48, 0xb3, 0x29, 0x44, 0x1e, 0xd3, 0x48, 0x46, 0x71, 0xff, 0xdd, 0x3b, 0x2e, 0x1d, 0xed, 0xd6, 0xd0, 0x79, 0x71, 0x22, 0x26, 0xf5, 0x1c, 0x70, 0x8a, 0x06, 0x52, 0x65, 0x06, 0x00, 0xed, 0x1e, 0x5e, 0x46}, + {0x77, 0x58, 0x80, 0x74, 0x9c, 0xfd, 0xf3, 0x2f, 0x3e, 0xf3, 0x7e, 0x6b, 0x23, 0x37, 0xb8, 0xb6, 0xa5, 0x76, 0x92, 0xf8, 0x22, 0xd1, 0xca, 0x21, 0x7f, 0x4a, 0x71, 0xb8, 0xfb, 0xa3, 0x8c, 0x86, 0xb5, 0x6b, 0x20, 0x74, 0xa2, 0xc0, 0xab, 0x4b, 0x56, 0xce, 0xba, 0xf6, 0x61, 0x13, 0x86, 0x08}, + {0xf7, 0x50, 0xec, 0x3f, 0x40, 0x61, 0x26, 0x2b, 0xe8, 0xf7, 0x67, 0x69, 0x94, 0xcb, 0x0b, 0xaa, 0xe8, 0x7b, 0x37, 0xf8, 0x9f, 0x3e, 0x5b, 0x35, 0x6d, 0xd0, 0x3f, 0xe7, 0x42, 0x04, 0x09, 0x62, 0x44, 0xc8, 0x40, 0xf9, 0xf8, 0x08, 0x9e, 0x4c, 0x07, 0x44, 0x92, 0x85, 0xb9, 0x8e, 0xe3, 0x77}, +} + +func TestBuilder48(t *testing.T) { + const numBuckets = 3 + const valueSize = 48 + + // Create a table with 3 buckets. + builder, err := NewBuilderSized("", numBuckets*targetEntriesPerBucket, valueSize) + require.NoError(t, err) + require.NotNil(t, builder) + assert.Len(t, builder.buckets, 3) + defer builder.Close() + + // Insert a few entries. + keys := []string{"hello", "world", "blub", "foo"} + for i, key := range keys { + require.NoError(t, builder.Insert([]byte(key), []byte(testValues48[i]))) + } + { + // print test values + for _, tc := range testValues48 { + spew.Dump(FormatByteSlice(tc)) + } + } + + // Create index file. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal index. + require.NoError(t, builder.Seal(context.TODO(), targetFile)) + + // Assert binary content. + buf, err := os.ReadFile(targetFile.Name()) + require.NoError(t, err) + expected := concatBytes( + // --- File header + // magic + []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, // 0 + // value size (48 bytes in this case) + []byte{0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 1 + // num buckets + []byte{0x03, 0x00, 0x00, 0x00}, // 2 + // padding + []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 3 + + // --- Bucket header 0 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 4 + // num entries + []byte{0x01, 0x00, 0x00, 0x00}, // 5 + // hash len + []byte{0x03}, // 6 + // padding + []byte{0x00}, // 7 + // file offset + []byte{0x50, 0x00, 0x00, 0x00, 0x00, 0x00}, // 8 + + // --- Bucket header 1 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 9 + // num entries + []byte{0x01, 0x00, 0x00, 0x00}, // 10 + // hash len + []byte{0x03}, // 11 + // padding + []byte{0x00}, // 12 + // file offset + []byte{0x83, 0x00, 0x00, 0x00, 0x00, 0x00}, // 13 + + // --- Bucket header 2 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 14 + // num entries + []byte{0x02, 0x00, 0x00, 0x00}, // 15 + // hash len + []byte{0x03}, // 16 + // padding + []byte{0x00}, // 17 + // file offset + []byte{0xb6, 0x00, 0x00, 0x00, 0x00, 0x00}, // 18 + + // --- Bucket 0 + // hash + []byte{0xe2, 0xdb, 0x55}, // 19 + // value + []byte{0xcc, 0x0a, 0xd4, 0x66, 0x32, 0x50, 0xc3, 0x96, 0x8b, 0x5c, 0x77, 0x7e, 0xb8, 0xfd, 0x9c, 0x78, 0xea, 0xfb, 0xd3, 0x4f, 0x1a, 0x59, 0x4e, 0xda, 0x1d, 0x90, 0x2a, 0xcd, 0x79, 0xb6, 0x0b, 0x2d, 0xea, 0x76, 0x36, 0x54, 0x65, 0xe6, 0x53, 0x1b, 0x70, 0x38, 0x84, 0xb2, 0xbf, 0x5d, 0xf9, 0x30}, // 20 + + // --- Bucket 2 + // hash + []byte{0x92, 0xcd, 0xbb}, // 21 + // value + []byte{0x7c, 0x18, 0x51, 0xd7, 0x63, 0x83, 0xf9, 0xc5, 0xaa, 0x48, 0x3c, 0x8e, 0xff, 0xf0, 0xf1, 0xab, 0xee, 0xda, 0xb0, 0x2f, 0x92, 0xcc, 0xb8, 0x78, 0x11, 0x5b, 0xa0, 0xb9, 0xfa, 0xf5, 0x2e, 0xb4, 0xd7, 0x10, 0x2d, 0x7b, 0xe5, 0xb6, 0x9f, 0xd0, 0xb1, 0xff, 0xd0, 0xf2, 0xef, 0xcd, 0x72, 0x1a}, // 22 + // hash + []byte{0x98, 0x3d, 0xbd}, // 23 + // value + []byte{0xbb, 0x12, 0x08, 0x5f, 0x73, 0xee, 0x39, 0x69, 0x9f, 0x6e, 0x5a, 0xd8, 0x21, 0x2d, 0x43, 0xbe, 0x01, 0xc1, 0x3f, 0xc5, 0xfa, 0x86, 0x09, 0x7e, 0x97, 0x61, 0x59, 0xb8, 0xc9, 0x16, 0x47, 0xe3, 0x18, 0xfe, 0x52, 0x1e, 0xa2, 0x98, 0x59, 0x83, 0x16, 0x88, 0x5b, 0x46, 0x83, 0x2b, 0xa3, 0x2a}, // 24 + // hash + []byte{0xe3, 0x09, 0x6b}, // 25 + // value + []byte{0x0b, 0x2f, 0xc2, 0x4d, 0xc5, 0x98, 0x8b, 0x13, 0xd9, 0x17, 0xf8, 0xc1, 0xb8, 0x59, 0xd4, 0x24, 0xad, 0xef, 0xe5, 0xb6, 0xb8, 0xb9, 0xba, 0x01, 0x9c, 0xe0, 0x7f, 0x96, 0x25, 0x83, 0xd6, 0xbf, 0xa3, 0xb2, 0xf2, 0x29, 0xb9, 0xa1, 0xa1, 0x92, 0xd0, 0xc0, 0xe5, 0x06, 0x94, 0xea, 0x6c, 0xb3}, // 26 + ) + assert.Equal(t, expected, buf) + + { + splitSizes := []int{ + // --- File header + 8, 8, 4, 12, + // --- Bucket header 0 + 4, 4, 1, 1, 6, + // --- Bucket header 1 + 4, 4, 1, 1, 6, + // --- Bucket header 2 + 4, 4, 1, 1, 6, + // --- Bucket 0 + 3, valueSize, + // --- Bucket 2 + 3, valueSize, 3, valueSize, 3, valueSize, + } + splitExpected := splitBufferWithProvidedSizes(expected, splitSizes) + splitGot := splitBufferWithProvidedSizes(buf, splitSizes) + + comparations := compareBufferArrays(splitExpected, splitGot) + + for i, equal := range comparations { + if !equal { + t.Errorf("%d: \nexpected: %v, \n got: %v", i, FormatByteSlice(splitExpected[i]), FormatByteSlice(splitGot[i])) + } + } + } + + // Reset file offset. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + + // Open index. + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + require.NotNil(t, db) + + // File header assertions. + assert.Equal(t, Header{ + ValueSize: valueSize, + NumBuckets: numBuckets, + }, db.Header) + + // Get bucket handles. + buckets := make([]*Bucket, numBuckets) + for i := range buckets { + buckets[i], err = db.GetBucket(uint(i)) + require.NoError(t, err) + } + + // Ensure out-of-bounds bucket accesses fail. + _, wantErr := db.GetBucket(numBuckets) + assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") + + // Bucket header assertions. + assert.Equal(t, BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x50, + }, + Stride: 3 + valueSize, // 3 + 36 + OffsetWidth: valueSize, + }, buckets[0].BucketDescriptor) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 131, + }, buckets[1].BucketHeader) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 2, + HashLen: 3, + FileOffset: 182, + }, buckets[2].BucketHeader) + + assert.Equal(t, uint8(3+valueSize), buckets[2].Stride) + // Test lookups. + entries, err := buckets[2].Load( /*batchSize*/ 3) + require.NoError(t, err) + assert.Equal(t, []Entry{ + { + Hash: 12402072, + Value: []byte(testValues48[3]), + }, + { + Hash: 7014883, + Value: []byte(testValues48[2]), + }, + }, entries) + + { + for i, keyString := range keys { + key := []byte(keyString) + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + assert.Equal(t, []byte(testValues48[i]), value) + } + } +} + +func TestBuilder48_Random(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long test") + } + + numKeys := uint(len(testValues48)) + const keySize = uint(16) + const valueSize = 48 + const queries = int(10000) + + // Create new builder session. + builder, err := NewBuilderSized("", numKeys, valueSize) + require.NoError(t, err) + require.NotNil(t, builder) + require.NotEmpty(t, builder.buckets) + + // Ensure we cleaned up after ourselves. + defer func() { + _, statErr := os.Stat(builder.dir) + assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) + }() + defer builder.Close() + + // Insert items to temp buckets. + preInsert := time.Now() + key := make([]byte, keySize) + for i := uint(0); i < numKeys; i++ { + binary.LittleEndian.PutUint64(key, uint64(i)) + err := builder.Insert(key, []byte(testValues48[i])) + require.NoError(t, err) + } + t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) + + // Create file for final index. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal to final index. + preSeal := time.Now() + sealErr := builder.Seal(context.TODO(), targetFile) + require.NoError(t, sealErr, "Seal failed") + t.Logf("Sealed in %s", time.Since(preSeal)) + + // Print some stats. + targetStat, err := targetFile.Stat() + require.NoError(t, err) + t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) + t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) + t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) + + // Open index. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + + // Run query benchmark. + preQuery := time.Now() + for i := queries; i != 0; i-- { + keyN := uint64(rand.Int63n(int64(numKeys))) + binary.LittleEndian.PutUint64(key, keyN) + + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + require.Equal(t, []byte(testValues48[keyN]), value) + } + t.Logf("Queried %d items", queries) + t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) +} diff --git a/compactindexsized/build8_test.go b/compactindexsized/build8_test.go new file mode 100644 index 00000000..b7901426 --- /dev/null +++ b/compactindexsized/build8_test.go @@ -0,0 +1,257 @@ +package compactindexsized + +import ( + "context" + "encoding/binary" + "errors" + "io" + "io/fs" + "math/rand" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vbauerster/mpb/v8/decor" +) + +func itob(i uint64) []byte { + b := make([]byte, 8) + binary.LittleEndian.PutUint64(b, i) + return b +} + +func btoi(b []byte) uint64 { + return binary.LittleEndian.Uint64(b) +} + +func TestBuilder8(t *testing.T) { + const numBuckets = 3 + const valueSize = 8 + + // Create a table with 3 buckets. + builder, err := NewBuilderSized("", numBuckets*targetEntriesPerBucket, valueSize) + require.NoError(t, err) + require.NotNil(t, builder) + assert.Len(t, builder.buckets, 3) + defer builder.Close() + + // Insert a few entries. + require.NoError(t, builder.Insert([]byte("hello"), itob(1))) + require.NoError(t, builder.Insert([]byte("world"), itob(2))) + require.NoError(t, builder.Insert([]byte("blub"), itob(3))) + + // Create index file. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal index. + require.NoError(t, builder.Seal(context.TODO(), targetFile)) + + // Assert binary content. + buf, err := os.ReadFile(targetFile.Name()) + require.NoError(t, err) + assert.Equal(t, []byte{ + // --- File header + // magic + 0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78, + // value size + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // num buckets + 0x03, 0x00, 0x00, 0x00, + // padding + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket header 0 + // hash domain + 0x00, 0x00, 0x00, 0x00, + // num entries + 0x01, 0x00, 0x00, 0x00, + // hash len + 0x03, + // padding + 0x00, + // file offset + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket header 1 + // hash domain + 0x00, 0x00, 0x00, 0x00, + // num entries + 0x01, 0x00, 0x00, 0x00, + // hash len + 0x03, + // padding + 0x00, + // file offset + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket header 2 + // hash domain + 0x00, 0x00, 0x00, 0x00, + // num entries + 0x01, 0x00, 0x00, 0x00, + // hash len + 0x03, + // padding + 0x00, + // file offset + 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket 0 + // hash + 0xe2, 0xdb, 0x55, + // value + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket 1 + // hash + 0x92, 0xcd, 0xbb, + // value + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket 2 + // hash + 0xe3, 0x09, 0x6b, + // value + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, buf) + + // Reset file offset. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + + // Open index. + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + require.NotNil(t, db) + + // File header assertions. + assert.Equal(t, Header{ + ValueSize: valueSize, + NumBuckets: numBuckets, + }, db.Header) + + // Get bucket handles. + buckets := make([]*Bucket, numBuckets) + for i := range buckets { + buckets[i], err = db.GetBucket(uint(i)) + require.NoError(t, err) + } + + // Ensure out-of-bounds bucket accesses fail. + _, wantErr := db.GetBucket(numBuckets) + assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") + + // Bucket header assertions. + assert.Equal(t, BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x50, + }, + Stride: 11, // 3 + 8 + OffsetWidth: 8, + }, buckets[0].BucketDescriptor) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x5b, + }, buckets[1].BucketHeader) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x66, + }, buckets[2].BucketHeader) + + // Test lookups. + entries, err := buckets[2].Load( /*batchSize*/ 4) + require.NoError(t, err) + assert.Equal(t, []Entry{ + { + Hash: 0x6b09e3, + Value: itob(3), + }, + }, entries) +} + +func TestBuilder8_Random(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long test") + } + + const numKeys = uint(500000) + const keySize = uint(16) + const valueSize = 8 + const queries = int(10000) + + // Create new builder session. + builder, err := NewBuilderSized("", numKeys, valueSize) + require.NoError(t, err) + require.NotNil(t, builder) + require.NotEmpty(t, builder.buckets) + + // Ensure we cleaned up after ourselves. + defer func() { + _, statErr := os.Stat(builder.dir) + assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) + }() + defer builder.Close() + + // Insert items to temp buckets. + preInsert := time.Now() + key := make([]byte, keySize) + for i := uint(0); i < numKeys; i++ { + binary.LittleEndian.PutUint64(key, uint64(i)) + err := builder.Insert(key, itob(uint64(rand.Int63n(int64(100000))))) + require.NoError(t, err) + } + t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) + + // Create file for final index. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal to final index. + preSeal := time.Now() + sealErr := builder.Seal(context.TODO(), targetFile) + require.NoError(t, sealErr, "Seal failed") + t.Logf("Sealed in %s", time.Since(preSeal)) + + // Print some stats. + targetStat, err := targetFile.Stat() + require.NoError(t, err) + t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) + t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) + t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) + + // Open index. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + + // Run query benchmark. + preQuery := time.Now() + for i := queries; i != 0; i-- { + keyN := uint64(rand.Int63n(int64(numKeys))) + binary.LittleEndian.PutUint64(key, keyN) + + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + require.True(t, btoi(value) > 0) + } + t.Logf("Queried %d items", queries) + t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) +} diff --git a/compactindexsized/compactindex.go b/compactindexsized/compactindex.go new file mode 100644 index 00000000..18aa43ca --- /dev/null +++ b/compactindexsized/compactindex.go @@ -0,0 +1,278 @@ +// Package compactindex is an immutable hashtable index format inspired by djb's constant database (cdb). +// +// # Design +// +// Compactindex is used to create secondary indexes over arbitrary flat files. +// Each index is a single, immutable flat file. +// +// Index files consist of a space-optimized and query-optimized key-value-like table. +// +// Instead of storing actual keys, the format stores FKS dynamic perfect hashes. +// And instead of storing values, the format contains offsets into some file. +// +// As a result, the database effectively only supports two operations, similarly to cdb. +// (Note that the actual Go interface is a bit more flexible). +// +// func Create(kv map[[]byte]uint64) *Index +// func (*Index) Lookup(key []byte) (value uint64, exist bool) +// +// # Buckets +// +// The set of items is split into buckets of approx 10000 records. +// The number of buckets is unlimited. +// +// The key-to-bucket assignment is determined by xxHash3 using uniform discrete hashing over the key space. +// +// The index file header also mentions the number of buckets and the file offset of each bucket. +// +// # Tables +// +// Each bucket contains a table of entries, indexed by a collision-free hash function. +// +// The hash function used in the entry table is xxHash. +// A 32-bit hash domain is prefixed to mine collision-free sets of hashes (FKS scheme). +// This hash domain is also recorded at the bucket header. +// +// Each bucket entry is a constant-size record consisting of a 3-byte hash and an offset to the value. +// The size of the offset integer is the minimal byte-aligned integer width that can represent the target file size. +// +// # Querying +// +// The query interface (DB) is backend-agnostic, supporting any storage medium that provides random reads. +// To name a few: Memory buffers, local files, arbitrary embedded buffers, HTTP range requests, plan9, etc... +// +// The DB struct itself performs zero memory allocations and therefore also doesn't cache. +// It is therefore recommended to provide a io.ReaderAt backed by a cache to improve performance. +// +// Given a key, the query strategy is simple: +// +// 1. Hash key to bucket using global hash function +// 2. Retrieve bucket offset from bucket header table +// 3. Hash key to entry using per-bucket hash function +// 4. Search for entry in bucket (binary search) +// +// The search strategy for locating entries in buckets can be adjusted to fit the latency/bandwidth profile of the underlying storage medium. +// +// For example, the fastest lookup strategy in memory is a binary search retrieving double cache lines at a time. +// When doing range requests against high-latency remote storage (e.g. S3 buckets), +// it is typically faster to retrieve and scan through large parts of a bucket (multiple kilobytes) at once. +// +// # Construction +// +// Constructing a compactindex requires upfront knowledge of the number of items and highest possible target offset (read: target file size). +// +// The process requires scratch space of around 16 bytes per entry. During generation, data is offloaded to disk for memory efficiency. +// +// The process works as follows: +// +// 1. Determine number of buckets and offset integer width +// based on known input params (item count and target file size). +// 2. Linear pass over input data, populating temporary files that +// contain the unsorted entries of each bucket. +// 3. For each bucket, brute force a perfect hash function that +// defines a bijection between hash values and keys in the bucket. +// 4. For each bucket, sort by hash values. +// 5. Store to index. +// +// An alternative construction approach is available when the number of items or target file size is unknown. +// In this case, a set of keys is first serialized to a flat file. +package compactindexsized + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindexsized` to avoid conflicts with the original package +// - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. + +import ( + "encoding/binary" + "fmt" + "math" + "math/bits" + "sort" + + "github.com/cespare/xxhash/v2" +) + +// Magic are the first eight bytes of an index. +var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} + +const Version = uint8(1) + +// Header occurs once at the beginning of the index. +type Header struct { + ValueSize uint64 + NumBuckets uint32 + Kind uint8 +} + +// headerSize is the size of the header at the beginning of the file. +const headerSize = 32 + +// Load checks the Magic sequence and loads the header fields. +func (h *Header) Load(buf *[headerSize]byte) error { + // Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream. + if *(*[8]byte)(buf[:8]) != Magic { + return fmt.Errorf("not a radiance compactindex file") + } + *h = Header{ + ValueSize: binary.LittleEndian.Uint64(buf[8:16]), + NumBuckets: binary.LittleEndian.Uint32(buf[16:20]), + } + // Check version. + if buf[20] != Version { + return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) + } + h.Kind = buf[21] + // 10 bytes to spare for now. Might use it in the future. + // Force to zero for now. + for _, b := range buf[22:32] { + if b != 0x00 { + return fmt.Errorf("unsupported index version") + } + } + return nil +} + +func (h *Header) Store(buf *[headerSize]byte) { + copy(buf[0:8], Magic[:]) + binary.LittleEndian.PutUint64(buf[8:16], h.ValueSize) + binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets) + buf[20] = Version + buf[21] = h.Kind + for i := 22; i < 32; i++ { + buf[i] = 0 + } +} + +// BucketHash returns the bucket index for the given key. +// +// Uses a truncated xxHash64 rotated until the result fits. +func (h *Header) BucketHash(key []byte) uint { + u := xxhash.Sum64(key) + n := uint64(h.NumBuckets) + r := (-n) % n + for u < r { + u = hashUint64(u) + } + return uint(u % n) +} + +// hashUint64 is a reversible uint64 permutation based on Google's +// Murmur3 hash finalizer (public domain) +func hashUint64(x uint64) uint64 { + x ^= x >> 33 + x *= 0xff51afd7ed558ccd + x ^= x >> 33 + x *= 0xc4ceb9fe1a85ec53 + x ^= x >> 33 + return x +} + +// BucketHeader occurs at the beginning of each bucket. +type BucketHeader struct { + HashDomain uint32 + NumEntries uint32 + HashLen uint8 + FileOffset uint64 +} + +// bucketHdrLen is the size of the header preceding the hash table entries. +const bucketHdrLen = 16 + +func (b *BucketHeader) Store(buf *[bucketHdrLen]byte) { + binary.LittleEndian.PutUint32(buf[0:4], b.HashDomain) + binary.LittleEndian.PutUint32(buf[4:8], b.NumEntries) + buf[8] = b.HashLen + buf[9] = 0 + putUintLe(buf[10:16], b.FileOffset) +} + +func (b *BucketHeader) Load(buf *[bucketHdrLen]byte) { + b.HashDomain = binary.LittleEndian.Uint32(buf[0:4]) + b.NumEntries = binary.LittleEndian.Uint32(buf[4:8]) + b.HashLen = buf[8] + b.FileOffset = uintLe(buf[10:16]) +} + +// Hash returns the per-bucket hash of a key. +func (b *BucketHeader) Hash(key []byte) uint64 { + xsum := EntryHash64(b.HashDomain, key) + // Mask sum by hash length. + return xsum & (math.MaxUint64 >> (64 - b.HashLen*8)) +} + +type BucketDescriptor struct { + BucketHeader + Stride uint8 // size of one entry in bucket + OffsetWidth uint8 // with of offset field in bucket +} + +func (b *BucketDescriptor) unmarshalEntry(buf []byte) (e Entry) { + e.Hash = uintLe(buf[0:b.HashLen]) + e.Value = make([]byte, b.OffsetWidth) + copy(e.Value[:], buf[b.HashLen:b.HashLen+b.OffsetWidth]) + return +} + +func (b *BucketDescriptor) marshalEntry(buf []byte, e Entry) { + if len(buf) < int(b.Stride) { + panic("serializeEntry: buf too small") + } + putUintLe(buf[0:b.HashLen], e.Hash) + copy(buf[b.HashLen:b.HashLen+b.OffsetWidth], e.Value[:]) +} + +// SearchSortedEntries performs an in-memory binary search for a given hash. +func SearchSortedEntries(entries []Entry, hash uint64) *Entry { + i, found := sort.Find(len(entries), func(i int) int { + other := entries[i].Hash + // Note: This is safe because neither side exceeds 2^24. + return int(hash) - int(other) + }) + if !found { + return nil + } + if i >= len(entries) || entries[i].Hash != hash { + return nil + } + return &entries[i] +} + +// EntryHash64 is a xxHash-based hash function using an arbitrary prefix. +func EntryHash64(prefix uint32, key []byte) uint64 { + const blockSize = 32 + var prefixBlock [blockSize]byte + binary.LittleEndian.PutUint32(prefixBlock[:4], prefix) + + var digest xxhash.Digest + digest.Reset() + digest.Write(prefixBlock[:]) + digest.Write(key) + return digest.Sum64() +} + +// Entry is a single element in a hash table. +type Entry struct { + Hash uint64 + Value []byte +} + +// maxCls64 returns the max integer that has the same amount of leading zeros as n. +func maxCls64(n uint64) uint64 { + return math.MaxUint64 >> bits.LeadingZeros64(n) +} + +// uintLe decodes an unsigned little-endian integer without bounds assertions. +// out-of-bounds bits are set to zero. +func uintLe(buf []byte) uint64 { + var full [8]byte + copy(full[:], buf) + return binary.LittleEndian.Uint64(full[:]) +} + +// putUintLe encodes an unsigned little-endian integer without bounds assertions. +func putUintLe(buf []byte, x uint64) { + var full [8]byte + binary.LittleEndian.PutUint64(full[:], x) + copy(buf, full[:]) +} diff --git a/compactindexsized/compactindex_test.go b/compactindexsized/compactindex_test.go new file mode 100644 index 00000000..c85fc9c3 --- /dev/null +++ b/compactindexsized/compactindex_test.go @@ -0,0 +1,89 @@ +package compactindexsized + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindexsized` to avoid conflicts with the original package +// - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. + +import ( + "math" + "math/rand" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMaxCls64(t *testing.T) { + cases := [][2]uint64{ + {0x0000_0000_0000_0000, 0x0000_0000_0000_0000}, + {0x0000_0000_0000_0001, 0x0000_0000_0000_0001}, + {0x0000_0000_0000_0003, 0x0000_0000_0000_0002}, + {0x0000_0000_0000_0003, 0x0000_0000_0000_0003}, + {0x0000_0000_0000_0007, 0x0000_0000_0000_0004}, + {0x0000_0000_FFFF_FFFF, 0x0000_0000_F000_000F}, + {0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF}, + } + for _, tc := range cases { + assert.Equal(t, tc[0], maxCls64(tc[1])) + } +} + +func TestHeader_BucketHash(t *testing.T) { + const numItems = 500000 + const numBuckets = 1000 + + header := Header{ + NumBuckets: numBuckets, + } + + keys := make([][]byte, numItems) + hits := make([]int, numBuckets) + for i := range keys { + var buf [16]byte + n, _ := rand.Read(buf[:]) + keys[i] = buf[:n] + } + + // Bounds check and count hits. + for _, key := range keys { + idx := header.BucketHash(key) + require.True(t, idx < numBuckets) + hits[idx]++ + } + + // Calculate standard deviation. + mean := float64(numItems) / float64(numBuckets) + var cumVariance float64 + for _, bucketHits := range hits { + delta := float64(bucketHits) - mean + cumVariance += (delta * delta) + } + variance := cumVariance / float64(len(hits)) + stddev := math.Sqrt(variance) + t.Logf("mean % 12.2f", mean) + normStddev := stddev / mean + t.Logf("stddev % 10.2f", stddev) + t.Logf("1σ / mean % 7.2f%%", 100*normStddev) + + const failNormStddev = 0.08 + if normStddev > failNormStddev { + t.Logf("FAIL: > %f%%", 100*failNormStddev) + t.Fail() + } else { + t.Logf(" OK: <= %f%%", 100*failNormStddev) + } + + // Print percentiles. + sort.Ints(hits) + t.Logf("min % 10d", hits[0]) + t.Logf("p01 % 10d", hits[int(math.Round(0.01*float64(len(hits))))]) + t.Logf("p05 % 10d", hits[int(math.Round(0.05*float64(len(hits))))]) + t.Logf("p10 % 10d", hits[int(math.Round(0.10*float64(len(hits))))]) + t.Logf("p50 % 10d", hits[int(math.Round(0.50*float64(len(hits))))]) + t.Logf("p90 % 10d", hits[int(math.Round(0.90*float64(len(hits))))]) + t.Logf("p95 % 10d", hits[int(math.Round(0.95*float64(len(hits))))]) + t.Logf("p99 % 10d", hits[int(math.Round(0.99*float64(len(hits))))]) + t.Logf("max % 10d", hits[len(hits)-1]) +} diff --git a/compactindexsized/fallocate_fake.go b/compactindexsized/fallocate_fake.go new file mode 100644 index 00000000..8bc9b7cf --- /dev/null +++ b/compactindexsized/fallocate_fake.go @@ -0,0 +1,27 @@ +package compactindexsized + +import ( + "fmt" + "os" +) + +func fake_fallocate(f *os.File, offset int64, size int64) error { + const blockSize = 4096 + var zero [blockSize]byte + + for size > 0 { + step := size + if step > blockSize { + step = blockSize + } + + if _, err := f.Write(zero[:step]); err != nil { + return fmt.Errorf("failure while generic fallocate: %w", err) + } + + offset += step + size -= step + } + + return nil +} diff --git a/compactindexsized/fallocate_generic.go b/compactindexsized/fallocate_generic.go new file mode 100644 index 00000000..cde99d7f --- /dev/null +++ b/compactindexsized/fallocate_generic.go @@ -0,0 +1,11 @@ +//go:build !linux + +package compactindexsized + +import ( + "os" +) + +func fallocate(f *os.File, offset int64, size int64) error { + return fake_fallocate(f, offset, size) +} diff --git a/compactindexsized/fallocate_linux.go b/compactindexsized/fallocate_linux.go new file mode 100644 index 00000000..03939627 --- /dev/null +++ b/compactindexsized/fallocate_linux.go @@ -0,0 +1,17 @@ +//go:build linux + +package compactindexsized + +import ( + "fmt" + "os" + "syscall" +) + +func fallocate(f *os.File, offset int64, size int64) error { + err := syscall.Fallocate(int(f.Fd()), 0, offset, size) + if err != nil { + return fmt.Errorf("failure while linux fallocate: %w", err) + } + return nil +} diff --git a/compactindexsized/query.go b/compactindexsized/query.go new file mode 100644 index 00000000..54ad0102 --- /dev/null +++ b/compactindexsized/query.go @@ -0,0 +1,228 @@ +package compactindexsized + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindexsized` to avoid conflicts with the original package +// - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. + +import ( + "errors" + "fmt" + "io" +) + +// DB is a compactindex handle. +type DB struct { + Header + Stream io.ReaderAt + prefetch bool +} + +// Open returns a handle to access a compactindex. +// +// The provided stream must start with the Magic byte sequence. +// Tip: Use io.NewSectionReader to create aligned substreams when dealing with a file that contains multiple indexes. +func Open(stream io.ReaderAt) (*DB, error) { + // Read the static 32-byte header. + // Ignore errors if the read fails after filling the buffer (e.g. EOF). + var fileHeader [headerSize]byte + n, readErr := stream.ReadAt(fileHeader[:], 0) + if n < len(fileHeader) { + // ReadAt must return non-nil error here. + return nil, readErr + } + db := new(DB) + if err := db.Header.Load(&fileHeader); err != nil { + return nil, err + } + db.Stream = stream + return db, nil +} + +func (db *DB) Prefetch(yes bool) { + db.prefetch = yes +} + +// GetKind returns the kind of the index. +func (db *DB) GetKind() uint8 { + return db.Header.Kind +} + +func (db *DB) GetValueSize() uint64 { + value := db.Header.ValueSize + if value == 0 { + panic("value size not set") + } + return value +} + +// Lookup queries for a key in the index and returns the value (offset), if any. +// +// Returns ErrNotFound if the key is unknown. +func (db *DB) Lookup(key []byte) ([]byte, error) { + bucket, err := db.LookupBucket(key) + if err != nil { + return nil, err + } + return bucket.Lookup(key) +} + +// LookupBucket returns a handle to the bucket that might contain the given key. +func (db *DB) LookupBucket(key []byte) (*Bucket, error) { + return db.GetBucket(db.Header.BucketHash(key)) +} + +// GetBucket returns a handle to the bucket at the given index. +func (db *DB) GetBucket(i uint) (*Bucket, error) { + if i >= uint(db.Header.NumBuckets) { + return nil, fmt.Errorf("out of bounds bucket index: %d >= %d", i, db.Header.NumBuckets) + } + + // Fill bucket handle. + bucket := &Bucket{ + BucketDescriptor: BucketDescriptor{ + Stride: db.entryStride(), + OffsetWidth: uint8(db.GetValueSize()), + }, + } + // Read bucket header. + readErr := bucket.BucketHeader.readFrom(db.Stream, i) + if readErr != nil { + return nil, readErr + } + bucket.Entries = io.NewSectionReader(db.Stream, int64(bucket.FileOffset), int64(bucket.NumEntries)*int64(bucket.Stride)) + if db.prefetch { + // TODO: find good value for numEntriesToPrefetch + numEntriesToPrefetch := minInt64(3_000, int64(bucket.NumEntries)) + prefetchSize := int64(db.entryStride()) * numEntriesToPrefetch + buf := make([]byte, prefetchSize) + _, err := bucket.Entries.ReadAt(buf, 0) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + } + return bucket, nil +} + +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +const HashSize = 3 + +func (db *DB) entryStride() uint8 { + offsetSize := db.GetValueSize() + return uint8(HashSize) + uint8(offsetSize) +} + +func bucketOffset(i uint) int64 { + return headerSize + int64(i)*bucketHdrLen +} + +func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { + var buf [bucketHdrLen]byte + n, err := rd.ReadAt(buf[:], bucketOffset(i)) + if n < len(buf) { + return err + } + b.Load(&buf) + return nil +} + +func (b *BucketHeader) writeTo(wr io.WriterAt, i uint) error { + var buf [bucketHdrLen]byte + b.Store(&buf) + _, err := wr.WriteAt(buf[:], bucketOffset(i)) + return err +} + +// Bucket is a database handle pointing to a subset of the index. +type Bucket struct { + BucketDescriptor + Entries *io.SectionReader +} + +// maxEntriesPerBucket is the hardcoded maximum permitted number of entries per bucket. +const maxEntriesPerBucket = 1 << 24 // (16 * stride) MiB + +// targetEntriesPerBucket is the average number of records in each hashtable bucket we aim for. +const targetEntriesPerBucket = 10000 + +// Load retrieves all entries in the hashtable. +func (b *Bucket) Load(batchSize int) ([]Entry, error) { + if batchSize <= 0 { + batchSize = 512 // default to reasonable batch size + } + // TODO bounds check + if b.NumEntries > maxEntriesPerBucket { + return nil, fmt.Errorf("refusing to load bucket with %d entries", b.NumEntries) + } + entries := make([]Entry, 0, b.NumEntries) + + stride := int(b.Stride) + buf := make([]byte, batchSize*stride) + off := int64(0) + for { + // Read another chunk. + n, err := b.Entries.ReadAt(buf, off) + // Decode all entries in it. + sub := buf[:n] + for len(sub) >= stride { + entries = append(entries, b.unmarshalEntry(sub)) + sub = sub[stride:] + off += int64(stride) + } + // Handle error. + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + break + } else if err != nil { + return nil, err + } + } + + return entries, nil +} + +// TODO: This binary search algo is not optimized for high-latency remotes yet. + +// Lookup queries for a key using binary search. +func (b *Bucket) Lookup(key []byte) ([]byte, error) { + target := b.Hash(key) + low := 0 + high := int(b.NumEntries) + return searchEytzinger(low, high, target, b.loadEntry) +} + +func (b *Bucket) loadEntry(i int) (Entry, error) { + off := int64(i) * int64(b.Stride) + buf := make([]byte, b.Stride) + n, err := b.Entries.ReadAt(buf, off) + if n != len(buf) { + return Entry{}, err + } + return b.unmarshalEntry(buf), nil +} + +// ErrNotFound marks a missing entry. +var ErrNotFound = errors.New("not found") + +func searchEytzinger(min int, max int, x uint64, getter func(int) (Entry, error)) ([]byte, error) { + var index int + for index < max { + k, err := getter(index) + if err != nil { + return nil, err + } + if k.Hash == x { + return k.Value, nil + } + index = index<<1 | 1 + if k.Hash < x { + index++ + } + } + return nil, ErrNotFound +} diff --git a/compactindexsized/query_test.go b/compactindexsized/query_test.go new file mode 100644 index 00000000..d6b5cd88 --- /dev/null +++ b/compactindexsized/query_test.go @@ -0,0 +1,58 @@ +package compactindexsized + +import ( + "bytes" + "errors" + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type failReader struct{ err error } + +func (rd failReader) ReadAt([]byte, int64) (int, error) { + return 0, rd.err +} + +func TestOpen_ReadFail(t *testing.T) { + err := errors.New("oh no!") + db, dbErr := Open(failReader{err}) + require.Nil(t, db) + require.Same(t, err, dbErr) +} + +func TestOpen_InvalidMagic(t *testing.T) { + var buf [32]byte + rand.Read(buf[:]) + buf[1] = '.' // make test deterministic + + db, dbErr := Open(bytes.NewReader(buf[:])) + require.Nil(t, db) + require.EqualError(t, dbErr, "not a radiance compactindex file") +} + +func TestOpen_HeaderOnly(t *testing.T) { + buf := [32]byte{ + // Magic + 'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x', + // FileSize + 0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // NumBuckets + 0x42, 0x00, 0x00, 0x00, + // Padding + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + } + + db, dbErr := Open(bytes.NewReader(buf[:])) + require.NotNil(t, db) + require.NoError(t, dbErr) + + assert.NotNil(t, db.Stream) + assert.Equal(t, Header{ + ValueSize: 0x1337, + NumBuckets: 0x42, + }, db.Header) +} diff --git a/compactindexsized/sort_test.go b/compactindexsized/sort_test.go new file mode 100644 index 00000000..47e6e777 --- /dev/null +++ b/compactindexsized/sort_test.go @@ -0,0 +1,23 @@ +package compactindexsized + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSort(t *testing.T) { + entries := make([]uint, 50) + for i := range entries { + entries[i] = uint(i) + } + + // Sort entries. + sortWithCompare(entries, func(i, j int) bool { + return entries[i] < entries[j] + }) + + sorted := []uint{0x1f, 0xf, 0x2a, 0x7, 0x17, 0x26, 0x2e, 0x3, 0xb, 0x13, 0x1b, 0x23, 0x28, 0x2c, 0x30, 0x1, 0x5, 0x9, 0xd, 0x11, 0x15, 0x19, 0x1d, 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x2f, 0x31, 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24} + + assert.Equal(t, sorted, entries) +} From 78d8860b6bc0f19ae730ecde2d7986b1be4c185e Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 15:05:53 +0100 Subject: [PATCH 03/63] Replace compactindex with compactindexsized (8 bytes) --- car-dag-traverser.go | 18 +++++++++--------- cmd-rpc-server-car.go | 8 ++++---- cmd-x-index-all.go | 16 +++++++--------- epoch.go | 6 +++--- index-cid-to-offset.go | 14 +++++++------- tools.go | 13 +++++++++++++ 6 files changed, 43 insertions(+), 32 deletions(-) diff --git a/car-dag-traverser.go b/car-dag-traverser.go index 788b2ed6..6c8140cd 100644 --- a/car-dag-traverser.go +++ b/car-dag-traverser.go @@ -13,7 +13,7 @@ import ( "github.com/ipld/go-car" "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" - "github.com/rpcpool/yellowstone-faithful/compactindex" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "k8s.io/klog/v2" @@ -88,7 +88,7 @@ func openCarReaderWithCidIndex(carPath string, indexFilePath string) (*SimpleIte } klog.Infof("Reading index from %s", indexFilePath) - c2o, err := compactindex.Open(indexFile) + c2o, err := compactindexsized.Open(indexFile) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } @@ -126,9 +126,9 @@ func openCarReaderWithCidIndex(carPath string, indexFilePath string) (*SimpleIte } type SimpleIterator struct { - c2o *compactindex.DB // index from cid to offset in the CAR file - cr *carv2.Reader // the CAR file - indexFile *os.File // the index file + c2o *compactindexsized.DB // index from cid to offset in the CAR file + cr *carv2.Reader // the CAR file + indexFile *os.File // the index file } func NewSimpleCarIterator(carPath string, indexFilePath string) (*SimpleIterator, error) { @@ -153,23 +153,23 @@ func (t *SimpleIterator) Get(ctx context.Context, c cid.Cid) (*blocks.BasicBlock return node, err } -func newOffsetFinderFunc(c2o *compactindex.DB) func(ctx context.Context, c cid.Cid) (uint64, error) { +func newOffsetFinderFunc(c2o *compactindexsized.DB) func(ctx context.Context, c cid.Cid) (uint64, error) { return func(ctx context.Context, c cid.Cid) (uint64, error) { bucket, err := c2o.LookupBucket(c.Bytes()) if err != nil { - if err == compactindex.ErrNotFound { + if err == compactindexsized.ErrNotFound { return 0, ErrNotFound } return 0, fmt.Errorf("failed to lookup bucket: %w", err) } offset, err := bucket.Lookup(c.Bytes()) if err != nil { - if err == compactindex.ErrNotFound { + if err == compactindexsized.ErrNotFound { return 0, ErrNotFound } return 0, fmt.Errorf("failed to lookup offset: %w", err) } - return offset, nil + return btoi(offset), nil } } diff --git a/cmd-rpc-server-car.go b/cmd-rpc-server-car.go index 4bf3ff83..738d419e 100644 --- a/cmd-rpc-server-car.go +++ b/cmd-rpc-server-car.go @@ -12,8 +12,8 @@ import ( "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" "github.com/patrickmn/go-cache" - "github.com/rpcpool/yellowstone-faithful/compactindex" "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/gsfa" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" @@ -75,7 +75,7 @@ func newCmd_rpcServerCar() *cli.Command { } defer cidToOffsetIndexFile.Close() - cidToOffsetIndex, err := compactindex.Open(cidToOffsetIndexFile) + cidToOffsetIndex, err := compactindexsized.Open(cidToOffsetIndexFile) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -158,7 +158,7 @@ func createAndStartRPCServer_withCar( options *RpcServerOptions, carReader *carv2.Reader, remoteCarReader ReaderAtCloser, - cidToOffsetIndex *compactindex.DB, + cidToOffsetIndex *compactindexsized.DB, slotToCidIndex *compactindex36.DB, sigToCidIndex *compactindex36.DB, gsfaReader *gsfa.GsfaReader, @@ -224,7 +224,7 @@ type deprecatedRPCServer struct { lassieFetcher *lassieWrapper localCarReader *carv2.Reader remoteCarReader ReaderAtCloser - cidToOffsetIndex *compactindex.DB + cidToOffsetIndex *compactindexsized.DB slotToCidIndex *compactindex36.DB sigToCidIndex *compactindex36.DB gsfaReader *gsfa.GsfaReader diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index 1eae0589..c724ef94 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -17,8 +17,8 @@ import ( "github.com/ipfs/go-cid" carv1 "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindex" "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" "k8s.io/klog/v2" @@ -157,7 +157,6 @@ func createAllIndexes( tmpDir, indexDir, numTotalItems, - targetFileSize, ) if err != nil { return nil, fmt.Errorf("failed to create cid_to_offset index: %w", err) @@ -363,23 +362,22 @@ type Builder_CidToOffset struct { tmpDir string indexDir string carPath string - index *compactindex.Builder + index *compactindexsized.Builder } func NewBuilder_CidToOffset( tmpDir string, indexDir string, numItems uint64, - targetFileSize uint64, ) (*Builder_CidToOffset, error) { tmpDir = filepath.Join(tmpDir, "index-cid-to-offset-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { return nil, fmt.Errorf("failed to create cid_to_offset tmp dir: %w", err) } - index, err := compactindex.NewBuilder( + index, err := compactindexsized.NewBuilderSized( tmpDir, uint(numItems), - (targetFileSize), + 8, ) if err != nil { return nil, fmt.Errorf("failed to create cid_to_offset index: %w", err) @@ -392,7 +390,7 @@ func NewBuilder_CidToOffset( } func (b *Builder_CidToOffset) Put(c cid.Cid, offset uint64) error { - return b.index.Insert(c.Bytes(), offset) + return b.index.Insert(c.Bytes(), itob(offset)) } func (b *Builder_CidToOffset) Close() error { @@ -709,7 +707,7 @@ func verifyAllIndexes( type Index_CidToOffset struct { file *os.File - db *compactindex.DB + db *compactindexsized.DB } func OpenIndex_CidToOffset( @@ -720,7 +718,7 @@ func OpenIndex_CidToOffset( return nil, fmt.Errorf("failed to open index file: %w", err) } - index, err := compactindex.Open(indexFile) + index, err := compactindexsized.Open(indexFile) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } diff --git a/epoch.go b/epoch.go index 73eed471..f1cf9e5e 100644 --- a/epoch.go +++ b/epoch.go @@ -17,8 +17,8 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/patrickmn/go-cache" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindex" "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/gsfa" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" @@ -35,7 +35,7 @@ type Epoch struct { localCarReader *carv2.Reader remoteCarReader ReaderAtCloser remoteCarHeaderSize uint64 - cidToOffsetIndex *compactindex.DB + cidToOffsetIndex *compactindexsized.DB slotToCidIndex *compactindex36.DB sigToCidIndex *compactindex36.DB sigExists *bucketteer.Reader @@ -118,7 +118,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { } ep.onClose = append(ep.onClose, cidToOffsetIndexFile.Close) - cidToOffsetIndex, err := compactindex.Open(cidToOffsetIndexFile) + cidToOffsetIndex, err := compactindexsized.Open(cidToOffsetIndexFile) if err != nil { return nil, fmt.Errorf("failed to open cid-to-offset index: %w", err) } diff --git a/index-cid-to-offset.go b/index-cid-to-offset.go index 2907f1db..b54e4c50 100644 --- a/index-cid-to-offset.go +++ b/index-cid-to-offset.go @@ -17,7 +17,7 @@ import ( carv1 "github.com/ipld/go-car" "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" - "github.com/rpcpool/yellowstone-faithful/compactindex" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "k8s.io/klog/v2" ) @@ -67,10 +67,10 @@ func CreateIndex_cid2offset(ctx context.Context, tmpDir string, carPath string, } klog.Infof("Creating builder with %d items and target file size %d", numItems, targetFileSize) - c2o, err := compactindex.NewBuilder( + c2o, err := compactindexsized.NewBuilderSized( tmpDir, uint(numItems), - (targetFileSize), + 8, ) if err != nil { return "", fmt.Errorf("failed to open index store: %w", err) @@ -97,7 +97,7 @@ func CreateIndex_cid2offset(ctx context.Context, tmpDir string, carPath string, // klog.Infof("key: %s, offset: %d", bin.FormatByteSlice(c.Bytes()), totalOffset) - err = c2o.Insert(c.Bytes(), uint64(totalOffset)) + err = c2o.Insert(c.Bytes(), itob(uint64(totalOffset))) if err != nil { return "", fmt.Errorf("failed to put cid to offset: %w", err) } @@ -174,7 +174,7 @@ func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath s } defer indexFile.Close() - c2o, err := compactindex.Open(indexFile) + c2o, err := compactindexsized.Open(indexFile) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -252,7 +252,7 @@ func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath s return nil } -func findOffsetFromCid(db *compactindex.DB, c cid.Cid) (uint64, error) { +func findOffsetFromCid(db *compactindexsized.DB, c cid.Cid) (uint64, error) { bucket, err := db.LookupBucket(c.Bytes()) if err != nil { return 0, fmt.Errorf("failed to lookup bucket for %s: %w", c, err) @@ -261,5 +261,5 @@ func findOffsetFromCid(db *compactindex.DB, c cid.Cid) (uint64, error) { if err != nil { return 0, fmt.Errorf("failed to lookup offset for %s: %w", c, err) } - return offset, nil + return btoi(offset), nil } diff --git a/tools.go b/tools.go index 623091a4..8bd6c8d4 100644 --- a/tools.go +++ b/tools.go @@ -1,6 +1,7 @@ package main import ( + "encoding/binary" "encoding/json" "fmt" "os" @@ -69,3 +70,15 @@ func loadFromYAML(configFilepath string, dst any) error { return yaml.NewDecoder(file).Decode(dst) } + +// btoi converts a byte slice of length 8 to a uint64. +func btoi(b []byte) uint64 { + return binary.LittleEndian.Uint64(b) +} + +// itob converts a uint64 to a byte slice of length 8. +func itob(v uint64) []byte { + var buf [8]byte + binary.LittleEndian.PutUint64(buf[:], v) + return buf[:] +} From c4d050e41d802f24b5985e2da0f4c8e5a72ff2ef Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 15:06:32 +0100 Subject: [PATCH 04/63] Delete compactindex36 --- compactindex36/LICENSE | 202 ------------- compactindex36/README.md | 137 --------- compactindex36/build.go | 310 -------------------- compactindex36/build_test.go | 438 ---------------------------- compactindex36/compactindex.go | 280 ------------------ compactindex36/compactindex_test.go | 89 ------ compactindex36/fallocate_fake.go | 27 -- compactindex36/fallocate_generic.go | 11 - compactindex36/fallocate_linux.go | 17 -- compactindex36/query.go | 219 -------------- compactindex36/query_test.go | 58 ---- 11 files changed, 1788 deletions(-) delete mode 100644 compactindex36/LICENSE delete mode 100644 compactindex36/README.md delete mode 100644 compactindex36/build.go delete mode 100644 compactindex36/build_test.go delete mode 100644 compactindex36/compactindex.go delete mode 100644 compactindex36/compactindex_test.go delete mode 100644 compactindex36/fallocate_fake.go delete mode 100644 compactindex36/fallocate_generic.go delete mode 100644 compactindex36/fallocate_linux.go delete mode 100644 compactindex36/query.go delete mode 100644 compactindex36/query_test.go diff --git a/compactindex36/LICENSE b/compactindex36/LICENSE deleted file mode 100644 index d6456956..00000000 --- a/compactindex36/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/compactindex36/README.md b/compactindex36/README.md deleted file mode 100644 index ef24d1e4..00000000 --- a/compactindex36/README.md +++ /dev/null @@ -1,137 +0,0 @@ -# a fast flat-file index for constant datasets - -This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex -The following changes have been made: - - The package has been renamed to `compactindex36` to avoid conflicts with the original package - - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. - -This package specifies a file format and Go implementation for indexing constant datasets. - -*`compactindex` …* -- is an immutable file format; -- maps arbitrary keys into offsets in an external flat file; -- consumes a constant amount of space per entry - - ~6-8 bytes, regardless of key size - - 3 bytes per enty -- `O(1)` complexity queries, with `2 + log2(10000)` lookups worst- & average-case (binary search); -- during construction, requires near-constant memory space and `O(n)` scratch space with regard to entries per file; -- during construction, features a constant >500k entry/s per-core write rate (2.5 GHz Intel laptop); -- works on any storage supporting random reads (regular files, HTTP range requests, on-chain, ...); -- is based on the "FKS method" which uses perfect (collision-free) hash functions in a two-level hashtable; [^1] -- is inspired by D. J. Bernstein's "constant database"; [^2] -- uses the xxHash64 non-cryptographic hash-function; [^3] - -Refer to the Go documentation for the algorithms used and implementation details. - -[![Go Reference](https://pkg.go.dev/badge/go.firedancer.io/radiance/pkg/compactindex.svg)](https://pkg.go.dev/go.firedancer.io/radiance/pkg/compactindex) - -[^1]: Fredman, M. L., Komlós, J., & Szemerédi, E. (1984). Storing a Sparse Table with 0 (1) Worst Case Access Time. Journal of the ACM, 31(3), 538–544. https://doi.org/10.1145/828.1884 -[^2]: cdb by D. J. Bernstein https://cr.yp.to/cdb.html -[^3]: Go implementation of xxHash by @cespare: https://github.com/cespare/xxhash/ - -## Interface - -In programming terms: - -```rs -fn lookup(key: &[byte]) -> Option -``` - -Given an arbitrary key, the index -- states whether the key exists in the index -- if it exists, maps the key to an integer (usually an offset into a file) - -## Examples - -Here are some example scenarios where `compactindex` is useful: - -- When working with immutable data structures - - Example: Indexing [IPLD CAR files][3] carrying Merkle-DAGs of content-addressable data -- When working with archived/constant data - - Example: Indexing files in `.tar` archives -- When dealing with immutable remote storage such as S3-like object storage - - Example: Storing the index and target file in S3, then using [HTTP range requests][4] to efficiently query data - -[3]: https://ipld.io/specs/transport/car/ -[4]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests - -Here are some things compactindex cannot do: - -- Cannot add more entries to an existing index - - Reason 1: indexes are tightly packed, so there is no space to insert new entries (though `fallocate(2)` with `FALLOC_FL_INSERT_RANGE` would technically work) - - Reason 2: the second-level hashtable uses a perfect hash function ensuring collision-free indexing of a subset of entries; - inserting new entries might cause a collision requiring - - Reason 3: adding too many entries will eventually create an imbalance in the first-level hashtable; - fixing this imbalance effectively requires re-constructing the file from scratch -- Cannot iterate over keys - - Reason: compactindex stores hashes, not the entries themselves. - This saves space but also allows for efficient random reads used during binary search - -## File Format (v0) - -**Encoding** - -The file format contains binary packed structures with byte alignment. - -Integers are encoded as little endian. - -**File Header** - -The file beings with a 32 byte file header. - -```rust -#[repr(packed)] -struct FileHeader { - magic: [u8; 8], // 0x00 - max_value: u64, // 0x08 - num_buckets: u32, // 0x10 - padding_14: [u8; 12], // 0x14 -} -``` - -- `magic` is set to the UTF-8 string `"rdcecidx"`. - The reader should reject files that don't start with this string. -- `num_buckets` is set to the number of hashtable buckets. -- `max_value` indicates the integer width of index values. -- `padding_14` must be zero. (reserved for future use) - -**Bucket Header Table** - -The file header is followed by a vector of bucket headers. -The number of is set by `num_buckets` in the file header. - -Each bucket header is 16 bytes long. - -```rust -#[repr(packed)] -struct BucketHeader { - hash_domain: u32, // 0x00 - num_entries: u32, // 0x04 - hash_len: u8, // 0x08 - padding_09: u8, // 0x09 - file_offset: u48, // 0x10 -} -``` - -- `hash_domain` is a "salt" to the per-bucket hash function. -- `num_entries` is set to the number of records in the bucket. -- `hash_len` is the size of the per-record hash in bytes and currently hardcoded to `3`. -- `padding_09` must be zero. -- `file_offset` is an offset from the beginning of the file header to the start of the bucket entries. - -**Bucket Entry Table** - -Each bucket has a vector of entries with length `num_entries`. -This structure makes up the vast majority of the index. - -```rust -#[repr(packed)] -struct Entry { - hash: u??, - value: u??, -} -``` - -The size of entry is static within a bucket. It is determined by its components: -- The size of `hash` in bytes equals `hash_len` -- The size of `value` in bytes equals the byte aligned integer width that is minimally required to represent `max_value` diff --git a/compactindex36/build.go b/compactindex36/build.go deleted file mode 100644 index c1499673..00000000 --- a/compactindex36/build.go +++ /dev/null @@ -1,310 +0,0 @@ -package compactindex36 - -// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex -// The following changes have been made: -// - The package has been renamed to `compactindex36` to avoid conflicts with the original package -// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. - -import ( - "bufio" - "context" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - "path/filepath" - "sort" - "syscall" -) - -// Builder creates new compactindex files. -type Builder struct { - Header - buckets []tempBucket - dir string -} - -// NewBuilder creates a new index builder. -// -// If dir is an empty string, a random temporary directory is used. -// -// numItems refers to the number of items in the index. -// -// targetFileSize is the size of the file that index entries point to. -// Can be set to zero if unknown, which results in a less efficient (larger) index. -func NewBuilder(dir string, numItems uint, targetFileSize uint64) (*Builder, error) { - if dir == "" { - var err error - dir, err = os.MkdirTemp("", "compactindex-") - if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) - } - } - if targetFileSize == 0 { - targetFileSize = math.MaxUint64 - } - - numBuckets := (numItems + targetEntriesPerBucket - 1) / targetEntriesPerBucket - buckets := make([]tempBucket, numBuckets) - for i := range buckets { - name := filepath.Join(dir, fmt.Sprintf("keys-%d", i)) - f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o666) - if err != nil { - return nil, err - } - buckets[i].file = f - buckets[i].writer = bufio.NewWriter(f) - } - - return &Builder{ - Header: Header{ - FileSize: targetFileSize, - NumBuckets: uint32(numBuckets), - }, - buckets: buckets, - dir: dir, - }, nil -} - -// Insert writes a key-value mapping to the index. -// -// Index generation will fail if the same key is inserted twice. -// The writer must not pass a value greater than targetFileSize. -func (b *Builder) Insert(key []byte, value [36]byte) error { - return b.buckets[b.Header.BucketHash(key)].writeTuple(key, value) -} - -// Seal writes the final index to the provided file. -// This process is CPU-intensive, use context to abort prematurely. -// -// The file should be opened with access mode os.O_RDWR. -// Passing a non-empty file will result in a corrupted index. -func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { - // TODO support in-place writing. - - // Write header. - var headerBuf [headerSize]byte - b.Header.Store(&headerBuf) - _, err = f.Write(headerBuf[:]) - if err != nil { - return fmt.Errorf("failed to write header: %w", err) - } - // Create hole to leave space for bucket header table. - bucketTableLen := int64(b.NumBuckets) * bucketHdrLen - err = fallocate(f, headerSize, bucketTableLen) - if errors.Is(err, syscall.EOPNOTSUPP) { - // The underlying file system may not support fallocate - err = fake_fallocate(f, headerSize, bucketTableLen) - if err != nil { - return fmt.Errorf("failed to fake fallocate() bucket table: %w", err) - } - } - if err != nil { - return fmt.Errorf("failed to fallocate() bucket table: %w", err) - } - // Seal each bucket. - for i := range b.buckets { - if err := b.sealBucket(ctx, i, f); err != nil { - return err - } - } - return nil -} - -// sealBucket will mine a bucket hashtable, write entries to a file, a -func (b *Builder) sealBucket(ctx context.Context, i int, f *os.File) error { - // Produce perfect hash table for bucket. - bucket := &b.buckets[i] - if err := bucket.flush(); err != nil { - return err - } - const mineAttempts uint32 = 1000 - entries, domain, err := bucket.mine(ctx, mineAttempts) - if err != nil { - return fmt.Errorf("failed to mine bucket %d: %w", i, err) - } - // Find current file length. - offset, err := f.Seek(0, io.SeekEnd) - if err != nil { - return fmt.Errorf("failed to seek to EOF: %w", err) - } - if offset < 0 { - panic("os.File.Seek() < 0") - } - // Write header to file. - desc := BucketDescriptor{ - BucketHeader: BucketHeader{ - HashDomain: domain, - NumEntries: uint32(bucket.records), - HashLen: 3, // TODO remove hardcoded constant - FileOffset: uint64(offset), - }, - Stride: 3 + valueLength(), // TODO remove hardcoded constant - OffsetWidth: valueLength(), - } - // Write entries to file. - wr := bufio.NewWriter(f) - entryBuf := make([]byte, desc.HashLen+valueLength()) // TODO remove hardcoded constant - for _, entry := range entries { - desc.marshalEntry(entryBuf, entry) - if _, err := wr.Write(entryBuf[:]); err != nil { - return fmt.Errorf("failed to write record to index: %w", err) - } - } - if err := wr.Flush(); err != nil { - return fmt.Errorf("failed to flush bucket to index: %w", err) - } - // Write header to file. - if err := desc.BucketHeader.writeTo(f, uint(i)); err != nil { - return fmt.Errorf("failed to write bucket header %d: %w", i, err) - } - return nil -} - -func (b *Builder) Close() error { - return os.RemoveAll(b.dir) -} - -// tempBucket represents the "temporary bucket" file, -// a disk buffer containing a vector of key-value-tuples. -type tempBucket struct { - records uint - file *os.File - writer *bufio.Writer -} - -// writeTuple performs a buffered write of a KV-tuple. -func (b *tempBucket) writeTuple(key []byte, value [36]byte) (err error) { - b.records++ - var static [38]byte - binary.LittleEndian.PutUint16(static[0:2], uint16(len(key))) - copy(static[2:38], value[:]) - if _, err = b.writer.Write(static[:]); err != nil { - return err - } - _, err = b.writer.Write(key) - return -} - -// flush empties the in-memory write buffer to the file. -func (b *tempBucket) flush() error { - if err := b.writer.Flush(); err != nil { - return fmt.Errorf("failed to flush writer: %w", err) - } - b.writer = nil - return nil -} - -// mine repeatedly hashes the set of entries with different nonces. -// -// Returns a sorted list of hashtable entries upon finding a set of hashes without collisions. -// If a number of attempts was made without success, returns ErrCollision instead. -func (b *tempBucket) mine(ctx context.Context, attempts uint32) (entries []Entry, domain uint32, err error) { - entries = make([]Entry, b.records) - bitmap := make([]byte, 1<<21) - - rd := bufio.NewReader(b.file) - for domain = uint32(0); domain < attempts; domain++ { - if err = ctx.Err(); err != nil { - return - } - // Reset bitmap - for i := range bitmap { - bitmap[i] = 0 - } - // Reset reader - if _, err = b.file.Seek(0, io.SeekStart); err != nil { - return - } - rd.Reset(b.file) - - if hashErr := hashBucket(rd, entries, bitmap, domain); errors.Is(hashErr, ErrCollision) { - continue - } else if hashErr != nil { - return nil, 0, hashErr - } - - return // ok - } - - return nil, domain, ErrCollision -} - -// hashBucket reads and hashes entries from a temporary bucket file. -// -// Uses a 2^24 wide bitmap to detect collisions. -func hashBucket(rd *bufio.Reader, entries []Entry, bitmap []byte, nonce uint32) error { - // TODO Don't hardcode this, choose hash depth dynamically - mask := uint64(0xffffff) - - // Scan provided reader for entries and hash along the way. - for i := range entries { - // Read next key from file (as defined by writeTuple) - var static [38]byte - if _, err := io.ReadFull(rd, static[:]); err != nil { - return err - } - keyLen := binary.LittleEndian.Uint16(static[0:2]) - var value [36]byte - copy(value[:], static[2:38]) - key := make([]byte, keyLen) - if _, err := io.ReadFull(rd, key); err != nil { - return err - } - - // Hash to entry - hash := EntryHash64(nonce, key) & mask - - // Check for collision in bitmap - bi, bj := hash/8, hash%8 - chunk := bitmap[bi] - if (chunk>>bj)&1 == 1 { - return ErrCollision - } - bitmap[bi] = chunk | (1 << bj) - - // Export entry - entries[i] = Entry{ - Hash: hash, - Value: value, - } - } - - // Sort entries. - // sort.Slice(entries, func(i, j int) bool { - // return entries[i].Hash < entries[j].Hash - // }) - sortWithCompare(entries, func(i, j int) int { - if entries[i].Hash < entries[j].Hash { - return -1 - } else if entries[i].Hash > entries[j].Hash { - return 1 - } - return 0 - }) - - return nil -} - -var ErrCollision = errors.New("hash collision") - -func sortWithCompare[T any](a []T, compare func(i, j int) int) { - sort.Slice(a, func(i, j int) bool { - return compare(i, j) < 0 - }) - sorted := make([]T, len(a)) - eytzinger(a, sorted, 0, 1) - copy(a, sorted) -} - -func eytzinger[T any](in, out []T, i, k int) int { - if k <= len(in) { - i = eytzinger(in, out, i, 2*k) - out[k-1] = in[i] - i++ - i = eytzinger(in, out, i, 2*k+1) - } - return i -} diff --git a/compactindex36/build_test.go b/compactindex36/build_test.go deleted file mode 100644 index 46fa70b1..00000000 --- a/compactindex36/build_test.go +++ /dev/null @@ -1,438 +0,0 @@ -package compactindex36 - -// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex -// The following changes have been made: -// - The package has been renamed to `compactindex36` to avoid conflicts with the original package -// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. - -import ( - "bytes" - "context" - "encoding/binary" - "errors" - "fmt" - "io" - "io/fs" - "math" - "math/rand" - "os" - "strings" - "testing" - "time" - - "github.com/davecgh/go-spew/spew" - "github.com/ipfs/go-cid" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/vbauerster/mpb/v8/decor" -) - -var testCidStrings = []string{ - "bafyreiba5kzq6wf6neax6ascsh5khxhuy7zc6vqsu6zac32i7ilv4u62nm", - "bafyreie42alzugm43fiqv64ss3h5fh3xriaeamul7d7qmrrbxe6fpjo5b4", - "bafyreidam5koitaftfx7sydge5ta3ig2j5qbabqcql4umpom3yuia4sbm4", - "bafyreia3pebgypo4oqgdg4pqpjfybmcdbsbavcdscotji4wj2gfc3r4icm", - "bafyreigudmeashua4432mbq3tawwnsz3qfpmm5tjpwahopn7cxttotqdge", - "bafyreic3azak2ds4fomkw35pmvsznu46sgonmketlnfaqnoc6owi4t64my", - "bafyreib6t4ooiajnebkwgk4z57fhcvejc663a6haq6cb6tjjluj4fuulla", - "bafyreidmohyrgchkgavx7wubebip5agb4ngisnlkqaot4kz2eo635ny5m4", - "bafyreicpmxvpxwjemofmic6aka72dliueqxtsklrilkofwbqgn6ffuz7ka", - "bafyreifkjdmj3kmi2hkoqcqweunbktouxo6sy362rysl34ffyjinebylim", - "bafyreidzql2rmbs3chtq2cmbnncvfyz2tjclwqx4vnowvyph77fomh26qi", - "bafyreig4kpaq6rf5y46qgqhdzgr5uauubfqyevbmj6pmtaxxhh3tkyzury", - "bafyreianxqyomvh6dl533cs25z7yfda2z62ity3w7sdqf3kk4tmogu7t24", - "bafyreicaq6dv5jsq4du2tqiefr3baepnj4ei3bpxvg5g6np7ygacgbw5aq", - "bafyreia4b2nleifcp54w4scrjy7fgctsoy6zz4mkot3gw6xydqkrc2wdtq", - "bafyreierpgsryprxfgshtzjarnb662d5akhg7om6utubggjwtlg6qwwj5i", - "bafyreidufcwvs7fvot2blqnwciaxre35s3ip6xxkncrus4voci3ktots2q", - "bafyreif23uzartrw62g5pywtrsz3xsl2wdw73o4fvtsf76gqgx37mfpqjm", - "bafyreianu4oifizvqyop753ao4hrocftlbnn6kzm7xtsm4ryaz6uawkgmu", - "bafyreidekyir7cti4jch652nnmimrftoqynjxl6vzjimfkqxh42rx27yiy", - "bafyreia3zuym3akg4gp5ewlmdwxnybrsqrab4m6tpsgxq65az6z7r5jtba", - "bafyreihherovnppygar6h5hu4we4hkxrvoqtpkawwcmn7hkyeq6cisouyu", - "bafyreicmqd5dhn2hv2qcskf27vsml36p6srz6zoxjfjkmnu7ltczqtbkbe", - "bafyreihy2c7lomf3i3nucd5bbwvox3luhtnzujkybchgmyxenmanon7rxe", - "bafyreicld6buy3mr4ibs2jzakoaptdj7xvpjo4pwhwiuywnrzfzoh5ahqi", - "bafyreifyapa5a5ii72hfmqiwgsjto6iarshfwwvrrxdw3bhr62ucuutl4e", - "bafyreigrlvwdaivwthwvihcbyrnl5pl7jfor72xlaivi2f6ajypy4yku3a", - "bafyreiamvgkbpuahegu3mhxxujzvxk2t5hrykhrfw47yurlxqumkv243iy", - "bafyreib4qf7qpjmpr2eqi7mqwqxw2fznnkvhzkpj3udiloxqay5fhk5wui", - "bafyreidbol6tdhj42rdpchpafszgmnmg7tgvi2uwou7s2whiamznzawhk4", - "bafyreidrpejzimhuwq6j74jzv2odzriuitwmdkp2ibojzcax6jdpqiztti", - "bafyreidrgb4vmgvsreebrj6apscopszfbgw5e7llh22kk2cdayyeoyggwy", - "bafyreigpzlopkl2ttxfdf6n5sgxyda4bvlglre7nkjq37uecmvf47f6ttm", - "bafyreidcq3csrifsyeed42fbky42w7bxhvg6fd42l7qkw3cnxliab4e7nu", - "bafyreibchdux4qchrrz67kikde273mjth475fedjisvoazf3zhmodlkx7a", - "bafyreie4rdlgpfcrrdlonofkwlrefh6z5hcwieasatkddozvyknwqahh4q", - "bafyreibhwuih7ekso6zypyr4uwl37xewyu7foy2clqvz4l7lbgwxpslyyu", - "bafyreigltijqq3m6h7h6du5o4ynqwmimtslnsmyu3njwlnpuyadyev6awa", - "bafyreihwtszo3p7ujg2wsuhsqon5tidxxnyin2t42uhj7zq6xta7fo2suy", - "bafyreie2uggjajncn2lna6ytq2sw2uu4xw724pe6wj4ihhiawnnjm5sgwa", - "bafyreignb5gdw7fwfycoipjqbkvkve7dkuugr3s5ylkaucn3ks7klxh4te", - "bafyreib3iwnufpnoxgf7z5w3vtygu2z2kcqxj3quxypupfgmr53tyt6wdq", - "bafyreic7kxsh7nmfpxmrm727yug2rfnrhfuavmpll3cms4r6cpnbbuwgqm", - "bafyreig2o4yrzlwo74eom4v65tenr6yjh2v23vbl7sjffrppzceenxs3eq", - "bafyreidletnh5bxnc6k2p3idnul5qatfcf4qqrgmkjxolgpu7wolye47hm", - "bafyreigv2nni66nb6be5dchkonpb2t556qplv5xz4vdolwmyz4m32aufdi", - "bafyreid66pezal5svaidpvxc3zz6w5eksxcjn6omelhsqhj5jmcmxhgjhm", - "bafyreihjhwpvm2soq5syyovsiqrchsuojsdk4imj2gqk6pikc4rxdqtmny", - "bafyreidt3oveadwf5jrmxatrwa5bdxvfyxnrucypmtqwiu2pvrrztrj5xe", - "bafyreid6y6r44wqcwql5yyitmw5mpfmrrlsois2unbqzmtlvyeefqahnnu", - "bafyreic6evvtf3y3slkbwhzbjuvspqu2jxf7qr267rhigmox6f4a5a36eq", - "bafyreiekep5a55yvebqzzi6x7xyotse57zfwcpyeh2xermqkvxlkvpxh24", - "bafyreigwb22sgfg56dc2jnnvxttjyhwfp4itevlukqj2wfz5ebru72elv4", - "bafyreiebz2fxh64dqvbiwmqnyj5rj63txl5u7abmets2imhn2su6tcuvyu", - "bafyreigcm7wkxlsyc26acgb7nfjho2twh6au2pbk35w6bsbv2qt7rt7iaq", - "bafyreieiuq6g74i25huoumvey7oynljndt2d4qvbddqkhpysrexu7ixsuy", - "bafyreihuhj5slybgbqzdr4mpkyo5dwvqjxfhicardbph6htiyeut2frol4", - "bafyreiaskg4kwqrpdcatnymvno4xf54uewysdiz3357fdct2tlnx2gpkqq", - "bafyreicakit2lbmg3wo4uoox4rc2gv3odzrrkrr32zwk7qaolpoc7uyz5u", - "bafyreih5jcnhw4evhq5j4n75miruqfofo2dv46hdtqyd5ht2eqeu7g5cme", - "bafyreicwtl6ulct4ckjnq57gmctw3wjo6ctvjbbr7l4bwfbzpj3y3g6unm", - "bafyreiebgoqj3nawzcwjy4t67uljnmvfh55fiqaxsskld6qpjvd2majesq", - "bafyreif472dxwhnyjhxmxoto3czfblhssgmhrpsqcmrwzprywk45wqdtmi", - "bafyreiaz444on546zihfuygqchlw4r4vu2tuw5xnelm6dsodqcno23pvzu", - "bafyreidgzghcd2lfdcylsccvlj43f5ujj7xtriu6ojp7jog5iainecagka", - "bafyreiehvi56dn3zm2ltfgecss2ydfmcb2hmf6hk76b6ebpoxhquajawze", - "bafyreie4wcortvdsirbontddokin6wgm25xg46lu3qxcyyjj6rgkuk5cca", - "bafyreicurlgiukht7wnxy3za3hz5fzs2a62ggc6i3rqhzhck4p2lgt5754", - "bafyreihn2zwm7m3tqfwa53me4qxiit66yiny5sxtkvvjewjfkbjrgmeswu", - "bafyreid7m33qok7d66vsyc5mq257rya5sg24rzv5qwbghwsimclt5ll7pi", -} - -var testCids = func() []cid.Cid { - var cids []cid.Cid - for _, s := range testCidStrings { - c, err := cid.Decode(s) - if err != nil { - panic(err) - } - cids = append(cids, c) - } - return cids -}() - -func concatBytes(bs ...[]byte) []byte { - var out []byte - for _, b := range bs { - out = append(out, b...) - } - return out -} - -func numberToHexBytes(n int) string { - return (fmt.Sprintf("0x%02x", n)) -} - -func FormatByteSlice(buf []byte) string { - elems := make([]string, 0) - for _, v := range buf { - elems = append(elems, numberToHexBytes(int(v))) - } - - return "{" + strings.Join(elems, ", ") + "}" + fmt.Sprintf("(len=%v)", len(elems)) -} - -func splitBufferWithProvidedSizes(buf []byte, sizes []int) [][]byte { - var out [][]byte - var offset int - for _, size := range sizes { - out = append(out, buf[offset:offset+size]) - offset += size - } - return out -} - -func compareBufferArrays(a, b [][]byte) []bool { - var out []bool - - for i := 0; i < len(a); i++ { - out = append(out, bytes.Equal(a[i], b[i])) - } - - return out -} - -func TestBuilder(t *testing.T) { - const numBuckets = 3 - const maxValue = math.MaxUint64 - - // Create a table with 3 buckets. - builder, err := NewBuilder("", numBuckets*targetEntriesPerBucket, maxValue) - require.NoError(t, err) - require.NotNil(t, builder) - assert.Len(t, builder.buckets, 3) - defer builder.Close() - - // Insert a few entries. - keys := []string{"hello", "world", "blub", "foo"} - for i, key := range keys { - require.NoError(t, builder.Insert([]byte(key), [36]byte(testCids[i].Bytes()))) - } - { - // print test values - for _, tc := range testCids { - spew.Dump(FormatByteSlice(tc.Bytes())) - } - } - - // Create index file. - targetFile, err := os.CreateTemp("", "compactindex-final-") - require.NoError(t, err) - defer os.Remove(targetFile.Name()) - defer targetFile.Close() - - // Seal index. - require.NoError(t, builder.Seal(context.TODO(), targetFile)) - - // Assert binary content. - buf, err := os.ReadFile(targetFile.Name()) - require.NoError(t, err) - expected := concatBytes( - // --- File header - // magic - []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, // 0 - // max file size - []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, // 1 - // num buckets - []byte{0x03, 0x00, 0x00, 0x00}, // 2 - // padding - []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 3 - - // --- Bucket header 0 - // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 4 - // num entries - []byte{0x01, 0x00, 0x00, 0x00}, // 5 - // hash len - []byte{0x03}, // 6 - // padding - []byte{0x00}, // 7 - // file offset - []byte{0x50, 0x00, 0x00, 0x00, 0x00, 0x00}, // 8 - - // --- Bucket header 1 - // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 9 - // num entries - []byte{0x01, 0x00, 0x00, 0x00}, // 10 - // hash len - []byte{0x03}, // 11 - // padding - []byte{0x00}, // 12 - // file offset - []byte{0x77, 0x00, 0x00, 0x00, 0x00, 0x00}, // 13 - - // --- Bucket header 2 - // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 14 - // num entries - []byte{0x02, 0x00, 0x00, 0x00}, // 15 - // hash len - []byte{0x03}, // 16 - // padding - []byte{0x00}, // 17 - // file offset - []byte{0x9e, 0x00, 0x00, 0x00, 0x00, 0x00}, // 18 - - // --- Bucket 0 - // hash - []byte{0xe2, 0xdb, 0x55}, // 19 - // value - []byte{0x1, 0x71, 0x12, 0x20, 0x20, 0xea, 0xb3, 0xf, 0x58, 0xbe, 0x69, 0x1, 0x7f, 0x2, 0x42, 0x91, 0xfa, 0xa3, 0xdc, 0xf4, 0xc7, 0xf2, 0x2f, 0x56, 0x12, 0xa7, 0xb2, 0x1, 0x6f, 0x48, 0xfa, 0x17, 0x5e, 0x53, 0xda, 0x6b}, // 20 - - // --- Bucket 2 - // hash - []byte{0x92, 0xcd, 0xbb}, // 21 - // value - []byte{0x01, 0x71, 0x12, 0x20, 0x9c, 0xd0, 0x17, 0x9a, 0x19, 0x9c, 0xd9, 0x51, 0x0a, 0xfb, 0x92, 0x96, 0xcf, 0xd2, 0x9f, 0x77, 0x8a, 0x00, 0x40, 0x32, 0x8b, 0xf8, 0xff, 0x06, 0x46, 0x21, 0xb9, 0x3c, 0x57, 0xa5, 0xdd, 0x0f}, // 22 - // hash - []byte{0x98, 0x3d, 0xbd}, // 25 - // value - []byte{0x01, 0x71, 0x12, 0x20, 0x1b, 0x79, 0x02, 0x6c, 0x3d, 0xdc, 0x74, 0x0c, 0x33, 0x71, 0xf0, 0x7a, 0x4b, 0x80, 0xb0, 0x43, 0x0c, 0x82, 0x0a, 0x88, 0x72, 0x13, 0xa6, 0x94, 0x72, 0xc9, 0xd1, 0x8a, 0x2d, 0xc7, 0x88, 0x13}, // 26 - // hash - []byte{0xe3, 0x09, 0x6b}, // 23 - // value - []byte{0x1, 0x71, 0x12, 0x20, 0x60, 0x67, 0x54, 0xe4, 0x4c, 0x5, 0x99, 0x6f, 0xf9, 0x60, 0x66, 0x27, 0x66, 0xd, 0xa0, 0xda, 0x4f, 0x60, 0x10, 0x6, 0x2, 0x82, 0xf9, 0x46, 0x3d, 0xcc, 0xde, 0x28, 0x80, 0x72, 0x41, 0x67}, // 24 - ) - assert.Equal(t, expected, buf) - - { - splitSizes := []int{ - // --- File header - 8, 8, 4, 12, - // --- Bucket header 0 - 4, 4, 1, 1, 6, - // --- Bucket header 1 - 4, 4, 1, 1, 6, - // --- Bucket header 2 - 4, 4, 1, 1, 6, - // --- Bucket 0 - 3, 36, - // --- Bucket 2 - 3, 36, 3, 36, 3, 36, - } - splitExpected := splitBufferWithProvidedSizes(expected, splitSizes) - splitGot := splitBufferWithProvidedSizes(buf, splitSizes) - - comparations := compareBufferArrays(splitExpected, splitGot) - - for i, equal := range comparations { - if !equal { - t.Errorf("%d: \nexpected: %v, \n got: %v", i, FormatByteSlice(splitExpected[i]), FormatByteSlice(splitGot[i])) - } - } - - } - - // Reset file offset. - _, seekErr := targetFile.Seek(0, io.SeekStart) - require.NoError(t, seekErr) - - // Open index. - db, err := Open(targetFile) - require.NoError(t, err, "Failed to open generated index") - require.NotNil(t, db) - - // File header assertions. - assert.Equal(t, Header{ - FileSize: maxValue, - NumBuckets: numBuckets, - }, db.Header) - - // Get bucket handles. - buckets := make([]*Bucket, numBuckets) - for i := range buckets { - buckets[i], err = db.GetBucket(uint(i)) - require.NoError(t, err) - } - - // Ensure out-of-bounds bucket accesses fail. - _, wantErr := db.GetBucket(numBuckets) - assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") - - // Bucket header assertions. - assert.Equal(t, BucketDescriptor{ - BucketHeader: BucketHeader{ - HashDomain: 0x00, - NumEntries: 1, - HashLen: 3, - FileOffset: 0x50, - }, - Stride: 39, // 3 + 36 - OffsetWidth: 36, - }, buckets[0].BucketDescriptor) - assert.Equal(t, BucketHeader{ - HashDomain: 0x00, - NumEntries: 1, - HashLen: 3, - FileOffset: 119, - }, buckets[1].BucketHeader) - assert.Equal(t, BucketHeader{ - HashDomain: 0x00, - NumEntries: 2, - HashLen: 3, - FileOffset: 158, - }, buckets[2].BucketHeader) - - // Test lookups. - entries, err := buckets[2].Load( /*batchSize*/ 3) - require.NoError(t, err) - assert.Equal(t, []Entry{ - { - Hash: 12402072, - Value: [36]byte(testCids[3].Bytes()), - }, - { - Hash: 7014883, - Value: [36]byte(testCids[2].Bytes()), - }, - }, entries) - - { - for i, keyString := range keys { - key := []byte(keyString) - bucket, err := db.LookupBucket(key) - require.NoError(t, err) - - value, err := bucket.Lookup(key) - require.NoError(t, err) - assert.Equal(t, [36]byte(testCids[i].Bytes()), value) - } - } -} - -func TestBuilder_Random(t *testing.T) { - if testing.Short() { - t.Skip("Skipping long test") - } - - numKeys := uint(len(testCids)) - const keySize = uint(16) - const maxOffset = uint64(1000000) - const queries = int(10000) - - // Create new builder session. - builder, err := NewBuilder("", numKeys, maxOffset) - require.NoError(t, err) - require.NotNil(t, builder) - require.NotEmpty(t, builder.buckets) - - // Ensure we cleaned up after ourselves. - defer func() { - _, statErr := os.Stat(builder.dir) - assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) - }() - defer builder.Close() - - // Insert items to temp buckets. - preInsert := time.Now() - key := make([]byte, keySize) - for i := uint(0); i < numKeys; i++ { - binary.LittleEndian.PutUint64(key, uint64(i)) - err := builder.Insert(key, [36]byte(testCids[i].Bytes())) - require.NoError(t, err) - } - t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) - - // Create file for final index. - targetFile, err := os.CreateTemp("", "compactindex-final-") - require.NoError(t, err) - defer os.Remove(targetFile.Name()) - defer targetFile.Close() - - // Seal to final index. - preSeal := time.Now() - sealErr := builder.Seal(context.TODO(), targetFile) - require.NoError(t, sealErr, "Seal failed") - t.Logf("Sealed in %s", time.Since(preSeal)) - - // Print some stats. - targetStat, err := targetFile.Stat() - require.NoError(t, err) - t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) - t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) - t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) - - // Open index. - _, seekErr := targetFile.Seek(0, io.SeekStart) - require.NoError(t, seekErr) - db, err := Open(targetFile) - require.NoError(t, err, "Failed to open generated index") - - // Run query benchmark. - preQuery := time.Now() - for i := queries; i != 0; i-- { - keyN := uint64(rand.Int63n(int64(numKeys))) - binary.LittleEndian.PutUint64(key, keyN) - - bucket, err := db.LookupBucket(key) - require.NoError(t, err) - - value, err := bucket.Lookup(key) - require.NoError(t, err) - require.Equal(t, [36]byte(testCids[keyN].Bytes()), value) - } - t.Logf("Queried %d items", queries) - t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) -} diff --git a/compactindex36/compactindex.go b/compactindex36/compactindex.go deleted file mode 100644 index 4720795d..00000000 --- a/compactindex36/compactindex.go +++ /dev/null @@ -1,280 +0,0 @@ -// Package compactindex is an immutable hashtable index format inspired by djb's constant database (cdb). -// -// # Design -// -// Compactindex is used to create secondary indexes over arbitrary flat files. -// Each index is a single, immutable flat file. -// -// Index files consist of a space-optimized and query-optimized key-value-like table. -// -// Instead of storing actual keys, the format stores FKS dynamic perfect hashes. -// And instead of storing values, the format contains offsets into some file. -// -// As a result, the database effectively only supports two operations, similarly to cdb. -// (Note that the actual Go interface is a bit more flexible). -// -// func Create(kv map[[]byte]uint64) *Index -// func (*Index) Lookup(key []byte) (value uint64, exist bool) -// -// # Buckets -// -// The set of items is split into buckets of approx 10000 records. -// The number of buckets is unlimited. -// -// The key-to-bucket assignment is determined by xxHash3 using uniform discrete hashing over the key space. -// -// The index file header also mentions the number of buckets and the file offset of each bucket. -// -// # Tables -// -// Each bucket contains a table of entries, indexed by a collision-free hash function. -// -// The hash function used in the entry table is xxHash. -// A 32-bit hash domain is prefixed to mine collision-free sets of hashes (FKS scheme). -// This hash domain is also recorded at the bucket header. -// -// Each bucket entry is a constant-size record consisting of a 3-byte hash and an offset to the value. -// The size of the offset integer is the minimal byte-aligned integer width that can represent the target file size. -// -// # Querying -// -// The query interface (DB) is backend-agnostic, supporting any storage medium that provides random reads. -// To name a few: Memory buffers, local files, arbitrary embedded buffers, HTTP range requests, plan9, etc... -// -// The DB struct itself performs zero memory allocations and therefore also doesn't cache. -// It is therefore recommended to provide a io.ReaderAt backed by a cache to improve performance. -// -// Given a key, the query strategy is simple: -// -// 1. Hash key to bucket using global hash function -// 2. Retrieve bucket offset from bucket header table -// 3. Hash key to entry using per-bucket hash function -// 4. Search for entry in bucket (binary search) -// -// The search strategy for locating entries in buckets can be adjusted to fit the latency/bandwidth profile of the underlying storage medium. -// -// For example, the fastest lookup strategy in memory is a binary search retrieving double cache lines at a time. -// When doing range requests against high-latency remote storage (e.g. S3 buckets), -// it is typically faster to retrieve and scan through large parts of a bucket (multiple kilobytes) at once. -// -// # Construction -// -// Constructing a compactindex requires upfront knowledge of the number of items and highest possible target offset (read: target file size). -// -// The process requires scratch space of around 16 bytes per entry. During generation, data is offloaded to disk for memory efficiency. -// -// The process works as follows: -// -// 1. Determine number of buckets and offset integer width -// based on known input params (item count and target file size). -// 2. Linear pass over input data, populating temporary files that -// contain the unsorted entries of each bucket. -// 3. For each bucket, brute force a perfect hash function that -// defines a bijection between hash values and keys in the bucket. -// 4. For each bucket, sort by hash values. -// 5. Store to index. -// -// An alternative construction approach is available when the number of items or target file size is unknown. -// In this case, a set of keys is first serialized to a flat file. -package compactindex36 - -// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex -// The following changes have been made: -// - The package has been renamed to `compactindex36` to avoid conflicts with the original package -// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. - -import ( - "encoding/binary" - "fmt" - "math" - "math/bits" - "sort" - - "github.com/cespare/xxhash/v2" -) - -// Magic are the first eight bytes of an index. -var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} - -const Version = uint8(1) - -// Header occurs once at the beginning of the index. -type Header struct { - FileSize uint64 - NumBuckets uint32 -} - -// headerSize is the size of the header at the beginning of the file. -const headerSize = 32 - -// Load checks the Magic sequence and loads the header fields. -func (h *Header) Load(buf *[headerSize]byte) error { - // Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream. - if *(*[8]byte)(buf[:8]) != Magic { - return fmt.Errorf("not a radiance compactindex file") - } - *h = Header{ - FileSize: binary.LittleEndian.Uint64(buf[8:16]), - NumBuckets: binary.LittleEndian.Uint32(buf[16:20]), - } - // Check version. - if buf[20] != Version { - return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) - } - // 11 bytes to spare for now. Might use it in the future. - // Force to zero for now. - for _, b := range buf[21:32] { - if b != 0x00 { - return fmt.Errorf("unsupported index version") - } - } - return nil -} - -func (h *Header) Store(buf *[headerSize]byte) { - copy(buf[0:8], Magic[:]) - binary.LittleEndian.PutUint64(buf[8:16], h.FileSize) - binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets) - buf[20] = Version - for i := 21; i < 32; i++ { - buf[i] = 0 - } -} - -// BucketHash returns the bucket index for the given key. -// -// Uses a truncated xxHash64 rotated until the result fits. -func (h *Header) BucketHash(key []byte) uint { - u := xxhash.Sum64(key) - n := uint64(h.NumBuckets) - r := (-n) % n - for u < r { - u = hashUint64(u) - } - return uint(u % n) -} - -// hashUint64 is a reversible uint64 permutation based on Google's -// Murmur3 hash finalizer (public domain) -func hashUint64(x uint64) uint64 { - x ^= x >> 33 - x *= 0xff51afd7ed558ccd - x ^= x >> 33 - x *= 0xc4ceb9fe1a85ec53 - x ^= x >> 33 - return x -} - -// BucketHeader occurs at the beginning of each bucket. -type BucketHeader struct { - HashDomain uint32 - NumEntries uint32 - HashLen uint8 - FileOffset uint64 -} - -// bucketHdrLen is the size of the header preceding the hash table entries. -const bucketHdrLen = 16 - -func (b *BucketHeader) Store(buf *[bucketHdrLen]byte) { - binary.LittleEndian.PutUint32(buf[0:4], b.HashDomain) - binary.LittleEndian.PutUint32(buf[4:8], b.NumEntries) - buf[8] = b.HashLen - buf[9] = 0 - putUintLe(buf[10:16], b.FileOffset) -} - -func (b *BucketHeader) Load(buf *[bucketHdrLen]byte) { - b.HashDomain = binary.LittleEndian.Uint32(buf[0:4]) - b.NumEntries = binary.LittleEndian.Uint32(buf[4:8]) - b.HashLen = buf[8] - b.FileOffset = uintLe(buf[10:16]) -} - -// Hash returns the per-bucket hash of a key. -func (b *BucketHeader) Hash(key []byte) uint64 { - xsum := EntryHash64(b.HashDomain, key) - // Mask sum by hash length. - return xsum & (math.MaxUint64 >> (64 - b.HashLen*8)) -} - -type BucketDescriptor struct { - BucketHeader - Stride uint8 // size of one entry in bucket - OffsetWidth uint8 // with of offset field in bucket -} - -func (b *BucketDescriptor) unmarshalEntry(buf []byte) (e Entry) { - e.Hash = uintLe(buf[0:b.HashLen]) - copy(e.Value[:], buf[b.HashLen:b.HashLen+b.OffsetWidth]) - return -} - -func (b *BucketDescriptor) marshalEntry(buf []byte, e Entry) { - if len(buf) < int(b.Stride) { - panic("serializeEntry: buf too small") - } - putUintLe(buf[0:b.HashLen], e.Hash) - copy(buf[b.HashLen:b.HashLen+b.OffsetWidth], e.Value[:]) -} - -// SearchSortedEntries performs an in-memory binary search for a given hash. -func SearchSortedEntries(entries []Entry, hash uint64) *Entry { - i, found := sort.Find(len(entries), func(i int) int { - other := entries[i].Hash - // Note: This is safe because neither side exceeds 2^24. - return int(hash) - int(other) - }) - if !found { - return nil - } - if i >= len(entries) || entries[i].Hash != hash { - return nil - } - return &entries[i] -} - -// EntryHash64 is a xxHash-based hash function using an arbitrary prefix. -func EntryHash64(prefix uint32, key []byte) uint64 { - const blockSize = 32 - var prefixBlock [blockSize]byte - binary.LittleEndian.PutUint32(prefixBlock[:4], prefix) - - var digest xxhash.Digest - digest.Reset() - digest.Write(prefixBlock[:]) - digest.Write(key) - return digest.Sum64() -} - -// Entry is a single element in a hash table. -type Entry struct { - Hash uint64 - Value [36]byte -} - -func valueLength() uint8 { - return 36 // 36 is the length of the CIDs we use. -} - -// maxCls64 returns the max integer that has the same amount of leading zeros as n. -func maxCls64(n uint64) uint64 { - return math.MaxUint64 >> bits.LeadingZeros64(n) -} - -// uintLe decodes an unsigned little-endian integer without bounds assertions. -// out-of-bounds bits are set to zero. -func uintLe(buf []byte) uint64 { - var full [8]byte - copy(full[:], buf) - return binary.LittleEndian.Uint64(full[:]) -} - -// putUintLe encodes an unsigned little-endian integer without bounds assertions. -// Returns true if the integer fully fit in the provided buffer. -func putUintLe(buf []byte, x uint64) bool { - var full [8]byte - binary.LittleEndian.PutUint64(full[:], x) - copy(buf, full[:]) - return int(valueLength()) <= len(buf) -} diff --git a/compactindex36/compactindex_test.go b/compactindex36/compactindex_test.go deleted file mode 100644 index f8bdebe1..00000000 --- a/compactindex36/compactindex_test.go +++ /dev/null @@ -1,89 +0,0 @@ -package compactindex36 - -// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex -// The following changes have been made: -// - The package has been renamed to `compactindex36` to avoid conflicts with the original package -// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. - -import ( - "math" - "math/rand" - "sort" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestMaxCls64(t *testing.T) { - cases := [][2]uint64{ - {0x0000_0000_0000_0000, 0x0000_0000_0000_0000}, - {0x0000_0000_0000_0001, 0x0000_0000_0000_0001}, - {0x0000_0000_0000_0003, 0x0000_0000_0000_0002}, - {0x0000_0000_0000_0003, 0x0000_0000_0000_0003}, - {0x0000_0000_0000_0007, 0x0000_0000_0000_0004}, - {0x0000_0000_FFFF_FFFF, 0x0000_0000_F000_000F}, - {0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF}, - } - for _, tc := range cases { - assert.Equal(t, tc[0], maxCls64(tc[1])) - } -} - -func TestHeader_BucketHash(t *testing.T) { - const numItems = 500000 - const numBuckets = 1000 - - header := Header{ - NumBuckets: numBuckets, - } - - keys := make([][]byte, numItems) - hits := make([]int, numBuckets) - for i := range keys { - var buf [16]byte - n, _ := rand.Read(buf[:]) - keys[i] = buf[:n] - } - - // Bounds check and count hits. - for _, key := range keys { - idx := header.BucketHash(key) - require.True(t, idx < numBuckets) - hits[idx]++ - } - - // Calculate standard deviation. - mean := float64(numItems) / float64(numBuckets) - var cumVariance float64 - for _, bucketHits := range hits { - delta := float64(bucketHits) - mean - cumVariance += (delta * delta) - } - variance := cumVariance / float64(len(hits)) - stddev := math.Sqrt(variance) - t.Logf("mean % 12.2f", mean) - normStddev := stddev / mean - t.Logf("stddev % 10.2f", stddev) - t.Logf("1σ / mean % 7.2f%%", 100*normStddev) - - const failNormStddev = 0.08 - if normStddev > failNormStddev { - t.Logf("FAIL: > %f%%", 100*failNormStddev) - t.Fail() - } else { - t.Logf(" OK: <= %f%%", 100*failNormStddev) - } - - // Print percentiles. - sort.Ints(hits) - t.Logf("min % 10d", hits[0]) - t.Logf("p01 % 10d", hits[int(math.Round(0.01*float64(len(hits))))]) - t.Logf("p05 % 10d", hits[int(math.Round(0.05*float64(len(hits))))]) - t.Logf("p10 % 10d", hits[int(math.Round(0.10*float64(len(hits))))]) - t.Logf("p50 % 10d", hits[int(math.Round(0.50*float64(len(hits))))]) - t.Logf("p90 % 10d", hits[int(math.Round(0.90*float64(len(hits))))]) - t.Logf("p95 % 10d", hits[int(math.Round(0.95*float64(len(hits))))]) - t.Logf("p99 % 10d", hits[int(math.Round(0.99*float64(len(hits))))]) - t.Logf("max % 10d", hits[len(hits)-1]) -} diff --git a/compactindex36/fallocate_fake.go b/compactindex36/fallocate_fake.go deleted file mode 100644 index 434ca8b8..00000000 --- a/compactindex36/fallocate_fake.go +++ /dev/null @@ -1,27 +0,0 @@ -package compactindex36 - -import ( - "fmt" - "os" -) - -func fake_fallocate(f *os.File, offset int64, size int64) error { - const blockSize = 4096 - var zero [blockSize]byte - - for size > 0 { - step := size - if step > blockSize { - step = blockSize - } - - if _, err := f.Write(zero[:step]); err != nil { - return fmt.Errorf("failure while generic fallocate: %w", err) - } - - offset += step - size -= step - } - - return nil -} diff --git a/compactindex36/fallocate_generic.go b/compactindex36/fallocate_generic.go deleted file mode 100644 index 6b4a0210..00000000 --- a/compactindex36/fallocate_generic.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !linux - -package compactindex36 - -import ( - "os" -) - -func fallocate(f *os.File, offset int64, size int64) error { - return fake_fallocate(f, offset, size) -} diff --git a/compactindex36/fallocate_linux.go b/compactindex36/fallocate_linux.go deleted file mode 100644 index eeebd9bb..00000000 --- a/compactindex36/fallocate_linux.go +++ /dev/null @@ -1,17 +0,0 @@ -//go:build linux - -package compactindex36 - -import ( - "fmt" - "os" - "syscall" -) - -func fallocate(f *os.File, offset int64, size int64) error { - err := syscall.Fallocate(int(f.Fd()), 0, offset, size) - if err != nil { - return fmt.Errorf("failure while linux fallocate: %w", err) - } - return nil -} diff --git a/compactindex36/query.go b/compactindex36/query.go deleted file mode 100644 index c8e06a5c..00000000 --- a/compactindex36/query.go +++ /dev/null @@ -1,219 +0,0 @@ -package compactindex36 - -// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex -// The following changes have been made: -// - The package has been renamed to `compactindex36` to avoid conflicts with the original package -// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. - -import ( - "errors" - "fmt" - "io" -) - -// DB is a compactindex handle. -type DB struct { - Header - Stream io.ReaderAt - prefetch bool -} - -// Open returns a handle to access a compactindex. -// -// The provided stream must start with the Magic byte sequence. -// Tip: Use io.NewSectionReader to create aligned substreams when dealing with a file that contains multiple indexes. -func Open(stream io.ReaderAt) (*DB, error) { - // Read the static 32-byte header. - // Ignore errors if the read fails after filling the buffer (e.g. EOF). - var fileHeader [headerSize]byte - n, readErr := stream.ReadAt(fileHeader[:], 0) - if n < len(fileHeader) { - // ReadAt must return non-nil error here. - return nil, readErr - } - db := new(DB) - if err := db.Header.Load(&fileHeader); err != nil { - return nil, err - } - db.Stream = stream - return db, nil -} - -func (db *DB) Prefetch(yes bool) { - db.prefetch = yes -} - -// Lookup queries for a key in the index and returns the value (offset), if any. -// -// Returns ErrNotFound if the key is unknown. -func (db *DB) Lookup(key []byte) ([36]byte, error) { - bucket, err := db.LookupBucket(key) - if err != nil { - return Empty, err - } - return bucket.Lookup(key) -} - -// LookupBucket returns a handle to the bucket that might contain the given key. -func (db *DB) LookupBucket(key []byte) (*Bucket, error) { - return db.GetBucket(db.Header.BucketHash(key)) -} - -// GetBucket returns a handle to the bucket at the given index. -func (db *DB) GetBucket(i uint) (*Bucket, error) { - if i >= uint(db.Header.NumBuckets) { - return nil, fmt.Errorf("out of bounds bucket index: %d >= %d", i, db.Header.NumBuckets) - } - - // Fill bucket handle. - bucket := &Bucket{ - BucketDescriptor: BucketDescriptor{ - Stride: db.entryStride(), - OffsetWidth: valueLength(), - }, - } - // Read bucket header. - readErr := bucket.BucketHeader.readFrom(db.Stream, i) - if readErr != nil { - return nil, readErr - } - bucket.Entries = io.NewSectionReader(db.Stream, int64(bucket.FileOffset), int64(bucket.NumEntries)*int64(bucket.Stride)) - if db.prefetch { - // TODO: find good value for numEntriesToPrefetch - numEntriesToPrefetch := minInt64(3_000, int64(bucket.NumEntries)) - prefetchSize := (36 + 3) * numEntriesToPrefetch - buf := make([]byte, prefetchSize) - _, err := bucket.Entries.ReadAt(buf, 0) - if err != nil && !errors.Is(err, io.EOF) { - return nil, err - } - } - return bucket, nil -} - -func minInt64(a, b int64) int64 { - if a < b { - return a - } - return b -} - -func (db *DB) entryStride() uint8 { - hashSize := 3 // TODO remove hardcoded constant - offsetSize := valueLength() - return uint8(hashSize) + offsetSize -} - -func bucketOffset(i uint) int64 { - return headerSize + int64(i)*bucketHdrLen -} - -func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { - var buf [bucketHdrLen]byte - n, err := rd.ReadAt(buf[:], bucketOffset(i)) - if n < len(buf) { - return err - } - b.Load(&buf) - return nil -} - -func (b *BucketHeader) writeTo(wr io.WriterAt, i uint) error { - var buf [bucketHdrLen]byte - b.Store(&buf) - _, err := wr.WriteAt(buf[:], bucketOffset(i)) - return err -} - -// Bucket is a database handle pointing to a subset of the index. -type Bucket struct { - BucketDescriptor - Entries *io.SectionReader -} - -// maxEntriesPerBucket is the hardcoded maximum permitted number of entries per bucket. -const maxEntriesPerBucket = 1 << 24 // (16 * stride) MiB - -// targetEntriesPerBucket is the average number of records in each hashtable bucket we aim for. -const targetEntriesPerBucket = 10000 - -// Load retrieves all entries in the hashtable. -func (b *Bucket) Load(batchSize int) ([]Entry, error) { - if batchSize <= 0 { - batchSize = 512 // default to reasonable batch size - } - // TODO bounds check - if b.NumEntries > maxEntriesPerBucket { - return nil, fmt.Errorf("refusing to load bucket with %d entries", b.NumEntries) - } - entries := make([]Entry, 0, b.NumEntries) - - stride := int(b.Stride) - buf := make([]byte, batchSize*stride) - off := int64(0) - for { - // Read another chunk. - n, err := b.Entries.ReadAt(buf, off) - // Decode all entries in it. - sub := buf[:n] - for len(sub) >= stride { - entries = append(entries, b.unmarshalEntry(sub)) - sub = sub[stride:] - off += int64(stride) - } - // Handle error. - if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { - break - } else if err != nil { - return nil, err - } - } - - return entries, nil -} - -// TODO: This binary search algo is not optimized for high-latency remotes yet. - -// Lookup queries for a key using binary search. -func (b *Bucket) Lookup(key []byte) ([36]byte, error) { - return b.binarySearch(b.Hash(key)) -} - -var Empty [36]byte - -func (b *Bucket) binarySearch(target uint64) ([36]byte, error) { - low := 0 - high := int(b.NumEntries) - return searchEytzinger(low, high, target, b.loadEntry) -} - -func (b *Bucket) loadEntry(i int) (Entry, error) { - off := int64(i) * int64(b.Stride) - buf := make([]byte, b.Stride) - n, err := b.Entries.ReadAt(buf, off) - if n != len(buf) { - return Entry{}, err - } - return b.unmarshalEntry(buf), nil -} - -// ErrNotFound marks a missing entry. -var ErrNotFound = errors.New("not found") - -func searchEytzinger(min int, max int, x uint64, getter func(int) (Entry, error)) ([36]byte, error) { - var index int - for index < max { - k, err := getter(index) - if err != nil { - return Empty, err - } - if k.Hash == x { - return k.Value, nil - } - index = index<<1 | 1 - if k.Hash < x { - index++ - } - } - return Empty, ErrNotFound -} diff --git a/compactindex36/query_test.go b/compactindex36/query_test.go deleted file mode 100644 index 64efd84d..00000000 --- a/compactindex36/query_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package compactindex36 - -import ( - "bytes" - "errors" - "math/rand" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -type failReader struct{ err error } - -func (rd failReader) ReadAt([]byte, int64) (int, error) { - return 0, rd.err -} - -func TestOpen_ReadFail(t *testing.T) { - err := errors.New("oh no!") - db, dbErr := Open(failReader{err}) - require.Nil(t, db) - require.Same(t, err, dbErr) -} - -func TestOpen_InvalidMagic(t *testing.T) { - var buf [32]byte - rand.Read(buf[:]) - buf[1] = '.' // make test deterministic - - db, dbErr := Open(bytes.NewReader(buf[:])) - require.Nil(t, db) - require.EqualError(t, dbErr, "not a radiance compactindex file") -} - -func TestOpen_HeaderOnly(t *testing.T) { - buf := [32]byte{ - // Magic - 'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x', - // FileSize - 0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - // NumBuckets - 0x42, 0x00, 0x00, 0x00, - // Padding - 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - } - - db, dbErr := Open(bytes.NewReader(buf[:])) - require.NotNil(t, db) - require.NoError(t, dbErr) - - assert.NotNil(t, db.Stream) - assert.Equal(t, Header{ - FileSize: 0x1337, - NumBuckets: 0x42, - }, db.Header) -} From 73406bef72165c6824ffa515901b41208bbc6416 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 15:14:25 +0100 Subject: [PATCH 05/63] Use compactindexsized instead of compactindex36 --- cmd-rpc-server-car-getBlock.go | 4 ++-- cmd-rpc-server-car-getTransaction.go | 4 ++-- cmd-rpc-server-car.go | 17 +++++++------- cmd-rpc-server-filecoin.go | 6 ++--- cmd-x-index-all.go | 33 ++++++++++------------------ epoch.go | 9 ++++---- index-sig-to-cid.go | 12 +++++----- index-slot-to-cid.go | 12 +++++----- multiepoch-getBlock.go | 4 ++-- multiepoch-getBlockTime.go | 4 ++-- multiepoch-getTransaction.go | 4 ++-- 11 files changed, 49 insertions(+), 60 deletions(-) diff --git a/cmd-rpc-server-car-getBlock.go b/cmd-rpc-server-car-getBlock.go index 751c7111..0d3d6985 100644 --- a/cmd-rpc-server-car-getBlock.go +++ b/cmd-rpc-server-car-getBlock.go @@ -17,7 +17,7 @@ import ( "github.com/ipfs/go-cid" "github.com/ipld/go-car/util" cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" solanablockrewards "github.com/rpcpool/yellowstone-faithful/solana-block-rewards" "github.com/sourcegraph/jsonrpc2" @@ -87,7 +87,7 @@ func (ser *deprecatedRPCServer) handleGetBlock(ctx context.Context, conn *reques block, err := ser.GetBlock(WithSubrapghPrefetch(ctx, true), slot) if err != nil { klog.Errorf("failed to get block: %v", err) - if errors.Is(err, compactindex36.ErrNotFound) { + if errors.Is(err, compactindexsized.ErrNotFound) { conn.ReplyWithError( ctx, req.ID, diff --git a/cmd-rpc-server-car-getTransaction.go b/cmd-rpc-server-car-getTransaction.go index 36f65953..795d7a83 100644 --- a/cmd-rpc-server-car-getTransaction.go +++ b/cmd-rpc-server-car-getTransaction.go @@ -4,7 +4,7 @@ import ( "context" "errors" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/sourcegraph/jsonrpc2" "k8s.io/klog/v2" ) @@ -27,7 +27,7 @@ func (ser *deprecatedRPCServer) handleGetTransaction(ctx context.Context, conn * transactionNode, err := ser.GetTransaction(WithSubrapghPrefetch(ctx, true), sig) if err != nil { - if errors.Is(err, compactindex36.ErrNotFound) { + if errors.Is(err, compactindexsized.ErrNotFound) { conn.ReplyRaw( ctx, req.ID, diff --git a/cmd-rpc-server-car.go b/cmd-rpc-server-car.go index 738d419e..61c49389 100644 --- a/cmd-rpc-server-car.go +++ b/cmd-rpc-server-car.go @@ -12,7 +12,6 @@ import ( "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" "github.com/patrickmn/go-cache" - "github.com/rpcpool/yellowstone-faithful/compactindex36" "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/gsfa" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" @@ -90,7 +89,7 @@ func newCmd_rpcServerCar() *cli.Command { } defer slotToCidIndexFile.Close() - slotToCidIndex, err := compactindex36.Open(slotToCidIndexFile) + slotToCidIndex, err := compactindexsized.Open(slotToCidIndexFile) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -105,7 +104,7 @@ func newCmd_rpcServerCar() *cli.Command { } defer sigToCidIndexFile.Close() - sigToCidIndex, err := compactindex36.Open(sigToCidIndexFile) + sigToCidIndex, err := compactindexsized.Open(sigToCidIndexFile) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -159,8 +158,8 @@ func createAndStartRPCServer_withCar( carReader *carv2.Reader, remoteCarReader ReaderAtCloser, cidToOffsetIndex *compactindexsized.DB, - slotToCidIndex *compactindex36.DB, - sigToCidIndex *compactindex36.DB, + slotToCidIndex *compactindexsized.DB, + sigToCidIndex *compactindexsized.DB, gsfaReader *gsfa.GsfaReader, ) error { if options == nil { @@ -190,8 +189,8 @@ func createAndStartRPCServer_lassie( ctx context.Context, options *RpcServerOptions, lassieWr *lassieWrapper, - slotToCidIndex *compactindex36.DB, - sigToCidIndex *compactindex36.DB, + slotToCidIndex *compactindexsized.DB, + sigToCidIndex *compactindexsized.DB, gsfaReader *gsfa.GsfaReader, ) error { if options == nil { @@ -225,8 +224,8 @@ type deprecatedRPCServer struct { localCarReader *carv2.Reader remoteCarReader ReaderAtCloser cidToOffsetIndex *compactindexsized.DB - slotToCidIndex *compactindex36.DB - sigToCidIndex *compactindex36.DB + slotToCidIndex *compactindexsized.DB + sigToCidIndex *compactindexsized.DB gsfaReader *gsfa.GsfaReader cidToBlockCache *cache.Cache // TODO: prevent OOM options *RpcServerOptions diff --git a/cmd-rpc-server-filecoin.go b/cmd-rpc-server-filecoin.go index 8e32a985..44c3fb97 100644 --- a/cmd-rpc-server-filecoin.go +++ b/cmd-rpc-server-filecoin.go @@ -4,7 +4,7 @@ import ( "fmt" "github.com/davecgh/go-spew/spew" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/gsfa" "github.com/urfave/cli/v2" ) @@ -61,7 +61,7 @@ func newCmd_rpcServerFilecoin() *cli.Command { } defer slotToCidIndexFile.Close() - slotToCidIndex, err := compactindex36.Open(slotToCidIndexFile) + slotToCidIndex, err := compactindexsized.Open(slotToCidIndexFile) if err != nil { return fmt.Errorf("failed to open slot-to-cid index: %w", err) } @@ -76,7 +76,7 @@ func newCmd_rpcServerFilecoin() *cli.Command { } defer sigToCidIndexFile.Close() - sigToCidIndex, err := compactindex36.Open(sigToCidIndexFile) + sigToCidIndex, err := compactindexsized.Open(sigToCidIndexFile) if err != nil { return fmt.Errorf("failed to open sig-to-cid index: %w", err) } diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index c724ef94..c047b32c 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -17,7 +17,6 @@ import ( "github.com/ipfs/go-cid" carv1 "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindex36" "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" @@ -131,10 +130,6 @@ func createAllIndexes( rootCID := rd.header.Roots[0] klog.Infof("Getting car file size") - targetFileSize, err := getFileSize(carPath) - if err != nil { - return nil, fmt.Errorf("failed to get car file size: %w", err) - } klog.Infof("Counting items in car file...") numItems, err := carCountItemsByFirstByte(carPath) @@ -167,7 +162,6 @@ func createAllIndexes( tmpDir, indexDir, numItems[byte(iplddecoders.KindBlock)], - targetFileSize, ) if err != nil { return nil, fmt.Errorf("failed to create slot_to_cid index: %w", err) @@ -178,7 +172,6 @@ func createAllIndexes( tmpDir, indexDir, numItems[byte(iplddecoders.KindTransaction)], - targetFileSize, ) if err != nil { return nil, fmt.Errorf("failed to create sig_to_cid index: %w", err) @@ -417,23 +410,22 @@ type Builder_SignatureToCid struct { tmpDir string indexDir string carPath string - index *compactindex36.Builder + index *compactindexsized.Builder } func NewBuilder_SignatureToCid( tmpDir string, indexDir string, numItems uint64, - targetFileSize uint64, ) (*Builder_SignatureToCid, error) { tmpDir = filepath.Join(tmpDir, "index-sig-to-cid-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { return nil, fmt.Errorf("failed to create sig_to_cid tmp dir: %w", err) } - index, err := compactindex36.NewBuilder( + index, err := compactindexsized.NewBuilderSized( tmpDir, uint(numItems), - (targetFileSize), + 36, ) if err != nil { return nil, fmt.Errorf("failed to create sig_to_cid index: %w", err) @@ -448,7 +440,7 @@ func NewBuilder_SignatureToCid( func (b *Builder_SignatureToCid) Put(signature solana.Signature, cid cid.Cid) error { var buf [36]byte copy(buf[:], cid.Bytes()[:36]) - return b.index.Insert(signature[:], buf) + return b.index.Insert(signature[:], buf[:]) } func (b *Builder_SignatureToCid) Close() error { @@ -475,23 +467,22 @@ type Builder_SlotToCid struct { tmpDir string indexDir string carPath string - index *compactindex36.Builder + index *compactindexsized.Builder } func NewBuilder_SlotToCid( tmpDir string, indexDir string, numItems uint64, - targetFileSize uint64, ) (*Builder_SlotToCid, error) { tmpDir = filepath.Join(tmpDir, "index-slot-to-cid-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { return nil, fmt.Errorf("failed to create slot_to_cid tmp dir: %w", err) } - index, err := compactindex36.NewBuilder( + index, err := compactindexsized.NewBuilderSized( tmpDir, uint(numItems), - (targetFileSize), + 36, ) if err != nil { return nil, fmt.Errorf("failed to create slot_to_cid index: %w", err) @@ -506,7 +497,7 @@ func NewBuilder_SlotToCid( func (b *Builder_SlotToCid) Put(slot uint64, cid cid.Cid) error { var buf [36]byte copy(buf[:], cid.Bytes()[:36]) - return b.index.Insert(uint64ToLeBytes(slot), buf) + return b.index.Insert(uint64ToLeBytes(slot), buf[:]) } func (b *Builder_SlotToCid) Close() error { @@ -743,7 +734,7 @@ func (i *Index_CidToOffset) Close() error { type Index_SlotToCid struct { file *os.File - db *compactindex36.DB + db *compactindexsized.DB } func OpenIndex_SlotToCid( @@ -754,7 +745,7 @@ func OpenIndex_SlotToCid( return nil, fmt.Errorf("failed to open index file: %w", err) } - index, err := compactindex36.Open(indexFile) + index, err := compactindexsized.Open(indexFile) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } @@ -779,7 +770,7 @@ func (i *Index_SlotToCid) Close() error { type Index_SigToCid struct { file *os.File - db *compactindex36.DB + db *compactindexsized.DB } func OpenIndex_SigToCid( @@ -790,7 +781,7 @@ func OpenIndex_SigToCid( return nil, fmt.Errorf("failed to open index file: %w", err) } - index, err := compactindex36.Open(indexFile) + index, err := compactindexsized.Open(indexFile) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } diff --git a/epoch.go b/epoch.go index f1cf9e5e..53ef80ff 100644 --- a/epoch.go +++ b/epoch.go @@ -17,7 +17,6 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/patrickmn/go-cache" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindex36" "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/gsfa" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" @@ -36,8 +35,8 @@ type Epoch struct { remoteCarReader ReaderAtCloser remoteCarHeaderSize uint64 cidToOffsetIndex *compactindexsized.DB - slotToCidIndex *compactindex36.DB - sigToCidIndex *compactindex36.DB + slotToCidIndex *compactindexsized.DB + sigToCidIndex *compactindexsized.DB sigExists *bucketteer.Reader gsfaReader *gsfa.GsfaReader cidToNodeCache *cache.Cache // TODO: prevent OOM @@ -139,7 +138,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { } ep.onClose = append(ep.onClose, slotToCidIndexFile.Close) - slotToCidIndex, err := compactindex36.Open(slotToCidIndexFile) + slotToCidIndex, err := compactindexsized.Open(slotToCidIndexFile) if err != nil { return nil, fmt.Errorf("failed to open slot-to-cid index: %w", err) } @@ -160,7 +159,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { } ep.onClose = append(ep.onClose, sigToCidIndexFile.Close) - sigToCidIndex, err := compactindex36.Open(sigToCidIndexFile) + sigToCidIndex, err := compactindexsized.Open(sigToCidIndexFile) if err != nil { return nil, fmt.Errorf("failed to open sig-to-cid index: %w", err) } diff --git a/index-sig-to-cid.go b/index-sig-to-cid.go index 7f7b0623..378a3019 100644 --- a/index-sig-to-cid.go +++ b/index-sig-to-cid.go @@ -13,7 +13,7 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" ) @@ -58,10 +58,10 @@ func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, ind } klog.Infof("Creating builder with %d items", numItems) - c2o, err := compactindex36.NewBuilder( + c2o, err := compactindexsized.NewBuilderSized( tmpDir, uint(numItems), // TODO: what if the number of real items is less than this? - (0), + 36, ) if err != nil { return "", fmt.Errorf("failed to open index store: %w", err) @@ -90,7 +90,7 @@ func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, ind var buf [36]byte copy(buf[:], c.Bytes()[:36]) - err = c2o.Insert(sig[:], buf) + err = c2o.Insert(sig[:], buf[:]) if err != nil { return fmt.Errorf("failed to put cid to offset: %w", err) } @@ -168,7 +168,7 @@ func VerifyIndex_sig2cid(ctx context.Context, carPath string, indexFilePath stri } defer indexFile.Close() - c2o, err := compactindex36.Open(indexFile) + c2o, err := compactindexsized.Open(indexFile) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -299,7 +299,7 @@ func VerifyIndex_sigExists(ctx context.Context, carPath string, indexFilePath st return nil } -func findCidFromSignature(db *compactindex36.DB, sig solana.Signature) (cid.Cid, error) { +func findCidFromSignature(db *compactindexsized.DB, sig solana.Signature) (cid.Cid, error) { bucket, err := db.LookupBucket(sig[:]) if err != nil { return cid.Cid{}, fmt.Errorf("failed to lookup bucket for %s: %w", sig, err) diff --git a/index-slot-to-cid.go b/index-slot-to-cid.go index 13030c2c..636763be 100644 --- a/index-slot-to-cid.go +++ b/index-slot-to-cid.go @@ -11,7 +11,7 @@ import ( "github.com/dustin/go-humanize" "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" ) @@ -56,10 +56,10 @@ func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, in } klog.Infof("Creating builder with %d items", numItems) - c2o, err := compactindex36.NewBuilder( + c2o, err := compactindexsized.NewBuilderSized( tmpDir, uint(numItems), // TODO: what if the number of real items is less than this? - (0), + 36, ) if err != nil { return "", fmt.Errorf("failed to open index store: %w", err) @@ -87,7 +87,7 @@ func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, in var buf [36]byte copy(buf[:], c.Bytes()[:36]) - err = c2o.Insert(slotBytes, buf) + err = c2o.Insert(slotBytes, buf[:]) if err != nil { return fmt.Errorf("failed to put cid to offset: %w", err) } @@ -165,7 +165,7 @@ func VerifyIndex_slot2cid(ctx context.Context, carPath string, indexFilePath str } defer indexFile.Close() - c2o, err := compactindex36.Open(indexFile) + c2o, err := compactindexsized.Open(indexFile) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -214,7 +214,7 @@ func uint64ToLeBytes(n uint64) []byte { } // findCidFromSlot finds the CID for the given slot number in the given index. -func findCidFromSlot(db *compactindex36.DB, slotNum uint64) (cid.Cid, error) { +func findCidFromSlot(db *compactindexsized.DB, slotNum uint64) (cid.Cid, error) { slotBytes := uint64ToLeBytes(uint64(slotNum)) bucket, err := db.LookupBucket(slotBytes) if err != nil { diff --git a/multiepoch-getBlock.go b/multiepoch-getBlock.go index d408a055..6b8e6f6f 100644 --- a/multiepoch-getBlock.go +++ b/multiepoch-getBlock.go @@ -16,7 +16,7 @@ import ( "github.com/ipfs/go-cid" "github.com/ipld/go-car/util" cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" solanablockrewards "github.com/rpcpool/yellowstone-faithful/solana-block-rewards" "github.com/sourcegraph/jsonrpc2" @@ -54,7 +54,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex block, err := epochHandler.GetBlock(WithSubrapghPrefetch(ctx, true), slot) if err != nil { - if errors.Is(err, compactindex36.ErrNotFound) { + if errors.Is(err, compactindexsized.ErrNotFound) { return &jsonrpc2.Error{ Code: CodeNotFound, Message: fmt.Sprintf("Slot %d was skipped, or missing in long-term storage", slot), diff --git a/multiepoch-getBlockTime.go b/multiepoch-getBlockTime.go index a9b87b71..e6528c99 100644 --- a/multiepoch-getBlockTime.go +++ b/multiepoch-getBlockTime.go @@ -5,7 +5,7 @@ import ( "errors" "fmt" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/sourcegraph/jsonrpc2" ) @@ -30,7 +30,7 @@ func (multi *MultiEpoch) handleGetBlockTime(ctx context.Context, conn *requestCo block, err := epochHandler.GetBlock(WithSubrapghPrefetch(ctx, false), blockNum) if err != nil { - if errors.Is(err, compactindex36.ErrNotFound) { + if errors.Is(err, compactindexsized.ErrNotFound) { return &jsonrpc2.Error{ Code: CodeNotFound, Message: fmt.Sprintf("Slot %d was skipped, or missing in long-term storage", blockNum), diff --git a/multiepoch-getTransaction.go b/multiepoch-getTransaction.go index e8119c3a..5d055a23 100644 --- a/multiepoch-getTransaction.go +++ b/multiepoch-getTransaction.go @@ -9,7 +9,7 @@ import ( "github.com/gagliardetto/solana-go" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindex36" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/sourcegraph/jsonrpc2" "k8s.io/klog/v2" ) @@ -163,7 +163,7 @@ func (multi *MultiEpoch) handleGetTransaction(ctx context.Context, conn *request transactionNode, err := epochHandler.GetTransaction(WithSubrapghPrefetch(ctx, true), sig) if err != nil { - if errors.Is(err, compactindex36.ErrNotFound) { + if errors.Is(err, compactindexsized.ErrNotFound) { // NOTE: solana just returns null here in case of transaction not found return &jsonrpc2.Error{ Code: CodeNotFound, From 77e15b9c649cacc06bd83d70ab0c09e622ead1d3 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 18:34:06 +0100 Subject: [PATCH 06/63] compactindexsized: add arbitrary metadata in the header --- compactindexsized/build.go | 43 ++++++-- compactindexsized/build36_test.go | 95 ++++++++++-------- compactindexsized/build48_test.go | 88 ++++++++++------- compactindexsized/build8_test.go | 80 ++++++++------- compactindexsized/compactindex.go | 67 ++++++++----- compactindexsized/header.go | 158 ++++++++++++++++++++++++++++++ compactindexsized/header_test.go | 79 +++++++++++++++ compactindexsized/query.go | 45 ++++++--- compactindexsized/query_test.go | 56 +++++++++-- 9 files changed, 552 insertions(+), 159 deletions(-) create mode 100644 compactindexsized/header.go create mode 100644 compactindexsized/header_test.go diff --git a/compactindexsized/build.go b/compactindexsized/build.go index 3ece8ce2..0f095478 100644 --- a/compactindexsized/build.go +++ b/compactindexsized/build.go @@ -21,9 +21,10 @@ import ( // Builder creates new compactindex files. type Builder struct { Header - dir string - closers []io.Closer - buckets []tempBucket + dir string + headerSize int64 + closers []io.Closer + buckets []tempBucket } // NewBuilderSized creates a new index builder. @@ -85,8 +86,25 @@ func NewBuilderSized( }, nil } -func (b *Builder) SetKind(kind uint8) { - b.Header.Kind = kind +// SetKind sets the kind of the index. +// If the kind is already set, it is overwritten. +func (b *Builder) SetKind(kind []byte) error { + // check if kind is too long + if len(kind) > MaxKeySize { + return fmt.Errorf("kind is too long") + } + // check if kind is empty + if len(kind) == 0 { + return fmt.Errorf("kind is empty") + } + // check if kind is already set + if b.Header.Meta.Count(KeyKind) > 0 { + // remove kind + b.Header.Meta.Remove(KeyKind) + } + // set kind + b.Header.Meta.Add(KeyKind, kind) + return nil } func (b *Builder) getValueSize() int { @@ -109,13 +127,21 @@ func (b *Builder) Insert(key []byte, value []byte) error { func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { // TODO support in-place writing. + defer func() { + f.Sync() + }() + // Write header. - var headerBuf [headerSize]byte - b.Header.Store(&headerBuf) - _, err = f.Write(headerBuf[:]) + headerBuf := b.Header.Bytes() + headerSize := int64(len(headerBuf)) + numWroteHeader, err := f.Write(headerBuf[:]) if err != nil { return fmt.Errorf("failed to write header: %w", err) } + if numWroteHeader != len(headerBuf) { + return fmt.Errorf("failed to write header: wrote %d bytes, expected %d", numWroteHeader, len(headerBuf)) + } + b.headerSize = headerSize // Create hole to leave space for bucket header table. bucketTableLen := int64(b.NumBuckets) * bucketHdrLen err = fallocate(f, headerSize, bucketTableLen) @@ -169,6 +195,7 @@ func (b *Builder) sealBucket(ctx context.Context, i int, f *os.File) error { Stride: b.getEntryStride(), OffsetWidth: uint8(b.getValueSize()), } + desc.BucketHeader.headerSize = b.headerSize // Write entries to file. wr := bufio.NewWriter(f) entryBuf := make([]byte, b.getEntryStride()) // TODO remove hardcoded constant diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go index 2d3d7394..950e7759 100644 --- a/compactindexsized/build36_test.go +++ b/compactindexsized/build36_test.go @@ -163,8 +163,8 @@ func TestBuilder36(t *testing.T) { assert.Len(t, builder.buckets, 3) defer builder.Close() - kindSomething := uint8(0x42) - builder.SetKind(kindSomething) + kindSomething := []byte("something") + require.NoError(t, builder.SetKind(kindSomething)) // Insert a few entries. keys := []string{"hello", "world", "blub", "foo"} @@ -193,80 +193,84 @@ func TestBuilder36(t *testing.T) { expected := concatBytes( // --- File header // magic - []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, // 0 + Magic[:], + // header size + i32tob(29), // value size (36 bytes in this case) - []byte{0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 1 - // num buckets - []byte{0x03, 0x00, 0x00, 0x00}, // 2 - []byte{ - 0x01, // version - 0x42, // kind - // padding - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, // 3 + []byte{0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + + []byte{0x03, 0x00, 0x00, 0x00}, // num buckets + []byte{1}, // version + + []byte{1}, // how many kv pairs + + []byte{4}, // key size + []byte("kind"), // key + []byte{9}, // value size + []byte("something"), // value // --- Bucket header 0 // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 4 + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - []byte{0x01, 0x00, 0x00, 0x00}, // 5 + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - []byte{0x03}, // 6 + []byte{0x03}, // padding - []byte{0x00}, // 7 + []byte{0x00}, // file offset - []byte{0x50, 0x00, 0x00, 0x00, 0x00, 0x00}, // 8 + []byte{89, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 1 // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 9 + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - []byte{0x01, 0x00, 0x00, 0x00}, // 10 + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - []byte{0x03}, // 11 + []byte{0x03}, // padding - []byte{0x00}, // 12 + []byte{0x00}, // file offset - []byte{0x77, 0x00, 0x00, 0x00, 0x00, 0x00}, // 13 + []byte{128, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 2 // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 14 + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - []byte{0x02, 0x00, 0x00, 0x00}, // 15 + []byte{0x02, 0x00, 0x00, 0x00}, // hash len - []byte{0x03}, // 16 + []byte{0x03}, // padding - []byte{0x00}, // 17 + []byte{0x00}, // file offset - []byte{0x9e, 0x00, 0x00, 0x00, 0x00, 0x00}, // 18 + []byte{167, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket 0 // hash - []byte{0xe2, 0xdb, 0x55}, // 19 + []byte{0xe2, 0xdb, 0x55}, // value - []byte{0x1, 0x71, 0x12, 0x20, 0x20, 0xea, 0xb3, 0xf, 0x58, 0xbe, 0x69, 0x1, 0x7f, 0x2, 0x42, 0x91, 0xfa, 0xa3, 0xdc, 0xf4, 0xc7, 0xf2, 0x2f, 0x56, 0x12, 0xa7, 0xb2, 0x1, 0x6f, 0x48, 0xfa, 0x17, 0x5e, 0x53, 0xda, 0x6b}, // 20 + []byte{0x1, 0x71, 0x12, 0x20, 0x20, 0xea, 0xb3, 0xf, 0x58, 0xbe, 0x69, 0x1, 0x7f, 0x2, 0x42, 0x91, 0xfa, 0xa3, 0xdc, 0xf4, 0xc7, 0xf2, 0x2f, 0x56, 0x12, 0xa7, 0xb2, 0x1, 0x6f, 0x48, 0xfa, 0x17, 0x5e, 0x53, 0xda, 0x6b}, // --- Bucket 2 // hash - []byte{0x92, 0xcd, 0xbb}, // 21 + []byte{0x92, 0xcd, 0xbb}, // value - []byte{0x01, 0x71, 0x12, 0x20, 0x9c, 0xd0, 0x17, 0x9a, 0x19, 0x9c, 0xd9, 0x51, 0x0a, 0xfb, 0x92, 0x96, 0xcf, 0xd2, 0x9f, 0x77, 0x8a, 0x00, 0x40, 0x32, 0x8b, 0xf8, 0xff, 0x06, 0x46, 0x21, 0xb9, 0x3c, 0x57, 0xa5, 0xdd, 0x0f}, // 22 + []byte{0x01, 0x71, 0x12, 0x20, 0x9c, 0xd0, 0x17, 0x9a, 0x19, 0x9c, 0xd9, 0x51, 0x0a, 0xfb, 0x92, 0x96, 0xcf, 0xd2, 0x9f, 0x77, 0x8a, 0x00, 0x40, 0x32, 0x8b, 0xf8, 0xff, 0x06, 0x46, 0x21, 0xb9, 0x3c, 0x57, 0xa5, 0xdd, 0x0f}, // hash - []byte{0x98, 0x3d, 0xbd}, // 25 + []byte{0x98, 0x3d, 0xbd}, // value - []byte{0x01, 0x71, 0x12, 0x20, 0x1b, 0x79, 0x02, 0x6c, 0x3d, 0xdc, 0x74, 0x0c, 0x33, 0x71, 0xf0, 0x7a, 0x4b, 0x80, 0xb0, 0x43, 0x0c, 0x82, 0x0a, 0x88, 0x72, 0x13, 0xa6, 0x94, 0x72, 0xc9, 0xd1, 0x8a, 0x2d, 0xc7, 0x88, 0x13}, // 26 + []byte{0x01, 0x71, 0x12, 0x20, 0x1b, 0x79, 0x02, 0x6c, 0x3d, 0xdc, 0x74, 0x0c, 0x33, 0x71, 0xf0, 0x7a, 0x4b, 0x80, 0xb0, 0x43, 0x0c, 0x82, 0x0a, 0x88, 0x72, 0x13, 0xa6, 0x94, 0x72, 0xc9, 0xd1, 0x8a, 0x2d, 0xc7, 0x88, 0x13}, // hash - []byte{0xe3, 0x09, 0x6b}, // 23 + []byte{0xe3, 0x09, 0x6b}, // value - []byte{0x1, 0x71, 0x12, 0x20, 0x60, 0x67, 0x54, 0xe4, 0x4c, 0x5, 0x99, 0x6f, 0xf9, 0x60, 0x66, 0x27, 0x66, 0xd, 0xa0, 0xda, 0x4f, 0x60, 0x10, 0x6, 0x2, 0x82, 0xf9, 0x46, 0x3d, 0xcc, 0xde, 0x28, 0x80, 0x72, 0x41, 0x67}, // 24 + []byte{0x1, 0x71, 0x12, 0x20, 0x60, 0x67, 0x54, 0xe4, 0x4c, 0x5, 0x99, 0x6f, 0xf9, 0x60, 0x66, 0x27, 0x66, 0xd, 0xa0, 0xda, 0x4f, 0x60, 0x10, 0x6, 0x2, 0x82, 0xf9, 0x46, 0x3d, 0xcc, 0xde, 0x28, 0x80, 0x72, 0x41, 0x67}, ) assert.Equal(t, expected, buf) { splitSizes := []int{ // --- File header - 8, 8, 4, 12, + 8, 4, 8, 4, 1, 1, 1, 4, 1, 9, // --- Bucket header 0 4, 4, 1, 1, 6, // --- Bucket header 1 @@ -299,14 +303,20 @@ func TestBuilder36(t *testing.T) { require.NoError(t, err, "Failed to open generated index") require.NotNil(t, db) - assert.Equal(t, kindSomething, db.Header.Kind) assert.Equal(t, kindSomething, db.GetKind()) // File header assertions. assert.Equal(t, Header{ ValueSize: valueSize, NumBuckets: numBuckets, - Kind: kindSomething, + Meta: Meta{ + KeyVals: []KV{ + { + Key: KeyKind, + Value: kindSomething, + }, + }, + }, }, db.Header) // Get bucket handles. @@ -326,7 +336,8 @@ func TestBuilder36(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 0x50, + FileOffset: 89, + headerSize: 41, }, Stride: 3 + valueSize, // 3 + 36 OffsetWidth: valueSize, @@ -335,13 +346,15 @@ func TestBuilder36(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 119, + FileOffset: 128, + headerSize: 41, }, buckets[1].BucketHeader) assert.Equal(t, BucketHeader{ HashDomain: 0x00, NumEntries: 2, HashLen: 3, - FileOffset: 158, + FileOffset: 167, + headerSize: 41, }, buckets[2].BucketHeader) assert.Equal(t, uint8(3+valueSize), buckets[2].Stride) diff --git a/compactindexsized/build48_test.go b/compactindexsized/build48_test.go index 403fe597..b6e39e4d 100644 --- a/compactindexsized/build48_test.go +++ b/compactindexsized/build48_test.go @@ -136,6 +136,9 @@ func TestBuilder48(t *testing.T) { assert.Len(t, builder.buckets, 3) defer builder.Close() + kindSomething48 := []byte("something48") + require.NoError(t, builder.SetKind(kindSomething48)) + // Insert a few entries. keys := []string{"hello", "world", "blub", "foo"} for i, key := range keys { @@ -163,67 +166,75 @@ func TestBuilder48(t *testing.T) { expected := concatBytes( // --- File header // magic - []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, // 0 - // value size (48 bytes in this case) - []byte{0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 1 - // num buckets - []byte{0x03, 0x00, 0x00, 0x00}, // 2 - // padding - []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 3 + []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, + // header size + i32tob(31), + // value size (36 bytes in this case) + []byte{48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + + []byte{0x03, 0x00, 0x00, 0x00}, // num buckets + []byte{1}, // version + + []byte{1}, // how many kv pairs + + []byte{4}, // key size + []byte("kind"), // key + []byte{11}, // value size + []byte("something48"), // value // --- Bucket header 0 // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 4 + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - []byte{0x01, 0x00, 0x00, 0x00}, // 5 + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - []byte{0x03}, // 6 + []byte{0x03}, // padding - []byte{0x00}, // 7 + []byte{0x00}, // file offset - []byte{0x50, 0x00, 0x00, 0x00, 0x00, 0x00}, // 8 + []byte{91, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 1 // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 9 + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - []byte{0x01, 0x00, 0x00, 0x00}, // 10 + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - []byte{0x03}, // 11 + []byte{0x03}, // padding - []byte{0x00}, // 12 + []byte{0x00}, // file offset - []byte{0x83, 0x00, 0x00, 0x00, 0x00, 0x00}, // 13 + []byte{142, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 2 // hash domain - []byte{0x00, 0x00, 0x00, 0x00}, // 14 + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - []byte{0x02, 0x00, 0x00, 0x00}, // 15 + []byte{0x02, 0x00, 0x00, 0x00}, // hash len - []byte{0x03}, // 16 + []byte{0x03}, // padding - []byte{0x00}, // 17 + []byte{0x00}, // file offset - []byte{0xb6, 0x00, 0x00, 0x00, 0x00, 0x00}, // 18 + []byte{193, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket 0 // hash - []byte{0xe2, 0xdb, 0x55}, // 19 + []byte{0xe2, 0xdb, 0x55}, // value - []byte{0xcc, 0x0a, 0xd4, 0x66, 0x32, 0x50, 0xc3, 0x96, 0x8b, 0x5c, 0x77, 0x7e, 0xb8, 0xfd, 0x9c, 0x78, 0xea, 0xfb, 0xd3, 0x4f, 0x1a, 0x59, 0x4e, 0xda, 0x1d, 0x90, 0x2a, 0xcd, 0x79, 0xb6, 0x0b, 0x2d, 0xea, 0x76, 0x36, 0x54, 0x65, 0xe6, 0x53, 0x1b, 0x70, 0x38, 0x84, 0xb2, 0xbf, 0x5d, 0xf9, 0x30}, // 20 + []byte{0xcc, 0x0a, 0xd4, 0x66, 0x32, 0x50, 0xc3, 0x96, 0x8b, 0x5c, 0x77, 0x7e, 0xb8, 0xfd, 0x9c, 0x78, 0xea, 0xfb, 0xd3, 0x4f, 0x1a, 0x59, 0x4e, 0xda, 0x1d, 0x90, 0x2a, 0xcd, 0x79, 0xb6, 0x0b, 0x2d, 0xea, 0x76, 0x36, 0x54, 0x65, 0xe6, 0x53, 0x1b, 0x70, 0x38, 0x84, 0xb2, 0xbf, 0x5d, 0xf9, 0x30}, // --- Bucket 2 // hash - []byte{0x92, 0xcd, 0xbb}, // 21 + []byte{0x92, 0xcd, 0xbb}, // value - []byte{0x7c, 0x18, 0x51, 0xd7, 0x63, 0x83, 0xf9, 0xc5, 0xaa, 0x48, 0x3c, 0x8e, 0xff, 0xf0, 0xf1, 0xab, 0xee, 0xda, 0xb0, 0x2f, 0x92, 0xcc, 0xb8, 0x78, 0x11, 0x5b, 0xa0, 0xb9, 0xfa, 0xf5, 0x2e, 0xb4, 0xd7, 0x10, 0x2d, 0x7b, 0xe5, 0xb6, 0x9f, 0xd0, 0xb1, 0xff, 0xd0, 0xf2, 0xef, 0xcd, 0x72, 0x1a}, // 22 + []byte{0x7c, 0x18, 0x51, 0xd7, 0x63, 0x83, 0xf9, 0xc5, 0xaa, 0x48, 0x3c, 0x8e, 0xff, 0xf0, 0xf1, 0xab, 0xee, 0xda, 0xb0, 0x2f, 0x92, 0xcc, 0xb8, 0x78, 0x11, 0x5b, 0xa0, 0xb9, 0xfa, 0xf5, 0x2e, 0xb4, 0xd7, 0x10, 0x2d, 0x7b, 0xe5, 0xb6, 0x9f, 0xd0, 0xb1, 0xff, 0xd0, 0xf2, 0xef, 0xcd, 0x72, 0x1a}, // hash - []byte{0x98, 0x3d, 0xbd}, // 23 + []byte{0x98, 0x3d, 0xbd}, // value - []byte{0xbb, 0x12, 0x08, 0x5f, 0x73, 0xee, 0x39, 0x69, 0x9f, 0x6e, 0x5a, 0xd8, 0x21, 0x2d, 0x43, 0xbe, 0x01, 0xc1, 0x3f, 0xc5, 0xfa, 0x86, 0x09, 0x7e, 0x97, 0x61, 0x59, 0xb8, 0xc9, 0x16, 0x47, 0xe3, 0x18, 0xfe, 0x52, 0x1e, 0xa2, 0x98, 0x59, 0x83, 0x16, 0x88, 0x5b, 0x46, 0x83, 0x2b, 0xa3, 0x2a}, // 24 + []byte{0xbb, 0x12, 0x08, 0x5f, 0x73, 0xee, 0x39, 0x69, 0x9f, 0x6e, 0x5a, 0xd8, 0x21, 0x2d, 0x43, 0xbe, 0x01, 0xc1, 0x3f, 0xc5, 0xfa, 0x86, 0x09, 0x7e, 0x97, 0x61, 0x59, 0xb8, 0xc9, 0x16, 0x47, 0xe3, 0x18, 0xfe, 0x52, 0x1e, 0xa2, 0x98, 0x59, 0x83, 0x16, 0x88, 0x5b, 0x46, 0x83, 0x2b, 0xa3, 0x2a}, // hash - []byte{0xe3, 0x09, 0x6b}, // 25 + []byte{0xe3, 0x09, 0x6b}, // value []byte{0x0b, 0x2f, 0xc2, 0x4d, 0xc5, 0x98, 0x8b, 0x13, 0xd9, 0x17, 0xf8, 0xc1, 0xb8, 0x59, 0xd4, 0x24, 0xad, 0xef, 0xe5, 0xb6, 0xb8, 0xb9, 0xba, 0x01, 0x9c, 0xe0, 0x7f, 0x96, 0x25, 0x83, 0xd6, 0xbf, 0xa3, 0xb2, 0xf2, 0x29, 0xb9, 0xa1, 0xa1, 0x92, 0xd0, 0xc0, 0xe5, 0x06, 0x94, 0xea, 0x6c, 0xb3}, // 26 ) @@ -232,7 +243,7 @@ func TestBuilder48(t *testing.T) { { splitSizes := []int{ // --- File header - 8, 8, 4, 12, + 8, 4, 8, 4, 1, 1, 1, 4, 1, 11, // --- Bucket header 0 4, 4, 1, 1, 6, // --- Bucket header 1 @@ -269,6 +280,14 @@ func TestBuilder48(t *testing.T) { assert.Equal(t, Header{ ValueSize: valueSize, NumBuckets: numBuckets, + Meta: Meta{ + KeyVals: []KV{ + { + Key: KeyKind, + Value: kindSomething48, + }, + }, + }, }, db.Header) // Get bucket handles. @@ -288,7 +307,8 @@ func TestBuilder48(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 0x50, + FileOffset: 91, + headerSize: 43, }, Stride: 3 + valueSize, // 3 + 36 OffsetWidth: valueSize, @@ -297,13 +317,15 @@ func TestBuilder48(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 131, + FileOffset: 142, + headerSize: 43, }, buckets[1].BucketHeader) assert.Equal(t, BucketHeader{ HashDomain: 0x00, NumEntries: 2, HashLen: 3, - FileOffset: 182, + FileOffset: 193, + headerSize: 43, }, buckets[2].BucketHeader) assert.Equal(t, uint8(3+valueSize), buckets[2].Stride) diff --git a/compactindexsized/build8_test.go b/compactindexsized/build8_test.go index b7901426..b658338d 100644 --- a/compactindexsized/build8_test.go +++ b/compactindexsized/build8_test.go @@ -26,6 +26,12 @@ func btoi(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } +func i32tob(i uint32) []byte { + b := make([]byte, 4) + binary.LittleEndian.PutUint32(b, i) + return b +} + func TestBuilder8(t *testing.T) { const numBuckets = 3 const valueSize = 8 @@ -50,75 +56,78 @@ func TestBuilder8(t *testing.T) { // Seal index. require.NoError(t, builder.Seal(context.TODO(), targetFile)) + require.NoError(t, targetFile.Sync()) // Assert binary content. - buf, err := os.ReadFile(targetFile.Name()) + actual, err := os.ReadFile(targetFile.Name()) require.NoError(t, err) - assert.Equal(t, []byte{ + assert.Equal(t, concatBytes( // --- File header // magic - 0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78, + Magic[:], + // header size + i32tob(14), // value size - 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{8, 0, 0, 0, 0, 0, 0, 0}, // num buckets - 0x03, 0x00, 0x00, 0x00, - // padding - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{3, 0, 0, 0}, + []byte{1}, // version + []byte{0}, // how many kv pairs // --- Bucket header 0 // hash domain - 0x00, 0x00, 0x00, 0x00, + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - 0x01, 0x00, 0x00, 0x00, + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - 0x03, + []byte{0x03}, // padding - 0x00, + []byte{0x00}, // file offset - 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{74, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 1 // hash domain - 0x00, 0x00, 0x00, 0x00, + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - 0x01, 0x00, 0x00, 0x00, + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - 0x03, + []byte{0x03}, // padding - 0x00, + []byte{0x00}, // file offset - 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{85, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 2 // hash domain - 0x00, 0x00, 0x00, 0x00, + []byte{0x00, 0x00, 0x00, 0x00}, // num entries - 0x01, 0x00, 0x00, 0x00, + []byte{0x01, 0x00, 0x00, 0x00}, // hash len - 0x03, + []byte{0x03}, // padding - 0x00, + []byte{0x00}, // file offset - 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{96, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket 0 // hash - 0xe2, 0xdb, 0x55, + []byte{0xe2, 0xdb, 0x55}, // value - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket 1 // hash - 0x92, 0xcd, 0xbb, + []byte{0x92, 0xcd, 0xbb}, // value - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket 2 // hash - 0xe3, 0x09, 0x6b, + []byte{0xe3, 0x09, 0x6b}, // value - 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }, buf) + []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + ), actual) // Reset file offset. _, seekErr := targetFile.Seek(0, io.SeekStart) @@ -152,22 +161,27 @@ func TestBuilder8(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 0x50, + FileOffset: 74, + headerSize: 26, }, Stride: 11, // 3 + 8 OffsetWidth: 8, }, buckets[0].BucketDescriptor) + assert.Equal(t, BucketHeader{ HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 0x5b, + FileOffset: 85, + headerSize: 26, }, buckets[1].BucketHeader) + assert.Equal(t, BucketHeader{ HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 0x66, + FileOffset: 96, + headerSize: 26, }, buckets[2].BucketHeader) // Test lookups. @@ -250,7 +264,7 @@ func TestBuilder8_Random(t *testing.T) { value, err := bucket.Lookup(key) require.NoError(t, err) - require.True(t, btoi(value) > 0) + require.Greater(t, btoi(value), uint64(0), "The found value must be > 0") } t.Logf("Queried %d items", queries) t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) diff --git a/compactindexsized/compactindex.go b/compactindexsized/compactindex.go index 18aa43ca..ee242e20 100644 --- a/compactindexsized/compactindex.go +++ b/compactindexsized/compactindex.go @@ -84,6 +84,7 @@ package compactindexsized // - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. import ( + "bytes" "encoding/binary" "fmt" "math" @@ -102,46 +103,65 @@ const Version = uint8(1) type Header struct { ValueSize uint64 NumBuckets uint32 - Kind uint8 + Meta Meta } -// headerSize is the size of the header at the beginning of the file. -const headerSize = 32 - // Load checks the Magic sequence and loads the header fields. -func (h *Header) Load(buf *[headerSize]byte) error { +func (h *Header) Load(buf []byte) error { // Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream. if *(*[8]byte)(buf[:8]) != Magic { return fmt.Errorf("not a radiance compactindex file") } + // read length of the rest of the header + lenWithoutMagicAndLen := binary.LittleEndian.Uint32(buf[8:12]) + if lenWithoutMagicAndLen < 12 { + return fmt.Errorf("invalid header length") + } + if lenWithoutMagicAndLen > uint32(len(buf)) { + return fmt.Errorf("invalid header length") + } + // read the rest of the header *h = Header{ - ValueSize: binary.LittleEndian.Uint64(buf[8:16]), - NumBuckets: binary.LittleEndian.Uint32(buf[16:20]), + ValueSize: binary.LittleEndian.Uint64(buf[12:20]), + NumBuckets: binary.LittleEndian.Uint32(buf[20:24]), } // Check version. - if buf[20] != Version { + if buf[24] != Version { return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) } - h.Kind = buf[21] - // 10 bytes to spare for now. Might use it in the future. - // Force to zero for now. - for _, b := range buf[22:32] { - if b != 0x00 { - return fmt.Errorf("unsupported index version") - } + // read key-value pairs + if err := h.Meta.UnmarshalBinary(buf[25:]); err != nil { + return fmt.Errorf("failed to unmarshal metadata: %w", err) + } + if h.ValueSize == 0 { + return fmt.Errorf("value size not set") + } + if h.NumBuckets == 0 { + return fmt.Errorf("number of buckets not set") } return nil } -func (h *Header) Store(buf *[headerSize]byte) { - copy(buf[0:8], Magic[:]) - binary.LittleEndian.PutUint64(buf[8:16], h.ValueSize) - binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets) - buf[20] = Version - buf[21] = h.Kind - for i := 22; i < 32; i++ { - buf[i] = 0 +func (h *Header) Bytes() []byte { + buf := new(bytes.Buffer) + { + // value size + binary.Write(buf, binary.LittleEndian, h.ValueSize) + // number of buckets + binary.Write(buf, binary.LittleEndian, h.NumBuckets) + // version + buf.WriteByte(Version) + // key-value pairs + kvb := h.Meta.Bytes() + buf.Write(kvb) } + lenWithoutMagicAndLen := buf.Len() + + finalBuf := new(bytes.Buffer) + finalBuf.Write(Magic[:]) // magic + binary.Write(finalBuf, binary.LittleEndian, uint32(lenWithoutMagicAndLen)) // length of the rest of the header + finalBuf.Write(buf.Bytes()) // the rest of the header + return finalBuf.Bytes() } // BucketHash returns the bucket index for the given key. @@ -174,6 +194,7 @@ type BucketHeader struct { NumEntries uint32 HashLen uint8 FileOffset uint64 + headerSize int64 } // bucketHdrLen is the size of the header preceding the hash table entries. diff --git a/compactindexsized/header.go b/compactindexsized/header.go new file mode 100644 index 00000000..51808370 --- /dev/null +++ b/compactindexsized/header.go @@ -0,0 +1,158 @@ +package compactindexsized + +import ( + "bytes" + "fmt" + "io" +) + +type Meta struct { + KeyVals []KV +} + +// Bytes returns the serialized metadata. +func (m *Meta) Bytes() []byte { + b, err := m.MarshalBinary() + if err != nil { + panic(err) + } + return b +} + +func (m *Meta) MarshalBinary() ([]byte, error) { + var buf bytes.Buffer + if len(m.KeyVals) > MaxNumKVs { + return nil, fmt.Errorf("number of key-value pairs %d exceeds max %d", len(m.KeyVals), MaxNumKVs) + } + buf.WriteByte(byte(len(m.KeyVals))) + for _, kv := range m.KeyVals { + { + keyLen := len(kv.Key) + if keyLen > MaxKeySize { + return nil, fmt.Errorf("key size %d exceeds max %d", keyLen, MaxKeySize) + } + buf.WriteByte(byte(keyLen)) + buf.Write(kv.Key) + } + { + valueLen := len(kv.Value) + if valueLen > MaxValueSize { + return nil, fmt.Errorf("value size %d exceeds max %d", valueLen, MaxValueSize) + } + buf.WriteByte(byte(valueLen)) + buf.Write(kv.Value) + } + } + return buf.Bytes(), nil +} + +func (m *Meta) UnmarshalBinary(b []byte) error { + if len(b) == 0 { + return nil + } + numKVs := int(b[0]) + if numKVs > MaxNumKVs { + return fmt.Errorf("number of key-value pairs %d exceeds max %d", numKVs, MaxNumKVs) + } + b = b[1:] + reader := bytes.NewReader(b) + for i := 0; i < numKVs; i++ { + var kv KV + { + keyLen, err := reader.ReadByte() + if err != nil { + return err + } + kv.Key = make([]byte, keyLen) + if _, err := io.ReadFull(reader, kv.Key); err != nil { + return err + } + } + { + valueLen, err := reader.ReadByte() + if err != nil { + return err + } + kv.Value = make([]byte, valueLen) + if _, err := io.ReadFull(reader, kv.Value); err != nil { + return err + } + } + m.KeyVals = append(m.KeyVals, kv) + } + return nil +} + +const ( + MaxNumKVs = 255 + MaxKeySize = 255 + MaxValueSize = 255 +) + +// Add adds a key-value pair to the metadata. +func (m *Meta) Add(key, value []byte) error { + if len(m.KeyVals) >= MaxNumKVs { + return fmt.Errorf("number of key-value pairs %d exceeds max %d", len(m.KeyVals), MaxNumKVs) + } + if len(key) > MaxKeySize { + return fmt.Errorf("key size %d exceeds max %d", len(key), MaxKeySize) + } + if len(value) > MaxValueSize { + return fmt.Errorf("value size %d exceeds max %d", len(value), MaxValueSize) + } + m.KeyVals = append(m.KeyVals, KV{Key: key, Value: value}) + return nil +} + +// GetFirst returns the first value for the given key. +func (m *Meta) GetFirst(key []byte) []byte { + for _, kv := range m.KeyVals { + if bytes.Equal(kv.Key, key) { + return kv.Value + } + } + return nil +} + +func (m *Meta) Remove(key []byte) { + var newKeyVals []KV + for _, kv := range m.KeyVals { + if !bytes.Equal(kv.Key, key) { + newKeyVals = append(newKeyVals, kv) + } + } + m.KeyVals = newKeyVals +} + +// Get returns all values for the given key. +func (m *Meta) Get(key []byte) [][]byte { + var values [][]byte + for _, kv := range m.KeyVals { + if bytes.Equal(kv.Key, key) { + values = append(values, kv.Value) + } + } + return values +} + +// Count returns the number of values for the given key. +func (m *Meta) Count(key []byte) int { + var count int + for _, kv := range m.KeyVals { + if bytes.Equal(kv.Key, key) { + count++ + } + } + return count +} + +type KV struct { + Key []byte + Value []byte +} + +func NewKV(key, value []byte) KV { + return KV{Key: key, Value: value} +} + +var KeyKind = []byte{'k', 'i', 'n', 'd'} diff --git a/compactindexsized/header_test.go b/compactindexsized/header_test.go new file mode 100644 index 00000000..c1031028 --- /dev/null +++ b/compactindexsized/header_test.go @@ -0,0 +1,79 @@ +package compactindexsized + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHeaderMeta(t *testing.T) { + require.Equal(t, (255), MaxKeySize) + require.Equal(t, (255), MaxValueSize) + require.Equal(t, (255), MaxNumKVs) + + var meta Meta + require.NoError(t, meta.Add([]byte("foo"), []byte("bar"))) + require.NoError(t, meta.Add([]byte("foo"), []byte("baz"))) + + require.Equal(t, 2, meta.Count([]byte("foo"))) + + require.Equal(t, []byte("bar"), meta.GetFirst([]byte("foo"))) + + require.Equal(t, [][]byte{[]byte("bar"), []byte("baz")}, meta.Get([]byte("foo"))) + + require.Equal(t, [][]byte(nil), meta.Get([]byte("bar"))) + + require.Equal(t, []byte(nil), meta.GetFirst([]byte("bar"))) + + require.Equal(t, 0, meta.Count([]byte("bar"))) + + encoded, err := meta.MarshalBinary() + require.NoError(t, err) + { + mustBeEncoded := concatBytes( + []byte{2}, // number of key-value pairs + + []byte{3}, // length of key + []byte("foo"), // key + + []byte{3}, // length of value + []byte("bar"), // value + + []byte{3}, // length of key + []byte("foo"), // key + + []byte{3}, // length of value + []byte("baz"), // value + ) + require.Equal(t, mustBeEncoded, encoded) + } + + var decoded Meta + require.NoError(t, decoded.UnmarshalBinary(encoded)) + + require.Equal(t, meta, decoded) +} + +func TestHeader(t *testing.T) { + var header Header + + header.ValueSize = 42 + header.NumBuckets = 43 + + encoded := header.Bytes() + { + mustBeEncoded := concatBytes( + // magic + Magic[:], + // header size + i32tob(14), + // value size + []byte{42, 0, 0, 0, 0, 0, 0, 0}, + // num buckets + []byte{43, 0, 0, 0}, + []byte{1}, // version + []byte{0}, // how many kv pairs + ) + require.Equal(t, mustBeEncoded, encoded) + } +} diff --git a/compactindexsized/query.go b/compactindexsized/query.go index 54ad0102..48e2f60c 100644 --- a/compactindexsized/query.go +++ b/compactindexsized/query.go @@ -6,6 +6,8 @@ package compactindexsized // - The values it indexes are N-byte values instead of 8-byte values. This allows to index CIDs (in particular sha256+CBOR CIDs), and other values, directly. import ( + "bytes" + "encoding/binary" "errors" "fmt" "io" @@ -14,10 +16,13 @@ import ( // DB is a compactindex handle. type DB struct { Header - Stream io.ReaderAt - prefetch bool + headerSize int64 + Stream io.ReaderAt + prefetch bool } +var ErrInvalidMagic = errors.New("invalid magic") + // Open returns a handle to access a compactindex. // // The provided stream must start with the Magic byte sequence. @@ -25,16 +30,28 @@ type DB struct { func Open(stream io.ReaderAt) (*DB, error) { // Read the static 32-byte header. // Ignore errors if the read fails after filling the buffer (e.g. EOF). - var fileHeader [headerSize]byte - n, readErr := stream.ReadAt(fileHeader[:], 0) - if n < len(fileHeader) { + var magicAndSize [8 + 4]byte + n, readErr := stream.ReadAt(magicAndSize[:], 0) + if n < len(magicAndSize) { + // ReadAt must return non-nil error here. + return nil, readErr + } + // check magic + if !bytes.Equal(magicAndSize[:8], Magic[:]) { + return nil, ErrInvalidMagic + } + size := binary.LittleEndian.Uint32(magicAndSize[8:]) + fileHeaderBuf := make([]byte, 8+4+size) + n, readErr = stream.ReadAt(fileHeaderBuf, 0) + if n < len(fileHeaderBuf) { // ReadAt must return non-nil error here. return nil, readErr } db := new(DB) - if err := db.Header.Load(&fileHeader); err != nil { + if err := db.Header.Load(fileHeaderBuf); err != nil { return nil, err } + db.headerSize = int64(8 + 4 + size) db.Stream = stream return db, nil } @@ -44,8 +61,13 @@ func (db *DB) Prefetch(yes bool) { } // GetKind returns the kind of the index. -func (db *DB) GetKind() uint8 { - return db.Header.Kind +func (db *DB) GetKind() []byte { + return db.Header.Meta.GetFirst(KeyKind) +} + +// KindIs returns whether the index is of the given kind. +func (db *DB) KindIs(kind []byte) bool { + return db.Header.Meta.Count(KeyKind) > 0 && bytes.Equal(db.Header.Meta.GetFirst(KeyKind), kind) } func (db *DB) GetValueSize() uint64 { @@ -85,6 +107,7 @@ func (db *DB) GetBucket(i uint) (*Bucket, error) { OffsetWidth: uint8(db.GetValueSize()), }, } + bucket.BucketHeader.headerSize = db.headerSize // Read bucket header. readErr := bucket.BucketHeader.readFrom(db.Stream, i) if readErr != nil { @@ -118,13 +141,13 @@ func (db *DB) entryStride() uint8 { return uint8(HashSize) + uint8(offsetSize) } -func bucketOffset(i uint) int64 { +func bucketOffset(headerSize int64, i uint) int64 { return headerSize + int64(i)*bucketHdrLen } func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { var buf [bucketHdrLen]byte - n, err := rd.ReadAt(buf[:], bucketOffset(i)) + n, err := rd.ReadAt(buf[:], bucketOffset(b.headerSize, i)) if n < len(buf) { return err } @@ -135,7 +158,7 @@ func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { func (b *BucketHeader) writeTo(wr io.WriterAt, i uint) error { var buf [bucketHdrLen]byte b.Store(&buf) - _, err := wr.WriteAt(buf[:], bucketOffset(i)) + _, err := wr.WriteAt(buf[:], bucketOffset(b.headerSize, i)) return err } diff --git a/compactindexsized/query_test.go b/compactindexsized/query_test.go index d6b5cd88..f741ceec 100644 --- a/compactindexsized/query_test.go +++ b/compactindexsized/query_test.go @@ -30,29 +30,65 @@ func TestOpen_InvalidMagic(t *testing.T) { db, dbErr := Open(bytes.NewReader(buf[:])) require.Nil(t, db) - require.EqualError(t, dbErr, "not a radiance compactindex file") + require.EqualError(t, dbErr, ErrInvalidMagic.Error()) } func TestOpen_HeaderOnly(t *testing.T) { - buf := [32]byte{ + buf := concatBytes( // Magic - 'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x', + []byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'}, + // header size + i32tob(30), // FileSize - 0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + []byte{0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // NumBuckets - 0x42, 0x00, 0x00, 0x00, - // Padding - 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - } + []byte{0x42, 0x00, 0x00, 0x00}, + // Version + []byte{0x01}, + + // Meta: how many key-value pairs + []byte{2}, + + // First key-value pair + // Key length + []byte{3}, + // Key + []byte("foo"), + // Value length + []byte{3}, + // Value + []byte("bar"), + + // Second key-value pair + // Key length + []byte{3}, + // Key + []byte("foo"), + // Value length + []byte{3}, + // Value + []byte("baz"), + ) db, dbErr := Open(bytes.NewReader(buf[:])) - require.NotNil(t, db) require.NoError(t, dbErr) + require.NotNil(t, db) assert.NotNil(t, db.Stream) assert.Equal(t, Header{ ValueSize: 0x1337, NumBuckets: 0x42, + Meta: Meta{ + KeyVals: []KV{ + { + Key: []byte("foo"), + Value: []byte("bar"), + }, + { + Key: []byte("foo"), + Value: []byte("baz"), + }, + }, + }, }, db.Header) } From a90c9cee72132e5fe737277f46aca988e88cf8c7 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 19:01:16 +0100 Subject: [PATCH 07/63] Cleanup tests --- compactindexsized/build36_test.go | 13 ++++++------- compactindexsized/build48_test.go | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go index 950e7759..6f53beaa 100644 --- a/compactindexsized/build36_test.go +++ b/compactindexsized/build36_test.go @@ -19,7 +19,6 @@ import ( "testing" "time" - "github.com/davecgh/go-spew/spew" "github.com/ipfs/go-cid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -171,12 +170,12 @@ func TestBuilder36(t *testing.T) { for i, key := range keys { require.NoError(t, builder.Insert([]byte(key), []byte(testCids[i].Bytes()))) } - { - // print test values - for _, tc := range testCids { - spew.Dump(FormatByteSlice(tc.Bytes())) - } - } + // { + // // print test values + // for _, tc := range testCids { + // spew.Dump(FormatByteSlice(tc.Bytes())) + // } + // } // Create index file. targetFile, err := os.CreateTemp("", "compactindex-final-") diff --git a/compactindexsized/build48_test.go b/compactindexsized/build48_test.go index b6e39e4d..14de12d4 100644 --- a/compactindexsized/build48_test.go +++ b/compactindexsized/build48_test.go @@ -16,7 +16,6 @@ import ( "testing" "time" - "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/vbauerster/mpb/v8/decor" @@ -144,12 +143,12 @@ func TestBuilder48(t *testing.T) { for i, key := range keys { require.NoError(t, builder.Insert([]byte(key), []byte(testValues48[i]))) } - { - // print test values - for _, tc := range testValues48 { - spew.Dump(FormatByteSlice(tc)) - } - } + // { + // // print test values + // for _, tc := range testValues48 { + // spew.Dump(FormatByteSlice(tc)) + // } + // } // Create index file. targetFile, err := os.CreateTemp("", "compactindex-final-") From fa63b7a22b193b448a3e7914f231c7af805f87d1 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 14 Nov 2023 19:29:06 +0100 Subject: [PATCH 08/63] Add metadata shortcut --- compactindexsized/build.go | 4 ++++ compactindexsized/build36_test.go | 34 ++++++++++++++++++++----------- compactindexsized/build8_test.go | 3 ++- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/compactindexsized/build.go b/compactindexsized/build.go index 0f095478..8a2bd164 100644 --- a/compactindexsized/build.go +++ b/compactindexsized/build.go @@ -107,6 +107,10 @@ func (b *Builder) SetKind(kind []byte) error { return nil } +func (b *Builder) Metadata() *Meta { + return &b.Header.Meta +} + func (b *Builder) getValueSize() int { return int(b.ValueSize) } diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go index 6f53beaa..a5c19498 100644 --- a/compactindexsized/build36_test.go +++ b/compactindexsized/build36_test.go @@ -164,6 +164,7 @@ func TestBuilder36(t *testing.T) { kindSomething := []byte("something") require.NoError(t, builder.SetKind(kindSomething)) + require.NoError(t, builder.Metadata().Add([]byte("hello"), []byte("world"))) // Insert a few entries. keys := []string{"hello", "world", "blub", "foo"} @@ -194,20 +195,25 @@ func TestBuilder36(t *testing.T) { // magic Magic[:], // header size - i32tob(29), + i32tob(41), // value size (36 bytes in this case) []byte{0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, []byte{0x03, 0x00, 0x00, 0x00}, // num buckets []byte{1}, // version - []byte{1}, // how many kv pairs + []byte{2}, // how many kv pairs []byte{4}, // key size []byte("kind"), // key []byte{9}, // value size []byte("something"), // value + []byte{5}, // key size + []byte("hello"), // key + []byte{5}, // value size + []byte("world"), // value + // --- Bucket header 0 // hash domain []byte{0x00, 0x00, 0x00, 0x00}, @@ -218,7 +224,7 @@ func TestBuilder36(t *testing.T) { // padding []byte{0x00}, // file offset - []byte{89, 0x00, 0x00, 0x00, 0x00, 0x00}, + []byte{101, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 1 // hash domain @@ -230,7 +236,7 @@ func TestBuilder36(t *testing.T) { // padding []byte{0x00}, // file offset - []byte{128, 0x00, 0x00, 0x00, 0x00, 0x00}, + []byte{140, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket header 2 // hash domain @@ -242,7 +248,7 @@ func TestBuilder36(t *testing.T) { // padding []byte{0x00}, // file offset - []byte{167, 0x00, 0x00, 0x00, 0x00, 0x00}, + []byte{179, 0x00, 0x00, 0x00, 0x00, 0x00}, // --- Bucket 0 // hash @@ -269,7 +275,7 @@ func TestBuilder36(t *testing.T) { { splitSizes := []int{ // --- File header - 8, 4, 8, 4, 1, 1, 1, 4, 1, 9, + 8, 4, 8, 4, 1, 1, 1, 4, 1, 9, 1, 5, 1, 5, // --- Bucket header 0 4, 4, 1, 1, 6, // --- Bucket header 1 @@ -314,6 +320,10 @@ func TestBuilder36(t *testing.T) { Key: KeyKind, Value: kindSomething, }, + { + Key: []byte("hello"), + Value: []byte("world"), + }, }, }, }, db.Header) @@ -335,8 +345,8 @@ func TestBuilder36(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 89, - headerSize: 41, + FileOffset: 101, + headerSize: 53, }, Stride: 3 + valueSize, // 3 + 36 OffsetWidth: valueSize, @@ -345,15 +355,15 @@ func TestBuilder36(t *testing.T) { HashDomain: 0x00, NumEntries: 1, HashLen: 3, - FileOffset: 128, - headerSize: 41, + FileOffset: 140, + headerSize: 53, }, buckets[1].BucketHeader) assert.Equal(t, BucketHeader{ HashDomain: 0x00, NumEntries: 2, HashLen: 3, - FileOffset: 167, - headerSize: 41, + FileOffset: 179, + headerSize: 53, }, buckets[2].BucketHeader) assert.Equal(t, uint8(3+valueSize), buckets[2].Stride) diff --git a/compactindexsized/build8_test.go b/compactindexsized/build8_test.go index b658338d..ffe1cb73 100644 --- a/compactindexsized/build8_test.go +++ b/compactindexsized/build8_test.go @@ -223,7 +223,8 @@ func TestBuilder8_Random(t *testing.T) { key := make([]byte, keySize) for i := uint(0); i < numKeys; i++ { binary.LittleEndian.PutUint64(key, uint64(i)) - err := builder.Insert(key, itob(uint64(rand.Int63n(int64(100000))))) + v := uint64(rand.Int63n(int64(100000))) + 1 + err := builder.Insert(key, itob(v)) require.NoError(t, err) } t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) From d68a8ef15cd66e89dabb92a832836483db4b7495 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 15 Nov 2023 17:00:05 +0100 Subject: [PATCH 09/63] Better errors; more methods --- compactindexsized/header.go | 48 +++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/compactindexsized/header.go b/compactindexsized/header.go index 51808370..18207769 100644 --- a/compactindexsized/header.go +++ b/compactindexsized/header.go @@ -25,11 +25,11 @@ func (m *Meta) MarshalBinary() ([]byte, error) { return nil, fmt.Errorf("number of key-value pairs %d exceeds max %d", len(m.KeyVals), MaxNumKVs) } buf.WriteByte(byte(len(m.KeyVals))) - for _, kv := range m.KeyVals { + for i, kv := range m.KeyVals { { keyLen := len(kv.Key) if keyLen > MaxKeySize { - return nil, fmt.Errorf("key size %d exceeds max %d", keyLen, MaxKeySize) + return nil, fmt.Errorf("key %d size %d exceeds max %d", i, keyLen, MaxKeySize) } buf.WriteByte(byte(keyLen)) buf.Write(kv.Key) @@ -37,7 +37,7 @@ func (m *Meta) MarshalBinary() ([]byte, error) { { valueLen := len(kv.Value) if valueLen > MaxValueSize { - return nil, fmt.Errorf("value size %d exceeds max %d", valueLen, MaxValueSize) + return nil, fmt.Errorf("value %d size %d exceeds max %d", i, valueLen, MaxValueSize) } buf.WriteByte(byte(valueLen)) buf.Write(kv.Value) @@ -61,21 +61,21 @@ func (m *Meta) UnmarshalBinary(b []byte) error { { keyLen, err := reader.ReadByte() if err != nil { - return err + return fmt.Errorf("failed to read key length %d: %w", i, err) } kv.Key = make([]byte, keyLen) if _, err := io.ReadFull(reader, kv.Key); err != nil { - return err + return fmt.Errorf("failed to read key %d: %w", i, err) } } { valueLen, err := reader.ReadByte() if err != nil { - return err + return fmt.Errorf("failed to read value length %d: %w", i, err) } kv.Value = make([]byte, valueLen) if _, err := io.ReadFull(reader, kv.Value); err != nil { - return err + return fmt.Errorf("failed to read value %d: %w", i, err) } } m.KeyVals = append(m.KeyVals, kv) @@ -114,6 +114,40 @@ func (m *Meta) GetFirst(key []byte) []byte { return nil } +// ReadFirst copies the first value for the given key into the given value. +// It returns the number of bytes copied. +func (m *Meta) ReadFirst(key []byte, valueDst []byte) int { + for _, kv := range m.KeyVals { + if bytes.Equal(kv.Key, key) { + return copy(valueDst, kv.Value) + } + } + return 0 +} + +// ReplaceFirst replaces the first value for the given key. +func (m *Meta) ReplaceFirst(key, value []byte) error { + for i, kv := range m.KeyVals { + if bytes.Equal(kv.Key, key) { + m.KeyVals[i].Value = value + return nil + } + } + return fmt.Errorf("key %q not found", key) +} + +// HasDuplicateKeys returns true if there are duplicate keys. +func (m *Meta) HasDuplicateKeys() bool { + seen := make(map[string]struct{}) + for _, kv := range m.KeyVals { + if _, ok := seen[string(kv.Key)]; ok { + return true + } + seen[string(kv.Key)] = struct{}{} + } + return false +} + func (m *Meta) Remove(key []byte) { var newKeyVals []KV for _, kv := range m.KeyVals { From 1f6b6deac07bc22e7fa53d04ccb80c9e46cd75d2 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 17 Nov 2023 16:04:16 +0100 Subject: [PATCH 10/63] Cleanup --- compactindexsized/header.go | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/compactindexsized/header.go b/compactindexsized/header.go index 18207769..bfcd4c27 100644 --- a/compactindexsized/header.go +++ b/compactindexsized/header.go @@ -104,6 +104,26 @@ func (m *Meta) Add(key, value []byte) error { return nil } +// ReplaceFirst replaces the first value for the given key. +func (m *Meta) ReplaceFirst(key, value []byte) error { + if len(m.KeyVals) >= MaxNumKVs { + return fmt.Errorf("number of key-value pairs %d exceeds max %d", len(m.KeyVals), MaxNumKVs) + } + if len(key) > MaxKeySize { + return fmt.Errorf("key size %d exceeds max %d", len(key), MaxKeySize) + } + if len(value) > MaxValueSize { + return fmt.Errorf("value size %d exceeds max %d", len(value), MaxValueSize) + } + for i, kv := range m.KeyVals { + if bytes.Equal(kv.Key, key) { + m.KeyVals[i].Value = value + return nil + } + } + return fmt.Errorf("key %q not found", key) +} + // GetFirst returns the first value for the given key. func (m *Meta) GetFirst(key []byte) []byte { for _, kv := range m.KeyVals { @@ -125,17 +145,6 @@ func (m *Meta) ReadFirst(key []byte, valueDst []byte) int { return 0 } -// ReplaceFirst replaces the first value for the given key. -func (m *Meta) ReplaceFirst(key, value []byte) error { - for i, kv := range m.KeyVals { - if bytes.Equal(kv.Key, key) { - m.KeyVals[i].Value = value - return nil - } - } - return fmt.Errorf("key %q not found", key) -} - // HasDuplicateKeys returns true if there are duplicate keys. func (m *Meta) HasDuplicateKeys() bool { seen := make(map[string]struct{}) From c3e4e3a1f02610ff43cca62a394e49ebb902320c Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 17 Nov 2023 16:37:58 +0100 Subject: [PATCH 11/63] Refactor meta --- compactindexsized/build36_test.go | 4 +++- compactindexsized/header.go | 16 ++++++++-------- compactindexsized/header_test.go | 12 ++++++++---- compactindexsized/query.go | 7 ++++--- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go index a5c19498..f00fb4d3 100644 --- a/compactindexsized/build36_test.go +++ b/compactindexsized/build36_test.go @@ -308,7 +308,9 @@ func TestBuilder36(t *testing.T) { require.NoError(t, err, "Failed to open generated index") require.NotNil(t, db) - assert.Equal(t, kindSomething, db.GetKind()) + got, ok := db.GetKind() + require.True(t, ok) + assert.Equal(t, kindSomething, got) // File header assertions. assert.Equal(t, Header{ diff --git a/compactindexsized/header.go b/compactindexsized/header.go index bfcd4c27..3185f24f 100644 --- a/compactindexsized/header.go +++ b/compactindexsized/header.go @@ -104,8 +104,8 @@ func (m *Meta) Add(key, value []byte) error { return nil } -// ReplaceFirst replaces the first value for the given key. -func (m *Meta) ReplaceFirst(key, value []byte) error { +// Replace replaces the first value for the given key. +func (m *Meta) Replace(key, value []byte) error { if len(m.KeyVals) >= MaxNumKVs { return fmt.Errorf("number of key-value pairs %d exceeds max %d", len(m.KeyVals), MaxNumKVs) } @@ -124,14 +124,14 @@ func (m *Meta) ReplaceFirst(key, value []byte) error { return fmt.Errorf("key %q not found", key) } -// GetFirst returns the first value for the given key. -func (m *Meta) GetFirst(key []byte) []byte { +// Get returns the first value for the given key. +func (m *Meta) Get(key []byte) ([]byte, bool) { for _, kv := range m.KeyVals { if bytes.Equal(kv.Key, key) { - return kv.Value + return kv.Value, true } } - return nil + return nil, false } // ReadFirst copies the first value for the given key into the given value. @@ -167,8 +167,8 @@ func (m *Meta) Remove(key []byte) { m.KeyVals = newKeyVals } -// Get returns all values for the given key. -func (m *Meta) Get(key []byte) [][]byte { +// GetAll returns all values for the given key. +func (m *Meta) GetAll(key []byte) [][]byte { var values [][]byte for _, kv := range m.KeyVals { if bytes.Equal(kv.Key, key) { diff --git a/compactindexsized/header_test.go b/compactindexsized/header_test.go index c1031028..c85d755e 100644 --- a/compactindexsized/header_test.go +++ b/compactindexsized/header_test.go @@ -17,13 +17,17 @@ func TestHeaderMeta(t *testing.T) { require.Equal(t, 2, meta.Count([]byte("foo"))) - require.Equal(t, []byte("bar"), meta.GetFirst([]byte("foo"))) + got, ok := meta.Get([]byte("foo")) + require.True(t, ok) + require.Equal(t, []byte("bar"), got) - require.Equal(t, [][]byte{[]byte("bar"), []byte("baz")}, meta.Get([]byte("foo"))) + require.Equal(t, [][]byte{[]byte("bar"), []byte("baz")}, meta.GetAll([]byte("foo"))) - require.Equal(t, [][]byte(nil), meta.Get([]byte("bar"))) + require.Equal(t, [][]byte(nil), meta.GetAll([]byte("bar"))) - require.Equal(t, []byte(nil), meta.GetFirst([]byte("bar"))) + got, ok = meta.Get([]byte("bar")) + require.False(t, ok) + require.Equal(t, []byte(nil), got) require.Equal(t, 0, meta.Count([]byte("bar"))) diff --git a/compactindexsized/query.go b/compactindexsized/query.go index 48e2f60c..d4433b9f 100644 --- a/compactindexsized/query.go +++ b/compactindexsized/query.go @@ -61,13 +61,14 @@ func (db *DB) Prefetch(yes bool) { } // GetKind returns the kind of the index. -func (db *DB) GetKind() []byte { - return db.Header.Meta.GetFirst(KeyKind) +func (db *DB) GetKind() ([]byte, bool) { + return db.Header.Meta.Get(KeyKind) } // KindIs returns whether the index is of the given kind. func (db *DB) KindIs(kind []byte) bool { - return db.Header.Meta.Count(KeyKind) > 0 && bytes.Equal(db.Header.Meta.GetFirst(KeyKind), kind) + got, ok := db.Header.Meta.Get(KeyKind) + return ok && bytes.Equal(got, kind) } func (db *DB) GetValueSize() uint64 { From bfd8795fbf17767aeb79c281e9b66098ff8053eb Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 4 Dec 2023 22:52:43 +0100 Subject: [PATCH 12/63] Refactor bucketteer --- bucketteer/bucketteer_test.go | 2 +- bucketteer/example/main.go | 2 +- bucketteer/read.go | 73 ++++++++++++++++++++++++----------- bucketteer/write.go | 45 ++++++++++----------- 4 files changed, 72 insertions(+), 50 deletions(-) diff --git a/bucketteer/bucketteer_test.go b/bucketteer/bucketteer_test.go index 99e4e2a5..9010b910 100644 --- a/bucketteer/bucketteer_test.go +++ b/bucketteer/bucketteer_test.go @@ -153,7 +153,7 @@ func TestBucketteer(t *testing.T) { hash, err := contentReader.ReadUint64(bin.LE) require.NoError(t, err) found := false - for _, h := range wr.prefixToHashes[prefix] { + for _, h := range wr.prefixToHashes[prefixToUint16(prefix)] { if h == hash { found = true break diff --git a/bucketteer/example/main.go b/bucketteer/example/main.go index f6afffcf..4ffed250 100644 --- a/bucketteer/example/main.go +++ b/bucketteer/example/main.go @@ -97,7 +97,7 @@ func main() { } if true { // now search for random signatures that are not in the Bucketteer: - numSearches := 100_000_000 + numSearches := 10_000_000 fmt.Println( "testing search for random signatures that are not in the Bucketteer (numSearches:", humanize.Comma(int64(numSearches)), diff --git a/bucketteer/read.go b/bucketteer/read.go index 7c7d2c95..8ab65653 100644 --- a/bucketteer/read.go +++ b/bucketteer/read.go @@ -5,6 +5,7 @@ import ( "encoding/binary" "fmt" "io" + "math" bin "github.com/gagliardetto/binary" "golang.org/x/exp/mmap" @@ -13,7 +14,35 @@ import ( type Reader struct { contentReader io.ReaderAt meta map[string]string - prefixToOffset map[[2]byte]uint64 + prefixToOffset *bucketToOffset +} + +type bucketToOffset [math.MaxUint16 + 1]uint64 + +func newUint16Layout() bucketToOffset { + var layout bucketToOffset + for i := 0; i <= math.MaxUint16; i++ { + layout[i] = math.MaxUint64 + } + return layout +} + +func newUint16LayoutPointer() *bucketToOffset { + var layout bucketToOffset + for i := 0; i <= math.MaxUint16; i++ { + layout[i] = math.MaxUint64 + } + return &layout +} + +func prefixToUint16(prefix [2]byte) uint16 { + return binary.LittleEndian.Uint16(prefix[:]) +} + +func uint16ToPrefix(num uint16) [2]byte { + var prefix [2]byte + binary.LittleEndian.PutUint16(prefix[:], num) + return prefix } // Open opens a Bucketteer file in read-only mode, @@ -28,7 +57,7 @@ func Open(path string) (*Reader, error) { func NewReader(reader io.ReaderAt) (*Reader, error) { r := &Reader{ - prefixToOffset: make(map[[2]byte]uint64), + prefixToOffset: newUint16LayoutPointer(), } prefixToOffset, meta, headerTotalSize, err := readHeader(reader) if err != nil { @@ -67,7 +96,7 @@ func readHeaderSize(reader io.ReaderAt) (int64, error) { return headerSize, nil } -func readHeader(reader io.ReaderAt) (map[[2]byte]uint64, map[string]string, int64, error) { +func readHeader(reader io.ReaderAt) (*bucketToOffset, map[string]string, int64, error) { // read header size: headerSize, err := readHeaderSize(reader) if err != nil { @@ -102,24 +131,22 @@ func readHeader(reader io.ReaderAt) (map[[2]byte]uint64, map[string]string, int6 return nil, nil, 0, fmt.Errorf("expected version %d, got %d", Version, got) } } - { - // read meta: - numMeta, err := decoder.ReadUint64(bin.LE) + // read meta: + numMeta, err := decoder.ReadUint64(bin.LE) + if err != nil { + return nil, nil, 0, err + } + meta := make(map[string]string, numMeta) + for i := uint64(0); i < numMeta; i++ { + key, err := decoder.ReadString() if err != nil { return nil, nil, 0, err } - meta := make(map[string]string, numMeta) - for i := uint64(0); i < numMeta; i++ { - key, err := decoder.ReadString() - if err != nil { - return nil, nil, 0, err - } - value, err := decoder.ReadString() - if err != nil { - return nil, nil, 0, err - } - meta[key] = value + value, err := decoder.ReadString() + if err != nil { + return nil, nil, 0, err } + meta[key] = value } // numPrefixes: numPrefixes, err := decoder.ReadUint64(bin.LE) @@ -127,7 +154,7 @@ func readHeader(reader io.ReaderAt) (map[[2]byte]uint64, map[string]string, int6 return nil, nil, 0, err } // prefix -> offset: - prefixToOffset := make(map[[2]byte]uint64, numPrefixes) + prefixToOffset := newUint16Layout() for i := uint64(0); i < numPrefixes; i++ { var prefix [2]byte _, err := decoder.Read(prefix[:]) @@ -138,19 +165,19 @@ func readHeader(reader io.ReaderAt) (map[[2]byte]uint64, map[string]string, int6 if err != nil { return nil, nil, 0, err } - prefixToOffset[prefix] = offset + prefixToOffset[prefixToUint16(prefix)] = offset } - return prefixToOffset, nil, headerSize + 4, err + return &prefixToOffset, meta, headerSize + 4, err } func (r *Reader) Has(sig [64]byte) (bool, error) { prefix := [2]byte{sig[0], sig[1]} - offset, ok := r.prefixToOffset[prefix] - if !ok { + offset := r.prefixToOffset[prefixToUint16(prefix)] + if offset == math.MaxUint64 { return false, nil } // numHashes: - numHashesBuf := make([]byte, 4) + numHashesBuf := make([]byte, 4) // TODO: is uint32 enough? That's 4 billion hashes per bucket. RIght now an epoch can have 1 billion signatures. _, err := r.contentReader.ReadAt(numHashesBuf, int64(offset)) if err != nil { return false, err diff --git a/bucketteer/write.go b/bucketteer/write.go index 5837f1be..3400da22 100644 --- a/bucketteer/write.go +++ b/bucketteer/write.go @@ -5,6 +5,7 @@ import ( "bytes" "encoding/binary" "fmt" + "math" "os" "sort" @@ -14,7 +15,13 @@ import ( type Writer struct { destination *os.File writer *bufio.Writer - prefixToHashes map[[2]byte][]uint64 // prefix -> hashes + prefixToHashes *prefixToHashes // prefix -> hashes +} + +type prefixToHashes [math.MaxUint16 + 1][]uint64 // prefix -> hashes + +func newPrefixToHashes() *prefixToHashes { + return &prefixToHashes{} } const ( @@ -40,7 +47,7 @@ func NewWriter(path string) (*Writer, error) { return &Writer{ writer: bufio.NewWriterSize(file, writeBufSize), destination: file, - prefixToHashes: make(map[[2]byte][]uint64), + prefixToHashes: newPrefixToHashes(), }, nil } @@ -49,7 +56,7 @@ func NewWriter(path string) (*Writer, error) { func (b *Writer) Put(sig [64]byte) { var prefix [2]byte copy(prefix[:], sig[:2]) - b.prefixToHashes[prefix] = append(b.prefixToHashes[prefix], Hash(sig)) + b.prefixToHashes[prefixToUint16(prefix)] = append(b.prefixToHashes[prefixToUint16(prefix)], Hash(sig)) } // Has returns true if the Bucketteer has seen the given signature. @@ -57,7 +64,7 @@ func (b *Writer) Has(sig [64]byte) bool { var prefix [2]byte copy(prefix[:], sig[:2]) hash := Hash(sig) - for _, h := range b.prefixToHashes[prefix] { + for _, h := range b.prefixToHashes[prefixToUint16(prefix)] { if h == hash { return true } @@ -90,7 +97,7 @@ func createHeader( version uint64, headerSizeIn uint32, meta map[string]string, - prefixToOffset map[[2]byte]uint64, + prefixToOffset bucketToOffset, ) ([]byte, error) { tmpHeaderBuf := new(bytes.Buffer) headerWriter := bin.NewBorshEncoder(tmpHeaderBuf) @@ -132,13 +139,13 @@ func createHeader( return nil, err } - prefixes := getSortedPrefixes(prefixToOffset) // write prefix+offset pairs - for _, prefix := range prefixes { + for prefixAsUint16 := range prefixToOffset { + prefix := uint16ToPrefix(uint16(prefixAsUint16)) if _, err := headerWriter.Write(prefix[:]); err != nil { return nil, err } - offset := prefixToOffset[prefix] + offset := prefixToOffset[prefixAsUint16] if err := headerWriter.WriteUint64(offset, binary.LittleEndian); err != nil { return nil, err } @@ -161,27 +168,15 @@ func overwriteFileContentAt( return err } -func getSortedPrefixes[K any](prefixToHashes map[[2]byte]K) [][2]byte { - prefixes := make([][2]byte, 0, len(prefixToHashes)) - for prefix := range prefixToHashes { - prefixes = append(prefixes, prefix) - } - sort.Slice(prefixes, func(i, j int) bool { - return bytes.Compare(prefixes[i][:], prefixes[j][:]) < 0 - }) - return prefixes -} - func seal( out *bufio.Writer, - prefixToHashes map[[2]byte][]uint64, + prefixToHashes *prefixToHashes, meta map[string]string, ) ([]byte, int64, error) { - prefixes := getSortedPrefixes(prefixToHashes) - prefixToOffset := make(map[[2]byte]uint64, len(prefixes)) - for _, prefix := range prefixes { + prefixToOffset := bucketToOffset{} + for prefixAsUint16 := range prefixToHashes { // initialize all offsets to 0: - prefixToOffset[prefix] = 0 + prefixToOffset[prefixAsUint16] = 0 } totalWritten := int64(0) @@ -203,7 +198,7 @@ func seal( totalWritten += int64(headerSize) previousOffset := uint64(0) - for _, prefix := range prefixes { + for prefix := range prefixToHashes { entries := getCleanSet(prefixToHashes[prefix]) if len(entries) != len(prefixToHashes[prefix]) { panic(fmt.Sprintf("duplicate hashes for prefix %v", prefix)) From cf3e13037bb7f05ef2e46303edaf3e3c41149632 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 4 Dec 2023 22:54:35 +0100 Subject: [PATCH 13/63] Refactor indexes --- car-dag-traverser.go | 42 +-- cmd-x-index-all.go | 333 ++++++------------- cmd-x-index-cid2offset.go | 21 ++ cmd-x-index-sig2cid.go | 21 ++ cmd-x-index-slot2cid.go | 21 ++ cmd-x-verify-index-all.go | 8 +- compactindexsized/build.go | 34 +- compactindexsized/build36_test.go | 4 +- compactindexsized/build48_test.go | 4 +- compactindexsized/build8_test.go | 2 +- compactindexsized/compactindex.go | 6 +- compactindexsized/query.go | 4 +- compactindexsized/query_test.go | 2 +- epoch.go | 25 +- go.mod | 131 ++++---- go.sum | 330 ++++++++---------- index-cid-to-offset.go | 59 ++-- index-sig-to-cid.go | 54 ++- index-slot-to-cid.go | 53 ++- indexes/errors.go | 8 + indexes/index-cid-to-offset-and-size.go | 214 ++++++++++++ indexes/index-cid-to-offset-and-size_test.go | 150 +++++++++ indexes/index-sig-to-cid.go | 194 +++++++++++ indexes/index-sig-to-cid_test.go | 131 ++++++++ indexes/index-slot-to-cid.go | 185 +++++++++++ indexes/index-slot-to-cid_test.go | 119 +++++++ indexes/metadata.go | 128 +++++++ indexes/networks.go | 18 + indexes/uints.go | 78 +++++ indexes/uints_test.go | 142 ++++++++ 30 files changed, 1870 insertions(+), 651 deletions(-) create mode 100644 indexes/errors.go create mode 100644 indexes/index-cid-to-offset-and-size.go create mode 100644 indexes/index-cid-to-offset-and-size_test.go create mode 100644 indexes/index-sig-to-cid.go create mode 100644 indexes/index-sig-to-cid_test.go create mode 100644 indexes/index-slot-to-cid.go create mode 100644 indexes/index-slot-to-cid_test.go create mode 100644 indexes/metadata.go create mode 100644 indexes/networks.go create mode 100644 indexes/uints.go create mode 100644 indexes/uints_test.go diff --git a/car-dag-traverser.go b/car-dag-traverser.go index 6c8140cd..951d0dcb 100644 --- a/car-dag-traverser.go +++ b/car-dag-traverser.go @@ -13,7 +13,7 @@ import ( "github.com/ipld/go-car" "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "k8s.io/klog/v2" @@ -47,7 +47,7 @@ func dirExists(path string) (bool, error) { return true, nil } -func openCarReaderWithCidIndex(carPath string, indexFilePath string) (*SimpleIterator, error) { +func openCarReaderWithCidToOffsetAndSizeIndex(carPath string, indexFilePath string) (*SimpleIterator, error) { // Check if the CAR file exists: exists, err := fileExists(carPath) if err != nil { @@ -82,13 +82,8 @@ func openCarReaderWithCidIndex(carPath string, indexFilePath string) (*SimpleIte return nil, fmt.Errorf("CAR file has %d roots, expected 1", len(roots)) } - indexFile, err := os.Open(indexFilePath) - if err != nil { - return nil, fmt.Errorf("failed to open index file: %w", err) - } - klog.Infof("Reading index from %s", indexFilePath) - c2o, err := compactindexsized.Open(indexFile) + c2o, err := indexes.Open_CidToOffsetAndSize(indexFilePath) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } @@ -98,9 +93,8 @@ func openCarReaderWithCidIndex(carPath string, indexFilePath string) (*SimpleIte } iter := &SimpleIterator{ - c2o: c2o, - cr: cr, - indexFile: indexFile, + c2o: c2o, + cr: cr, } // Try finding the root CID in the index; @@ -126,18 +120,16 @@ func openCarReaderWithCidIndex(carPath string, indexFilePath string) (*SimpleIte } type SimpleIterator struct { - c2o *compactindexsized.DB // index from cid to offset in the CAR file - cr *carv2.Reader // the CAR file - indexFile *os.File // the index file + c2o *indexes.CidToOffsetAndSize_Reader // index from cid to offset in the CAR file + cr *carv2.Reader // the CAR file } func NewSimpleCarIterator(carPath string, indexFilePath string) (*SimpleIterator, error) { - return openCarReaderWithCidIndex(carPath, indexFilePath) + return openCarReaderWithCidToOffsetAndSizeIndex(carPath, indexFilePath) } // Close closes the underlying resources. func (t *SimpleIterator) Close() error { - t.indexFile.Close() return t.cr.Close() } @@ -153,23 +145,13 @@ func (t *SimpleIterator) Get(ctx context.Context, c cid.Cid) (*blocks.BasicBlock return node, err } -func newOffsetFinderFunc(c2o *compactindexsized.DB) func(ctx context.Context, c cid.Cid) (uint64, error) { +func newOffsetFinderFunc(c2o *indexes.CidToOffsetAndSize_Reader) func(ctx context.Context, c cid.Cid) (uint64, error) { return func(ctx context.Context, c cid.Cid) (uint64, error) { - bucket, err := c2o.LookupBucket(c.Bytes()) + oas, err := c2o.Get(c) if err != nil { - if err == compactindexsized.ErrNotFound { - return 0, ErrNotFound - } - return 0, fmt.Errorf("failed to lookup bucket: %w", err) - } - offset, err := bucket.Lookup(c.Bytes()) - if err != nil { - if err == compactindexsized.ErrNotFound { - return 0, ErrNotFound - } - return 0, fmt.Errorf("failed to lookup offset: %w", err) + return 0, fmt.Errorf("failed to get offset and size: %w", err) } - return btoi(offset), nil + return oas.Offset, nil } } diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index c047b32c..176dd89b 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -13,11 +13,10 @@ import ( "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" - "github.com/gagliardetto/solana-go" "github.com/ipfs/go-cid" carv1 "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" "k8s.io/klog/v2" @@ -25,6 +24,8 @@ import ( func newCmd_Index_all() *cli.Command { var verify bool + var epoch uint64 + var network indexes.Network return &cli.Command{ Name: "all", Description: "Given a CAR file containing a Solana epoch, create all the necessary indexes and save them in the specified index dir.", @@ -43,6 +44,22 @@ func newCmd_Index_all() *cli.Command { Usage: "temporary directory to use for storing intermediate files", Value: "", }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "the epoch of the CAR file", + Destination: &epoch, + }, + &cli.StringFlag{ + Name: "network", + Usage: "the network of the CAR file", + Action: func(c *cli.Context, s string) error { + network = indexes.Network(s) + if !indexes.IsValidNetwork(network) { + return fmt.Errorf("invalid network: %s", network) + } + return nil + }, + }, }, Subcommands: []*cli.Command{}, Action: func(c *cli.Context) error { @@ -68,7 +85,14 @@ func newCmd_Index_all() *cli.Command { klog.Infof("Took %s", time.Since(startedAt)) }() klog.Infof("Creating all indexes for %s", carPath) - indexPaths, err := createAllIndexes(context.Background(), tmpDir, carPath, indexDir) + indexPaths, err := createAllIndexes( + c.Context, + epoch, + network, + tmpDir, + carPath, + indexDir, + ) if err != nil { return err } @@ -96,6 +120,8 @@ var veryPlainSdumpConfig = spew.ConfigState{ func createAllIndexes( ctx context.Context, + epoch uint64, + network indexes.Network, tmpDir string, carPath string, indexDir string, @@ -148,19 +174,23 @@ func createAllIndexes( } klog.Infof("Total: %s items", humanize.Comma(int64(numTotalItems))) - cid_to_offset, err := NewBuilder_CidToOffset( + cid_to_offset_and_size, err := NewBuilder_CidToOffset( + epoch, + rootCID, + network, tmpDir, - indexDir, numTotalItems, ) if err != nil { return nil, fmt.Errorf("failed to create cid_to_offset index: %w", err) } - defer cid_to_offset.Close() + defer cid_to_offset_and_size.Close() slot_to_cid, err := NewBuilder_SlotToCid( + epoch, + rootCID, + network, tmpDir, - indexDir, numItems[byte(iplddecoders.KindBlock)], ) if err != nil { @@ -169,8 +199,10 @@ func createAllIndexes( defer slot_to_cid.Close() sig_to_cid, err := NewBuilder_SignatureToCid( + epoch, + rootCID, + network, tmpDir, - indexDir, numItems[byte(iplddecoders.KindTransaction)], ) if err != nil { @@ -212,7 +244,7 @@ func createAllIndexes( // klog.Infof("key: %s, offset: %d", bin.FormatByteSlice(c.Bytes()), totalOffset) - err = cid_to_offset.Put(_cid, totalOffset) + err = cid_to_offset_and_size.Put(_cid, totalOffset, sectionLength) if err != nil { return nil, fmt.Errorf("failed to index cid to offset: %w", err) } @@ -295,29 +327,32 @@ func createAllIndexes( { // seal the indexes { - klog.Infof("Sealing cid_to_offset index...") - paths.CidToOffset, err = cid_to_offset.Seal(ctx, carPath, rootCID) + klog.Infof("Sealing cid_to_offset_and_size index...") + err = cid_to_offset_and_size.Seal(ctx, indexDir) if err != nil { return nil, fmt.Errorf("failed to seal cid_to_offset index: %w", err) } - klog.Infof("Successfully sealed cid_to_offset index: %s", paths.CidToOffset) + paths.CidToOffsetAndSize = cid_to_offset_and_size.GetFilepath() + klog.Infof("Successfully sealed cid_to_offset_and_size index: %s", paths.CidToOffsetAndSize) } { klog.Infof("Sealing slot_to_cid index...") - paths.SlotToCid, err = slot_to_cid.Seal(ctx, carPath, rootCID) + err = slot_to_cid.Seal(ctx, indexDir) if err != nil { return nil, fmt.Errorf("failed to seal slot_to_cid index: %w", err) } + paths.SlotToCid = slot_to_cid.GetFilepath() klog.Infof("Successfully sealed slot_to_cid index: %s", paths.SlotToCid) } { klog.Infof("Sealing sig_to_cid index...") - paths.SignatureToCid, err = sig_to_cid.Seal(ctx, carPath, rootCID) + err = sig_to_cid.Seal(ctx, indexDir) if err != nil { return nil, fmt.Errorf("failed to seal sig_to_cid index: %w", err) } + paths.SignatureToCid = sig_to_cid.GetFilepath() klog.Infof("Successfully sealed sig_to_cid index: %s", paths.SignatureToCid) } @@ -345,179 +380,82 @@ func blackText(s string) string { } type IndexPaths struct { - CidToOffset string - SlotToCid string - SignatureToCid string - SignatureExists string -} - -type Builder_CidToOffset struct { - tmpDir string - indexDir string - carPath string - index *compactindexsized.Builder + CidToOffsetAndSize string + SlotToCid string + SignatureToCid string + SignatureExists string } func NewBuilder_CidToOffset( + epoch uint64, + rootCid cid.Cid, + network indexes.Network, tmpDir string, - indexDir string, numItems uint64, -) (*Builder_CidToOffset, error) { +) (*indexes.CidToOffsetAndSize_Writer, error) { tmpDir = filepath.Join(tmpDir, "index-cid-to-offset-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { return nil, fmt.Errorf("failed to create cid_to_offset tmp dir: %w", err) } - index, err := compactindexsized.NewBuilderSized( + index, err := indexes.NewWriter_CidToOffsetAndSize( + epoch, + rootCid, + network, tmpDir, - uint(numItems), - 8, + numItems, ) if err != nil { - return nil, fmt.Errorf("failed to create cid_to_offset index: %w", err) + return nil, fmt.Errorf("failed to create cid-to-offset-and-size index: %w", err) } - return &Builder_CidToOffset{ - tmpDir: tmpDir, - indexDir: indexDir, - index: index, - }, nil -} - -func (b *Builder_CidToOffset) Put(c cid.Cid, offset uint64) error { - return b.index.Insert(c.Bytes(), itob(offset)) -} - -func (b *Builder_CidToOffset) Close() error { - return b.index.Close() -} - -func (b *Builder_CidToOffset) Seal(ctx context.Context, carPath string, rootCid cid.Cid) (string, error) { - indexFilePath := filepath.Join(b.indexDir, fmt.Sprintf("%s.%s.cid-to-offset.index", filepath.Base(carPath), rootCid.String())) - klog.Infof("Creating cid_to_offset index file at %s", indexFilePath) - targetFile, err := os.Create(indexFilePath) - if err != nil { - return "", fmt.Errorf("failed to create cid_to_offset index file: %w", err) - } - defer targetFile.Close() - - klog.Infof("Sealing cid_to_offset index...") - if err = b.index.Seal(ctx, targetFile); err != nil { - return "", fmt.Errorf("failed to seal cid_to_offset index: %w", err) - } - return indexFilePath, nil -} - -type Builder_SignatureToCid struct { - tmpDir string - indexDir string - carPath string - index *compactindexsized.Builder + return index, nil } func NewBuilder_SignatureToCid( + epoch uint64, + rootCid cid.Cid, + network indexes.Network, tmpDir string, - indexDir string, numItems uint64, -) (*Builder_SignatureToCid, error) { +) (*indexes.SigToCid_Writer, error) { tmpDir = filepath.Join(tmpDir, "index-sig-to-cid-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { return nil, fmt.Errorf("failed to create sig_to_cid tmp dir: %w", err) } - index, err := compactindexsized.NewBuilderSized( + index, err := indexes.NewWriter_SigToCid( + epoch, + rootCid, + network, tmpDir, - uint(numItems), - 36, + numItems, ) if err != nil { return nil, fmt.Errorf("failed to create sig_to_cid index: %w", err) } - return &Builder_SignatureToCid{ - tmpDir: tmpDir, - indexDir: indexDir, - index: index, - }, nil -} - -func (b *Builder_SignatureToCid) Put(signature solana.Signature, cid cid.Cid) error { - var buf [36]byte - copy(buf[:], cid.Bytes()[:36]) - return b.index.Insert(signature[:], buf[:]) -} - -func (b *Builder_SignatureToCid) Close() error { - return b.index.Close() -} - -func (b *Builder_SignatureToCid) Seal(ctx context.Context, carPath string, rootCid cid.Cid) (string, error) { - indexFilePath := filepath.Join(b.indexDir, fmt.Sprintf("%s.%s.sig-to-cid.index", filepath.Base(carPath), rootCid.String())) - klog.Infof("Creating sig_to_cid index file at %s", indexFilePath) - targetFile, err := os.Create(indexFilePath) - if err != nil { - return "", fmt.Errorf("failed to create sig_to_cid index file: %w", err) - } - defer targetFile.Close() - - klog.Infof("Sealing sig_to_cid index...") - if err = b.index.Seal(ctx, targetFile); err != nil { - return "", fmt.Errorf("failed to seal sig_to_cid index: %w", err) - } - return indexFilePath, nil -} - -type Builder_SlotToCid struct { - tmpDir string - indexDir string - carPath string - index *compactindexsized.Builder + return index, nil } func NewBuilder_SlotToCid( + epoch uint64, + rootCid cid.Cid, + network indexes.Network, tmpDir string, - indexDir string, numItems uint64, -) (*Builder_SlotToCid, error) { +) (*indexes.SlotToCid_Writer, error) { tmpDir = filepath.Join(tmpDir, "index-slot-to-cid-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { return nil, fmt.Errorf("failed to create slot_to_cid tmp dir: %w", err) } - index, err := compactindexsized.NewBuilderSized( + index, err := indexes.NewWriter_SlotToCid( + epoch, + rootCid, + network, tmpDir, - uint(numItems), - 36, + numItems, ) if err != nil { return nil, fmt.Errorf("failed to create slot_to_cid index: %w", err) } - return &Builder_SlotToCid{ - tmpDir: tmpDir, - indexDir: indexDir, - index: index, - }, nil -} - -func (b *Builder_SlotToCid) Put(slot uint64, cid cid.Cid) error { - var buf [36]byte - copy(buf[:], cid.Bytes()[:36]) - return b.index.Insert(uint64ToLeBytes(slot), buf[:]) -} - -func (b *Builder_SlotToCid) Close() error { - return b.index.Close() -} - -func (b *Builder_SlotToCid) Seal(ctx context.Context, carPath string, rootCid cid.Cid) (string, error) { - indexFilePath := filepath.Join(b.indexDir, fmt.Sprintf("%s.%s.slot-to-cid.index", filepath.Base(carPath), rootCid.String())) - klog.Infof("Creating slot_to_cid index file at %s", indexFilePath) - targetFile, err := os.Create(indexFilePath) - if err != nil { - return "", fmt.Errorf("failed to create slot_to_cid index file: %w", err) - } - defer targetFile.Close() - - klog.Infof("Sealing index...") - if err = b.index.Seal(ctx, targetFile); err != nil { - return "", fmt.Errorf("failed to seal slot_to_cid index: %w", err) - } - return indexFilePath, nil + return index, nil } func verifyAllIndexes( @@ -550,7 +488,7 @@ func verifyAllIndexes( } cid_to_offset, err := OpenIndex_CidToOffset( - indexes.CidToOffset, + indexes.CidToOffsetAndSize, ) if err != nil { return fmt.Errorf("failed to open cid_to_offset index: %w", err) @@ -613,9 +551,12 @@ func verifyAllIndexes( if err != nil { return fmt.Errorf("failed to lookup offset for %s: %w", _cid, err) } - if offset != totalOffset { + if offset.Offset != totalOffset { return fmt.Errorf("offset mismatch for %s: %d != %d", _cid, offset, totalOffset) } + if offset.Size != sectionLength { + return fmt.Errorf("length mismatch for %s: %d != %d", _cid, offset, sectionLength) + } numIndexedOffsets++ @@ -696,110 +637,32 @@ func verifyAllIndexes( return nil } -type Index_CidToOffset struct { - file *os.File - db *compactindexsized.DB -} - func OpenIndex_CidToOffset( indexFilePath string, -) (*Index_CidToOffset, error) { - indexFile, err := os.Open(indexFilePath) - if err != nil { - return nil, fmt.Errorf("failed to open index file: %w", err) - } - - index, err := compactindexsized.Open(indexFile) +) (*indexes.CidToOffsetAndSize_Reader, error) { + index, err := indexes.Open_CidToOffsetAndSize(indexFilePath) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } - - return &Index_CidToOffset{ - file: indexFile, - db: index, - }, nil -} - -func (i *Index_CidToOffset) Get(cid_ cid.Cid) (uint64, error) { - offset, err := findOffsetFromCid(i.db, cid_) - if err != nil { - return 0, fmt.Errorf("failed to lookup offset for %s: %w", cid_, err) - } - return offset, nil -} - -func (i *Index_CidToOffset) Close() error { - return i.file.Close() -} - -type Index_SlotToCid struct { - file *os.File - db *compactindexsized.DB + return index, nil } func OpenIndex_SlotToCid( indexFilePath string, -) (*Index_SlotToCid, error) { - indexFile, err := os.Open(indexFilePath) - if err != nil { - return nil, fmt.Errorf("failed to open index file: %w", err) - } - - index, err := compactindexsized.Open(indexFile) +) (*indexes.SlotToCid_Reader, error) { + index, err := indexes.Open_SlotToCid(indexFilePath) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } - - return &Index_SlotToCid{ - file: indexFile, - db: index, - }, nil -} - -func (i *Index_SlotToCid) Get(slot uint64) (cid.Cid, error) { - cid_, err := findCidFromSlot(i.db, slot) - if err != nil { - return cid.Undef, fmt.Errorf("failed to lookup cid for slot %d: %w", slot, err) - } - return cid_, nil -} - -func (i *Index_SlotToCid) Close() error { - return i.file.Close() -} - -type Index_SigToCid struct { - file *os.File - db *compactindexsized.DB + return index, nil } func OpenIndex_SigToCid( indexFilePath string, -) (*Index_SigToCid, error) { - indexFile, err := os.Open(indexFilePath) - if err != nil { - return nil, fmt.Errorf("failed to open index file: %w", err) - } - - index, err := compactindexsized.Open(indexFile) +) (*indexes.SigToCid_Reader, error) { + index, err := indexes.Open_SigToCid(indexFilePath) if err != nil { return nil, fmt.Errorf("failed to open index: %w", err) } - - return &Index_SigToCid{ - file: indexFile, - db: index, - }, nil -} - -func (i *Index_SigToCid) Get(sig solana.Signature) (cid.Cid, error) { - cid_, err := findCidFromSignature(i.db, sig) - if err != nil { - return cid.Undef, fmt.Errorf("failed to lookup cid for sig %x: %w", sig, err) - } - return cid_, nil -} - -func (i *Index_SigToCid) Close() error { - return i.file.Close() + return index, nil } diff --git a/cmd-x-index-cid2offset.go b/cmd-x-index-cid2offset.go index e62336c9..1ff9e616 100644 --- a/cmd-x-index-cid2offset.go +++ b/cmd-x-index-cid2offset.go @@ -5,12 +5,15 @@ import ( "fmt" "time" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/urfave/cli/v2" "k8s.io/klog/v2" ) func newCmd_Index_cid2offset() *cli.Command { var verify bool + var epoch uint64 + var network indexes.Network return &cli.Command{ Name: "cid-to-offset", Description: "Given a CAR file containing a Solana epoch, create an index of the file that maps CIDs to offsets in the CAR file.", @@ -29,6 +32,22 @@ func newCmd_Index_cid2offset() *cli.Command { Usage: "temporary directory to use for storing intermediate files", Value: "", }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "the epoch of the CAR file", + Destination: &epoch, + }, + &cli.StringFlag{ + Name: "network", + Usage: "the network of the CAR file", + Action: func(c *cli.Context, s string) error { + network = indexes.Network(s) + if !indexes.IsValidNetwork(network) { + return fmt.Errorf("invalid network: %s", network) + } + return nil + }, + }, }, Subcommands: []*cli.Command{}, Action: func(c *cli.Context) error { @@ -50,6 +69,8 @@ func newCmd_Index_cid2offset() *cli.Command { klog.Infof("Creating CID-to-offset index for %s", carPath) indexFilepath, err := CreateIndex_cid2offset( context.TODO(), + epoch, + network, tmpDir, carPath, indexDir, diff --git a/cmd-x-index-sig2cid.go b/cmd-x-index-sig2cid.go index ccde34cc..6875e315 100644 --- a/cmd-x-index-sig2cid.go +++ b/cmd-x-index-sig2cid.go @@ -5,12 +5,15 @@ import ( "fmt" "time" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/urfave/cli/v2" "k8s.io/klog/v2" ) func newCmd_Index_sig2cid() *cli.Command { var verify bool + var epoch uint64 + var network indexes.Network return &cli.Command{ Name: "sig-to-cid", Description: "Given a CAR file containing a Solana epoch, create an index of the file that maps transaction signatures to CIDs.", @@ -29,6 +32,22 @@ func newCmd_Index_sig2cid() *cli.Command { Usage: "temporary directory to use for storing intermediate files", Value: "", }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "the epoch of the CAR file", + Destination: &epoch, + }, + &cli.StringFlag{ + Name: "network", + Usage: "the network of the CAR file", + Action: func(c *cli.Context, s string) error { + network = indexes.Network(s) + if !indexes.IsValidNetwork(network) { + return fmt.Errorf("invalid network: %s", network) + } + return nil + }, + }, }, Subcommands: []*cli.Command{}, Action: func(c *cli.Context) error { @@ -50,6 +69,8 @@ func newCmd_Index_sig2cid() *cli.Command { klog.Infof("Creating Sig-to-CID index for %s", carPath) indexFilepath, err := CreateIndex_sig2cid( context.TODO(), + epoch, + network, tmpDir, carPath, indexDir, diff --git a/cmd-x-index-slot2cid.go b/cmd-x-index-slot2cid.go index 75fbfc24..d45c2d1e 100644 --- a/cmd-x-index-slot2cid.go +++ b/cmd-x-index-slot2cid.go @@ -5,12 +5,15 @@ import ( "fmt" "time" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/urfave/cli/v2" "k8s.io/klog/v2" ) func newCmd_Index_slot2cid() *cli.Command { var verify bool + var epoch uint64 + var network indexes.Network return &cli.Command{ Name: "slot-to-cid", Description: "Given a CAR file containing a Solana epoch, create an index of the file that maps slot numbers to CIDs.", @@ -29,6 +32,22 @@ func newCmd_Index_slot2cid() *cli.Command { Usage: "temporary directory to use for storing intermediate files", Value: "", }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "the epoch of the CAR file", + Destination: &epoch, + }, + &cli.StringFlag{ + Name: "network", + Usage: "the network of the CAR file", + Action: func(c *cli.Context, s string) error { + network = indexes.Network(s) + if !indexes.IsValidNetwork(network) { + return fmt.Errorf("invalid network: %s", network) + } + return nil + }, + }, }, Subcommands: []*cli.Command{}, Action: func(c *cli.Context) error { @@ -50,6 +69,8 @@ func newCmd_Index_slot2cid() *cli.Command { klog.Infof("Creating Slot-to-CID index for %s", carPath) indexFilepath, err := CreateIndex_slot2cid( context.TODO(), + epoch, + network, tmpDir, carPath, indexDir, diff --git a/cmd-x-verify-index-all.go b/cmd-x-verify-index-all.go index 905ebec3..47af273c 100644 --- a/cmd-x-verify-index-all.go +++ b/cmd-x-verify-index-all.go @@ -34,10 +34,10 @@ func newCmd_VerifyIndex_all() *cli.Command { context.TODO(), carPath, &IndexPaths{ - CidToOffset: indexFilePathCid2Offset, - SlotToCid: indexFilePathSlot2Cid, - SignatureToCid: indexFilePathSig2Cid, - SignatureExists: indexFilePathSigExists, + CidToOffsetAndSize: indexFilePathCid2Offset, + SlotToCid: indexFilePathSlot2Cid, + SignatureToCid: indexFilePathSig2Cid, + SignatureExists: indexFilePathSigExists, }, ) if err != nil { diff --git a/compactindexsized/build.go b/compactindexsized/build.go index 8a2bd164..7b4041be 100644 --- a/compactindexsized/build.go +++ b/compactindexsized/build.go @@ -21,7 +21,7 @@ import ( // Builder creates new compactindex files. type Builder struct { Header - dir string + tmpDir string headerSize int64 closers []io.Closer buckets []tempBucket @@ -36,13 +36,13 @@ type Builder struct { // valueSize is the size of each value in bytes. It must be > 0 and <= 256. // All values must be of the same size. func NewBuilderSized( - dir string, + tmpDir string, numItems uint, valueSize uint, ) (*Builder, error) { - if dir == "" { + if tmpDir == "" { var err error - dir, err = os.MkdirTemp("", "compactindex-") + tmpDir, err = os.MkdirTemp("", "compactindex-") if err != nil { return nil, fmt.Errorf("failed to create temp dir: %w", err) } @@ -61,7 +61,7 @@ func NewBuilderSized( buckets := make([]tempBucket, numBuckets) closers := make([]io.Closer, 0, numBuckets) for i := range buckets { - name := filepath.Join(dir, fmt.Sprintf("keys-%d", i)) + name := filepath.Join(tmpDir, fmt.Sprintf("keys-%d", i)) f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o666) if err != nil { for _, c := range closers { @@ -82,7 +82,7 @@ func NewBuilderSized( }, closers: closers, buckets: buckets, - dir: dir, + tmpDir: tmpDir, }, nil } @@ -98,17 +98,17 @@ func (b *Builder) SetKind(kind []byte) error { return fmt.Errorf("kind is empty") } // check if kind is already set - if b.Header.Meta.Count(KeyKind) > 0 { + if b.Header.Metadata.Count(KeyKind) > 0 { // remove kind - b.Header.Meta.Remove(KeyKind) + b.Header.Metadata.Remove(KeyKind) } // set kind - b.Header.Meta.Add(KeyKind, kind) + b.Header.Metadata.Add(KeyKind, kind) return nil } func (b *Builder) Metadata() *Meta { - return &b.Header.Meta + return &b.Header.Metadata } func (b *Builder) getValueSize() int { @@ -128,17 +128,17 @@ func (b *Builder) Insert(key []byte, value []byte) error { // // The file should be opened with access mode os.O_RDWR. // Passing a non-empty file will result in a corrupted index. -func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { +func (b *Builder) Seal(ctx context.Context, file *os.File) (err error) { // TODO support in-place writing. defer func() { - f.Sync() + file.Sync() }() // Write header. headerBuf := b.Header.Bytes() headerSize := int64(len(headerBuf)) - numWroteHeader, err := f.Write(headerBuf[:]) + numWroteHeader, err := file.Write(headerBuf[:]) if err != nil { return fmt.Errorf("failed to write header: %w", err) } @@ -148,10 +148,10 @@ func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { b.headerSize = headerSize // Create hole to leave space for bucket header table. bucketTableLen := int64(b.NumBuckets) * bucketHdrLen - err = fallocate(f, headerSize, bucketTableLen) + err = fallocate(file, headerSize, bucketTableLen) if errors.Is(err, syscall.EOPNOTSUPP) { // The underlying file system may not support fallocate - err = fake_fallocate(f, headerSize, bucketTableLen) + err = fake_fallocate(file, headerSize, bucketTableLen) if err != nil { return fmt.Errorf("failed to fake fallocate() bucket table: %w", err) } @@ -161,7 +161,7 @@ func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { } // Seal each bucket. for i := range b.buckets { - if err := b.sealBucket(ctx, i, f); err != nil { + if err := b.sealBucket(ctx, i, file); err != nil { return fmt.Errorf("failed to seal bucket %d: %w", i, err) } } @@ -228,7 +228,7 @@ func (b *Builder) Close() error { for _, c := range b.closers { c.Close() } - return os.RemoveAll(b.dir) + return os.RemoveAll(b.tmpDir) } // tempBucket represents the "temporary bucket" file, diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go index f00fb4d3..d5088529 100644 --- a/compactindexsized/build36_test.go +++ b/compactindexsized/build36_test.go @@ -316,7 +316,7 @@ func TestBuilder36(t *testing.T) { assert.Equal(t, Header{ ValueSize: valueSize, NumBuckets: numBuckets, - Meta: Meta{ + Metadata: Meta{ KeyVals: []KV{ { Key: KeyKind, @@ -414,7 +414,7 @@ func TestBuilder36_Random(t *testing.T) { // Ensure we cleaned up after ourselves. defer func() { - _, statErr := os.Stat(builder.dir) + _, statErr := os.Stat(builder.tmpDir) assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) }() defer builder.Close() diff --git a/compactindexsized/build48_test.go b/compactindexsized/build48_test.go index 14de12d4..ca10f847 100644 --- a/compactindexsized/build48_test.go +++ b/compactindexsized/build48_test.go @@ -279,7 +279,7 @@ func TestBuilder48(t *testing.T) { assert.Equal(t, Header{ ValueSize: valueSize, NumBuckets: numBuckets, - Meta: Meta{ + Metadata: Meta{ KeyVals: []KV{ { Key: KeyKind, @@ -373,7 +373,7 @@ func TestBuilder48_Random(t *testing.T) { // Ensure we cleaned up after ourselves. defer func() { - _, statErr := os.Stat(builder.dir) + _, statErr := os.Stat(builder.tmpDir) assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) }() defer builder.Close() diff --git a/compactindexsized/build8_test.go b/compactindexsized/build8_test.go index ffe1cb73..1f0dde67 100644 --- a/compactindexsized/build8_test.go +++ b/compactindexsized/build8_test.go @@ -213,7 +213,7 @@ func TestBuilder8_Random(t *testing.T) { // Ensure we cleaned up after ourselves. defer func() { - _, statErr := os.Stat(builder.dir) + _, statErr := os.Stat(builder.tmpDir) assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) }() defer builder.Close() diff --git a/compactindexsized/compactindex.go b/compactindexsized/compactindex.go index ee242e20..c9a4d709 100644 --- a/compactindexsized/compactindex.go +++ b/compactindexsized/compactindex.go @@ -103,7 +103,7 @@ const Version = uint8(1) type Header struct { ValueSize uint64 NumBuckets uint32 - Meta Meta + Metadata Meta } // Load checks the Magic sequence and loads the header fields. @@ -130,7 +130,7 @@ func (h *Header) Load(buf []byte) error { return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) } // read key-value pairs - if err := h.Meta.UnmarshalBinary(buf[25:]); err != nil { + if err := h.Metadata.UnmarshalBinary(buf[25:]); err != nil { return fmt.Errorf("failed to unmarshal metadata: %w", err) } if h.ValueSize == 0 { @@ -152,7 +152,7 @@ func (h *Header) Bytes() []byte { // version buf.WriteByte(Version) // key-value pairs - kvb := h.Meta.Bytes() + kvb := h.Metadata.Bytes() buf.Write(kvb) } lenWithoutMagicAndLen := buf.Len() diff --git a/compactindexsized/query.go b/compactindexsized/query.go index d4433b9f..a9a3b4ee 100644 --- a/compactindexsized/query.go +++ b/compactindexsized/query.go @@ -62,12 +62,12 @@ func (db *DB) Prefetch(yes bool) { // GetKind returns the kind of the index. func (db *DB) GetKind() ([]byte, bool) { - return db.Header.Meta.Get(KeyKind) + return db.Header.Metadata.Get(KeyKind) } // KindIs returns whether the index is of the given kind. func (db *DB) KindIs(kind []byte) bool { - got, ok := db.Header.Meta.Get(KeyKind) + got, ok := db.Header.Metadata.Get(KeyKind) return ok && bytes.Equal(got, kind) } diff --git a/compactindexsized/query_test.go b/compactindexsized/query_test.go index f741ceec..f39e7d20 100644 --- a/compactindexsized/query_test.go +++ b/compactindexsized/query_test.go @@ -78,7 +78,7 @@ func TestOpen_HeaderOnly(t *testing.T) { assert.Equal(t, Header{ ValueSize: 0x1337, NumBuckets: 0x42, - Meta: Meta{ + Metadata: Meta{ KeyVals: []KV{ { Key: []byte("foo"), diff --git a/epoch.go b/epoch.go index 53ef80ff..e5214dc8 100644 --- a/epoch.go +++ b/epoch.go @@ -17,8 +17,8 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/patrickmn/go-cache" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/gsfa" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" @@ -34,9 +34,9 @@ type Epoch struct { localCarReader *carv2.Reader remoteCarReader ReaderAtCloser remoteCarHeaderSize uint64 - cidToOffsetIndex *compactindexsized.DB - slotToCidIndex *compactindexsized.DB - sigToCidIndex *compactindexsized.DB + cidToOffsetIndex *indexes.CidToOffsetAndSize_Reader + slotToCidIndex *indexes.SlotToCid_Reader + sigToCidIndex *indexes.SigToCid_Reader sigExists *bucketteer.Reader gsfaReader *gsfa.GsfaReader cidToNodeCache *cache.Cache // TODO: prevent OOM @@ -117,7 +117,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { } ep.onClose = append(ep.onClose, cidToOffsetIndexFile.Close) - cidToOffsetIndex, err := compactindexsized.Open(cidToOffsetIndexFile) + cidToOffsetIndex, err := indexes.OpenWithReader_CidToOffsetAndSize(cidToOffsetIndexFile) if err != nil { return nil, fmt.Errorf("failed to open cid-to-offset index: %w", err) } @@ -138,7 +138,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { } ep.onClose = append(ep.onClose, slotToCidIndexFile.Close) - slotToCidIndex, err := compactindexsized.Open(slotToCidIndexFile) + slotToCidIndex, err := indexes.OpenWithReader_SlotToCid(slotToCidIndexFile) if err != nil { return nil, fmt.Errorf("failed to open slot-to-cid index: %w", err) } @@ -159,7 +159,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { } ep.onClose = append(ep.onClose, sigToCidIndexFile.Close) - sigToCidIndex, err := compactindexsized.Open(sigToCidIndexFile) + sigToCidIndex, err := indexes.OpenWithReader_SigToCid(sigToCidIndexFile) if err != nil { return nil, fmt.Errorf("failed to open sig-to-cid index: %w", err) } @@ -411,7 +411,7 @@ func (ser *Epoch) FindCidFromSlot(ctx context.Context, slot uint64) (o cid.Cid, } else if has { return c, nil } - found, err := findCidFromSlot(ser.slotToCidIndex, slot) + found, err := ser.slotToCidIndex.Get(slot) if err != nil { return cid.Undef, err } @@ -424,7 +424,7 @@ func (ser *Epoch) FindCidFromSignature(ctx context.Context, sig solana.Signature defer func() { klog.Infof("Found CID for signature %s in %s: %s", sig, time.Since(startedAt), o) }() - return findCidFromSignature(ser.sigToCidIndex, sig) + return ser.sigToCidIndex.Get(sig) } func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (o uint64, e error) { @@ -439,12 +439,13 @@ func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (o uint64, } else if has { return offset, nil } - found, err := findOffsetFromCid(ser.cidToOffsetIndex, cid) + found, err := ser.cidToOffsetIndex.Get(cid) if err != nil { return 0, err } - ser.putCidToOffsetInCache(cid, found) - return found, nil + // TODO: use also the size. + ser.putCidToOffsetInCache(cid, found.Offset) + return found.Offset, nil } func (ser *Epoch) GetBlock(ctx context.Context, slot uint64) (*ipldbindcode.Block, error) { diff --git a/go.mod b/go.mod index 35837898..bc4906c5 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/rpcpool/yellowstone-faithful go 1.20 require ( - github.com/benbjohnson/clock v1.3.0 // indirect + github.com/benbjohnson/clock v1.3.5 // indirect github.com/cespare/xxhash/v2 v2.2.0 github.com/davecgh/go-spew v1.1.1 github.com/dustin/go-humanize v1.0.1 @@ -11,48 +11,47 @@ require ( github.com/filecoin-project/go-state-types v0.10.0 // indirect github.com/gagliardetto/binary v0.7.8 github.com/gagliardetto/solana-go v1.8.4 - github.com/gin-gonic/gin v1.9.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/uuid v1.3.0 github.com/hannahhoward/go-pubsub v1.0.0 // indirect github.com/ipfs/go-blockservice v0.5.0 // indirect github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-datastore v0.6.0 // indirect - github.com/ipfs/go-graphsync v0.14.6 // indirect + github.com/ipfs/go-graphsync v0.16.0 // indirect github.com/ipfs/go-ipfs-blockstore v1.3.0 // indirect github.com/ipfs/go-ipfs-delay v0.0.1 // indirect github.com/ipfs/go-ipfs-exchange-interface v0.2.0 // indirect - github.com/ipfs/go-ipld-format v0.4.0 // indirect + github.com/ipfs/go-ipld-format v0.6.0 // indirect github.com/ipfs/go-libipfs v0.6.1 github.com/ipfs/go-log/v2 v2.5.1 - github.com/ipfs/go-unixfsnode v1.7.0 // indirect - github.com/ipld/go-car/v2 v2.9.0 + github.com/ipfs/go-unixfsnode v1.9.0 // indirect + github.com/ipld/go-car/v2 v2.13.1 github.com/ipld/go-codec-dagpb v1.6.0 // indirect - github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0 - github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff // indirect + github.com/ipld/go-ipld-prime v0.21.0 + github.com/ipni/go-libipni v0.5.3 // indirect github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.16.4 - github.com/libp2p/go-libp2p v0.27.1 - github.com/libp2p/go-libp2p-routing-helpers v0.6.1 // indirect - github.com/multiformats/go-multiaddr v0.9.0 // indirect + github.com/klauspost/compress v1.17.2 + github.com/libp2p/go-libp2p v0.32.1 + github.com/libp2p/go-libp2p-routing-helpers v0.7.1 // indirect + github.com/multiformats/go-multiaddr v0.12.0 // indirect github.com/multiformats/go-multicodec v0.9.0 - github.com/multiformats/go-multihash v0.2.1 // indirect + github.com/multiformats/go-multihash v0.2.3 // indirect github.com/sourcegraph/jsonrpc2 v0.2.0 - github.com/stretchr/testify v1.8.2 - github.com/urfave/cli/v2 v2.25.3 + github.com/stretchr/testify v1.8.4 + github.com/urfave/cli/v2 v2.25.7 github.com/vbauerster/mpb/v8 v8.2.1 - go.opentelemetry.io/otel v1.14.0 // indirect - go.opentelemetry.io/otel/trace v1.14.0 // indirect + go.opentelemetry.io/otel v1.16.0 // indirect + go.opentelemetry.io/otel/trace v1.16.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/net v0.8.0 // indirect - golang.org/x/sync v0.1.0 - google.golang.org/protobuf v1.30.0 + golang.org/x/net v0.17.0 // indirect + golang.org/x/sync v0.4.0 + google.golang.org/protobuf v1.31.0 k8s.io/klog/v2 v2.90.1 ) require ( - github.com/filecoin-project/lassie v0.12.1 - github.com/ipfs/go-ipld-cbor v0.0.6 + github.com/filecoin-project/lassie v0.21.0 + github.com/ipfs/go-ipld-cbor v0.1.0 github.com/ipfs/go-log v1.0.5 github.com/novifinancial/serde-reflection/serde-generate/runtime/golang v0.0.0-20220519162058-e5cd3c3b3f3a ) @@ -68,7 +67,7 @@ require ( github.com/ryanuber/go-glob v1.0.0 github.com/tejzpr/ordered-concurrently/v3 v3.0.1 github.com/valyala/fasthttp v1.47.0 - golang.org/x/exp v0.0.0-20230321023759-10a507213a29 + golang.org/x/exp v0.0.0-20231006140011-7918f672742d gopkg.in/yaml.v3 v3.0.1 k8s.io/klog v1.0.0 ) @@ -76,6 +75,7 @@ require ( require ( contrib.go.opencensus.io/exporter/stackdriver v0.13.14 // indirect filippo.io/edwards25519 v1.0.0 // indirect + github.com/Jorropo/jsync v1.0.1 // indirect github.com/PuerkitoBio/purell v1.1.1 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/VividCortex/ewma v1.2.0 // indirect @@ -85,12 +85,13 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/bep/debounce v1.2.1 // indirect github.com/blendle/zapdriver v1.3.1 // indirect + github.com/cespare/xxhash v1.1.0 // indirect github.com/containerd/cgroups v1.1.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/cskr/pubsub v1.0.2 // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect - github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0 // indirect + github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/dfuse-io/logging v0.0.0-20210109005628-b97a57253f70 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/elastic/gosigar v0.14.2 // indirect @@ -106,60 +107,62 @@ require ( github.com/flynn/noise v1.0.0 // indirect github.com/francoispqt/gojay v1.2.13 // indirect github.com/gagliardetto/treeout v0.1.4 // indirect - github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/logr v1.2.4 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/mock v1.6.0 // indirect github.com/google/gopacket v1.1.19 // indirect - github.com/google/pprof v0.0.0-20230405160723-4a4c7d95572b // indirect + github.com/google/pprof v0.0.0-20231023181126-ff6d637d2a7b // indirect + github.com/gorilla/websocket v1.5.0 // indirect github.com/hannahhoward/cbor-gen-for v0.0.0-20230214144701-5d17c9d5243c // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect - github.com/huin/goupnp v1.1.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect + github.com/huin/goupnp v1.3.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect + github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf // indirect github.com/ipfs/go-bitfield v1.1.0 // indirect - github.com/ipfs/go-block-format v0.1.1 // indirect + github.com/ipfs/go-block-format v0.2.0 // indirect github.com/ipfs/go-ipfs-chunker v0.0.6 // indirect github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect github.com/ipfs/go-ipfs-pq v0.0.3 // indirect - github.com/ipfs/go-ipfs-util v0.0.2 // indirect - github.com/ipfs/go-ipld-legacy v0.1.1 // indirect - github.com/ipfs/go-merkledag v0.10.0 // indirect + github.com/ipfs/go-ipfs-util v0.0.3 // indirect + github.com/ipfs/go-ipld-legacy v0.2.1 // indirect + github.com/ipfs/go-merkledag v0.11.0 // indirect github.com/ipfs/go-metrics-interface v0.0.1 // indirect github.com/ipfs/go-peertaskqueue v0.8.1 // indirect github.com/ipfs/go-verifcid v0.0.2 // indirect + github.com/ipld/go-trustless-utils v0.4.1 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect github.com/jbenet/goprocess v0.1.4 // indirect github.com/jpillora/backoff v1.0.0 // indirect - github.com/klauspost/cpuid/v2 v2.2.4 // indirect + github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/koron/go-ssdp v0.0.4 // indirect github.com/libp2p/go-buffer-pool v0.1.0 // indirect github.com/libp2p/go-cidranger v1.1.0 // indirect github.com/libp2p/go-flow-metrics v0.1.0 // indirect github.com/libp2p/go-libp2p-asn-util v0.3.0 // indirect github.com/libp2p/go-libp2p-record v0.2.0 // indirect - github.com/libp2p/go-mplex v0.7.0 // indirect github.com/libp2p/go-msgio v0.3.0 // indirect - github.com/libp2p/go-nat v0.1.0 // indirect + github.com/libp2p/go-nat v0.2.0 // indirect github.com/libp2p/go-netroute v0.2.1 // indirect - github.com/libp2p/go-reuseport v0.2.0 // indirect - github.com/libp2p/go-yamux/v4 v4.0.0 // indirect + github.com/libp2p/go-reuseport v0.4.0 // indirect + github.com/libp2p/go-yamux/v4 v4.0.1 // indirect github.com/logrusorgru/aurora v2.0.3+incompatible // indirect github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.18 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect - github.com/miekg/dns v1.1.53 // indirect + github.com/miekg/dns v1.1.56 // indirect github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect - github.com/minio/sha256-simd v1.0.0 // indirect + github.com/minio/sha256-simd v1.0.1 // indirect github.com/mitchellh/go-testing-interface v1.14.1 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -168,25 +171,24 @@ require ( github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect github.com/multiformats/go-multibase v0.2.0 // indirect - github.com/multiformats/go-multistream v0.4.1 // indirect + github.com/multiformats/go-multistream v0.5.0 // indirect github.com/multiformats/go-varint v0.0.7 // indirect - github.com/onsi/ginkgo/v2 v2.9.2 // indirect - github.com/opencontainers/runtime-spec v1.0.2 // indirect + github.com/onsi/ginkgo/v2 v2.13.0 // indirect + github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.89.0 // indirect - github.com/prometheus/client_golang v1.14.0 // indirect - github.com/prometheus/client_model v0.3.0 // indirect - github.com/prometheus/common v0.42.0 // indirect - github.com/prometheus/procfs v0.9.0 // indirect + github.com/prometheus/client_golang v1.16.0 // indirect + github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/common v0.44.0 // indirect + github.com/prometheus/procfs v0.11.1 // indirect github.com/quic-go/qpack v0.4.0 // indirect - github.com/quic-go/qtls-go1-19 v0.3.2 // indirect - github.com/quic-go/qtls-go1-20 v0.2.2 // indirect - github.com/quic-go/quic-go v0.33.0 // indirect - github.com/quic-go/webtransport-go v0.5.2 // indirect + github.com/quic-go/qtls-go1-20 v0.4.1 // indirect + github.com/quic-go/quic-go v0.40.0 // indirect + github.com/quic-go/webtransport-go v0.6.0 // indirect github.com/raulk/go-watchdog v1.3.0 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect @@ -195,23 +197,24 @@ require ( github.com/teris-io/shortid v0.0.0-20220617161101-71ec9f2aa569 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect - github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa // indirect + github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect go.mongodb.org/mongo-driver v1.11.2 // indirect go.opencensus.io v0.24.0 // indirect - go.uber.org/atomic v1.10.0 // indirect - go.uber.org/dig v1.16.1 // indirect - go.uber.org/fx v1.19.2 // indirect + go.opentelemetry.io/otel/metric v1.16.0 // indirect + go.uber.org/atomic v1.11.0 // indirect + go.uber.org/dig v1.17.1 // indirect + go.uber.org/fx v1.20.1 // indirect + go.uber.org/mock v0.3.0 // indirect go.uber.org/ratelimit v0.2.0 // indirect - go.uber.org/zap v1.24.0 // indirect - golang.org/x/crypto v0.7.0 // indirect - golang.org/x/mod v0.10.0 // indirect - golang.org/x/sys v0.7.0 // indirect - golang.org/x/term v0.6.0 // indirect - golang.org/x/text v0.8.0 // indirect + go.uber.org/zap v1.26.0 // indirect + golang.org/x/crypto v0.14.0 // indirect + golang.org/x/mod v0.13.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/term v0.13.0 // indirect + golang.org/x/text v0.13.0 // indirect golang.org/x/time v0.0.0-20191024005414-555d28b269f0 // indirect - golang.org/x/tools v0.7.0 // indirect + golang.org/x/tools v0.14.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect - lukechampine.com/blake3 v1.1.7 // indirect - nhooyr.io/websocket v1.8.7 // indirect + lukechampine.com/blake3 v1.2.1 // indirect ) diff --git a/go.sum b/go.sum index c54c6be1..b4bbf224 100644 --- a/go.sum +++ b/go.sum @@ -43,6 +43,9 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/GeertJohan/go.incremental v1.0.0/go.mod h1:6fAjUhbVuX1KcMD3c8TEgVUqmo4seqhv0i0kdATSkM0= github.com/GeertJohan/go.rice v1.0.0/go.mod h1:eH6gbSOAUv07dQuZVnBmoDP8mgsM1rtixis4Tib9if0= +github.com/Jorropo/jsync v1.0.1 h1:6HgRolFZnsdfzRUj+ImB9og1JYOxQoReSywkHOGSaUU= +github.com/Jorropo/jsync v1.0.1/go.mod h1:jCOZj3vrBCri3bSU3ErUYvevKlnbssrXeCivybS5ABQ= +github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= @@ -67,8 +70,9 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj github.com/aws/aws-sdk-go v1.22.1/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.23.20/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= -github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o= +github.com/benbjohnson/clock v1.3.5/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -82,12 +86,11 @@ github.com/blendle/zapdriver v1.3.1/go.mod h1:mdXfREi6u5MArG4j9fewC+FGnXaBR+T4Ox github.com/bradfitz/go-smtpd v0.0.0-20170404230938-deb6d6237625/go.mod h1:HYsPBTaaSFSlLx/70C2HPIMNZpVV8+vt/A+FMnYP11g= github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= -github.com/bytedance/sonic v1.8.0 h1:ea0Xadu+sHlu7x5O3gKhRpQ1IKiMrSiHttPF0ybECuA= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -119,9 +122,9 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c h1:pFUpOrbxDR6AkioZ1ySsx5yxlDQZ8stG2b88gTPxgJU= github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c/go.mod h1:6UhI8N9EjYm1c2odKpFpAYeR8dsBeM7PtzQhRgxRr9U= -github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK09Y2A4Xv7EE0= -github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0 h1:HbphB4TFFXpv7MNrT52FGrrgVXF1owhMVTHFZIlnvd4= -github.com/decred/dcrd/dcrec/secp256k1/v4 v4.1.0/go.mod h1:DZGJHZMqrU4JJqFAWUS2UO1+lbSKsdiOoYi9Zzey7Fc= +github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5ilcvdfma9wOH6Y= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etlyjdBU4sfcs2WYQMs= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= github.com/dfuse-io/logging v0.0.0-20201110202154-26697de88c79/go.mod h1:V+ED4kT/t/lKtH99JQmKIb0v9WL3VaYkJ36CfHlVECI= github.com/dfuse-io/logging v0.0.0-20210109005628-b97a57253f70 h1:CuJS05R9jmNlUK8GOxrEELPbfXm0EuGh/30LjkjN5vo= github.com/dfuse-io/logging v0.0.0-20210109005628-b97a57253f70/go.mod h1:EoK/8RFbMEteaCaz89uessDTnCWjbbcr+DXcBh4el5o= @@ -187,16 +190,15 @@ github.com/filecoin-project/go-statemachine v1.0.3/go.mod h1:jZdXXiHa61n4NmgWFG4 github.com/filecoin-project/go-statestore v0.1.0/go.mod h1:LFc9hD+fRxPqiHiaqUEZOinUJB4WARkRfNl10O7kTnI= github.com/filecoin-project/go-statestore v0.2.0 h1:cRRO0aPLrxKQCZ2UOQbzFGn4WDNdofHZoGPjfNaAo5Q= github.com/filecoin-project/go-statestore v0.2.0/go.mod h1:8sjBYbS35HwPzct7iT4lIXjLlYyPor80aU7t7a/Kspo= -github.com/filecoin-project/lassie v0.12.1 h1:qnEwyGh2T5XXbfGNKLFaaRWi7YgametC99DLyhs3ofE= -github.com/filecoin-project/lassie v0.12.1/go.mod h1:U9BbbrrScxUNfka2XE255GRTnKBGWRKKPtT6irLOlfc= +github.com/filecoin-project/lassie v0.21.0 h1:NKToQFzrsnvU1TK0ogl3xVaUU5Fl+IVdekdba8QCBGA= +github.com/filecoin-project/lassie v0.21.0/go.mod h1:eirdGffZzgZmiY45+QmPUfSI6jsa6JDM3Nd59bR3eUM= github.com/filecoin-project/specs-actors v0.9.4/go.mod h1:BStZQzx5x7TmCkLv0Bpa07U6cPKol6fd3w9KjMPZ6Z4= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ= github.com/flynn/noise v1.0.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= github.com/francoispqt/gojay v1.2.13 h1:d2m3sFjloqoIUQU3TsHBgj6qg/BVGlTBeHDUmyJnXKk= github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= @@ -209,11 +211,6 @@ github.com/gagliardetto/solana-go v1.8.4/go.mod h1:i+7aAyNDTHG0jK8GZIBSI4OVvDqkt github.com/gagliardetto/treeout v0.1.4 h1:ozeYerrLCmCubo1TcIjFiOWTTGteOOHND1twdFpgwaw= github.com/gagliardetto/treeout v0.1.4/go.mod h1:loUefvXTrlRG5rYmJmExNryyBRh8f89VZhmMOyCyqok= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= -github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -github.com/gin-gonic/gin v1.9.0 h1:OjyFBKICoexlu99ctXNR2gg+c5pKrKMuyjgARg9qeY8= -github.com/gin-gonic/gin v1.9.0/go.mod h1:W1Me9+hsUSyj3CePGrd1/QrKJMSJ1Tu/0hFEH89961k= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= @@ -226,28 +223,14 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= -github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= -github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= -github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= -github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -github.com/go-playground/validator/v10 v10.11.2 h1:q3SHpufmypg+erIExEKUmsgmhDTyhcJ38oeKGACXohU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= -github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0= -github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= -github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8= -github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= -github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= -github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= -github.com/goccy/go-json v0.10.0 h1:mXKd9Qw4NuzShiRlOXKews24ufknHO7gx30lsDyokKA= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= @@ -270,8 +253,6 @@ github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= -github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -298,13 +279,11 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gopacket v1.1.17/go.mod h1:UdDNZ1OO62aGYVnPhxT1U6aI7ukYtA/kB8vaU0diBUM= github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8= github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= @@ -313,8 +292,8 @@ github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20230405160723-4a4c7d95572b h1:Qcx5LM0fSiks9uCyFZwDBUasd3lxd1RM0GYpL+Li5o4= -github.com/google/pprof v0.0.0-20230405160723-4a4c7d95572b/go.mod h1:79YE0hCXdHag9sBkw2o+N/YnZtTkXi0UT9Nnixa5eYk= +github.com/google/pprof v0.0.0-20231023181126-ff6d637d2a7b h1:RMpPgZTSApbPf7xaVel+QkoGPRLFLrwFO89uDUHEGf0= +github.com/google/pprof v0.0.0-20231023181126-ff6d637d2a7b/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -325,12 +304,13 @@ github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE0 github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gopherjs/gopherjs v0.0.0-20190812055157-5d271430af9f h1:KMlcu9X58lhTA/KrfX8Bi1LQSO4pzoVjTiL3h4Jk+Zk= github.com/gopherjs/gopherjs v0.0.0-20190812055157-5d271430af9f/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gorilla/rpc v1.2.0/go.mod h1:V4h9r+4sF5HnzqbwIez0fKSpANP0zlYd3qR7p36jkTQ= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/goware/urlx v0.3.2 h1:gdoo4kBHlkqZNaf6XlQ12LGtQOmpKJrR04Rc3RnpJEo= github.com/goware/urlx v0.3.2/go.mod h1:h8uwbJy68o+tQXCGZNa9D73WN8n0r9OBae5bUnLcgjw= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= @@ -366,34 +346,35 @@ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.3/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru/v2 v2.0.5 h1:wW7h1TG88eUIJ2i69gaE3uNVtEPIagzhGvHgwfx2Vm4= +github.com/hashicorp/golang-lru/v2 v2.0.5/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/huin/goupnp v1.0.0/go.mod h1:n9v9KO1tAxYH82qOn+UTIFQDmx5n1Zxd/ClZDMX7Bnc= -github.com/huin/goupnp v1.1.0 h1:gEe0Dp/lZmPZiDFzJJaOfUpOvv2MKUkoBX8lDrn9vKU= -github.com/huin/goupnp v1.1.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= -github.com/huin/goutil v0.0.0-20170803182201-1ca381bf3150/go.mod h1:PpLOETDnJ0o3iZrZfqZzyLl6l7F3c6L1oWn7OICBi6o= +github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc= +github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= +github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf h1:toUvJ0yELWjrVmFX8AdriAfzl/EtqvYrpkfEniAJiFo= +github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf/go.mod h1:8IfDmp+FzFGcF4zjAgHMVPpwYw4AjN9ePEzDfkaYJ1w= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= -github.com/ipfs/go-block-format v0.1.1 h1:129vSO3zwbsYADcyQWcOYiuCpAqt462SFfqFHdFJhhI= -github.com/ipfs/go-block-format v0.1.1/go.mod h1:+McEIT+g52p+zz5xGAABGSOKrzmrdX97bc0USBdWPUs= +github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= +github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= github.com/ipfs/go-blockservice v0.5.0 h1:B2mwhhhVQl2ntW2EIpaWPwSCxSuqr5fFA93Ms4bYLEY= github.com/ipfs/go-blockservice v0.5.0/go.mod h1:W6brZ5k20AehbmERplmERn8o2Ni3ZZubvAxaIUeaT6w= github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.2/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.4-0.20191112011718-79e75dffeb10/go.mod h1:/BYOuUoxkE+0f6tGzlzMvycuN+5l35VOR4Bpg2sCmds= -github.com/ipfs/go-cid v0.0.4/go.mod h1:4LLaPOQwmk5z9LBgQnpkivrx8BJjUyGwTXCd5Xfj6+M= github.com/ipfs/go-cid v0.0.5/go.mod h1:plgt+Y5MnOey4vO4UlUazGqdbEXuFYitED67FexhXog= github.com/ipfs/go-cid v0.0.6-0.20200501230655-7c82f3b81c00/go.mod h1:plgt+Y5MnOey4vO4UlUazGqdbEXuFYitED67FexhXog= github.com/ipfs/go-cid v0.0.6/go.mod h1:6Ux9z5e+HpkQdckYoX1PG/6xqKspzlEIR5SDmgqgC/I= @@ -408,8 +389,8 @@ github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0M github.com/ipfs/go-datastore v0.6.0/go.mod h1:rt5M3nNbSO/8q1t4LNkLyUwRs8HupMeN/8O4Vn9YAT8= github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps= -github.com/ipfs/go-graphsync v0.14.6 h1:NPxvuUy4Z08Mg8dwpBzwgbv/PGLIufSJ1sle6iAX8yo= -github.com/ipfs/go-graphsync v0.14.6/go.mod h1:yT0AfjFgicOoWdAlUJ96tQ5AkuGI4r1taIQX/aHbBQo= +github.com/ipfs/go-graphsync v0.16.0 h1:0BX7whXlV13Y9FZ/jRg+xaGHaGYbtGxGppKD6tncw6k= +github.com/ipfs/go-graphsync v0.16.0/go.mod h1:WfbMW3hhmX5GQEQ+KJxsFzVJVBKgC5szfrYK7Zc7xIM= github.com/ipfs/go-hamt-ipld v0.1.1/go.mod h1:1EZCr2v0jlCnhpa+aZ0JZYp8Tt2w16+JJOAVz17YcDk= github.com/ipfs/go-ipfs-blockstore v1.3.0 h1:m2EXaWgwTzAfsmt5UdJ7Is6l4gJcaM/A12XwJyvYvMM= github.com/ipfs/go-ipfs-blockstore v1.3.0/go.mod h1:KgtZyc9fq+P2xJUiCAzbRdhhqJHvsw8u2Dlqy2MyRTE= @@ -424,26 +405,28 @@ github.com/ipfs/go-ipfs-ds-help v1.1.0/go.mod h1:YR5+6EaebOhfcqVCyqemItCLthrpVNo github.com/ipfs/go-ipfs-exchange-interface v0.2.0 h1:8lMSJmKogZYNo2jjhUs0izT+dck05pqUw4mWNW9Pw6Y= github.com/ipfs/go-ipfs-exchange-interface v0.2.0/go.mod h1:z6+RhJuDQbqKguVyslSOuVDhqF9JtTrO3eptSAiW2/Y= github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uYokgWRFidfvEkuA= +github.com/ipfs/go-ipfs-files v0.3.0 h1:fallckyc5PYjuMEitPNrjRfpwl7YFt69heCOUhsbGxQ= github.com/ipfs/go-ipfs-posinfo v0.0.1 h1:Esoxj+1JgSjX0+ylc0hUmJCOv6V2vFoZiETLR6OtpRs= github.com/ipfs/go-ipfs-pq v0.0.3 h1:YpoHVJB+jzK15mr/xsWC574tyDLkezVrDNeaalQBsTE= github.com/ipfs/go-ipfs-pq v0.0.3/go.mod h1:btNw5hsHBpRcSSgZtiNm/SLj5gYIZ18AKtv3kERkRb4= github.com/ipfs/go-ipfs-routing v0.3.0 h1:9W/W3N+g+y4ZDeffSgqhgo7BsBSJwPMcyssET9OWevc= github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc= -github.com/ipfs/go-ipfs-util v0.0.2 h1:59Sswnk1MFaiq+VcaknX7aYEyGyGDAA73ilhEK2POp8= github.com/ipfs/go-ipfs-util v0.0.2/go.mod h1:CbPtkWJzjLdEcezDns2XYaehFVNXG9zrdrtMecczcsQ= +github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0= +github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs= github.com/ipfs/go-ipld-cbor v0.0.3/go.mod h1:wTBtrQZA3SoFKMVkp6cn6HMRteIB1VsmHA0AQFOn7Nc= github.com/ipfs/go-ipld-cbor v0.0.4/go.mod h1:BkCduEx3XBCO6t2Sfo5BaHzuok7hbhdMm9Oh8B2Ftq4= github.com/ipfs/go-ipld-cbor v0.0.5/go.mod h1:BkCduEx3XBCO6t2Sfo5BaHzuok7hbhdMm9Oh8B2Ftq4= github.com/ipfs/go-ipld-cbor v0.0.6-0.20211211231443-5d9b9e1f6fa8/go.mod h1:ssdxxaLJPXH7OjF5V4NSjBbcfh+evoR4ukuru0oPXMA= -github.com/ipfs/go-ipld-cbor v0.0.6 h1:pYuWHyvSpIsOOLw4Jy7NbBkCyzLDcl64Bf/LZW7eBQ0= github.com/ipfs/go-ipld-cbor v0.0.6/go.mod h1:ssdxxaLJPXH7OjF5V4NSjBbcfh+evoR4ukuru0oPXMA= +github.com/ipfs/go-ipld-cbor v0.1.0 h1:dx0nS0kILVivGhfWuB6dUpMa/LAwElHPw1yOGYopoYs= +github.com/ipfs/go-ipld-cbor v0.1.0/go.mod h1:U2aYlmVrJr2wsUBU67K4KgepApSZddGRDWBYR0H4sCk= github.com/ipfs/go-ipld-format v0.0.1/go.mod h1:kyJtbkDALmFHv3QR6et67i35QzO3S0dCDnkOJhcZkms= github.com/ipfs/go-ipld-format v0.0.2/go.mod h1:4B6+FM2u9OJ9zCV+kSbgFAZlOrv1Hqbf0INGQgiKf9k= -github.com/ipfs/go-ipld-format v0.2.0/go.mod h1:3l3C1uKoadTPbeNfrDi+xMInYKlx2Cvg1BuydPSdzQs= -github.com/ipfs/go-ipld-format v0.4.0 h1:yqJSaJftjmjc9jEOFYlpkwOLVKv68OD27jFLlSghBlQ= -github.com/ipfs/go-ipld-format v0.4.0/go.mod h1:co/SdBE8h99968X0hViiw1MNlh6fvxxnHpvVLnH7jSM= -github.com/ipfs/go-ipld-legacy v0.1.1 h1:BvD8PEuqwBHLTKqlGFTHSwrwFOMkVESEvwIYwR2cdcc= -github.com/ipfs/go-ipld-legacy v0.1.1/go.mod h1:8AyKFCjgRPsQFf15ZQgDB8Din4DML/fOmKZkkFkrIEg= +github.com/ipfs/go-ipld-format v0.6.0 h1:VEJlA2kQ3LqFSIm5Vu6eIlSxD/Ze90xtc4Meten1F5U= +github.com/ipfs/go-ipld-format v0.6.0/go.mod h1:g4QVMTn3marU3qXchwjpKPKgJv+zF+OlaKMyhJ4LHPg= +github.com/ipfs/go-ipld-legacy v0.2.1 h1:mDFtrBpmU7b//LzLSypVrXsD8QxkEWxu5qVxN99/+tk= +github.com/ipfs/go-ipld-legacy v0.2.1/go.mod h1:782MOUghNzMO2DER0FlBR94mllfdCJCkTtDtPM51otM= github.com/ipfs/go-libipfs v0.6.1 h1:OSO9cm1H3r4OXfP0MP1Q5UhTnhd2fByGl6CVYyz/Rhk= github.com/ipfs/go-libipfs v0.6.1/go.mod h1:FmhKgxMOQA572TK5DA3MZ5GL44ZqsMHIrkgK4gLn4A8= github.com/ipfs/go-log v1.0.0/go.mod h1:JO7RzlMK6rA+CIxFMLOuB6Wf5b81GDiKElL7UPSIKjA= @@ -457,29 +440,31 @@ github.com/ipfs/go-log/v2 v2.1.2-0.20200626104915-0016c0b4b3e4/go.mod h1:2v2nsGf github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g= github.com/ipfs/go-log/v2 v2.5.1 h1:1XdUzF7048prq4aBjDQQ4SL5RxftpRGdXhNRwKSAlcY= github.com/ipfs/go-log/v2 v2.5.1/go.mod h1:prSpmC1Gpllc9UYWxDiZDreBYw7zp4Iqp1kOLU9U5UI= -github.com/ipfs/go-merkledag v0.10.0 h1:IUQhj/kzTZfam4e+LnaEpoiZ9vZF6ldimVlby+6OXL4= -github.com/ipfs/go-merkledag v0.10.0/go.mod h1:zkVav8KiYlmbzUzNM6kENzkdP5+qR7+2mCwxkQ6GIj8= +github.com/ipfs/go-merkledag v0.11.0 h1:DgzwK5hprESOzS4O1t/wi6JDpyVQdvm9Bs59N/jqfBY= +github.com/ipfs/go-merkledag v0.11.0/go.mod h1:Q4f/1ezvBiJV0YCIXvt51W/9/kqJGH4I1LsA7+djsM4= github.com/ipfs/go-metrics-interface v0.0.1 h1:j+cpbjYvu4R8zbleSs36gvB7jR+wsL2fGD6n0jO4kdg= github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j/b/tL7HTWtJ4VPgWY= github.com/ipfs/go-peertaskqueue v0.8.1 h1:YhxAs1+wxb5jk7RvS0LHdyiILpNmRIRnZVztekOF0pg= github.com/ipfs/go-peertaskqueue v0.8.1/go.mod h1:Oxxd3eaK279FxeydSPPVGHzbwVeHjatZ2GA8XD+KbPU= -github.com/ipfs/go-unixfs v0.4.4 h1:D/dLBOJgny5ZLIur2vIXVQVW0EyDHdOMBDEhgHrt6rY= -github.com/ipfs/go-unixfsnode v1.7.0 h1:OwRxGCed+WXP+esYYsRr8X7jqCMCoRlauBvDwGcK8ao= -github.com/ipfs/go-unixfsnode v1.7.0/go.mod h1:PVfoyZkX1B34qzT3vJO4nsLUpRCyhnMuHBznRcXirlk= +github.com/ipfs/go-unixfs v0.4.5 h1:wj8JhxvV1G6CD7swACwSKYa+NgtdWC1RUit+gFnymDU= +github.com/ipfs/go-unixfsnode v1.9.0 h1:ubEhQhr22sPAKO2DNsyVBW7YB/zA8Zkif25aBvz8rc8= +github.com/ipfs/go-unixfsnode v1.9.0/go.mod h1:HxRu9HYHOjK6HUqFBAi++7DVoWAHn0o4v/nZ/VA+0g8= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU= github.com/ipld/go-car v0.5.0 h1:kcCEa3CvYMs0iE5BzD5sV7O2EwMiCIp3uF8tA6APQT8= github.com/ipld/go-car v0.5.0/go.mod h1:ppiN5GWpjOZU9PgpAZ9HbZd9ZgSpwPMr48fGRJOWmvE= -github.com/ipld/go-car/v2 v2.9.0 h1:mkMSfh9NpnfdFe30xBFTQiKZ6+LY+mwOPrq6r56xsPo= -github.com/ipld/go-car/v2 v2.9.0/go.mod h1:UeIST4b5Je6LEx8GjFysgeCYwxAHKtAcsWxmF6PupNQ= +github.com/ipld/go-car/v2 v2.13.1 h1:KnlrKvEPEzr5IZHKTXLAEub+tPrzeAFQVRlSQvuxBO4= +github.com/ipld/go-car/v2 v2.13.1/go.mod h1:QkdjjFNGit2GIkpQ953KBwowuoukoM75nP/JI1iDJdo= github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= github.com/ipld/go-codec-dagpb v1.6.0/go.mod h1:ANzFhfP2uMJxRBr8CE+WQWs5UsNa0pYtmKZ+agnUw9s= -github.com/ipld/go-ipld-prime v0.9.1-0.20210324083106-dc342a9917db/go.mod h1:KvBLMr4PX1gWptgkzRjVZCrLmSGcZCb/jioOQwCqZN8= -github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0 h1:iJTl9tx5DEsnKpppX5PmfdoQ3ITuBmkh3yyEpHWY2SI= -github.com/ipld/go-ipld-prime v0.20.1-0.20230329011551-5056175565b0/go.mod h1:wmOtdy70ajP48iZITH8uLsGJVMqA4EJM61/bSfYYGhs= +github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH9C2E= +github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ= github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd h1:gMlw/MhNr2Wtp5RwGdsW23cs+yCuj9k2ON7i9MiJlRo= -github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff h1:xbKrIvnpQkbF8iHPk/HGcegsypCDpcXWHhzBCLyCWf8= -github.com/ipni/go-libipni v0.0.8-0.20230425184153-86a1fcb7f7ff/go.mod h1:paYP9U4N3/vOzGCuN9kU972vtvw9JUcQjOKyiCFGwRk= +github.com/ipld/go-trustless-utils v0.4.1 h1:puA14381Hg2LzH724mZ5ZFKFx+FFjjT5fPFs01vwlgM= +github.com/ipld/go-trustless-utils v0.4.1/go.mod h1:DgGuyfJ33goYwYVisjnxrlra0HVmZuHWVisVIkzVo1o= +github.com/ipld/ipld/specs v0.0.0-20231012031213-54d3b21deda4 h1:0VXv637/xpI0Pb5J8K+K8iRtTw4DOcxs0MB1HMzfwNY= +github.com/ipni/go-libipni v0.5.3 h1:OtyQsetnTjIeXFMHcuEFUmCyAlrKEiOfZrv4FpCFj5A= +github.com/ipni/go-libipni v0.5.3/go.mod h1:lwecr1Bn32BtroPW3Dnb9qzWGQ3IsB4STr1Cs+gS8TA= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 h1:QG4CGBqCeuBo6aZlGAamSkxWdgWfZGeE49eUOWJPA4c= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52/go.mod h1:fdg+/X9Gg4AsAIzWpEHwnqd+QY3b7lajxyjE1m4hkq4= github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus= @@ -499,7 +484,6 @@ github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22 github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= @@ -512,17 +496,14 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU= -github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= +github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= -github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/koron/go-ssdp v0.0.0-20191105050749-2e1c40ed0b5d/go.mod h1:5Ky9EC2xfoUKUor0Hjgi2BJhCSXJfMOFlmyYrVKGQMk= github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0= github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= @@ -534,38 +515,31 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/leodido/go-urn v1.2.1 h1:BqpAaACuzVSgi/VLzGZIobT2z4v53pjosyNd9Yv6n/w= -github.com/libp2p/go-buffer-pool v0.0.2/go.mod h1:MvaB6xw5vOrDl8rYZGLFdKAuk/hRoRZd1Vi32+RXyFM= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c= github.com/libp2p/go-cidranger v1.1.0/go.mod h1:KWZTfSr+r9qEo9OkI9/SIEeAtw+NNoU0dXIXt15Okic= github.com/libp2p/go-flow-metrics v0.1.0 h1:0iPhMI8PskQwzh57jB9WxIuIOQ0r+15PChFGkx3Q3WM= github.com/libp2p/go-flow-metrics v0.1.0/go.mod h1:4Xi8MX8wj5aWNDAZttg6UPmc0ZrnFNsMtpsYUClFtro= -github.com/libp2p/go-libp2p v0.27.1 h1:k1u6RHsX3hqKnslDjsSgLNURxJ3O1atIZCY4gpMbbus= -github.com/libp2p/go-libp2p v0.27.1/go.mod h1:FAvvfQa/YOShUYdiSS03IR9OXzkcJXwcNA2FUCh9ImE= +github.com/libp2p/go-libp2p v0.32.1 h1:wy1J4kZIZxOaej6NveTWCZmHiJ/kY7GoAqXgqNCnPps= +github.com/libp2p/go-libp2p v0.32.1/go.mod h1:hXXC3kXPlBZ1eu8Q2hptGrMB4mZ3048JUoS4EKaHW5c= github.com/libp2p/go-libp2p-asn-util v0.3.0 h1:gMDcMyYiZKkocGXDQ5nsUQyquC9+H+iLEQHwOCZ7s8s= github.com/libp2p/go-libp2p-asn-util v0.3.0/go.mod h1:B1mcOrKUE35Xq/ASTmQ4tN3LNzVVaMNmq2NACuqyB9w= github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0= github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvNDFGKX7QdlpYUcwk= -github.com/libp2p/go-libp2p-routing-helpers v0.6.1 h1:tI3rHOf/FDQsxC2pHBaOZiqPJ0MZYyzGAf4V45xla4U= -github.com/libp2p/go-libp2p-routing-helpers v0.6.1/go.mod h1:R289GUxUMzRXIbWGSuUUTPrlVJZ3Y/pPz495+qgXJX8= +github.com/libp2p/go-libp2p-routing-helpers v0.7.1 h1:kc0kWCZecbBPAiFEHhxfGJZPqjg1g9zV+X+ovR4Tmnc= +github.com/libp2p/go-libp2p-routing-helpers v0.7.1/go.mod h1:cHStPSRC/wgbfpb5jYdMP7zaSmc2wWcb1mkzNr6AR8o= github.com/libp2p/go-libp2p-testing v0.12.0 h1:EPvBb4kKMWO29qP4mZGyhVzUyR25dvfUIK5WDu6iPUA= -github.com/libp2p/go-mplex v0.7.0 h1:BDhFZdlk5tbr0oyFq/xv/NPGfjbnrsDam1EvutpBDbY= -github.com/libp2p/go-mplex v0.7.0/go.mod h1:rW8ThnRcYWft/Jb2jeORBmPd6xuG3dGxWN/W168L9EU= github.com/libp2p/go-msgio v0.3.0 h1:mf3Z8B1xcFN314sWX+2vOTShIE0Mmn2TXn3YCUQGNj0= github.com/libp2p/go-msgio v0.3.0/go.mod h1:nyRM819GmVaF9LX3l03RMh10QdOroF++NBbxAb0mmDM= -github.com/libp2p/go-nat v0.1.0 h1:MfVsH6DLcpa04Xr+p8hmVRG4juse0s3J8HyNWYHffXg= -github.com/libp2p/go-nat v0.1.0/go.mod h1:X7teVkwRHNInVNWQiO/tAiAVRwSr5zoRz4YSTC3uRBM= -github.com/libp2p/go-netroute v0.1.2/go.mod h1:jZLDV+1PE8y5XxBySEBgbuVAXbhtuHSdmLPL2n9MKbk= +github.com/libp2p/go-nat v0.2.0 h1:Tyz+bUFAYqGyJ/ppPPymMGbIgNRH+WqC5QrT5fKrrGk= +github.com/libp2p/go-nat v0.2.0/go.mod h1:3MJr+GRpRkyT65EpVPBstXLvOlAPzUVlG6Pwg9ohLJk= github.com/libp2p/go-netroute v0.2.1 h1:V8kVrpD8GK0Riv15/7VN6RbUQ3URNZVosw7H2v9tksU= github.com/libp2p/go-netroute v0.2.1/go.mod h1:hraioZr0fhBjG0ZRXJJ6Zj2IVEVNx6tDTFQfSmcq7mQ= -github.com/libp2p/go-reuseport v0.2.0 h1:18PRvIMlpY6ZK85nIAicSBuXXvrYoSw3dsBAR7zc560= -github.com/libp2p/go-reuseport v0.2.0/go.mod h1:bvVho6eLMm6Bz5hmU0LYN3ixd3nPPvtIlaURZZgOY4k= -github.com/libp2p/go-sockaddr v0.0.2/go.mod h1:syPvOmNs24S3dFVGJA1/mrqdeijPxLV2Le3BRLKd68k= -github.com/libp2p/go-yamux/v4 v4.0.0 h1:+Y80dV2Yx/kv7Y7JKu0LECyVdMXm1VUoko+VQ9rBfZQ= -github.com/libp2p/go-yamux/v4 v4.0.0/go.mod h1:NWjl8ZTLOGlozrXSOZ/HlfG++39iKNnM5wwmtQP1YB4= +github.com/libp2p/go-reuseport v0.4.0 h1:nR5KU7hD0WxXCJbmw7r2rhRYruNRl2koHw8fQscQm2s= +github.com/libp2p/go-reuseport v0.4.0/go.mod h1:ZtI03j/wO5hZVDFo2jKywN6bYKWLOy8Se6DrI2E1cLU= +github.com/libp2p/go-yamux/v4 v4.0.1 h1:FfDR4S1wj6Bw2Pqbc8Uz7pCxeRBPbwsBbEdfwiCypkQ= +github.com/libp2p/go-yamux/v4 v4.0.1/go.mod h1:NWjl8ZTLOGlozrXSOZ/HlfG++39iKNnM5wwmtQP1YB4= github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI= @@ -580,11 +554,10 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= -github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= -github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= @@ -593,8 +566,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/microcosm-cc/bluemonday v1.0.1/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= -github.com/miekg/dns v1.1.53 h1:ZBkuHr5dxHtB1caEOlZTLPo7D3L3TWckgUUs/RHfDxw= -github.com/miekg/dns v1.1.53/go.mod h1:uInx36IzPl7FYnDcMeVWxj9byh7DutNykX4G9Sj60FY= +github.com/miekg/dns v1.1.56 h1:5imZaSeoRNvpM9SzWNhEcP9QliKiz20/dA2QabIGVnE= +github.com/miekg/dns v1.1.56/go.mod h1:cRm6Oo2C8TY9ZS/TqsSrseAcncm74lfK5G+ikN2SWWY= github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c h1:bzE/A84HN25pxAuk9Eej1Kz9OUelF97nAc82bDquQI8= github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c/go.mod h1:0SQS9kMwD2VsyFEB++InYyBJroV/FRmBgcydeSUcJms= github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b h1:z78hV3sbSMAUoyUMM0I83AUIT6Hu17AWfgjzIbtrYFc= @@ -606,8 +579,9 @@ github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8Rv github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16/go.mod h1:2FMWW+8GMoPweT6+pI63m9YE3Lmw4J71hV56Chs1E/U= github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= -github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= +github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= +github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= @@ -621,7 +595,6 @@ github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= @@ -641,8 +614,8 @@ github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9 github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4= github.com/multiformats/go-multiaddr v0.1.1/go.mod h1:aMKBKNEYmzmDmxfX88/vz+J5IU55txyt0p4aiWVohjo= github.com/multiformats/go-multiaddr v0.2.0/go.mod h1:0nO36NvPpyV4QzvTLi/lafl2y95ncPj0vFwVF6k6wJ4= -github.com/multiformats/go-multiaddr v0.9.0 h1:3h4V1LHIk5w4hJHekMKWALPXErDfz/sggzwC/NcqbDQ= -github.com/multiformats/go-multiaddr v0.9.0/go.mod h1:mI67Lb1EeTOYb8GQfL/7wpIZwc46ElrvzhYnoJOmTT0= +github.com/multiformats/go-multiaddr v0.12.0 h1:1QlibTFkoXJuDjjYsMHhE73TnzJQl8FSWatk/0gxGzE= +github.com/multiformats/go-multiaddr v0.12.0/go.mod h1:WmZXgObOQOYp9r3cslLlppkrz1FYSHmE834dfz/lWu8= github.com/multiformats/go-multiaddr-dns v0.3.1 h1:QgQgR+LQVt3NPTjbrLLpsaT2ufAA2y0Mkk+QRVJbW3A= github.com/multiformats/go-multiaddr-dns v0.3.1/go.mod h1:G/245BRQ6FJGmryJCrOuTdB37AMA5AMOVuO6NY3JwTk= github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/eQsuaL3/CWe167E= @@ -660,10 +633,10 @@ github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpK github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc= github.com/multiformats/go-multihash v0.0.14/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc= github.com/multiformats/go-multihash v0.0.15/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg= -github.com/multiformats/go-multihash v0.2.1 h1:aem8ZT0VA2nCHHk7bPJ1BjUbHNciqZC/d16Vve9l108= -github.com/multiformats/go-multihash v0.2.1/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= -github.com/multiformats/go-multistream v0.4.1 h1:rFy0Iiyn3YT0asivDUIR05leAdwZq3de4741sbiSdfo= -github.com/multiformats/go-multistream v0.4.1/go.mod h1:Mz5eykRVAjJWckE2U78c6xqdtyNUEhKSM0Lwar2p77Q= +github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= +github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= +github.com/multiformats/go-multistream v0.5.0 h1:5htLSLl7lvJk3xx3qT/8Zm9J4K8vEOf/QGkvOGQAyiE= +github.com/multiformats/go-multistream v0.5.0/go.mod h1:n6tMZiwiP2wUsR8DgfDWw1dydlEqV3l6N3/GBsX6ILA= github.com/multiformats/go-varint v0.0.1/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/multiformats/go-varint v0.0.5/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= @@ -679,13 +652,14 @@ github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU= -github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts= +github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= +github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.27.4 h1:Z2AnStgsdSayCMDiCU42qIz+HLqEPcgiOCXjAU/w+8E= -github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= +github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= +github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= @@ -695,9 +669,7 @@ github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaR github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml/v2 v2.0.6 h1:nrzqCb7j9cDFj2coyLNLaZuJTLjWjlaz6nvTvIwycIU= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -715,34 +687,32 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= -github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= -github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= +github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= +github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= -github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= +github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= +github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= -github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= +github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/quic-go/qpack v0.4.0 h1:Cr9BXA1sQS2SmDUWjSofMPNKmvF6IiIfDRmgU0w1ZCo= github.com/quic-go/qpack v0.4.0/go.mod h1:UZVnYIfi5GRk+zI9UMaCPsmZ2xKJP7XBUvVyT1Knj9A= -github.com/quic-go/qtls-go1-19 v0.3.2 h1:tFxjCFcTQzK+oMxG6Zcvp4Dq8dx4yD3dDiIiyc86Z5U= -github.com/quic-go/qtls-go1-19 v0.3.2/go.mod h1:ySOI96ew8lnoKPtSqx2BlI5wCpUVPT05RMAlajtnyOI= -github.com/quic-go/qtls-go1-20 v0.2.2 h1:WLOPx6OY/hxtTxKV1Zrq20FtXtDEkeY00CGQm8GEa3E= -github.com/quic-go/qtls-go1-20 v0.2.2/go.mod h1:JKtK6mjbAVcUTN/9jZpvLbGxvdWIKS8uT7EiStoU1SM= -github.com/quic-go/quic-go v0.33.0 h1:ItNoTDN/Fm/zBlq769lLJc8ECe9gYaW40veHCCco7y0= -github.com/quic-go/quic-go v0.33.0/go.mod h1:YMuhaAV9/jIu0XclDXwZPAsP/2Kgr5yMYhe9oxhhOFA= -github.com/quic-go/webtransport-go v0.5.2 h1:GA6Bl6oZY+g/flt00Pnu0XtivSD8vukOu3lYhJjnGEk= -github.com/quic-go/webtransport-go v0.5.2/go.mod h1:OhmmgJIzTTqXK5xvtuX0oBpLV2GkLWNDA+UeTGJXErU= +github.com/quic-go/qtls-go1-20 v0.4.1 h1:D33340mCNDAIKBqXuAvexTNMUByrYmFYVfKfDN5nfFs= +github.com/quic-go/qtls-go1-20 v0.4.1/go.mod h1:X9Nh97ZL80Z+bX/gUXMbipO6OxdiDi58b/fMC9mAL+k= +github.com/quic-go/quic-go v0.40.0 h1:GYd1iznlKm7dpHD7pOVpUvItgMPo/jrMgDWZhMCecqw= +github.com/quic-go/quic-go v0.40.0/go.mod h1:PeN7kuVJ4xZbxSv/4OX6S1USOX8MJvydwpTx31vx60c= +github.com/quic-go/webtransport-go v0.6.0 h1:CvNsKqc4W2HljHJnoT+rMmbRJybShZ0YPFDD3NxaZLY= +github.com/quic-go/webtransport-go v0.6.0/go.mod h1:9KjU4AEBqEQidGHNDkZrb8CAa1abRaosM2yGOyiikEc= github.com/raulk/go-watchdog v1.3.0 h1:oUmdlHxdkXRJlwfG0O9omj8ukerm8MEQavSiDTEtBsk= github.com/raulk/go-watchdog v1.3.0/go.mod h1:fIvOnLbF0b0ZwkB9YU4mOW9Did//4vPZtDqv66NfsMU= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -751,7 +721,7 @@ github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/ronanh/intcomp v1.1.0 h1:i54kxmpmSoOZFcWPMWryuakN0vLxLswASsGa07zkvLU= github.com/ronanh/intcomp v1.1.0/go.mod h1:7FOLy3P3Zj3er/kVrU/pl+Ql7JFZj7bwliMGketo0IU= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= @@ -831,8 +801,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= -github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= github.com/tejzpr/ordered-concurrently/v3 v3.0.1 h1:TLHtzlQEDshbmGveS8S+hxLw4s5u67aoJw5LLf+X2xY= @@ -850,16 +820,11 @@ github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tj/go-spin v1.1.0/go.mod h1:Mg1mzmePZm4dva8Qz60H2lHwmJ2loum4VIrLgVnKwh4= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= -github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= -github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= -github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.9 h1:rmenucSohSTiyL09Y+l2OCk+FrMxGMzho2+tjr5ticU= github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli/v2 v2.0.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= -github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY= -github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= +github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs= +github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c= @@ -869,10 +834,9 @@ github.com/vbauerster/mpb/v8 v8.2.1 h1:7V3DLM8rkK4BpgDUqu8l/ExBDfAfMbWOECW5phzVH github.com/vbauerster/mpb/v8 v8.2.1/go.mod h1:DqGePwrIYW6Bs5pXaGAuGgP0PYgu5VZKIjfLZkOsdZw= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= -github.com/warpfork/go-testmark v0.11.0 h1:J6LnV8KpceDvo7spaNU4+DauH2n1x+6RaO2rJrmpQ9U= +github.com/warpfork/go-testmark v0.12.1 h1:rMgCpJfwy1sJ50x0M0NgyphxYYPMOODIJHhsXyEHU0s= github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= -github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 h1:5HZfQkwe0mIfyDmc1Em5GqlNRzcdtlv4HTNmdpt7XH0= @@ -890,8 +854,8 @@ github.com/whyrusleeping/cbor-gen v0.0.0-20200812213548-958ddffe352c/go.mod h1:f github.com/whyrusleeping/cbor-gen v0.0.0-20200826160007-0b9f6c5fb163/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/cbor-gen v0.0.0-20210118024343-169e9d70c0c2/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/cbor-gen v0.0.0-20210303213153-67a261a1d291/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= -github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa h1:EyA027ZAkuaCLoxVX4r1TZMPy1d31fM6hbfQ4OU4I5o= -github.com/whyrusleeping/cbor-gen v0.0.0-20230126041949-52956bd4c9aa/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= +github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25 h1:yVYDLoN2gmB3OdBXFW8e1UwgVbmCvNlnAKhvHPaNARI= +github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= @@ -922,24 +886,28 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/otel v1.14.0 h1:/79Huy8wbf5DnIPhemGB+zEPVwnN6fuQybr/SRXa6hM= -go.opentelemetry.io/otel v1.14.0/go.mod h1:o4buv+dJzx8rohcUeRmWUZhqupFvzWis188WlggnNeU= -go.opentelemetry.io/otel/sdk v1.11.1 h1:F7KmQgoHljhUuJyA+9BiU+EkJfyX5nVVF4wyzWZpKxs= -go.opentelemetry.io/otel/trace v1.14.0 h1:wp2Mmvj41tDsyAJXiWDWpfNsOiIyd38fy85pyKcFq/M= -go.opentelemetry.io/otel/trace v1.14.0/go.mod h1:8avnQLK+CG77yNLUae4ea2JDQ6iT+gozhnZjy/rw9G8= +go.opentelemetry.io/otel v1.16.0 h1:Z7GVAX/UkAXPKsy94IU+i6thsQS4nb7LviLpnaNeW8s= +go.opentelemetry.io/otel v1.16.0/go.mod h1:vl0h9NUa1D5s1nv3A5vZOYWn8av4K8Ml6JDeHrT/bx4= +go.opentelemetry.io/otel/metric v1.16.0 h1:RbrpwVG1Hfv85LgnZ7+txXioPDoh6EdbZHo26Q3hqOo= +go.opentelemetry.io/otel/metric v1.16.0/go.mod h1:QE47cpOmkwipPiefDwo2wDzwJrlfxxNYodqc4xnGCo4= +go.opentelemetry.io/otel/sdk v1.14.0 h1:PDCppFRDq8A1jL9v6KMI6dYesaq+DFcDZvjsoGvxGzY= +go.opentelemetry.io/otel/trace v1.16.0 h1:8JRpaObFoW0pxuVPapkgH8UhHQj+bJW8jJsCZEu5MQs= +go.opentelemetry.io/otel/trace v1.16.0/go.mod h1:Yt9vYq1SdNz3xdjZZK7wcXv1qv2pwLkqr2QVwea0ef0= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= -go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= -go.uber.org/dig v1.16.1 h1:+alNIBsl0qfY0j6epRubp/9obgtrObRAc5aD+6jbWY8= -go.uber.org/dig v1.16.1/go.mod h1:557JTAUZT5bUK0SvCwikmLPPtdQhfvLYtO5tJgQSbnk= -go.uber.org/fx v1.19.2 h1:SyFgYQFr1Wl0AYstE8vyYIzP4bFz2URrScjwC4cwUvY= -go.uber.org/fx v1.19.2/go.mod h1:43G1VcqSzbIv77y00p1DRAsyZS8WdzuYdhZXmEUkMyQ= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/dig v1.17.1 h1:Tga8Lz8PcYNsWsyHMZ1Vm0OQOUaJNDyvPImgbAu9YSc= +go.uber.org/dig v1.17.1/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE= +go.uber.org/fx v1.20.1 h1:zVwVQGS8zYvhh9Xxcu4w1M6ESyeMzebzj2NbSayZ4Mk= +go.uber.org/fx v1.20.1/go.mod h1:iSYNbHf2y55acNCwCXKx7LbWb5WG1Bnue5RDXz1OREg= go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= +go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/mock v0.3.0 h1:3mUxI1No2/60yUYax92Pt8eNOEecx2D3lcXZh2NEZJo= +go.uber.org/mock v0.3.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= @@ -956,12 +924,11 @@ go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= go.uber.org/zap v1.19.1/go.mod h1:j3DNczoxDZroyBnOT1L/Q79cfUMGZxlv/9dzN7SM1rI= go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= -go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= -go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE= go4.org v0.0.0-20200411211856-f5505b9728dd h1:BNJlw5kRTzdmyfh5U8F93HA2OwkP7ZGwA51eJ/0wKOU= go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg= -golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU= golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -980,8 +947,8 @@ golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.7.0 h1:AvwMYaRytfdeVt3u6mLaxYtErKYjxA2OXjJ1HHq6t3A= -golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -992,8 +959,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug= -golang.org/x/exp v0.0.0-20230321023759-10a507213a29/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1016,12 +983,11 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= -golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= +golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181029044818-c44066c5c816/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1053,8 +1019,8 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= -golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -1074,8 +1040,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= +golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1088,10 +1054,8 @@ golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190228124157-a34e9553db1e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190405154228-4b34438f7a67/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1108,7 +1072,6 @@ golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1127,16 +1090,16 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw= -golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= +golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1144,8 +1107,8 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= -golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1194,10 +1157,9 @@ golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWc golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200711155855-7342f9734a7d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4= -golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= +golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= +golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1277,8 +1239,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= -google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1317,15 +1279,13 @@ k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= k8s.io/klog/v2 v2.90.1 h1:m4bYOKall2MmOiRaR1J+We67Do7vm9KiQVlT96lnHUw= k8s.io/klog/v2 v2.90.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= -lukechampine.com/blake3 v1.1.7 h1:GgRMhmdsuK8+ii6UZFDL8Nb+VyMwadAgcJyfYHxG6n0= -lukechampine.com/blake3 v1.1.7/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA= +lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= +lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= modernc.org/cc v1.0.0/go.mod h1:1Sk4//wdnYJiUIxnW8ddKpaOJCF37yAdqYnkxUpaYxw= modernc.org/golex v1.0.0/go.mod h1:b/QX9oBD/LhixY6NDh+IdGv17hgB+51fET1i2kPSmvk= modernc.org/mathutil v1.1.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= modernc.org/strutil v1.1.0/go.mod h1:lstksw84oURvj9y3tn8lGvRxyRC1S2+g5uuIzNfIOBs= modernc.org/xc v1.0.0/go.mod h1:mRNCo0bvLjGhHO9WsyuKVU4q0ceiDDDoEeWDJHrNx8I= -nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= -nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/index-cid-to-offset.go b/index-cid-to-offset.go index b54e4c50..85d673c8 100644 --- a/index-cid-to-offset.go +++ b/index-cid-to-offset.go @@ -18,12 +18,20 @@ import ( "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "k8s.io/klog/v2" ) // CreateIndex_cid2offset creates an index file that maps CIDs to offsets in the CAR file. -func CreateIndex_cid2offset(ctx context.Context, tmpDir string, carPath string, indexDir string) (string, error) { +func CreateIndex_cid2offset( + ctx context.Context, + epoch uint64, + network indexes.Network, + tmpDir string, + carPath string, + indexDir string, +) (string, error) { // Check if the CAR file exists: exists, err := fileExists(carPath) if err != nil { @@ -66,11 +74,15 @@ func CreateIndex_cid2offset(ctx context.Context, tmpDir string, carPath string, return "", fmt.Errorf("failed to create tmp dir: %w", err) } + rootCid := rd.header.Roots[0] + klog.Infof("Creating builder with %d items and target file size %d", numItems, targetFileSize) - c2o, err := compactindexsized.NewBuilderSized( + c2o, err := indexes.NewWriter_CidToOffsetAndSize( + epoch, + rootCid, + network, tmpDir, - uint(numItems), - 8, + numItems, // TODO: what if the number of real items is less than this? ) if err != nil { return "", fmt.Errorf("failed to open index store: %w", err) @@ -97,7 +109,7 @@ func CreateIndex_cid2offset(ctx context.Context, tmpDir string, carPath string, // klog.Infof("key: %s, offset: %d", bin.FormatByteSlice(c.Bytes()), totalOffset) - err = c2o.Insert(c.Bytes(), itob(uint64(totalOffset))) + err = c2o.Put(c, totalOffset, sectionLength) if err != nil { return "", fmt.Errorf("failed to put cid to offset: %w", err) } @@ -110,24 +122,12 @@ func CreateIndex_cid2offset(ctx context.Context, tmpDir string, carPath string, } } - rootCID := rd.header.Roots[0] - - // Use the car file name and root CID to name the index file: - indexFilePath := filepath.Join(indexDir, fmt.Sprintf("%s.%s.cid-to-offset.index", filepath.Base(carPath), rootCID.String())) - // TODO: check if the index file already exists and if so, return an error (before doing all the work above) - - klog.Infof("Creating index file at %s", indexFilePath) - targetFile, err := os.Create(indexFilePath) - if err != nil { - return "", fmt.Errorf("failed to create index file: %w", err) - } - defer targetFile.Close() - klog.Infof("Sealing index...") - if err = c2o.Seal(ctx, targetFile); err != nil { + if err = c2o.Seal(ctx, indexDir); err != nil { return "", fmt.Errorf("failed to seal index: %w", err) } - klog.Infof("Index created; %d items indexed", numItemsIndexed) + indexFilePath := c2o.GetFilepath() + klog.Infof("Index created at %s; %d items indexed", indexFilePath, numItemsIndexed) return indexFilePath, nil } @@ -168,20 +168,14 @@ func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath s return fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.header.Roots)) } - indexFile, err := os.Open(indexFilePath) - if err != nil { - return fmt.Errorf("failed to open index file: %w", err) - } - defer indexFile.Close() - - c2o, err := compactindexsized.Open(indexFile) + c2o, err := indexes.Open_CidToOffsetAndSize(indexFilePath) if err != nil { return fmt.Errorf("failed to open index: %w", err) } { // find root cid rootCID := rd.header.Roots[0] - offset, err := findOffsetFromCid(c2o, rootCID) + offset, err := c2o.Get(rootCID) if err != nil { return fmt.Errorf("failed to get offset from index: %w", err) } @@ -195,7 +189,7 @@ func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath s if err != nil { return fmt.Errorf("failed to open CAR data reader: %w", err) } - dr.Seek(int64(offset), io.SeekStart) + dr.Seek(int64(offset.Offset), io.SeekStart) br := bufio.NewReader(dr) gotCid, data, err := util.ReadNode(br) @@ -239,13 +233,16 @@ func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath s if numItems%100000 == 0 { printToStderr(".") } - offset, err := findOffsetFromCid(c2o, c) + offset, err := c2o.Get(c) if err != nil { return fmt.Errorf("failed to lookup offset for %s: %w", c, err) } - if offset != totalOffset { + if offset.Offset != totalOffset { return fmt.Errorf("offset mismatch for %s: %d != %d", c, offset, totalOffset) } + if offset.Size != sectionLen { + return fmt.Errorf("length mismatch for %s: %d != %d", c, offset, sectionLen) + } totalOffset += sectionLen } diff --git a/index-sig-to-cid.go b/index-sig-to-cid.go index 378a3019..f0cb49ac 100644 --- a/index-sig-to-cid.go +++ b/index-sig-to-cid.go @@ -14,12 +14,20 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/rpcpool/yellowstone-faithful/bucketteer" "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" ) // CreateIndex_sig2cid creates an index file that maps transaction signatures to CIDs. -func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, indexDir string) (string, error) { +func CreateIndex_sig2cid( + ctx context.Context, + epoch uint64, + network indexes.Network, + tmpDir string, + carPath string, + indexDir string, +) (string, error) { // Check if the CAR file exists: exists, err := fileExists(carPath) if err != nil { @@ -43,6 +51,7 @@ func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, ind if len(roots) != 1 { return "", fmt.Errorf("CAR file has %d roots, expected 1", len(roots)) } + rootCid := roots[0] // TODO: use another way to precisely count the number of solana Blocks in the CAR file. klog.Infof("Counting items in car file...") @@ -58,15 +67,18 @@ func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, ind } klog.Infof("Creating builder with %d items", numItems) - c2o, err := compactindexsized.NewBuilderSized( + + sig2c, err := indexes.NewWriter_SigToCid( + epoch, + rootCid, + network, tmpDir, - uint(numItems), // TODO: what if the number of real items is less than this? - 36, + numItems, // TODO: what if the number of real items is less than this? ) if err != nil { return "", fmt.Errorf("failed to open index store: %w", err) } - defer c2o.Close() + defer sig2c.Close() numItemsIndexed := uint64(0) klog.Infof("Indexing...") @@ -87,10 +99,7 @@ func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, ind return fmt.Errorf("failed to read signature: %w", err) } - var buf [36]byte - copy(buf[:], c.Bytes()[:36]) - - err = c2o.Insert(sig[:], buf[:]) + err = sig2c.Put(sig, c) if err != nil { return fmt.Errorf("failed to put cid to offset: %w", err) } @@ -105,23 +114,12 @@ func CreateIndex_sig2cid(ctx context.Context, tmpDir string, carPath string, ind return "", fmt.Errorf("failed to index; error while iterating over blocks: %w", err) } - rootCID := roots[0] - - // Use the car file name and root CID to name the index file: - indexFilePath := filepath.Join(indexDir, fmt.Sprintf("%s.%s.sig-to-cid.index", filepath.Base(carPath), rootCID.String())) - - klog.Infof("Creating index file at %s", indexFilePath) - targetFile, err := os.Create(indexFilePath) - if err != nil { - return "", fmt.Errorf("failed to create index file: %w", err) - } - defer targetFile.Close() - klog.Infof("Sealing index...") - if err = c2o.Seal(ctx, targetFile); err != nil { + if err = sig2c.Seal(ctx, indexDir); err != nil { return "", fmt.Errorf("failed to seal index: %w", err) } - klog.Infof("Index created; %d items indexed", numItemsIndexed) + indexFilePath := sig2c.GetFilepath() + klog.Infof("Index created at %s; %d items indexed", indexFilePath, numItemsIndexed) return indexFilePath, nil } @@ -162,13 +160,7 @@ func VerifyIndex_sig2cid(ctx context.Context, carPath string, indexFilePath stri return fmt.Errorf("CAR file has %d roots, expected 1", len(roots)) } - indexFile, err := os.Open(indexFilePath) - if err != nil { - return fmt.Errorf("failed to open index file: %w", err) - } - defer indexFile.Close() - - c2o, err := compactindexsized.Open(indexFile) + c2o, err := indexes.Open_SigToCid(indexFilePath) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -188,7 +180,7 @@ func VerifyIndex_sig2cid(ctx context.Context, carPath string, indexFilePath stri return fmt.Errorf("failed to read signature: %w", err) } - got, err := findCidFromSignature(c2o, sig) + got, err := c2o.Get(sig) if err != nil { return fmt.Errorf("failed to find cid from signature: %w", err) } diff --git a/index-slot-to-cid.go b/index-slot-to-cid.go index 636763be..f5cfc63d 100644 --- a/index-slot-to-cid.go +++ b/index-slot-to-cid.go @@ -12,12 +12,20 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" ) // CreateIndex_slot2cid creates an index file that maps slot numbers to CIDs. -func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, indexDir string) (string, error) { +func CreateIndex_slot2cid( + ctx context.Context, + epoch uint64, + network indexes.Network, + tmpDir string, + carPath string, + indexDir string, +) (string, error) { // Check if the CAR file exists: exists, err := fileExists(carPath) if err != nil { @@ -41,6 +49,7 @@ func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, in if len(roots) != 1 { return "", fmt.Errorf("CAR file has %d roots, expected 1", len(roots)) } + rootCid := roots[0] // TODO: use another way to precisely count the number of solana Blocks in the CAR file. klog.Infof("Counting items in car file...") @@ -56,15 +65,17 @@ func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, in } klog.Infof("Creating builder with %d items", numItems) - c2o, err := compactindexsized.NewBuilderSized( + sl2c, err := indexes.NewWriter_SlotToCid( + epoch, + rootCid, + network, tmpDir, - uint(numItems), // TODO: what if the number of real items is less than this? - 36, + numItems, // TODO: what if the number of real items is less than this? ) if err != nil { return "", fmt.Errorf("failed to open index store: %w", err) } - defer c2o.Close() + defer sl2c.Close() numItemsIndexed := uint64(0) klog.Infof("Indexing...") @@ -82,12 +93,7 @@ func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, in func(c cid.Cid, block *ipldbindcode.Block) error { slotNum := block.Slot - slotBytes := uint64ToLeBytes(uint64(slotNum)) - - var buf [36]byte - copy(buf[:], c.Bytes()[:36]) - - err = c2o.Insert(slotBytes, buf[:]) + err = sl2c.Put(uint64(slotNum), c) if err != nil { return fmt.Errorf("failed to put cid to offset: %w", err) } @@ -102,23 +108,14 @@ func CreateIndex_slot2cid(ctx context.Context, tmpDir string, carPath string, in return "", fmt.Errorf("failed to index; error while iterating over blocks: %w", err) } - rootCID := roots[0] - // Use the car file name and root CID to name the index file: - indexFilePath := filepath.Join(indexDir, fmt.Sprintf("%s.%s.slot-to-cid.index", filepath.Base(carPath), rootCID.String())) - - klog.Infof("Creating index file at %s", indexFilePath) - targetFile, err := os.Create(indexFilePath) - if err != nil { - return "", fmt.Errorf("failed to create index file: %w", err) - } - defer targetFile.Close() klog.Infof("Sealing index...") - if err = c2o.Seal(ctx, targetFile); err != nil { + if err = sl2c.Seal(ctx, indexDir); err != nil { return "", fmt.Errorf("failed to seal index: %w", err) } - klog.Infof("Index created; %d items indexed", numItemsIndexed) + indexFilePath := sl2c.GetFilepath() + klog.Infof("Index created at %s; %d items indexed", indexFilePath, numItemsIndexed) return indexFilePath, nil } @@ -159,13 +156,7 @@ func VerifyIndex_slot2cid(ctx context.Context, carPath string, indexFilePath str return fmt.Errorf("CAR file has %d roots, expected 1", len(roots)) } - indexFile, err := os.Open(indexFilePath) - if err != nil { - return fmt.Errorf("failed to open index file: %w", err) - } - defer indexFile.Close() - - c2o, err := compactindexsized.Open(indexFile) + c2o, err := indexes.Open_SlotToCid(indexFilePath) if err != nil { return fmt.Errorf("failed to open index: %w", err) } @@ -184,7 +175,7 @@ func VerifyIndex_slot2cid(ctx context.Context, carPath string, indexFilePath str func(c cid.Cid, block *ipldbindcode.Block) error { slotNum := uint64(block.Slot) - got, err := findCidFromSlot(c2o, slotNum) + got, err := c2o.Get(slotNum) if err != nil { return fmt.Errorf("failed to put cid to offset: %w", err) } diff --git a/indexes/errors.go b/indexes/errors.go new file mode 100644 index 00000000..89b61aac --- /dev/null +++ b/indexes/errors.go @@ -0,0 +1,8 @@ +package indexes + +import "errors" + +var ( + ErrInvalidNetwork = errors.New("invalid network") + ErrInvalidRootCid = errors.New("invalid root cid") +) diff --git a/indexes/index-cid-to-offset-and-size.go b/indexes/index-cid-to-offset-and-size.go new file mode 100644 index 00000000..761603e6 --- /dev/null +++ b/indexes/index-cid-to-offset-and-size.go @@ -0,0 +1,214 @@ +package indexes + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" +) + +type CidToOffsetAndSize_Writer struct { + sealed bool + tmpDir string + finalPath string + meta *Metadata + index *compactindexsized.Builder +} + +const ( + // 6 bytes for offset (uint48, max 281.5 TB (terabytes)), + // 3 bytes for size (uint24, max 16.7 MB (megabytes), which is plenty considering the max object size is ~1 MB) + IndexValueSize_CidToOffsetAndSize = 6 + 3 +) + +func formatFilename_CidToOffsetAndSize(epoch uint64, rootCid cid.Cid, network Network) string { + return fmt.Sprintf( + "%d-%s-%s-%s", + epoch, + rootCid.String(), + network, + "cid-to-offset-and-size.index", + ) +} + +var Kind_CidToOffsetAndSize = []byte("cid-to-offset-and-size") + +func NewWriter_CidToOffsetAndSize( + epoch uint64, + rootCid cid.Cid, + network Network, + tmpDir string, // Where to put the temporary index files; WILL BE DELETED. + numItems uint64, +) (*CidToOffsetAndSize_Writer, error) { + if !IsValidNetwork(network) { + return nil, ErrInvalidNetwork + } + if rootCid == cid.Undef { + return nil, ErrInvalidRootCid + } + index, err := compactindexsized.NewBuilderSized( + tmpDir, + uint(numItems), + IndexValueSize_CidToOffsetAndSize, + ) + if err != nil { + return nil, err + } + meta := &Metadata{ + Epoch: epoch, + RootCid: rootCid, + Network: network, + IndexKind: Kind_CidToOffsetAndSize, + } + if err := setDefaultMetadata(index, meta); err != nil { + return nil, err + } + return &CidToOffsetAndSize_Writer{ + tmpDir: tmpDir, + meta: meta, + index: index, + }, nil +} + +func (w *CidToOffsetAndSize_Writer) Put(cid_ cid.Cid, offset uint64, size uint64) error { + if cid_ == cid.Undef { + return fmt.Errorf("cid is undefined") + } + if offset > maxUint48 { + return fmt.Errorf("offset is too large; max is %d, but got %d", maxUint48, offset) + } + if size > maxUint24 { + return fmt.Errorf("size is too large; max is %d, but got %d", maxUint24, size) + } + key := cid_.Bytes() + value := append(uint48tob(offset), uint24tob(uint32(size))...) + return w.index.Insert(key, value) +} + +func (w *CidToOffsetAndSize_Writer) Seal(ctx context.Context, dstDir string) error { + if w.sealed { + return fmt.Errorf("already sealed") + } + + filepath := filepath.Join(dstDir, formatFilename_CidToOffsetAndSize(w.meta.Epoch, w.meta.RootCid, w.meta.Network)) + w.finalPath = filepath + + file, err := os.Create(filepath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + + if err := w.index.Seal(ctx, file); err != nil { + return fmt.Errorf("failed to seal index: %w", err) + } + w.sealed = true + + return nil +} + +func (w *CidToOffsetAndSize_Writer) Close() error { + if !w.sealed { + panic(fmt.Errorf("attempted to close a cid-to-offset-and-size index that was not sealed")) + } + return w.index.Close() +} + +// GetFilepath returns the path to the sealed index file. +func (w *CidToOffsetAndSize_Writer) GetFilepath() string { + return w.finalPath +} + +type OffsetAndSize struct { + Offset uint64 // uint48, 6 bytes, max 281.5 TB (terabytes) + Size uint64 // uint24, 3 bytes, max 16.7 MB (megabytes) +} + +// Bytes returns the offset and size as a byte slice. +func (oas *OffsetAndSize) Bytes() []byte { + return append(uint48tob(oas.Offset), uint24tob(uint32(oas.Size))...) +} + +// FromBytes parses the offset and size from a byte slice. +func (oas *OffsetAndSize) FromBytes(buf []byte) error { + if len(buf) != IndexValueSize_CidToOffsetAndSize { + return errors.New("invalid byte slice length") + } + _ = buf[IndexValueSize_CidToOffsetAndSize-1] // bounds check hint to compiler + oas.Offset = btoUint48(buf[:6]) + oas.Size = uint64(btoUint24(buf[6:])) + return nil +} + +type CidToOffsetAndSize_Reader struct { + file io.Closer + meta *Metadata + index *compactindexsized.DB +} + +func Open_CidToOffsetAndSize(file string) (*CidToOffsetAndSize_Reader, error) { + reader, err := os.Open(file) + if err != nil { + return nil, fmt.Errorf("failed to open index file: %w", err) + } + return OpenWithReader_CidToOffsetAndSize(reader) +} + +func OpenWithReader_CidToOffsetAndSize(reader ReaderAtCloser) (*CidToOffsetAndSize_Reader, error) { + index, err := compactindexsized.Open(reader) + if err != nil { + return nil, fmt.Errorf("failed to open index: %w", err) + } + meta, err := getDefaultMetadata(index) + if err != nil { + return nil, err + } + if !IsValidNetwork(meta.Network) { + return nil, fmt.Errorf("invalid network") + } + if meta.RootCid == cid.Undef { + return nil, fmt.Errorf("root cid is undefined") + } + if err := meta.AssertIndexKind(Kind_CidToOffsetAndSize); err != nil { + return nil, err + } + return &CidToOffsetAndSize_Reader{ + file: reader, + meta: meta, + index: index, + }, nil +} + +func (r *CidToOffsetAndSize_Reader) Get(cid_ cid.Cid) (*OffsetAndSize, error) { + if cid_ == cid.Undef { + return nil, fmt.Errorf("cid is undefined") + } + key := cid_.Bytes() + value, err := r.index.Lookup(key) + if err != nil { + return nil, err + } + oas := &OffsetAndSize{} + if err := oas.FromBytes(value); err != nil { + return nil, err + } + return oas, nil +} + +func (r *CidToOffsetAndSize_Reader) Close() error { + return r.file.Close() +} + +// Meta returns the metadata for the index. +func (r *CidToOffsetAndSize_Reader) Meta() *Metadata { + return r.meta +} + +func (r *CidToOffsetAndSize_Reader) Prefetch(b bool) { + r.index.Prefetch(b) +} diff --git a/indexes/index-cid-to-offset-and-size_test.go b/indexes/index-cid-to-offset-and-size_test.go new file mode 100644 index 00000000..74f5d451 --- /dev/null +++ b/indexes/index-cid-to-offset-and-size_test.go @@ -0,0 +1,150 @@ +package indexes_test + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/stretchr/testify/require" +) + +func TestOffsetAndSize(t *testing.T) { + v := indexes.OffsetAndSize{ + Offset: 123, + Size: 456, + } + encoded := v.Bytes() + require.Equal(t, []byte{0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x01, 0x00}, encoded) + + var decoded indexes.OffsetAndSize + require.NoError(t, decoded.FromBytes(encoded)) + + require.Equal(t, v, decoded) +} + +func TestCidToOffsetAndSize(t *testing.T) { + // create a new index + // write some data to it + // close it + // open it + // read the data back + // assert that the data is correct + epoch := uint64(123) + cstr := "bafyreids2hw6eynl4vag3cdp535sxz6zp6tedhuv6xu3k3rze3fskqy4yy" + rootCid, err := cid.Parse(cstr) + require.NoError(t, err) + numItems := uint64(10000) + + dstDir := t.TempDir() + writer, err := indexes.NewWriter_CidToOffsetAndSize( + epoch, + rootCid, + indexes.NetworkMainnet, + "", + numItems, + ) + require.NoError(t, err) + require.NotNil(t, writer) + + // write some data to the index + cid1_, err := cid.Parse("bafyreibwvjchy4qq6tqeqg4olawpzs3cphr7nqp5gz2ch5bnttt2ajg6p4") + require.NoError(t, err) + require.NoError(t, writer.Put(cid1_, 123, 456)) + + cid2_, err := cid.Parse("bafyreibqlzq4vrezlbgn7qqgz36tx5itaelyxw4v2xyjho5fqqlrslf2vq") + require.NoError(t, err) + require.NoError(t, writer.Put(cid2_, 123456, 456789)) + + cid3_, err := cid.Parse("bafyreiciqiiofeu74nt4drrw6pysqaethngzjtlbsyskvjmntqzx4fzv7q") + require.NoError(t, err) + require.NoError(t, writer.Put(cid3_, 123456789, 4567)) + + { + // add other 997 items + for i := uint64(0); i < numItems-3; i++ { + cid_ := cid.NewCidV1(cid.Raw, []byte(fmt.Sprintf("cid-%d", i))) + require.NoError(t, err) + require.NoError(t, writer.Put(cid_, i, i)) + } + } + { + // if try to close the index before sealing it, should panic + require.Panics(t, func() { + require.NoError(t, writer.Close()) + }) + } + + // seal the index + require.NoError(t, writer.Seal(context.TODO(), dstDir)) + t.Log(writer.GetFilepath()) + { + files, err := os.ReadDir(dstDir) + require.NoError(t, err) + // should contain the index file + has := false + for _, file := range files { + // check if file exists + completePath := filepath.Join(dstDir, file.Name()) + file, err := os.Stat(completePath) + require.NoError(t, err) + // check if file is not empty + require.NotZero(t, file.Size()) + + if completePath == writer.GetFilepath() { + has = true + } + } + require.True(t, has) + } + + finalFilepath := writer.GetFilepath() + require.NotEmpty(t, finalFilepath) + + // close the index + require.NoError(t, writer.Close()) + + // open the index + reader, err := indexes.Open_CidToOffsetAndSize(finalFilepath) + require.NoError(t, err) + require.NotNil(t, reader) + + // read the data back + { + offsetAndSize, err := reader.Get(cid1_) + require.NoError(t, err) + require.NotNil(t, offsetAndSize) + + require.Equal(t, uint64(123), offsetAndSize.Offset) + require.Equal(t, uint64(456), offsetAndSize.Size) + } + { + offsetAndSize, err := reader.Get(cid2_) + require.NoError(t, err) + require.NotNil(t, offsetAndSize) + + require.Equal(t, uint64(123456), offsetAndSize.Offset) + require.Equal(t, uint64(456789), offsetAndSize.Size) + } + { + offsetAndSize, err := reader.Get(cid3_) + require.NoError(t, err) + require.NotNil(t, offsetAndSize) + + require.Equal(t, uint64(123456789), offsetAndSize.Offset) + require.Equal(t, uint64(4567), offsetAndSize.Size) + } + // check metadata + { + metadata := reader.Meta() + require.NotNil(t, metadata) + + require.Equal(t, epoch, metadata.Epoch) + require.Equal(t, rootCid, metadata.RootCid) + require.Equal(t, indexes.NetworkMainnet, metadata.Network) + require.Equal(t, indexes.Kind_CidToOffsetAndSize, metadata.IndexKind) + } +} diff --git a/indexes/index-sig-to-cid.go b/indexes/index-sig-to-cid.go new file mode 100644 index 00000000..461c2eb4 --- /dev/null +++ b/indexes/index-sig-to-cid.go @@ -0,0 +1,194 @@ +package indexes + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/gagliardetto/solana-go" + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" +) + +type SigToCid_Writer struct { + sealed bool + tmpDir string + finalPath string + meta *Metadata + index *compactindexsized.Builder +} + +const ( + // 36 bytes for cid + IndexValueSize_SigToCid = 36 +) + +func formatFilename_SigToCid(epoch uint64, rootCid cid.Cid, network Network) string { + return fmt.Sprintf( + "%d-%s-%s-%s", + epoch, + rootCid.String(), + network, + "sig-to-cid.index", + ) +} + +var Kind_SigToCid = []byte("sig-to-cid") + +func NewWriter_SigToCid( + epoch uint64, + rootCid cid.Cid, + network Network, + tmpDir string, // Where to put the temporary index files; WILL BE DELETED. + numItems uint64, +) (*SigToCid_Writer, error) { + if !IsValidNetwork(network) { + return nil, ErrInvalidNetwork + } + if rootCid == cid.Undef { + return nil, ErrInvalidRootCid + } + index, err := compactindexsized.NewBuilderSized( + tmpDir, + uint(numItems), + IndexValueSize_SigToCid, + ) + if err != nil { + return nil, err + } + meta := &Metadata{ + Epoch: epoch, + RootCid: rootCid, + Network: network, + IndexKind: Kind_SigToCid, + } + return &SigToCid_Writer{ + tmpDir: tmpDir, + meta: meta, + index: index, + }, nil +} + +func (w *SigToCid_Writer) Put(sig solana.Signature, cid_ cid.Cid) error { + if w.sealed { + return fmt.Errorf("cannot put to sealed writer") + } + if cid_ == cid.Undef { + return fmt.Errorf("cid is undefined") + } + key := sig[:] + value := cid_.Bytes() + return w.index.Insert(key, value) +} + +func (w *SigToCid_Writer) Seal(ctx context.Context, dstDir string) error { + if w.sealed { + return fmt.Errorf("already sealed") + } + if err := setDefaultMetadata(w.index, w.meta); err != nil { + return fmt.Errorf("failed to set metadata: %w", err) + } + + filepath := filepath.Join(dstDir, formatFilename_SigToCid(w.meta.Epoch, w.meta.RootCid, w.meta.Network)) + w.finalPath = filepath + + file, err := os.Create(filepath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + + if err := w.index.Seal(ctx, file); err != nil { + return fmt.Errorf("failed to seal index: %w", err) + } + w.sealed = true + + return nil +} + +func (w *SigToCid_Writer) Close() error { + if !w.sealed { + panic(fmt.Errorf("attempted to close a sig-to-cid index that was not sealed")) + } + return w.index.Close() +} + +// GetFilepath returns the path to the sealed index file. +func (w *SigToCid_Writer) GetFilepath() string { + return w.finalPath +} + +type SigToCid_Reader struct { + file io.Closer + meta *Metadata + index *compactindexsized.DB +} + +func Open_SigToCid(filepath string) (*SigToCid_Reader, error) { + file, err := os.Open(filepath) + if err != nil { + return nil, err + } + return OpenWithReader_SigToCid(file) +} + +type ReaderAtCloser interface { + io.ReaderAt + io.Closer +} + +func OpenWithReader_SigToCid(reader ReaderAtCloser) (*SigToCid_Reader, error) { + index, err := compactindexsized.Open(reader) + if err != nil { + return nil, err + } + meta, err := getDefaultMetadata(index) + if err != nil { + return nil, err + } + if !IsValidNetwork(meta.Network) { + return nil, fmt.Errorf("invalid network") + } + if meta.RootCid == cid.Undef { + return nil, fmt.Errorf("root cid is undefined") + } + if err := meta.AssertIndexKind(Kind_SigToCid); err != nil { + return nil, err + } + return &SigToCid_Reader{ + file: reader, + meta: meta, + index: index, + }, nil +} + +func (r *SigToCid_Reader) Get(sig solana.Signature) (cid.Cid, error) { + if sig.IsZero() { + return cid.Undef, fmt.Errorf("sig is undefined") + } + key := sig[:] + value, err := r.index.Lookup(key) + if err != nil { + return cid.Undef, err + } + _, c, err := cid.CidFromBytes(value[:]) + if err != nil { + return cid.Undef, err + } + return c, nil +} + +func (r *SigToCid_Reader) Close() error { + return r.file.Close() +} + +// Meta returns the metadata for the index. +func (r *SigToCid_Reader) Meta() *Metadata { + return r.meta +} + +func (r *SigToCid_Reader) Prefetch(b bool) { + r.index.Prefetch(b) +} diff --git a/indexes/index-sig-to-cid_test.go b/indexes/index-sig-to-cid_test.go new file mode 100644 index 00000000..69d36424 --- /dev/null +++ b/indexes/index-sig-to-cid_test.go @@ -0,0 +1,131 @@ +package indexes_test + +import ( + "context" + "crypto/rand" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/gagliardetto/solana-go" + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/stretchr/testify/require" +) + +func TestSigToCid(t *testing.T) { + epoch := uint64(123) + cstr := "bafyreids2hw6eynl4vag3cdp535sxz6zp6tedhuv6xu3k3rze3fskqy4yy" + rootCid, err := cid.Parse(cstr) + require.NoError(t, err) + numItems := uint64(10000) + + dstDir := t.TempDir() + writer, err := indexes.NewWriter_SigToCid( + epoch, + rootCid, + indexes.NetworkDevnet, + "", + numItems, + ) + require.NoError(t, err) + require.NotNil(t, writer) + + // write some data to the index + cid1_, err := cid.Parse("bafyreibwvjchy4qq6tqeqg4olawpzs3cphr7nqp5gz2ch5bnttt2ajg6p4") + require.NoError(t, err) + sig1 := newRandomSignature() + require.NoError(t, writer.Put(sig1, cid1_)) + + cid2_, err := cid.Parse("bafyreibqlzq4vrezlbgn7qqgz36tx5itaelyxw4v2xyjho5fqqlrslf2vq") + require.NoError(t, err) + sig2 := newRandomSignature() + require.NoError(t, writer.Put(sig2, cid2_)) + + cid3_, err := cid.Parse("bafyreiciqiiofeu74nt4drrw6pysqaethngzjtlbsyskvjmntqzx4fzv7q") + require.NoError(t, err) + sig3 := newRandomSignature() + require.NoError(t, writer.Put(sig3, cid3_)) + + { + // add other 997 items + for i := uint64(0); i < numItems-3; i++ { + cid_ := cid.NewCidV1(cid.Raw, []byte(fmt.Sprintf("cid-%d", i))) + sig_ := newRandomSignature() + require.NoError(t, writer.Put(sig_, cid_)) + } + } + { + // if try to close the index before sealing it, should panic + require.Panics(t, func() { + require.NoError(t, writer.Close()) + }) + } + + // seal the index + require.NoError(t, writer.Seal(context.TODO(), dstDir)) + t.Log(writer.GetFilepath()) + { + files, err := os.ReadDir(dstDir) + require.NoError(t, err) + // should contain the index file + has := false + for _, file := range files { + // check if file exists + completePath := filepath.Join(dstDir, file.Name()) + file, err := os.Stat(completePath) + require.NoError(t, err) + // check if file is not empty + require.NotZero(t, file.Size()) + + if completePath == writer.GetFilepath() { + has = true + } + } + require.True(t, has) + } + + finalFilepath := writer.GetFilepath() + require.NotEmpty(t, finalFilepath) + + // close the index + require.NoError(t, writer.Close()) + + // open the index + reader, err := indexes.Open_SigToCid(finalFilepath) + require.NoError(t, err) + require.NotNil(t, reader) + + // read the data back + { + cid_, err := reader.Get(sig1) + require.NoError(t, err) + require.Equal(t, cid1_, cid_) + + cid_, err = reader.Get(sig2) + require.NoError(t, err) + require.Equal(t, cid2_, cid_) + + cid_, err = reader.Get(sig3) + require.NoError(t, err) + require.Equal(t, cid3_, cid_) + } + + // check metadata + { + metadata := reader.Meta() + require.NotNil(t, metadata) + + require.Equal(t, epoch, metadata.Epoch) + require.Equal(t, rootCid, metadata.RootCid) + require.Equal(t, indexes.NetworkDevnet, metadata.Network) + require.Equal(t, indexes.Kind_SigToCid, metadata.IndexKind) + } +} + +func newRandomSignature() solana.Signature { + var sig solana.Signature + rand.Read(sig[:]) + return sig +} diff --git a/indexes/index-slot-to-cid.go b/indexes/index-slot-to-cid.go new file mode 100644 index 00000000..32401855 --- /dev/null +++ b/indexes/index-slot-to-cid.go @@ -0,0 +1,185 @@ +package indexes + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" +) + +type SlotToCid_Writer struct { + sealed bool + tmpDir string + finalPath string + meta *Metadata + index *compactindexsized.Builder +} + +const ( + // 36 bytes for cid + IndexValueSize_SlotToCid = 36 +) + +func formatFilename_SlotToCid(epoch uint64, rootCid cid.Cid, network Network) string { + return fmt.Sprintf( + "%d-%s-%s-%s", + epoch, + rootCid.String(), + network, + "slot-to-cid.index", + ) +} + +var Kind_SlotToCid = []byte("slot-to-cid") + +func NewWriter_SlotToCid( + epoch uint64, + rootCid cid.Cid, + network Network, + tmpDir string, // Where to put the temporary index files; WILL BE DELETED. + numItems uint64, +) (*SlotToCid_Writer, error) { + if !IsValidNetwork(network) { + return nil, ErrInvalidNetwork + } + if rootCid == cid.Undef { + return nil, ErrInvalidRootCid + } + index, err := compactindexsized.NewBuilderSized( + tmpDir, + uint(numItems), + IndexValueSize_SlotToCid, + ) + if err != nil { + return nil, err + } + meta := &Metadata{ + Epoch: epoch, + RootCid: rootCid, + Network: network, + IndexKind: Kind_SlotToCid, + } + if err := setDefaultMetadata(index, meta); err != nil { + return nil, err + } + return &SlotToCid_Writer{ + tmpDir: tmpDir, + meta: meta, + index: index, + }, nil +} + +func (w *SlotToCid_Writer) Put(slot uint64, cid_ cid.Cid) error { + if w.sealed { + return fmt.Errorf("cannot put to sealed writer") + } + if cid_ == cid.Undef { + return fmt.Errorf("cid is undefined") + } + key := uint64tob(slot) + value := cid_.Bytes() + return w.index.Insert(key, value) +} + +func (w *SlotToCid_Writer) Seal(ctx context.Context, dstDir string) error { + if w.sealed { + return fmt.Errorf("already sealed") + } + + filepath := filepath.Join(dstDir, formatFilename_SlotToCid(w.meta.Epoch, w.meta.RootCid, w.meta.Network)) + w.finalPath = filepath + + file, err := os.Create(filepath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + + if err := w.index.Seal(ctx, file); err != nil { + return fmt.Errorf("failed to seal index: %w", err) + } + w.sealed = true + + return nil +} + +func (w *SlotToCid_Writer) Close() error { + if !w.sealed { + panic(fmt.Errorf("attempted to close a slot-to-cid index that was not sealed")) + } + return w.index.Close() +} + +// GetFilepath returns the path to the sealed index file. +func (w *SlotToCid_Writer) GetFilepath() string { + return w.finalPath +} + +type SlotToCid_Reader struct { + file io.Closer + meta *Metadata + index *compactindexsized.DB +} + +func Open_SlotToCid(filepath string) (*SlotToCid_Reader, error) { + file, err := os.Open(filepath) + if err != nil { + return nil, err + } + return OpenWithReader_SlotToCid(file) +} + +func OpenWithReader_SlotToCid(reader ReaderAtCloser) (*SlotToCid_Reader, error) { + index, err := compactindexsized.Open(reader) + if err != nil { + return nil, err + } + meta, err := getDefaultMetadata(index) + if err != nil { + return nil, err + } + if !IsValidNetwork(meta.Network) { + return nil, fmt.Errorf("invalid network") + } + if meta.RootCid == cid.Undef { + return nil, fmt.Errorf("root cid is undefined") + } + if err := meta.AssertIndexKind(Kind_SlotToCid); err != nil { + return nil, err + } + return &SlotToCid_Reader{ + file: reader, + meta: meta, + index: index, + }, nil +} + +func (r *SlotToCid_Reader) Get(slot uint64) (cid.Cid, error) { + key := uint64tob(slot) + value, err := r.index.Lookup(key) + if err != nil { + return cid.Undef, err + } + _, c, err := cid.CidFromBytes(value[:]) + if err != nil { + return cid.Undef, err + } + return c, nil +} + +func (r *SlotToCid_Reader) Close() error { + return r.file.Close() +} + +// Meta returns the metadata for the index. +func (r *SlotToCid_Reader) Meta() *Metadata { + return r.meta +} + +func (r *SlotToCid_Reader) Prefetch(b bool) { + r.index.Prefetch(b) +} diff --git a/indexes/index-slot-to-cid_test.go b/indexes/index-slot-to-cid_test.go new file mode 100644 index 00000000..007a67eb --- /dev/null +++ b/indexes/index-slot-to-cid_test.go @@ -0,0 +1,119 @@ +package indexes_test + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/stretchr/testify/require" +) + +func TestSlotToCid(t *testing.T) { + epoch := uint64(123) + cstr := "bafyreids2hw6eynl4vag3cdp535sxz6zp6tedhuv6xu3k3rze3fskqy4yy" + rootCid, err := cid.Parse(cstr) + require.NoError(t, err) + numItems := uint64(10000) + + dstDir := t.TempDir() + writer, err := indexes.NewWriter_SlotToCid( + epoch, + rootCid, + indexes.NetworkMainnet, + "", + numItems, + ) + require.NoError(t, err) + require.NotNil(t, writer) + + // write some data to the index + cid1_, err := cid.Parse("bafyreibwvjchy4qq6tqeqg4olawpzs3cphr7nqp5gz2ch5bnttt2ajg6p4") + require.NoError(t, err) + require.NoError(t, writer.Put(123, cid1_)) + + cid2_, err := cid.Parse("bafyreibqlzq4vrezlbgn7qqgz36tx5itaelyxw4v2xyjho5fqqlrslf2vq") + require.NoError(t, err) + require.NoError(t, writer.Put(123456, cid2_)) + + cid3_, err := cid.Parse("bafyreiciqiiofeu74nt4drrw6pysqaethngzjtlbsyskvjmntqzx4fzv7q") + require.NoError(t, err) + require.NoError(t, writer.Put(123456789, cid3_)) + + { + // add other 997 items + for i := uint64(0); i < numItems-3; i++ { + cid_ := cid.NewCidV1(cid.Raw, []byte(fmt.Sprintf("cid-%d", i))) + require.NoError(t, writer.Put(i*33, cid_)) + } + } + { + // if try to close the index before sealing it, should panic + require.Panics(t, func() { + require.NoError(t, writer.Close()) + }) + } + + // seal the index + require.NoError(t, writer.Seal(context.TODO(), dstDir)) + t.Log(writer.GetFilepath()) + { + files, err := os.ReadDir(dstDir) + require.NoError(t, err) + // should contain the index file + has := false + for _, file := range files { + // check if file exists + completePath := filepath.Join(dstDir, file.Name()) + file, err := os.Stat(completePath) + require.NoError(t, err) + // check if file is not empty + require.NotZero(t, file.Size()) + + if completePath == writer.GetFilepath() { + has = true + } + } + require.True(t, has) + } + + finalFilepath := writer.GetFilepath() + require.NotEmpty(t, finalFilepath) + + // close the index + require.NoError(t, writer.Close()) + + // open the index + reader, err := indexes.Open_SlotToCid(finalFilepath) + require.NoError(t, err) + require.NotNil(t, reader) + + // read the data back + { + cid_, err := reader.Get(123) + require.NoError(t, err) + require.Equal(t, cid1_, cid_) + + cid_, err = reader.Get(123456) + require.NoError(t, err) + require.Equal(t, cid2_, cid_) + + cid_, err = reader.Get(123456789) + require.NoError(t, err) + require.Equal(t, cid3_, cid_) + } + + // check metadata + { + metadata := reader.Meta() + require.NotNil(t, metadata) + + require.Equal(t, epoch, metadata.Epoch) + require.Equal(t, rootCid, metadata.RootCid) + require.Equal(t, indexes.NetworkMainnet, metadata.Network) + require.Equal(t, indexes.Kind_SlotToCid, metadata.IndexKind) + } +} diff --git a/indexes/metadata.go b/indexes/metadata.go new file mode 100644 index 00000000..f5cb1a44 --- /dev/null +++ b/indexes/metadata.go @@ -0,0 +1,128 @@ +package indexes + +import ( + "bytes" + "fmt" + + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/compactindexsized" +) + +type Metadata struct { + Epoch uint64 + RootCid cid.Cid + Network Network + IndexKind []byte +} + +// Assert Epoch is x. +func (m *Metadata) AssertEpoch(x uint64) error { + if m.Epoch != x { + return fmt.Errorf("expected epoch %d, got %d", x, m.Epoch) + } + return nil +} + +// Assert RootCid is x. +func (m *Metadata) AssertRootCid(x cid.Cid) error { + if !m.RootCid.Equals(x) { + return fmt.Errorf("expected root cid %s, got %s", x, m.RootCid) + } + return nil +} + +// Assert Network is x. +func (m *Metadata) AssertNetwork(x Network) error { + if m.Network != x { + return fmt.Errorf("expected network %q, got %q", x, m.Network) + } + return nil +} + +// Assert IndexKind is x. +func (m *Metadata) AssertIndexKind(x []byte) error { + if !bytes.Equal(m.IndexKind, x) { + return fmt.Errorf("expected index kind %q, got %q", x, m.IndexKind) + } + return nil +} + +var ( + MetadataKey_Epoch = []byte("epoch") + MetadataKey_RootCid = []byte("rootCid") + MetadataKey_Network = []byte("network") +) + +func setDefaultMetadata(index *compactindexsized.Builder, metadata *Metadata) error { + if index == nil { + return fmt.Errorf("index is nil") + } + if metadata == nil { + return fmt.Errorf("metadata is nil") + } + setter := index.Metadata() + + if err := setter.Add(MetadataKey_Epoch, uint64tob(metadata.Epoch)); err != nil { + return err + } + + if metadata.RootCid == cid.Undef { + return fmt.Errorf("root cid is undefined") + } + if err := setter.Add(MetadataKey_RootCid, metadata.RootCid.Bytes()); err != nil { + return err + } + + if !IsValidNetwork(metadata.Network) { + return fmt.Errorf("invalid network") + } + if err := setter.Add(MetadataKey_Network, []byte(metadata.Network)); err != nil { + return err + } + + if len(metadata.IndexKind) == 0 { + return fmt.Errorf("index kind is empty") + } + return setter.Add(compactindexsized.KeyKind, metadata.IndexKind) +} + +// getDefaultMetadata gets and validates the metadata from the index. +// Will return an error if some of the metadata is missing. +func getDefaultMetadata(index *compactindexsized.DB) (*Metadata, error) { + out := &Metadata{} + meta := index.Metadata + + indexKind, ok := meta.Get(compactindexsized.KeyKind) + if ok { + out.IndexKind = indexKind + } else { + return nil, fmt.Errorf("metadata.kind is empty (index kind)") + } + + epochBytes, ok := meta.Get(MetadataKey_Epoch) + if ok { + out.Epoch = btoUint64(epochBytes) + } else { + return nil, fmt.Errorf("metadata.epoch is empty") + } + + rootCidBytes, ok := meta.Get(MetadataKey_RootCid) + if ok { + var err error + out.RootCid, err = cid.Cast(rootCidBytes) + if err != nil { + return nil, err + } + } else { + return nil, fmt.Errorf("metadata.rootCid is empty") + } + + networkBytes, ok := meta.Get(MetadataKey_Network) + if ok { + out.Network = Network(networkBytes) + } else { + return nil, fmt.Errorf("metadata.network is empty") + } + + return out, nil +} diff --git a/indexes/networks.go b/indexes/networks.go new file mode 100644 index 00000000..694ee2c5 --- /dev/null +++ b/indexes/networks.go @@ -0,0 +1,18 @@ +package indexes + +type Network string + +const ( + NetworkMainnet Network = "mainnet" + NetworkTestnet Network = "testnet" + NetworkDevnet Network = "devnet" +) + +func IsValidNetwork(network Network) bool { + switch network { + case NetworkMainnet, NetworkTestnet, NetworkDevnet: + return true + default: + return false + } +} diff --git a/indexes/uints.go b/indexes/uints.go new file mode 100644 index 00000000..f98682c5 --- /dev/null +++ b/indexes/uints.go @@ -0,0 +1,78 @@ +package indexes + +import "encoding/binary" + +const ( + maxUint24 = 1<<24 - 1 + maxUint40 = 1<<40 - 1 + maxUint48 = 1<<48 - 1 + maxUint64 = 1<<64 - 1 +) + +// uint24tob converts a uint32 to a 3-byte slice; panics if v > maxUint24. +func uint24tob(v uint32) []byte { + if v > maxUint24 { + panic("uint24tob: value out of range") + } + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, v) + return buf[:3] +} + +// btoUint24 converts a 3-byte slice to a uint32. +func btoUint24(buf []byte) uint32 { + _ = buf[2] // bounds check hint to compiler + return binary.LittleEndian.Uint32(cloneAndPad(buf, 1)) +} + +// uint40tob converts a uint64 to a 5-byte slice; panics if v > maxUint40. +func uint40tob(v uint64) []byte { + if v > maxUint40 { + panic("uint40tob: value out of range") + } + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, v) + return buf[:5] +} + +// btoUint40 converts a 5-byte slice to a uint64. +func btoUint40(buf []byte) uint64 { + _ = buf[4] // bounds check hint to compiler + return binary.LittleEndian.Uint64(cloneAndPad(buf, 3)) +} + +// uint48tob converts a uint64 to a 6-byte slice; panics if v > maxUint48. +func uint48tob(v uint64) []byte { + if v > maxUint48 { + panic("uint48tob: value out of range") + } + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, v) + return buf[:6] +} + +// btoUint48 converts a 6-byte slice to a uint64. +func btoUint48(buf []byte) uint64 { + _ = buf[5] // bounds check hint to compiler + return binary.LittleEndian.Uint64(cloneAndPad(buf, 2)) +} + +// uint64tob converts a uint64 to an 8-byte slice. +func uint64tob(v uint64) []byte { + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, v) + return buf +} + +// btoUint64 converts an 8-byte slice to a uint64. +func btoUint64(buf []byte) uint64 { + _ = buf[7] // bounds check hint to compiler + return binary.LittleEndian.Uint64(buf) +} + +// cloneAndPad clones a byte slice and pads it with zeros. +func cloneAndPad(buf []byte, pad int) []byte { + out := make([]byte, len(buf)+pad) + copy(out, buf) + return out +} diff --git a/indexes/uints_test.go b/indexes/uints_test.go new file mode 100644 index 00000000..e6dfb5d2 --- /dev/null +++ b/indexes/uints_test.go @@ -0,0 +1,142 @@ +package indexes + +import ( + "math" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUints(t *testing.T) { + { + require.Equal(t, int(16_777_215), maxUint24) + require.Equal(t, int(1_099_511_627_775), maxUint40) + require.Equal(t, int(281_474_976_710_655), maxUint48) + require.Equal(t, uint(math.MaxUint64), uint(maxUint64)) + } + { + v := uint24tob(0) + require.Equal(t, []byte{0, 0, 0}, v) + require.Equal(t, uint32(0), btoUint24(v)) + + v = uint24tob(1) + require.Equal(t, []byte{1, 0, 0}, v) + require.Equal(t, uint32(1), btoUint24(v)) + + v = uint24tob(maxUint24) + require.Equal(t, []byte{255, 255, 255}, v) + require.Equal(t, uint32(maxUint24), btoUint24(v)) + + v = uint24tob(123) + require.Equal(t, []byte{0x7b, 0x0, 0x0}, v) + require.Equal(t, uint32(123), btoUint24(v)) + + require.Panics(t, func() { + v = uint24tob(maxUint24 + 1) + require.Equal(t, []byte{0, 0, 0}, v) + require.Equal(t, uint32(0), btoUint24(v)) + }) + } + { + v := uint40tob(0) + require.Equal(t, []byte{0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(0), btoUint40(v)) + + v = uint40tob(1) + require.Equal(t, []byte{1, 0, 0, 0, 0}, v) + require.Equal(t, uint64(1), btoUint40(v)) + + v = uint40tob(123) + require.Equal(t, []byte{0x7b, 0x0, 0x0, 0x0, 0x0}, v) + require.Equal(t, uint64(123), btoUint40(v)) + + v = uint40tob(maxUint40) + require.Equal(t, []byte{255, 255, 255, 255, 255}, v) + require.Equal(t, uint64(maxUint40), btoUint40(v)) + + require.Panics(t, func() { + v = uint40tob(maxUint40 + 1) + require.Equal(t, []byte{0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(0), btoUint40(v)) + }) + } + { + v := uint48tob(0) + require.Equal(t, []byte{0, 0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(0), btoUint48(v)) + + v = uint48tob(1) + require.Equal(t, []byte{1, 0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(1), btoUint48(v)) + + v = uint48tob(123) + require.Equal(t, []byte{0x7b, 0x0, 0x0, 0x0, 0x0, 0x0}, v) + require.Equal(t, uint64(123), btoUint48(v)) + + v = uint48tob(maxUint48) + require.Equal(t, []byte{255, 255, 255, 255, 255, 255}, v) + require.Equal(t, uint64(maxUint48), btoUint48(v)) + + require.Panics(t, func() { + v = uint48tob(maxUint48 + 1) + require.Equal(t, []byte{0, 0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(0), btoUint48(v)) + }) + } + { + v := uint64tob(0) + require.Equal(t, []byte{0, 0, 0, 0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(0), btoUint64(v)) + + v = uint64tob(1) + require.Equal(t, []byte{1, 0, 0, 0, 0, 0, 0, 0}, v) + require.Equal(t, uint64(1), btoUint64(v)) + + v = uint64tob(123) + require.Equal(t, []byte{0x7b, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v) + require.Equal(t, uint64(123), btoUint64(v)) + + v = uint64tob(math.MaxUint64) + require.Equal(t, []byte{255, 255, 255, 255, 255, 255, 255, 255}, v) + require.Equal(t, uint64(math.MaxUint64), btoUint64(v)) + + v = uint64tob(math.MaxUint64 - 1) + require.Equal(t, []byte{254, 255, 255, 255, 255, 255, 255, 255}, v) + require.Equal(t, uint64(math.MaxUint64-1), btoUint64(v)) + } + { + buf := make([]byte, 9) + copy(buf[:6], uint48tob(123)) + copy(buf[6:], uint24tob(uint32(456))) + { + require.Equal(t, buf[:6], uint48tob(123)) + require.Equal(t, buf[6:], uint24tob(uint32(456))) + } + { + v := btoUint48(buf[:6]) + require.Equal(t, uint64(123), v) + require.Equal(t, uint32(123), uint32(v)) + } + { + v := btoUint24(buf[6:]) + require.Equal(t, uint32(456), v) + require.Equal(t, uint64(uint32(456)), uint64(v)) + } + + { + v := OffsetAndSize{ + Offset: 123, + Size: 456, + } + encoded := v.Bytes() + require.Equal(t, []byte{0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x01, 0x00}, encoded) + require.Equal(t, buf, encoded) + } + require.Equal(t, uint48tob(123), buf[:6]) + require.Equal(t, uint24tob(uint32(456)), buf[6:]) + require.Equal(t, uint64(123), btoUint48(buf[:6])) + require.Equal(t, uint32(456), btoUint24(uint24tob(uint32(456)))) + require.Equal(t, uint32(456), btoUint24(buf[6:])) + require.Equal(t, uint64(uint32(456)), uint64(btoUint24(buf[6:]))) + } +} From 1fa3195c8bd0c1baef7b9026c091fc2323cd31d2 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 4 Dec 2023 22:54:45 +0100 Subject: [PATCH 14/63] Refactor lassie fetch --- cmd-fetch.go | 534 +++++++++++++++++++++++++++++++--------------- fetch-util.go | 65 ------ flags.go | 188 ++++++++++++---- lassie-wrapper.go | 19 +- 4 files changed, 513 insertions(+), 293 deletions(-) diff --git a/cmd-fetch.go b/cmd-fetch.go index d206c696..8d8b7d16 100644 --- a/cmd-fetch.go +++ b/cmd-fetch.go @@ -21,14 +21,16 @@ package main // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. import ( + "context" "fmt" "io" "net/url" - "os" "strings" - "time" + + golog "github.com/ipfs/go-log/v2" "github.com/dustin/go-humanize" + "github.com/filecoin-project/lassie/pkg/aggregateeventrecorder" "github.com/filecoin-project/lassie/pkg/events" "github.com/filecoin-project/lassie/pkg/indexerlookup" "github.com/filecoin-project/lassie/pkg/lassie" @@ -36,187 +38,334 @@ import ( "github.com/filecoin-project/lassie/pkg/retriever" "github.com/filecoin-project/lassie/pkg/storage" "github.com/filecoin-project/lassie/pkg/types" + "github.com/google/uuid" "github.com/ipfs/go-cid" + "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/storage/deferred" + "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/libp2p/go-libp2p" + trustlessutils "github.com/ipld/go-trustless-utils" + trustlesshttp "github.com/ipld/go-trustless-utils/http" + "github.com/libp2p/go-libp2p/config" "github.com/libp2p/go-libp2p/core/peer" "github.com/urfave/cli/v2" - "k8s.io/klog/v2" ) var globalFetchProviderAddrInfos []peer.AddrInfo var lassieFetchFlags = []cli.Flag{ &cli.StringFlag{ - Name: "output", - Aliases: []string{"o"}, - Usage: "the CAR file to write to, may be an existing or a new CAR, or use '-' to write to stdout", + Name: "output", + Aliases: []string{"o"}, + Usage: "the CAR file to write to, may be an existing or a new CAR, " + + "or use '-' to write to stdout", TakesFile: true, }, - &cli.DurationFlag{ - Name: "provider-timeout", - Aliases: []string{"pt"}, - Usage: "consider it an error after not receiving a response from a storage provider after this amount of time", - Value: 20 * time.Second, - }, - &cli.DurationFlag{ - Name: "global-timeout", - Aliases: []string{"gt"}, - Usage: "consider it an error after not completing the retrieval after this amount of time", - Value: 0, - }, &cli.BoolFlag{ Name: "progress", Aliases: []string{"p"}, Usage: "print progress output", }, &cli.StringFlag{ - Name: "dag-scope", - Usage: "describes the fetch behavior at the end of the traversal path. Valid values include [all, entity, block].", - DefaultText: "defaults to all, the entire DAG at the end of the path will be fetched", - Value: "all", + Name: "dag-scope", + Usage: "describes the fetch behavior at the end of the traversal " + + "path. Valid values include [all, entity, block].", + DefaultText: "defaults to all, the entire DAG at the end of the path will " + + "be fetched", + Value: "all", Action: func(cctx *cli.Context, v string) error { switch v { - case string(types.DagScopeAll): - case string(types.DagScopeEntity): - case string(types.DagScopeBlock): + case string(trustlessutils.DagScopeAll): + case string(trustlessutils.DagScopeEntity): + case string(trustlessutils.DagScopeBlock): default: - return fmt.Errorf("invalid dag-scope parameter, must be of value [all, entity, block]") + return fmt.Errorf("invalid dag-scope parameter, must be of value " + + "[all, entity, block]") } - return nil }, }, &cli.StringFlag{ - Name: "providers", - Aliases: []string{"provider"}, - DefaultText: "Providers will be discovered automatically", - Usage: "Addresses of providers, including peer IDs, to use instead of automatic discovery, seperated by a comma. All protocols will be attempted when connecting to these providers. Example: /ip4/1.2.3.4/tcp/1234/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", + Name: "entity-bytes", + Usage: "describes the byte range to consider when selecting the blocks " + + "from a sharded file. Valid values should be of the form from:to, where " + + "from and to are byte offsets and to may be '*'", + DefaultText: "defaults to the entire file, 0:*", Action: func(cctx *cli.Context, v string) error { - // Do nothing if given an empty string - if v == "" { - return nil + if _, err := trustlessutils.ParseByteRange(v); err != nil { + return fmt.Errorf("invalid entity-bytes parameter, must be of the " + + "form from:to, where from and to are byte offsets and to may be '*'") } - - var err error - globalFetchProviderAddrInfos, err = types.ParseProviderStrings(v) - return err + return nil }, }, - &cli.StringFlag{ - Name: "ipni-endpoint", - Aliases: []string{"ipni"}, - DefaultText: "Defaults to https://cid.contact", - Usage: "HTTP endpoint of the IPNI instance used to discover providers.", + &cli.BoolFlag{ + Name: "duplicates", + Usage: "allow duplicate blocks to be written to the output CAR, which " + + "may be useful for streaming.", + Aliases: []string{"dups"}, }, + FlagIPNIEndpoint, FlagEventRecorderAuth, FlagEventRecorderInstanceId, FlagEventRecorderUrl, FlagVerbose, FlagVeryVerbose, FlagProtocols, + FlagAllowProviders, FlagExcludeProviders, FlagTempDir, FlagBitswapConcurrency, + FlagGlobalTimeout, + FlagProviderTimeout, } var fetchCmd = &cli.Command{ Name: "fetch", Usage: "Fetches content from the IPFS and Filecoin network", - Before: before, - Action: Fetch, + After: after, + Action: fetchAction, Flags: lassieFetchFlags, } -func Fetch(cctx *cli.Context) error { +func after(cctx *cli.Context) error { + ResetGlobalFlags() + return nil +} + +func fetchAction(cctx *cli.Context) error { if cctx.Args().Len() != 1 { - return fmt.Errorf("usage: lassie fetch [-o ] [-t ] [/path/to/content]") + // "help" becomes a subcommand, clear it to deal with a urfave/cli bug + // Ref: https://github.com/urfave/cli/blob/v2.25.7/help.go#L253-L255 + cctx.Command.Subcommands = nil + cli.ShowCommandHelpAndExit(cctx, "fetch", 0) + return nil } - ctx := cctx.Context msgWriter := cctx.App.ErrWriter dataWriter := cctx.App.Writer - progress := cctx.Bool("progress") - providerTimeout := cctx.Duration("provider-timeout") - globalTimeout := cctx.Duration("global-timeout") - dagScope := cctx.String("dag-scope") - tempDir := cctx.String("tempdir") - bitswapConcurrency := cctx.Int("bitswap-concurrency") - eventRecorderURL := cctx.String("event-recorder-url") - authToken := cctx.String("event-recorder-auth") - instanceID := cctx.String("event-recorder-instance-id") - - rootCid, path, err := parseCidPath(cctx.Args().Get(0)) + root, path, scope, byteRange, duplicates, err := parseCidPath(cctx.Args().Get(0)) if err != nil { return err } - providerTimeoutOpt := lassie.WithProviderTimeout(providerTimeout) - - host, err := host.InitHost(ctx, []libp2p.Option{}) - if err != nil { - return err + if cctx.IsSet("dag-scope") { + if scope, err = trustlessutils.ParseDagScope(cctx.String("dag-scope")); err != nil { + return err + } } - hostOpt := lassie.WithHost(host) - lassieOpts := []lassie.LassieOption{providerTimeoutOpt, hostOpt} - if len(globalFetchProviderAddrInfos) > 0 { - finderOpt := lassie.WithFinder(retriever.NewDirectCandidateFinder(host, globalFetchProviderAddrInfos)) - if cctx.IsSet("ipni-endpoint") { - klog.Warning("Ignoring ipni-endpoint flag since direct provider is specified") - } - lassieOpts = append(lassieOpts, finderOpt) - } else if cctx.IsSet("ipni-endpoint") { - endpoint := cctx.String("ipni-endpoint") - endpointUrl, err := url.Parse(endpoint) - if err != nil { - klog.Error("Failed to parse IPNI endpoint as URL", "err", err) - return fmt.Errorf("cannot parse given IPNI endpoint %s as valid URL: %w", endpoint, err) - } - finder, err := indexerlookup.NewCandidateFinder(indexerlookup.WithHttpEndpoint(endpointUrl)) - if err != nil { - klog.Error("Failed to instantiate IPNI candidate finder", "err", err) + if cctx.IsSet("entity-bytes") { + if entityBytes, err := trustlessutils.ParseByteRange(cctx.String("entity-bytes")); err != nil { return err + } else if entityBytes.IsDefault() { + byteRange = nil + } else { + byteRange = &entityBytes } - lassieOpts = append(lassieOpts, lassie.WithFinder(finder)) - klog.Info("Using explicit IPNI endpoint to find candidates", "endpoint", endpoint) } - if len(providerBlockList) > 0 { - lassieOpts = append(lassieOpts, lassie.WithProviderBlockList(providerBlockList)) + if cctx.IsSet("duplicates") { + duplicates = cctx.Bool("duplicates") } - if len(protocols) > 0 { - lassieOpts = append(lassieOpts, lassie.WithProtocols(protocols)) + tempDir := cctx.String("tempdir") + progress := cctx.Bool("progress") + + output := cctx.String("output") + outfile := fmt.Sprintf("%s.car", root.String()) + if output != "" { + outfile = output } - if globalTimeout > 0 { - lassieOpts = append(lassieOpts, lassie.WithGlobalTimeout(globalTimeout)) + lassieCfg, err := buildLassieConfigFromCLIContext(cctx, nil, nil) + if err != nil { + return err + } + + eventRecorderURL := cctx.String("event-recorder-url") + authToken := cctx.String("event-recorder-auth") + instanceID := cctx.String("event-recorder-instance-id") + eventRecorderCfg := getEventRecorderConfig(eventRecorderURL, authToken, instanceID) + + err = fetchRun( + cctx.Context, + lassieCfg, + eventRecorderCfg, + msgWriter, + dataWriter, + root, + path, + scope, + byteRange, + duplicates, + tempDir, + progress, + outfile, + ) + if err != nil { + return cli.Exit(err, 1) } - if tempDir != "" { - lassieOpts = append(lassieOpts, lassie.WithTempDir(tempDir)) + return nil +} + +func parseCidPath(spec string) ( + root cid.Cid, + path datamodel.Path, + scope trustlessutils.DagScope, + byteRange *trustlessutils.ByteRange, + duplicates bool, + err error, +) { + scope = trustlessutils.DagScopeAll // default + + if !strings.HasPrefix(spec, "/ipfs/") { + cstr := strings.Split(spec, "/")[0] + path = datamodel.ParsePath(strings.TrimPrefix(spec, cstr)) + if root, err = cid.Parse(cstr); err != nil { + return cid.Undef, datamodel.Path{}, trustlessutils.DagScopeAll, nil, false, err + } + return root, path, scope, byteRange, duplicates, err } else { - tempDir = os.TempDir() + specParts := strings.Split(spec, "?") + spec = specParts[0] + + if root, path, err = trustlesshttp.ParseUrlPath(spec); err != nil { + return cid.Undef, datamodel.Path{}, trustlessutils.DagScopeAll, nil, false, err + } + + switch len(specParts) { + case 1: + case 2: + query, err := url.ParseQuery(specParts[1]) + if err != nil { + return cid.Undef, datamodel.Path{}, trustlessutils.DagScopeAll, nil, false, err + } + scope, err = trustlessutils.ParseDagScope(query.Get("dag-scope")) + if err != nil { + return cid.Undef, datamodel.Path{}, trustlessutils.DagScopeAll, nil, false, err + } + if query.Get("entity-bytes") != "" { + br, err := trustlessutils.ParseByteRange(query.Get("entity-bytes")) + if err != nil { + return cid.Undef, datamodel.Path{}, trustlessutils.DagScopeAll, nil, false, err + } + byteRange = &br + } + duplicates = query.Get("dups") == "y" + default: + return cid.Undef, datamodel.Path{}, trustlessutils.DagScopeAll, nil, false, fmt.Errorf("invalid query: %s", spec) + } + + return root, path, scope, byteRange, duplicates, nil } +} - if bitswapConcurrency > 0 { - lassieOpts = append(lassieOpts, lassie.WithBitswapConcurrency(bitswapConcurrency)) +type progressPrinter struct { + candidatesFound int + writer io.Writer +} + +func (pp *progressPrinter) subscriber(event types.RetrievalEvent) { + switch ret := event.(type) { + case events.StartedFindingCandidatesEvent: + fmt.Fprintf(pp.writer, "\rQuerying indexer for %s...\n", ret.RootCid()) + case events.StartedRetrievalEvent: + fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", events.Identifier(ret), ret.Code()) + case events.ConnectedToProviderEvent: + fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", events.Identifier(ret), ret.Code()) + case events.GraphsyncProposedEvent: + fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", events.Identifier(ret), ret.Code()) + case events.GraphsyncAcceptedEvent: + fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", events.Identifier(ret), ret.Code()) + case events.FirstByteEvent: + fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", events.Identifier(ret), ret.Code()) + case events.CandidatesFoundEvent: + pp.candidatesFound = len(ret.Candidates()) + case events.CandidatesFilteredEvent: + if len(fetchProviderAddrInfos) == 0 { + fmt.Fprintf(pp.writer, "Found %d storage provider candidate(s) in the indexer:\n", pp.candidatesFound) + } else { + fmt.Fprintf(pp.writer, "Using the specified storage provider(s):\n") + } + for _, candidate := range ret.Candidates() { + fmt.Fprintf(pp.writer, "\r\t%s, Protocols: %v\n", candidate.MinerPeer.ID, candidate.Metadata.Protocols()) + } + case events.FailedEvent: + fmt.Fprintf(pp.writer, "\rRetrieval failure from indexer: %s\n", ret.ErrorMessage()) + case events.FailedRetrievalEvent: + fmt.Fprintf(pp.writer, "\rRetrieval failure for [%s]: %s\n", events.Identifier(ret), ret.ErrorMessage()) + case events.SucceededEvent: + // noop, handled at return from Retrieve() } +} + +type onlyWriter struct { + w io.Writer +} + +func (ow *onlyWriter) Write(p []byte) (n int, err error) { + return ow.w.Write(p) +} - lassie, err := lassie.NewLassie(ctx, lassieOpts...) +type fetchRunFunc func( + ctx context.Context, + lassieCfg *lassie.LassieConfig, + eventRecorderCfg *aggregateeventrecorder.EventRecorderConfig, + msgWriter io.Writer, + dataWriter io.Writer, + rootCid cid.Cid, + path datamodel.Path, + dagScope trustlessutils.DagScope, + entityBytes *trustlessutils.ByteRange, + duplicates bool, + tempDir string, + progress bool, + outfile string, +) error + +var fetchRun fetchRunFunc = defaultFetchRun + +const stdoutFileString string = "-" // a string representing stdout + +// defaultFetchRun is the handler for the fetch command. +// This abstraction allows the fetch command to be invoked +// programmatically for testing. +func defaultFetchRun( + ctx context.Context, + lassieCfg *lassie.LassieConfig, + eventRecorderCfg *aggregateeventrecorder.EventRecorderConfig, + msgWriter io.Writer, + dataWriter io.Writer, + rootCid cid.Cid, + path datamodel.Path, + dagScope trustlessutils.DagScope, + entityBytes *trustlessutils.ByteRange, + duplicates bool, + tempDir string, + progress bool, + outfile string, +) error { + lassie, err := lassie.NewLassieWithConfig(ctx, lassieCfg) if err != nil { return err } - // create and subscribe an event recorder API if configured - setupLassieEventRecorder(ctx, eventRecorderURL, authToken, instanceID, lassie) + // create and subscribe an event recorder API if an endpoint URL is set + if eventRecorderCfg.EndpointURL != "" { + setupLassieEventRecorder(ctx, eventRecorderCfg, lassie) + } - if len(globalFetchProviderAddrInfos) == 0 { - fmt.Fprintf(msgWriter, "Fetching %s", rootCid.String()+path) + printPath := path.String() + if printPath != "" { + printPath = "/" + printPath + } + if len(fetchProviderAddrInfos) == 0 { + fmt.Fprintf(msgWriter, "Fetching %s", rootCid.String()+printPath) } else { - fmt.Fprintf(msgWriter, "Fetching %s from %v", rootCid.String()+path, globalFetchProviderAddrInfos) + fmt.Fprintf(msgWriter, "Fetching %s from specified provider(s)", rootCid.String()+printPath) } if progress { fmt.Fprintln(msgWriter) @@ -224,21 +373,34 @@ func Fetch(cctx *cli.Context) error { lassie.RegisterSubscriber(pp.subscriber) } - outfile := fmt.Sprintf("%s.car", rootCid) - if cctx.IsSet("output") { - outfile = cctx.String("output") + var carWriter storage.DeferredWriter + carOpts := []car.Option{ + car.WriteAsCarV1(true), + car.StoreIdentityCIDs(false), + car.UseWholeCIDs(false), } - var carWriter *storage.DeferredCarWriter - if outfile == "-" { // stdout + tempStore := storage.NewDeferredStorageCar(tempDir, rootCid) + + if outfile == stdoutFileString { // we need the onlyWriter because stdout is presented as an os.File, and // therefore pretend to support seeks, so feature-checking in go-car // will make bad assumptions about capabilities unless we hide it - carWriter = storage.NewDeferredCarWriterForStream(rootCid, &onlyWriter{dataWriter}) + w := &onlyWriter{dataWriter} + if duplicates { + carWriter = storage.NewDuplicateAdderCarForStream(ctx, w, rootCid, path.String(), dagScope, entityBytes, tempStore) + } else { + carWriter = deferred.NewDeferredCarWriterForStream(w, []cid.Cid{rootCid}, carOpts...) + } } else { - carWriter = storage.NewDeferredCarWriterForPath(rootCid, outfile) + if duplicates { + carWriter = storage.NewDuplicateAdderCarForPath(ctx, outfile, rootCid, path.String(), dagScope, entityBytes, tempStore) + } else { + carWriter = deferred.NewDeferredCarWriterForPath(outfile, []cid.Cid{rootCid}, carOpts...) + } } - tempStore := storage.NewDeferredStorageCar(tempDir) + defer carWriter.Close() + carStore := storage.NewCachingTempStore(carWriter.BlockWriteOpener(), tempStore) defer carStore.Close() @@ -254,7 +416,7 @@ func Fetch(cctx *cli.Context) error { } }, false) - request, err := types.NewRequestForPath(carStore, rootCid, path, types.DagScope(dagScope)) + request, err := types.NewRequestForPath(carStore, rootCid, path.String(), dagScope, entityBytes) if err != nil { return err } @@ -265,18 +427,23 @@ func Fetch(cctx *cli.Context) error { request.PreloadLinkSystem.SetReadStorage(preloadStore) request.PreloadLinkSystem.SetWriteStorage(preloadStore) request.PreloadLinkSystem.TrustedStorage = true + request.Duplicates = duplicates - stats, err := lassie.Fetch(ctx, request, func(types.RetrievalEvent) {}) + stats, err := lassie.Fetch(ctx, request) if err != nil { fmt.Fprintln(msgWriter) return err } + spid := stats.StorageProviderId.String() + if spid == "" { + spid = types.BitswapIndentifier + } fmt.Fprintf(msgWriter, "\nFetched [%s] from [%s]:\n"+ "\tDuration: %s\n"+ "\t Blocks: %d\n"+ "\t Bytes: %s\n", rootCid, - stats.StorageProviderId, + spid, stats.Duration, blockCount, humanize.IBytes(stats.Size), @@ -285,77 +452,94 @@ func Fetch(cctx *cli.Context) error { return nil } -func parseCidPath(cpath string) (cid.Cid, string, error) { - cstr := strings.Split(cpath, "/")[0] - path := strings.TrimPrefix(cpath, cstr) - rootCid, err := cid.Parse(cstr) - if err != nil { - return cid.Undef, "", err +func buildLassieConfigFromCLIContext(cctx *cli.Context, lassieOpts []lassie.LassieOption, libp2pOpts []config.Option) (*lassie.LassieConfig, error) { + providerTimeout := cctx.Duration("provider-timeout") + globalTimeout := cctx.Duration("global-timeout") + bitswapConcurrency := cctx.Int("bitswap-concurrency") + bitswapConcurrencyPerRetrieval := cctx.Int("bitswap-concurrency-per-retrieval") + + lassieOpts = append(lassieOpts, lassie.WithProviderTimeout(providerTimeout)) + + if globalTimeout > 0 { + lassieOpts = append(lassieOpts, lassie.WithGlobalTimeout(globalTimeout)) } - return rootCid, path, nil -} -type progressPrinter struct { - candidatesFound int - writer io.Writer -} + if len(protocols) > 0 { + lassieOpts = append(lassieOpts, lassie.WithProtocols(protocols)) + } -func (pp *progressPrinter) subscriber(event types.RetrievalEvent) { - switch ret := event.(type) { - case events.RetrievalEventStarted: - switch ret.Phase() { - case types.IndexerPhase: - fmt.Fprintf(pp.writer, "\rQuerying indexer for %s...\n", ret.PayloadCid()) - case types.QueryPhase: - fmt.Fprintf(pp.writer, "\rQuerying [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - case types.RetrievalPhase: - fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - } - case events.RetrievalEventConnected: - switch ret.Phase() { - case types.QueryPhase: - fmt.Fprintf(pp.writer, "\rQuerying [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - case types.RetrievalPhase: - fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - } - case events.RetrievalEventProposed: - fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - case events.RetrievalEventAccepted: - fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - case events.RetrievalEventFirstByte: - fmt.Fprintf(pp.writer, "\rRetrieving from [%s] (%s)...\n", types.Identifier(ret), ret.Code()) - case events.RetrievalEventCandidatesFound: - pp.candidatesFound = len(ret.Candidates()) - case events.RetrievalEventCandidatesFiltered: - num := "all of them" - if pp.candidatesFound != len(ret.Candidates()) { - num = fmt.Sprintf("%d of them", len(ret.Candidates())) - } else if pp.candidatesFound == 1 { - num = "it" - } - if len(globalFetchProviderAddrInfos) > 0 { - fmt.Fprintf(pp.writer, "Found %d storage providers candidates from the indexer, querying %s:\n", pp.candidatesFound, num) - } else { - fmt.Fprintf(pp.writer, "Using the explicitly specified storage provider(s), querying %s:\n", num) + host, err := host.InitHost(cctx.Context, libp2pOpts) + if err != nil { + return nil, err + } + lassieOpts = append(lassieOpts, lassie.WithHost(host)) + + if len(fetchProviderAddrInfos) > 0 { + finderOpt := lassie.WithFinder(retriever.NewDirectCandidateFinder(host, fetchProviderAddrInfos)) + if cctx.IsSet("ipni-endpoint") { + logger.Warn("Ignoring ipni-endpoint flag since direct provider is specified") } - for _, candidate := range ret.Candidates() { - fmt.Fprintf(pp.writer, "\r\t%s, Protocols: %v\n", candidate.MinerPeer.ID, candidate.Metadata.Protocols()) + lassieOpts = append(lassieOpts, finderOpt) + } else if cctx.IsSet("ipni-endpoint") { + endpoint := cctx.String("ipni-endpoint") + endpointUrl, err := url.ParseRequestURI(endpoint) + if err != nil { + logger.Errorw("Failed to parse IPNI endpoint as URL", "err", err) + return nil, fmt.Errorf("cannot parse given IPNI endpoint %s as valid URL: %w", endpoint, err) } - case events.RetrievalEventFailed: - if ret.Phase() == types.IndexerPhase { - fmt.Fprintf(pp.writer, "\rRetrieval failure from indexer: %s\n", ret.ErrorMessage()) - } else { - fmt.Fprintf(pp.writer, "\rRetrieval failure for [%s]: %s\n", types.Identifier(ret), ret.ErrorMessage()) + finder, err := indexerlookup.NewCandidateFinder(indexerlookup.WithHttpEndpoint(endpointUrl)) + if err != nil { + logger.Errorw("Failed to instantiate IPNI candidate finder", "err", err) + return nil, err } - case events.RetrievalEventSuccess: - // noop, handled at return from Retrieve() + lassieOpts = append(lassieOpts, lassie.WithFinder(finder)) + logger.Debug("Using explicit IPNI endpoint to find candidates", "endpoint", endpoint) + } + + if len(providerBlockList) > 0 { + lassieOpts = append(lassieOpts, lassie.WithProviderBlockList(providerBlockList)) + } + + if bitswapConcurrency > 0 { + lassieOpts = append(lassieOpts, lassie.WithBitswapConcurrency(bitswapConcurrency)) } + + if bitswapConcurrencyPerRetrieval > 0 { + lassieOpts = append(lassieOpts, lassie.WithBitswapConcurrencyPerRetrieval(bitswapConcurrencyPerRetrieval)) + } else if bitswapConcurrency > 0 { + lassieOpts = append(lassieOpts, lassie.WithBitswapConcurrencyPerRetrieval(bitswapConcurrency)) + } + + return lassie.NewLassieConfig(lassieOpts...), nil } -type onlyWriter struct { - w io.Writer +func getEventRecorderConfig(endpointURL string, authToken string, instanceID string) *aggregateeventrecorder.EventRecorderConfig { + return &aggregateeventrecorder.EventRecorderConfig{ + InstanceID: instanceID, + EndpointURL: endpointURL, + EndpointAuthorization: authToken, + } } -func (ow *onlyWriter) Write(p []byte) (n int, err error) { - return ow.w.Write(p) +// setupLassieEventRecorder creates and subscribes an EventRecorder if an event recorder URL is given +func setupLassieEventRecorder( + ctx context.Context, + cfg *aggregateeventrecorder.EventRecorderConfig, + lassie *lassie.Lassie, +) { + if cfg.EndpointURL != "" { + if cfg.InstanceID == "" { + uuid, err := uuid.NewRandom() + if err != nil { + logger.Warnw("failed to generate default event recorder instance ID UUID, no instance ID will be provided", "err", err) + } + cfg.InstanceID = uuid.String() // returns "" if uuid is invalid + } + + eventRecorder := aggregateeventrecorder.NewAggregateEventRecorder(ctx, *cfg) + lassie.RegisterSubscriber(eventRecorder.RetrievalEventSubscriber()) + logger.Infow("Reporting retrieval events to event recorder API", "url", cfg.EndpointURL, "instance_id", cfg.InstanceID) + } } + +var logger = golog.Logger("lassie/main") diff --git a/fetch-util.go b/fetch-util.go index 224154ae..32414551 100644 --- a/fetch-util.go +++ b/fetch-util.go @@ -20,68 +20,3 @@ package main // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -import ( - "context" - "os" - - "github.com/filecoin-project/lassie/pkg/aggregateeventrecorder" - "github.com/filecoin-project/lassie/pkg/lassie" - "github.com/google/uuid" - "github.com/ipfs/go-log" - "github.com/urfave/cli/v2" - "k8s.io/klog/v2" -) - -func before(cctx *cli.Context) error { - // Determine logging level - subsystems := []string{ - "lassie", - "lassie/httpserver", - "indexerlookup", - "lassie/bitswap", - } - - level := "WARN" - if IsVerbose { - level = "INFO" - } - if IsVeryVerbose { - level = "DEBUG" - } - - // don't over-ride logging if set in the environment. - if os.Getenv("GOLOG_LOG_LEVEL") == "" { - for _, name := range subsystems { - _ = log.SetLogLevel(name, level) - } - } - - return nil -} - -// setupLassieEventRecorder creates and subscribes an EventRecorder if an event recorder URL is given -func setupLassieEventRecorder( - ctx context.Context, - eventRecorderURL string, - authToken string, - instanceID string, - lassie *lassie.Lassie, -) { - if eventRecorderURL != "" { - if instanceID == "" { - uuid, err := uuid.NewRandom() - if err != nil { - klog.Warning("failed to generate default event recorder instance ID UUID, no instance ID will be provided", "err", err) - } - instanceID = uuid.String() // returns "" if uuid is invalid - } - - eventRecorder := aggregateeventrecorder.NewAggregateEventRecorder(ctx, aggregateeventrecorder.EventRecorderConfig{ - InstanceID: instanceID, - EndpointURL: eventRecorderURL, - EndpointAuthorization: authToken, - }) - lassie.RegisterSubscriber(eventRecorder.RetrievalEventSubscriber()) - klog.Warningln("Reporting retrieval events to event recorder API", "url", eventRecorderURL, "instance_id", instanceID) - } -} diff --git a/flags.go b/flags.go index 62b118aa..b1a50355 100644 --- a/flags.go +++ b/flags.go @@ -2,9 +2,14 @@ package main // CREDIT: from https://github.com/filecoin-project/lassie/blob/main/cmd/lassie/flags.go import ( + "os" "strings" + "time" + "github.com/filecoin-project/lassie/pkg/heyfil" + "github.com/filecoin-project/lassie/pkg/lassie" "github.com/filecoin-project/lassie/pkg/types" + "github.com/ipfs/go-log" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multicodec" "github.com/urfave/cli/v2" @@ -14,37 +19,64 @@ import ( // verbose mode or not (default: false). var IsVerbose bool -// FlagVerbose enables verbose mode, which shows verbose information about -// operations invoked in the CLI. It should be included as a flag on the -// top-level command (e.g. lassie -v). +var ( + defaultTempDirectory string = os.TempDir() // use the system default temp dir + verboseLoggingSubsystems []string = []string{ // verbose logging is enabled for these subsystems when using the verbose or very-verbose flags + "lassie", + "lassie/retriever", + "lassie/httpserver", + "lassie/indexerlookup", + "lassie/bitswap", + } +) + +const ( + defaultProviderTimeout time.Duration = 20 * time.Second // 20 seconds +) + +// FlagVerbose enables verbose mode, which shows info information about +// operations invoked in the CLI. var FlagVerbose = &cli.BoolFlag{ - Name: "verbose", - Aliases: []string{"v"}, - Usage: "enable verbose mode for logging", - Destination: &IsVerbose, + Name: "verbose", + Aliases: []string{"v"}, + Usage: "enable verbose mode for logging", + Action: setLogLevel("INFO"), } -// IsVeryVerbose is a global var signaling if the CLI is running in -// very verbose mode or not (default: false). -var IsVeryVerbose bool - -// FlagVerbose enables verbose mode, which shows verbose information about -// operations invoked in the CLI. It should be included as a flag on the -// top-level command (e.g. lassie -v). +// FlagVeryVerbose enables very verbose mode, which shows debug information about +// operations invoked in the CLI. var FlagVeryVerbose = &cli.BoolFlag{ - Name: "very-verbose", - Aliases: []string{"vv"}, - Usage: "enable very verbose mode for debugging", - Destination: &IsVeryVerbose, + Name: "very-verbose", + Aliases: []string{"vv"}, + Usage: "enable very verbose mode for debugging", + Action: setLogLevel("DEBUG"), +} + +// setLogLevel returns a CLI Action function that sets the +// logging level for the given subsystems to the given level. +// It is used as an action for the verbose and very-verbose flags. +func setLogLevel(level string) func(*cli.Context, bool) error { + return func(cctx *cli.Context, _ bool) error { + // don't override logging if set in the environment. + if os.Getenv("GOLOG_LOG_LEVEL") != "" { + return nil + } + // set the logging level for the given subsystems + for _, name := range verboseLoggingSubsystems { + _ = log.SetLogLevel(name, level) + } + return nil + } } // FlagEventRecorderAuth asks for and provides the authorization token for // sending metrics to an event recorder API via a Basic auth Authorization // HTTP header. Value will formatted as "Basic " if provided. var FlagEventRecorderAuth = &cli.StringFlag{ - Name: "event-recorder-auth", - Usage: "the authorization token for an event recorder API", - EnvVars: []string{"LASSIE_EVENT_RECORDER_AUTH"}, + Name: "event-recorder-auth", + Usage: "the authorization token for an event recorder API", + DefaultText: "no authorization token will be used", + EnvVars: []string{"LASSIE_EVENT_RECORDER_AUTH"}, } // FlagEventRecorderUrl asks for and provides the URL for an event recorder API @@ -59,34 +91,67 @@ var FlagEventRecorderInstanceId = &cli.StringFlag{ // FlagEventRecorderUrl asks for and provides the URL for an event recorder API // to send metrics to. var FlagEventRecorderUrl = &cli.StringFlag{ - Name: "event-recorder-url", - Usage: "the url of an event recorder API", - EnvVars: []string{"LASSIE_EVENT_RECORDER_URL"}, + Name: "event-recorder-url", + Usage: "the url of an event recorder API", + DefaultText: "no event recorder API will be used", + EnvVars: []string{"LASSIE_EVENT_RECORDER_URL"}, } -var providerBlockList map[peer.ID]bool +var ( + providerBlockList map[peer.ID]bool + FlagExcludeProviders = &cli.StringFlag{ + Name: "exclude-providers", + DefaultText: "All providers allowed", + Usage: "Provider peer IDs, separated by a comma. Example: 12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", + EnvVars: []string{"LASSIE_EXCLUDE_PROVIDERS"}, + Action: func(cctx *cli.Context, v string) error { + // Do nothing if given an empty string + if v == "" { + return nil + } -var FlagExcludeProviders = &cli.StringFlag{ - Name: "exclude-providers", - DefaultText: "All providers allowed", - Usage: "Provider peer IDs, seperated by a comma. Example: 12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4", - EnvVars: []string{"LASSIE_EXCLUDE_PROVIDERS"}, + providerBlockList = make(map[peer.ID]bool) + vs := strings.Split(v, ",") + for _, v := range vs { + peerID, err := peer.Decode(v) + if err != nil { + return err + } + providerBlockList[peerID] = true + } + return nil + }, + } +) + +var fetchProviderAddrInfos []peer.AddrInfo + +var FlagAllowProviders = &cli.StringFlag{ + Name: "providers", + Aliases: []string{"provider"}, + DefaultText: "Providers will be discovered automatically", + Usage: "Comma-separated addresses of providers, to use instead of " + + "automatic discovery. Accepts full multiaddrs including peer ID, " + + "multiaddrs without peer ID and url-style addresses for HTTP and " + + "Filecoin SP f0 actor addresses. Lassie will attempt to connect to the " + + "peer(s). Example: " + + "/ip4/1.2.3.4/tcp/1234/p2p/12D3KooWBSTEYMLSu5FnQjshEVah9LFGEZoQt26eacCEVYfedWA4,http://ipfs.io,f01234", + EnvVars: []string{"LASSIE_ALLOW_PROVIDERS"}, Action: func(cctx *cli.Context, v string) error { // Do nothing if given an empty string if v == "" { return nil } - providerBlockList = make(map[peer.ID]bool) - vs := strings.Split(v, ",") - for _, v := range vs { - peerID, err := peer.Decode(v) - if err != nil { - return err - } - providerBlockList[peerID] = true + // in case we have been given filecoin actor addresses we can look them up + // with heyfil and translate to full multiaddrs, otherwise this is a + // pass-through + trans, err := heyfil.Heyfil{TranslateFaddr: true}.TranslateAll(strings.Split(v, ",")) + if err != nil { + return err } - return nil + fetchProviderAddrInfos, err = types.ParseProviderStrings(strings.Join(trans, ",")) + return err }, } @@ -95,7 +160,7 @@ var ( FlagProtocols = &cli.StringFlag{ Name: "protocols", DefaultText: "bitswap,graphsync,http", - Usage: "List of retrieval protocols to use, seperated by a comma", + Usage: "List of retrieval protocols to use, separated by a comma", EnvVars: []string{"LASSIE_SUPPORTED_PROTOCOLS"}, Action: func(cctx *cli.Context, v string) error { // Do nothing if given an empty string @@ -114,14 +179,51 @@ var FlagTempDir = &cli.StringFlag{ Name: "tempdir", Aliases: []string{"td"}, Usage: "directory to store temporary files while downloading", - Value: "", + Value: defaultTempDirectory, DefaultText: "os temp directory", EnvVars: []string{"LASSIE_TEMP_DIRECTORY"}, } var FlagBitswapConcurrency = &cli.IntFlag{ Name: "bitswap-concurrency", - Usage: "maximum number of concurrent bitswap requests per retrieval", - Value: 6, + Usage: "maximum number of concurrent bitswap requests", + Value: lassie.DefaultBitswapConcurrency, EnvVars: []string{"LASSIE_BITSWAP_CONCURRENCY"}, } + +var FlagBitswapConcurrencyPerRetrieval = &cli.IntFlag{ + Name: "bitswap-concurrency-per-retrieval", + Usage: "maximum number of concurrent bitswap requests per retrieval", + Value: lassie.DefaultBitswapConcurrencyPerRetrieval, + EnvVars: []string{"LASSIE_BITSWAP_CONCURRENCY_PER_RETRIEVAL"}, +} + +var FlagGlobalTimeout = &cli.DurationFlag{ + Name: "global-timeout", + Aliases: []string{"gt"}, + Usage: "consider it an error after not completing a retrieval after this amount of time", + EnvVars: []string{"LASSIE_GLOBAL_TIMEOUT"}, +} + +var FlagProviderTimeout = &cli.DurationFlag{ + Name: "provider-timeout", + Aliases: []string{"pt"}, + Usage: "consider it an error after not receiving a response from a storage provider after this amount of time", + Value: defaultProviderTimeout, + EnvVars: []string{"LASSIE_PROVIDER_TIMEOUT"}, +} + +var FlagIPNIEndpoint = &cli.StringFlag{ + Name: "ipni-endpoint", + Aliases: []string{"ipni"}, + DefaultText: "Defaults to https://cid.contact", + Usage: "HTTP endpoint of the IPNI instance used to discover providers.", +} + +func ResetGlobalFlags() { + // Reset global variables here so that they are not used + // in subsequent calls to commands during testing. + fetchProviderAddrInfos = make([]peer.AddrInfo, 0) + protocols = make([]multicodec.Code, 0) + providerBlockList = make(map[peer.ID]bool) +} diff --git a/lassie-wrapper.go b/lassie-wrapper.go index f1fa38ef..65c94222 100644 --- a/lassie-wrapper.go +++ b/lassie-wrapper.go @@ -15,6 +15,7 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/storage" "github.com/ipld/go-ipld-prime/storage/memstore" + trustlessutils "github.com/ipld/go-trustless-utils" "github.com/libp2p/go-libp2p" "github.com/libp2p/go-libp2p/core/peer" "github.com/urfave/cli/v2" @@ -38,7 +39,7 @@ func (l *lassieWrapper) GetNodeByCid(ctx context.Context, wantedCid cid.Cid) ([] ctx, wantedCid, "", - types.DagScopeBlock, + trustlessutils.DagScopeBlock, store, ) if err != nil { @@ -60,7 +61,7 @@ func (l *lassieWrapper) GetSubgraph(ctx context.Context, wantedCid cid.Cid) (*Wr ctx, wantedCid, "", - types.DagScopeAll, + trustlessutils.DagScopeAll, store, ) if err != nil { @@ -74,10 +75,10 @@ func (l *lassieWrapper) Fetch( ctx context.Context, rootCid cid.Cid, path string, - dagScope types.DagScope, + dagScope trustlessutils.DagScope, store RwStorage, ) (*types.RetrievalStats, error) { - request, err := types.NewRequestForPath(store, rootCid, path, types.DagScope(dagScope)) + request, err := types.NewRequestForPath(store, rootCid, path, trustlessutils.DagScope(dagScope), nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } @@ -86,7 +87,7 @@ func (l *lassieWrapper) Fetch( request.PreloadLinkSystem.SetWriteStorage(store) request.PreloadLinkSystem.TrustedStorage = true - stats, err := l.lassie.Fetch(ctx, request, func(types.RetrievalEvent) {}) + stats, err := l.lassie.Fetch(ctx, request) if err != nil { return stats, fmt.Errorf("failed to fetch: %w", err) } @@ -102,9 +103,6 @@ func newLassieWrapper( providerTimeout := cctx.Duration("provider-timeout") globalTimeout := cctx.Duration("global-timeout") bitswapConcurrency := cctx.Int("bitswap-concurrency") - eventRecorderURL := cctx.String("event-recorder-url") - authToken := cctx.String("event-recorder-auth") - instanceID := cctx.String("event-recorder-instance-id") providerTimeoutOpt := lassie.WithProviderTimeout(providerTimeout) @@ -158,8 +156,9 @@ func newLassieWrapper( return nil, err } - // create and subscribe an event recorder API if configured - setupLassieEventRecorder(ctx, eventRecorderURL, authToken, instanceID, lassie) + // if eventRecorderCfg.EndpointURL != "" { + // setupLassieEventRecorder(ctx, eventRecorderCfg, lassie) + // } return &lassieWrapper{ lassie: lassie, From 659f45532e7a7a7585caf54dde652fd32930a651 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 4 Dec 2023 22:57:25 +0100 Subject: [PATCH 15/63] Cleanup flags --- cmd-x-index-all.go | 4 +++- cmd-x-index-cid2offset.go | 4 +++- cmd-x-index-sig2cid.go | 4 +++- cmd-x-index-slot2cid.go | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index 176dd89b..a3928369 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -48,10 +48,11 @@ func newCmd_Index_all() *cli.Command { Name: "epoch", Usage: "the epoch of the CAR file", Destination: &epoch, + Required: true, }, &cli.StringFlag{ Name: "network", - Usage: "the network of the CAR file", + Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet", Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { @@ -59,6 +60,7 @@ func newCmd_Index_all() *cli.Command { } return nil }, + Required: true, }, }, Subcommands: []*cli.Command{}, diff --git a/cmd-x-index-cid2offset.go b/cmd-x-index-cid2offset.go index 1ff9e616..f8a59fc3 100644 --- a/cmd-x-index-cid2offset.go +++ b/cmd-x-index-cid2offset.go @@ -36,10 +36,11 @@ func newCmd_Index_cid2offset() *cli.Command { Name: "epoch", Usage: "the epoch of the CAR file", Destination: &epoch, + Required: true, }, &cli.StringFlag{ Name: "network", - Usage: "the network of the CAR file", + Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet", Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { @@ -47,6 +48,7 @@ func newCmd_Index_cid2offset() *cli.Command { } return nil }, + Required: true, }, }, Subcommands: []*cli.Command{}, diff --git a/cmd-x-index-sig2cid.go b/cmd-x-index-sig2cid.go index 6875e315..9a3a94e7 100644 --- a/cmd-x-index-sig2cid.go +++ b/cmd-x-index-sig2cid.go @@ -36,10 +36,11 @@ func newCmd_Index_sig2cid() *cli.Command { Name: "epoch", Usage: "the epoch of the CAR file", Destination: &epoch, + Required: true, }, &cli.StringFlag{ Name: "network", - Usage: "the network of the CAR file", + Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet", Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { @@ -47,6 +48,7 @@ func newCmd_Index_sig2cid() *cli.Command { } return nil }, + Required: true, }, }, Subcommands: []*cli.Command{}, diff --git a/cmd-x-index-slot2cid.go b/cmd-x-index-slot2cid.go index d45c2d1e..450c126c 100644 --- a/cmd-x-index-slot2cid.go +++ b/cmd-x-index-slot2cid.go @@ -36,10 +36,11 @@ func newCmd_Index_slot2cid() *cli.Command { Name: "epoch", Usage: "the epoch of the CAR file", Destination: &epoch, + Required: true, }, &cli.StringFlag{ Name: "network", - Usage: "the network of the CAR file", + Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet", Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { @@ -47,6 +48,7 @@ func newCmd_Index_slot2cid() *cli.Command { } return nil }, + Required: true, }, }, Subcommands: []*cli.Command{}, From dbddacc4577c0343f852fb631639c93467211d36 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 4 Dec 2023 23:06:37 +0100 Subject: [PATCH 16/63] Remove deprecated commands --- cmd-rpc-server-car-getBlock.go | 479 ----------------- cmd-rpc-server-car-getSignaturesForAddress.go | 160 ------ cmd-rpc-server-car-getTransaction.go | 123 ----- cmd-rpc-server-car.go | 488 ------------------ cmd-rpc-server-filecoin.go | 106 ---- http-handler.go | 59 --- main.go | 2 - 7 files changed, 1417 deletions(-) diff --git a/cmd-rpc-server-car-getBlock.go b/cmd-rpc-server-car-getBlock.go index 0d3d6985..21e88d7b 100644 --- a/cmd-rpc-server-car-getBlock.go +++ b/cmd-rpc-server-car-getBlock.go @@ -1,54 +1,11 @@ package main import ( - "bufio" - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "runtime" - "sort" - "sync" "time" - "github.com/gagliardetto/solana-go" - "github.com/ipfs/go-cid" - "github.com/ipld/go-car/util" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" - "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" - solanablockrewards "github.com/rpcpool/yellowstone-faithful/solana-block-rewards" - "github.com/sourcegraph/jsonrpc2" - "golang.org/x/sync/errgroup" "k8s.io/klog/v2" ) -type InternalError struct { - Err error -} - -func (e *InternalError) Error() string { - return fmt.Sprintf("internal error: %s", e.Err) -} - -func (e *InternalError) Unwrap() error { - return e.Err -} - -func (e *InternalError) IsPublic() bool { - return false -} - -func (e *InternalError) Is(err error) bool { - return errors.Is(e.Err, err) -} - -func (e *InternalError) As(target interface{}) bool { - return errors.As(e.Err, target) -} - type timer struct { start time.Time prev time.Time @@ -67,442 +24,6 @@ func (t *timer) time(name string) { t.prev = time.Now() } -func (ser *deprecatedRPCServer) handleGetBlock(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) { - tim := newTimer() - params, err := parseGetBlockRequest(req.Params) - if err != nil { - klog.Errorf("failed to parse params: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInvalidParams, - Message: "Invalid params", - }) - return - } - tim.time("parseGetBlockRequest") - slot := params.Slot - - block, err := ser.GetBlock(WithSubrapghPrefetch(ctx, true), slot) - if err != nil { - klog.Errorf("failed to get block: %v", err) - if errors.Is(err, compactindexsized.ErrNotFound) { - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: CodeNotFound, - Message: fmt.Sprintf("Slot %d was skipped, or missing in long-term storage", slot), - }) - } else { - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Failed to get block", - }) - } - return - } - tim.time("GetBlock") - { - prefetcherFromCar := func() error { - var blockCid, parentCid cid.Cid - wg := new(errgroup.Group) - wg.Go(func() (err error) { - blockCid, err = ser.FindCidFromSlot(ctx, slot) - if err != nil { - return err - } - return nil - }) - wg.Go(func() (err error) { - parentCid, err = ser.FindCidFromSlot(ctx, uint64(block.Meta.Parent_slot)) - if err != nil { - return err - } - return nil - }) - err = wg.Wait() - if err != nil { - return err - } - { - var blockOffset, parentOffset uint64 - wg := new(errgroup.Group) - wg.Go(func() (err error) { - blockOffset, err = ser.FindOffsetFromCid(ctx, blockCid) - if err != nil { - return err - } - return nil - }) - wg.Go(func() (err error) { - parentOffset, err = ser.FindOffsetFromCid(ctx, parentCid) - if err != nil { - // If the parent is not found, it (probably) means that it's outside of the car file. - parentOffset = 0 - } - return nil - }) - err = wg.Wait() - if err != nil { - return err - } - - parentIsInPreviousEpoch := CalcEpochForSlot(uint64(block.Meta.Parent_slot)) != CalcEpochForSlot(slot) - - length := blockOffset - parentOffset - // cap the length to 1GB - GiB := uint64(1024 * 1024 * 1024) - if length > GiB { - length = GiB - } - carSection, err := ser.ReadAtFromCar(ctx, parentOffset, length) - if err != nil { - return err - } - dr := bytes.NewReader(carSection) - - br := bufio.NewReader(dr) - - gotCid, data, err := util.ReadNode(br) - if err != nil { - return err - } - if !parentIsInPreviousEpoch && !gotCid.Equals(parentCid) { - return fmt.Errorf("CID mismatch: expected %s, got %s", parentCid, gotCid) - } - ser.putNodeInCache(gotCid, data) - - for { - gotCid, data, err = util.ReadNode(br) - if err != nil { - if errors.Is(err, io.EOF) { - break - } - return err - } - if gotCid.Equals(blockCid) { - break - } - ser.putNodeInCache(gotCid, data) - } - } - return nil - } - if ser.lassieFetcher == nil { - err := prefetcherFromCar() - if err != nil { - klog.Errorf("failed to prefetch from car: %v", err) - } - } - } - blocktime := uint64(block.Meta.Blocktime) - - allTransactionNodes := make([]*ipldbindcode.Transaction, 0) - mu := &sync.Mutex{} - var lastEntryHash solana.Hash - { - wg := new(errgroup.Group) - wg.SetLimit(runtime.NumCPU() * 2) - // get entries from the block - for entryIndex, entry := range block.Entries { - entryIndex := entryIndex - entryCid := entry.(cidlink.Link).Cid - wg.Go(func() error { - // get the entry by CID - entryNode, err := ser.GetEntryByCid(ctx, entryCid) - if err != nil { - klog.Errorf("failed to decode Entry: %v", err) - return err - } - - if entryIndex == len(block.Entries)-1 { - lastEntryHash = solana.HashFromBytes(entryNode.Hash) - } - - twg := new(errgroup.Group) - twg.SetLimit(runtime.NumCPU()) - // get the transactions from the entry - for txI := range entryNode.Transactions { - txI := txI - tx := entryNode.Transactions[txI] - twg.Go(func() error { - // get the transaction by CID - tcid := tx.(cidlink.Link).Cid - txNode, err := ser.GetTransactionByCid(ctx, tcid) - if err != nil { - klog.Errorf("failed to decode Transaction %s: %v", tcid, err) - return nil - } - // NOTE: this messes up the order of transactions, - // but we sort them later anyway. - mu.Lock() - allTransactionNodes = append(allTransactionNodes, txNode) - mu.Unlock() - return nil - }) - } - return twg.Wait() - }) - } - err = wg.Wait() - if err != nil { - klog.Errorf("failed to get entries: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - } - tim.time("get entries") - - var allTransactions []GetTransactionResponse - var rewards any - hasRewards := !block.Rewards.(cidlink.Link).Cid.Equals(DummyCID) - if hasRewards { - rewardsNode, err := ser.GetRewardsByCid(ctx, block.Rewards.(cidlink.Link).Cid) - if err != nil { - klog.Errorf("failed to decode Rewards: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - rewardsBuf, err := loadDataFromDataFrames(&rewardsNode.Data, ser.GetDataFrameByCid) - if err != nil { - klog.Errorf("failed to load Rewards dataFrames: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - - uncompressedRewards, err := decompressZstd(rewardsBuf) - if err != nil { - klog.Errorf("failed to decompress Rewards: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - // try decoding as protobuf - actualRewards, err := solanablockrewards.ParseRewards(uncompressedRewards) - if err != nil { - // TODO: add support for legacy rewards format - fmt.Println("Rewards are not protobuf: " + err.Error()) - } else { - { - // encode rewards as JSON, then decode it as a map - buf, err := json.Marshal(actualRewards) - if err != nil { - klog.Errorf("failed to encode rewards: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - var m map[string]any - err = json.Unmarshal(buf, &m) - if err != nil { - klog.Errorf("failed to decode rewards: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - if _, ok := m["rewards"]; ok { - // iter over rewards as an array of maps, and add a "commission" field to each = nil - rewardsAsArray := m["rewards"].([]any) - for _, reward := range rewardsAsArray { - rewardAsMap := reward.(map[string]any) - rewardAsMap["commission"] = nil - - // if it has a post_balance field, convert it to postBalance - if _, ok := rewardAsMap["post_balance"]; ok { - rewardAsMap["postBalance"] = rewardAsMap["post_balance"] - delete(rewardAsMap, "post_balance") - } - // if it has a reward_type field, convert it to rewardType - if _, ok := rewardAsMap["reward_type"]; ok { - rewardAsMap["rewardType"] = rewardAsMap["reward_type"] - delete(rewardAsMap, "reward_type") - - // if it's a float, convert to int and use rentTypeToString - if asFloat, ok := rewardAsMap["rewardType"].(float64); ok { - rewardAsMap["rewardType"] = rewardTypeToString(int(asFloat)) - } - } - } - rewards = m["rewards"] - } else { - klog.Errorf("did not find rewards field in rewards") - } - } - } - } - tim.time("get rewards") - { - for _, transactionNode := range allTransactionNodes { - var txResp GetTransactionResponse - - // response.Slot = uint64(transactionNode.Slot) - // if blocktime != 0 { - // response.Blocktime = &blocktime - // } - - { - pos, ok := transactionNode.GetPositionIndex() - if ok { - txResp.Position = uint64(pos) - } - tx, meta, err := parseTransactionAndMetaFromNode(transactionNode, ser.GetDataFrameByCid) - if err != nil { - klog.Errorf("failed to decode transaction: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - txResp.Signatures = tx.Signatures - if tx.Message.IsVersioned() { - txResp.Version = tx.Message.GetVersion() - 1 - } else { - txResp.Version = "legacy" - } - txResp.Meta = meta - - b64Tx, err := tx.ToBase64() - if err != nil { - klog.Errorf("failed to encode transaction: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - - txResp.Transaction = []any{b64Tx, "base64"} - } - - allTransactions = append(allTransactions, txResp) - } - } - sort.Slice(allTransactions, func(i, j int) bool { - return allTransactions[i].Position < allTransactions[j].Position - }) - tim.time("get transactions") - var blockResp GetBlockResponse - blockResp.Transactions = allTransactions - blockResp.BlockTime = &blocktime - blockResp.Blockhash = lastEntryHash.String() - blockResp.ParentSlot = uint64(block.Meta.Parent_slot) - blockResp.Rewards = rewards - - { - blockHeight, ok := block.GetBlockHeight() - if ok { - blockResp.BlockHeight = &blockHeight - } - } - { - // get parent slot - parentSlot := uint64(block.Meta.Parent_slot) - if parentSlot != 0 { - parentBlock, err := ser.GetBlock(WithSubrapghPrefetch(ctx, false), parentSlot) - if err != nil { - klog.Errorf("failed to decode block: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - - if len(parentBlock.Entries) > 0 { - lastEntryCidOfParent := parentBlock.Entries[len(parentBlock.Entries)-1] - parentEntryNode, err := ser.GetEntryByCid(ctx, lastEntryCidOfParent.(cidlink.Link).Cid) - if err != nil { - klog.Errorf("failed to decode Entry: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - parentEntryHash := solana.HashFromBytes(parentEntryNode.Hash).String() - blockResp.PreviousBlockhash = &parentEntryHash - } - } - } - tim.time("get parent block") - - err = conn.Reply( - ctx, - req.ID, - blockResp, - func(m map[string]any) map[string]any { - transactions, ok := m["transactions"].([]any) - if !ok { - return m - } - for i := range transactions { - transaction, ok := transactions[i].(map[string]any) - if !ok { - continue - } - transactions[i] = adaptTransactionMetaToExpectedOutput(transaction) - } - - return m - }, - ) - tim.time("reply") - if err != nil { - klog.Errorf("failed to reply: %v", err) - } -} - // pub enum RewardType { // Fee, // Rent, diff --git a/cmd-rpc-server-car-getSignaturesForAddress.go b/cmd-rpc-server-car-getSignaturesForAddress.go index 0d1f2e74..b999c633 100644 --- a/cmd-rpc-server-car-getSignaturesForAddress.go +++ b/cmd-rpc-server-car-getSignaturesForAddress.go @@ -1,22 +1,12 @@ package main import ( - "context" "encoding/base64" "encoding/json" "fmt" - "runtime" - "sync" bin "github.com/gagliardetto/binary" "github.com/gagliardetto/solana-go" - "github.com/rpcpool/yellowstone-faithful/gsfa/offsetstore" - metalatest "github.com/rpcpool/yellowstone-faithful/parse_legacy_transaction_status_meta/v-latest" - metaoldest "github.com/rpcpool/yellowstone-faithful/parse_legacy_transaction_status_meta/v-oldest" - "github.com/rpcpool/yellowstone-faithful/third_party/solana_proto/confirmed_block" - "github.com/sourcegraph/jsonrpc2" - "golang.org/x/sync/errgroup" - "k8s.io/klog/v2" ) type GetSignaturesForAddressParams struct { @@ -83,156 +73,6 @@ func parseGetSignaturesForAddressParams(raw *json.RawMessage) (*GetSignaturesFor return out, nil } -func (ser *deprecatedRPCServer) handleGetSignaturesForAddress(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) { - if ser.gsfaReader == nil { - klog.Errorf("gsfaReader is nil") - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "getSignaturesForAddress method is not enabled", - }) - return - } - signaturesOnly := ser.options.GsfaOnlySignatures - - params, err := parseGetSignaturesForAddressParams(req.Params) - if err != nil { - klog.Errorf("failed to parse params: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInvalidParams, - Message: "Invalid params", - }) - return - } - pk := params.Address - limit := params.Limit - - sigs, err := ser.gsfaReader.GetBeforeUntil( - ctx, - pk, - limit, - params.Before, - params.Until, - ) - if err != nil { - if offsetstore.IsNotFound(err) { - klog.Infof("No signatures found for address: %s", pk) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Not found", - }) - return - } - } - - var blockTimeCache struct { - m map[uint64]uint64 - mu sync.Mutex - } - blockTimeCache.m = make(map[uint64]uint64) - getBlockTime := func(slot uint64) uint64 { - blockTimeCache.mu.Lock() - defer blockTimeCache.mu.Unlock() - if blockTime, ok := blockTimeCache.m[slot]; ok { - return blockTime - } - block, err := ser.GetBlock(ctx, slot) - if err != nil { - klog.Errorf("failed to get block time for slot %d: %v", slot, err) - return 0 - } - blockTimeCache.m[slot] = uint64(block.Meta.Blocktime) - return uint64(block.Meta.Blocktime) - } - - wg := new(errgroup.Group) - wg.SetLimit(runtime.NumCPU() * 2) - // The response is an array of objects: [{signature: string}] - response := make([]map[string]any, len(sigs)) - for i := range sigs { - ii := i - sig := sigs[ii] - wg.Go(func() error { - response[ii] = map[string]any{ - "signature": sig.String(), - } - if signaturesOnly { - return nil - } - transactionNode, err := ser.GetTransaction(ctx, sig) - if err != nil { - klog.Errorf("failed to get tx %s: %v", sig, err) - return nil - } - if transactionNode != nil { - { - tx, meta, err := parseTransactionAndMetaFromNode(transactionNode, ser.GetDataFrameByCid) - if err == nil { - switch metaValue := meta.(type) { - case *confirmed_block.TransactionStatusMeta: - response[ii]["err"] = metaValue.Err - case *metalatest.TransactionStatusMeta: - response[ii]["err"] = metaValue.Status - case *metaoldest.TransactionStatusMeta: - response[ii]["err"] = metaValue.Status - } - - if _, ok := response[ii]["err"]; ok { - response[ii]["err"], _ = parseTransactionError(response[ii]["err"]) - } - - memoData := getMemoInstructionDataFromTransaction(&tx) - if memoData != nil { - response[ii]["memo"] = string(memoData) - } - } - - if _, ok := response[ii]["memo"]; !ok { - response[ii]["memo"] = nil - } - if _, ok := response[ii]["err"]; !ok { - response[ii]["err"] = nil - } - } - slot := uint64(transactionNode.Slot) - response[ii]["slot"] = slot - response[ii]["blockTime"] = getBlockTime(slot) - response[ii]["confirmationStatus"] = "finalized" - } - return nil - }) - } - if err := wg.Wait(); err != nil { - klog.Errorf("failed to get txs: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - - // reply with the data - err = conn.ReplyRaw( - ctx, - req.ID, - response, - ) - if err != nil { - klog.Errorf("failed to reply: %v", err) - } -} - func getMemoInstructionDataFromTransaction(tx *solana.Transaction) []byte { for _, instruction := range tx.Message.Instructions { prog, err := tx.ResolveProgramIDIndex(instruction.ProgramIDIndex) diff --git a/cmd-rpc-server-car-getTransaction.go b/cmd-rpc-server-car-getTransaction.go index 795d7a83..06ab7d0f 100644 --- a/cmd-rpc-server-car-getTransaction.go +++ b/cmd-rpc-server-car-getTransaction.go @@ -1,124 +1 @@ package main - -import ( - "context" - "errors" - - "github.com/rpcpool/yellowstone-faithful/compactindexsized" - "github.com/sourcegraph/jsonrpc2" - "k8s.io/klog/v2" -) - -func (ser *deprecatedRPCServer) handleGetTransaction(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) { - params, err := parseGetTransactionRequest(req.Params) - if err != nil { - klog.Errorf("failed to parse params: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInvalidParams, - Message: "Invalid params", - }) - return - } - - sig := params.Signature - - transactionNode, err := ser.GetTransaction(WithSubrapghPrefetch(ctx, true), sig) - if err != nil { - if errors.Is(err, compactindexsized.ErrNotFound) { - conn.ReplyRaw( - ctx, - req.ID, - nil, // NOTE: solana just returns null here in case of transaction not found - ) - return - } - klog.Errorf("failed to get Transaction: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - - var response GetTransactionResponse - - response.Slot = ptrToUint64(uint64(transactionNode.Slot)) - { - block, err := ser.GetBlock(ctx, uint64(transactionNode.Slot)) - if err != nil { - klog.Errorf("failed to decode block: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - blocktime := uint64(block.Meta.Blocktime) - if blocktime != 0 { - response.Blocktime = &blocktime - } - } - - { - pos, ok := transactionNode.GetPositionIndex() - if ok { - response.Position = uint64(pos) - } - tx, meta, err := parseTransactionAndMetaFromNode(transactionNode, ser.GetDataFrameByCid) - if err != nil { - klog.Errorf("failed to decode transaction: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - response.Signatures = tx.Signatures - if tx.Message.IsVersioned() { - response.Version = tx.Message.GetVersion() - 1 - } else { - response.Version = "legacy" - } - response.Meta = meta - - b64Tx, err := tx.ToBase64() - if err != nil { - klog.Errorf("failed to encode transaction: %v", err) - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Internal error", - }) - return - } - - response.Transaction = []any{b64Tx, "base64"} - } - - // reply with the data - err = conn.Reply( - ctx, - req.ID, - response, - func(m map[string]any) map[string]any { - return adaptTransactionMetaToExpectedOutput(m) - }, - ) - if err != nil { - klog.Errorf("failed to reply: %v", err) - } -} diff --git a/cmd-rpc-server-car.go b/cmd-rpc-server-car.go index 61c49389..32752ace 100644 --- a/cmd-rpc-server-car.go +++ b/cmd-rpc-server-car.go @@ -1,502 +1,14 @@ package main import ( - "bufio" - "context" "fmt" - "io" - "time" - - "github.com/gagliardetto/solana-go" - "github.com/ipfs/go-cid" - "github.com/ipld/go-car/util" - carv2 "github.com/ipld/go-car/v2" - "github.com/patrickmn/go-cache" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" - "github.com/rpcpool/yellowstone-faithful/gsfa" - "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" - "github.com/rpcpool/yellowstone-faithful/iplddecoders" - "github.com/sourcegraph/jsonrpc2" - "github.com/urfave/cli/v2" - "github.com/valyala/fasthttp" - "k8s.io/klog/v2" ) -func newCmd_rpcServerCar() *cli.Command { - var listenOn string - var gsfaOnlySignatures bool - return &cli.Command{ - Name: "rpc-server-car", - Description: "Start a Solana JSON RPC that exposes getTransaction and getBlock", - ArgsUsage: " ", - Before: func(c *cli.Context) error { - return nil - }, - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "listen", - Usage: "Listen address", - Value: ":8899", - Destination: &listenOn, - }, - &cli.BoolFlag{ - Name: "gsfa-only-signatures", - Usage: "gSFA: only return signatures", - Value: false, - Destination: &gsfaOnlySignatures, - }, - }, - Action: func(c *cli.Context) error { - carFilepath := c.Args().Get(0) - if carFilepath == "" { - return cli.Exit("Must provide a CAR filepath", 1) - } - cidToOffsetIndexFilepath := c.Args().Get(1) - if cidToOffsetIndexFilepath == "" { - return cli.Exit("Must provide a CID-to-offset index filepath/url", 1) - } - slotToCidIndexFilepath := c.Args().Get(2) - if slotToCidIndexFilepath == "" { - return cli.Exit("Must provide a slot-to-CID index filepath/url", 1) - } - sigToCidIndexFilepath := c.Args().Get(3) - if sigToCidIndexFilepath == "" { - return cli.Exit("Must provide a signature-to-CID index filepath/url", 1) - } - - cidToOffsetIndexFile, err := openIndexStorage( - c.Context, - cidToOffsetIndexFilepath, - DebugMode, - ) - if err != nil { - return fmt.Errorf("failed to open index file: %w", err) - } - defer cidToOffsetIndexFile.Close() - - cidToOffsetIndex, err := compactindexsized.Open(cidToOffsetIndexFile) - if err != nil { - return fmt.Errorf("failed to open index: %w", err) - } - - slotToCidIndexFile, err := openIndexStorage( - c.Context, - slotToCidIndexFilepath, - DebugMode, - ) - if err != nil { - return fmt.Errorf("failed to open index file: %w", err) - } - defer slotToCidIndexFile.Close() - - slotToCidIndex, err := compactindexsized.Open(slotToCidIndexFile) - if err != nil { - return fmt.Errorf("failed to open index: %w", err) - } - - sigToCidIndexFile, err := openIndexStorage( - c.Context, - sigToCidIndexFilepath, - DebugMode, - ) - if err != nil { - return fmt.Errorf("failed to open index file: %w", err) - } - defer sigToCidIndexFile.Close() - - sigToCidIndex, err := compactindexsized.Open(sigToCidIndexFile) - if err != nil { - return fmt.Errorf("failed to open index: %w", err) - } - - localCarReader, remoteCarReader, err := openCarStorage(c.Context, carFilepath) - if err != nil { - return fmt.Errorf("failed to open CAR file: %w", err) - } - - var gsfaIndex *gsfa.GsfaReader - gsfaIndexDir := c.Args().Get(4) - if gsfaIndexDir != "" { - gsfaIndex, err = gsfa.NewGsfaReader(gsfaIndexDir) - if err != nil { - return fmt.Errorf("failed to open gsfa index: %w", err) - } - defer gsfaIndex.Close() - } - - options := &RpcServerOptions{ - ListenOn: listenOn, - GsfaOnlySignatures: gsfaOnlySignatures, - } - - return createAndStartRPCServer_withCar( - c.Context, - options, - localCarReader, - remoteCarReader, - cidToOffsetIndex, - slotToCidIndex, - sigToCidIndex, - gsfaIndex, - ) - }, - } -} - -// createAndStartRPCServer_withCar creates and starts a JSON RPC server. -// Data: -// - Nodes: the node data is read from a CAR file (which can be a local file or a remote URL). -// - Indexes: the indexes are read from files (which can be a local file or a remote URL). -// -// The server is backed by a CAR file (meaning that it can only serve the content of the CAR file). -// It blocks until the server is stopped. -// It returns an error if the server fails to start or stops unexpectedly. -// It returns nil if the server is stopped gracefully. -func createAndStartRPCServer_withCar( - ctx context.Context, - options *RpcServerOptions, - carReader *carv2.Reader, - remoteCarReader ReaderAtCloser, - cidToOffsetIndex *compactindexsized.DB, - slotToCidIndex *compactindexsized.DB, - sigToCidIndex *compactindexsized.DB, - gsfaReader *gsfa.GsfaReader, -) error { - if options == nil { - panic("options cannot be nil") - } - listenOn := options.ListenOn - ca := cache.New(30*time.Second, 1*time.Minute) - handler := &deprecatedRPCServer{ - localCarReader: carReader, - remoteCarReader: remoteCarReader, - cidToOffsetIndex: cidToOffsetIndex, - slotToCidIndex: slotToCidIndex, - sigToCidIndex: sigToCidIndex, - gsfaReader: gsfaReader, - cidToBlockCache: ca, - options: options, - } - - h := newRPCHandler_fast(handler) - h = fasthttp.CompressHandler(h) - - klog.Infof("RPC server listening on %s", listenOn) - return fasthttp.ListenAndServe(listenOn, h) -} - -func createAndStartRPCServer_lassie( - ctx context.Context, - options *RpcServerOptions, - lassieWr *lassieWrapper, - slotToCidIndex *compactindexsized.DB, - sigToCidIndex *compactindexsized.DB, - gsfaReader *gsfa.GsfaReader, -) error { - if options == nil { - panic("options cannot be nil") - } - listenOn := options.ListenOn - ca := cache.New(30*time.Second, 1*time.Minute) - handler := &deprecatedRPCServer{ - lassieFetcher: lassieWr, - slotToCidIndex: slotToCidIndex, - sigToCidIndex: sigToCidIndex, - gsfaReader: gsfaReader, - cidToBlockCache: ca, - options: options, - } - - h := newRPCHandler_fast(handler) - h = fasthttp.CompressHandler(h) - - klog.Infof("RPC server listening on %s", listenOn) - return fasthttp.ListenAndServe(listenOn, h) -} - type RpcServerOptions struct { ListenOn string GsfaOnlySignatures bool } -type deprecatedRPCServer struct { - lassieFetcher *lassieWrapper - localCarReader *carv2.Reader - remoteCarReader ReaderAtCloser - cidToOffsetIndex *compactindexsized.DB - slotToCidIndex *compactindexsized.DB - sigToCidIndex *compactindexsized.DB - gsfaReader *gsfa.GsfaReader - cidToBlockCache *cache.Cache // TODO: prevent OOM - options *RpcServerOptions -} - func getCidCacheKey(off int64, p []byte) string { return fmt.Sprintf("%d-%d", off, len(p)) } - -func (r *deprecatedRPCServer) getNodeFromCache(c cid.Cid) (v []byte, err error, has bool) { - if v, ok := r.cidToBlockCache.Get(c.String()); ok { - return v.([]byte), nil, true - } - return nil, nil, false -} - -func (r *deprecatedRPCServer) putNodeInCache(c cid.Cid, data []byte) { - r.cidToBlockCache.Set(c.String(), data, cache.DefaultExpiration) -} - -func (s *deprecatedRPCServer) prefetchSubgraph(ctx context.Context, wantedCid cid.Cid) error { - if s.lassieFetcher != nil { - // Fetch the subgraph from lassie - sub, err := s.lassieFetcher.GetSubgraph(ctx, wantedCid) - if err == nil { - // put in cache - return sub.Each(ctx, func(c cid.Cid, data []byte) error { - s.putNodeInCache(c, data) - return nil - }) - } - klog.Errorf("failed to get subgraph from lassie: %v", err) - return err - } - return nil -} - -func (s *deprecatedRPCServer) GetNodeByCid(ctx context.Context, wantedCid cid.Cid) ([]byte, error) { - { - // try from cache - data, err, has := s.getNodeFromCache(wantedCid) - if err != nil { - return nil, err - } - if has { - return data, nil - } - } - if s.lassieFetcher != nil { - // Fetch the node from lassie. - data, err := s.lassieFetcher.GetNodeByCid(ctx, wantedCid) - if err == nil { - // put in cache - s.putNodeInCache(wantedCid, data) - return data, nil - } - klog.Errorf("failed to get node from lassie: %v", err) - return nil, err - } - // Find CAR file offset for CID in index. - offset, err := s.FindOffsetFromCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to find offset for CID %s: %v", wantedCid, err) - // not found or error - return nil, err - } - return s.GetNodeByOffset(ctx, wantedCid, offset) -} - -func (s *deprecatedRPCServer) ReadAtFromCar(ctx context.Context, offset uint64, length uint64) ([]byte, error) { - if s.localCarReader == nil { - // try remote reader - if s.remoteCarReader == nil { - return nil, fmt.Errorf("no CAR reader available") - } - return readSectionFromReaderAt(s.remoteCarReader, offset, length) - } - // Get reader and seek to offset, then read node. - dr, err := s.localCarReader.DataReader() - if err != nil { - klog.Errorf("failed to get data reader: %v", err) - return nil, err - } - dr.Seek(int64(offset), io.SeekStart) - data := make([]byte, length) - _, err = io.ReadFull(dr, data) - if err != nil { - klog.Errorf("failed to read node: %v", err) - return nil, err - } - return data, nil -} - -func (s *deprecatedRPCServer) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offset uint64) ([]byte, error) { - if s.localCarReader == nil { - // try remote reader - if s.remoteCarReader == nil { - return nil, fmt.Errorf("no CAR reader available") - } - return readNodeFromReaderAt(s.remoteCarReader, wantedCid, offset) - } - // Get reader and seek to offset, then read node. - dr, err := s.localCarReader.DataReader() - if err != nil { - klog.Errorf("failed to get data reader: %v", err) - return nil, err - } - dr.Seek(int64(offset), io.SeekStart) - br := bufio.NewReader(dr) - - gotCid, data, err := util.ReadNode(br) - if err != nil { - klog.Errorf("failed to read node: %v", err) - return nil, err - } - // verify that the CID we read matches the one we expected. - if !gotCid.Equals(wantedCid) { - klog.Errorf("CID mismatch: expected %s, got %s", wantedCid, gotCid) - return nil, fmt.Errorf("CID mismatch: expected %s, got %s", wantedCid, gotCid) - } - return data, nil -} - -func (ser *deprecatedRPCServer) FindCidFromSlot(ctx context.Context, slot uint64) (cid.Cid, error) { - return findCidFromSlot(ser.slotToCidIndex, slot) -} - -func (ser *deprecatedRPCServer) FindCidFromSignature(ctx context.Context, sig solana.Signature) (cid.Cid, error) { - return findCidFromSignature(ser.sigToCidIndex, sig) -} - -func (ser *deprecatedRPCServer) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (uint64, error) { - return findOffsetFromCid(ser.cidToOffsetIndex, cid) -} - -func (ser *deprecatedRPCServer) GetBlock(ctx context.Context, slot uint64) (*ipldbindcode.Block, error) { - // get the slot by slot number - wantedCid, err := ser.FindCidFromSlot(ctx, slot) - if err != nil { - klog.Errorf("failed to find CID for slot %d: %v", slot, err) - return nil, err - } - klog.Infof("found CID for slot %d: %s", slot, wantedCid) - { - doPrefetch := getValueFromContext(ctx, "prefetch") - if doPrefetch != nil && doPrefetch.(bool) { - // prefetch the block - ser.prefetchSubgraph(ctx, wantedCid) - } - } - // get the block by CID - data, err := ser.GetNodeByCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to find node by cid: %v", err) - return nil, err - } - // try parsing the data as a Block node. - decoded, err := iplddecoders.DecodeBlock(data) - if err != nil { - klog.Errorf("failed to decode block: %v", err) - return nil, err - } - return decoded, nil -} - -func (ser *deprecatedRPCServer) GetEntryByCid(ctx context.Context, wantedCid cid.Cid) (*ipldbindcode.Entry, error) { - data, err := ser.GetNodeByCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to find node by cid: %v", err) - return nil, err - } - // try parsing the data as an Entry node. - decoded, err := iplddecoders.DecodeEntry(data) - if err != nil { - klog.Errorf("failed to decode entry: %v", err) - return nil, err - } - return decoded, nil -} - -func (ser *deprecatedRPCServer) GetTransactionByCid(ctx context.Context, wantedCid cid.Cid) (*ipldbindcode.Transaction, error) { - data, err := ser.GetNodeByCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to find node by cid: %v", err) - return nil, err - } - // try parsing the data as a Transaction node. - decoded, err := iplddecoders.DecodeTransaction(data) - if err != nil { - klog.Errorf("failed to decode transaction: %v", err) - return nil, err - } - return decoded, nil -} - -func (ser *deprecatedRPCServer) GetDataFrameByCid(ctx context.Context, wantedCid cid.Cid) (*ipldbindcode.DataFrame, error) { - data, err := ser.GetNodeByCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to find node by cid: %v", err) - return nil, err - } - // try parsing the data as a DataFrame node. - decoded, err := iplddecoders.DecodeDataFrame(data) - if err != nil { - klog.Errorf("failed to decode data frame: %v", err) - return nil, err - } - return decoded, nil -} - -func (ser *deprecatedRPCServer) GetRewardsByCid(ctx context.Context, wantedCid cid.Cid) (*ipldbindcode.Rewards, error) { - data, err := ser.GetNodeByCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to find node by cid: %v", err) - return nil, err - } - // try parsing the data as a Rewards node. - decoded, err := iplddecoders.DecodeRewards(data) - if err != nil { - klog.Errorf("failed to decode rewards: %v", err) - return nil, err - } - return decoded, nil -} - -func (ser *deprecatedRPCServer) GetTransaction(ctx context.Context, sig solana.Signature) (*ipldbindcode.Transaction, error) { - // get the CID by signature - wantedCid, err := ser.FindCidFromSignature(ctx, sig) - if err != nil { - klog.Errorf("failed to find CID for signature %s: %v", sig, err) - return nil, err - } - klog.Infof("found CID for signature %s: %s", sig, wantedCid) - { - doPrefetch := getValueFromContext(ctx, "prefetch") - if doPrefetch != nil && doPrefetch.(bool) { - // prefetch the block - ser.prefetchSubgraph(ctx, wantedCid) - } - } - // get the transaction by CID - data, err := ser.GetNodeByCid(ctx, wantedCid) - if err != nil { - klog.Errorf("failed to get node by cid: %v", err) - return nil, err - } - // try parsing the data as a Transaction node. - decoded, err := iplddecoders.DecodeTransaction(data) - if err != nil { - klog.Errorf("failed to decode transaction: %v", err) - return nil, err - } - return decoded, nil -} - -// jsonrpc2.RequestHandler interface -func (ser *deprecatedRPCServer) Handle(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) { - switch req.Method { - case "getBlock": - ser.handleGetBlock(ctx, conn, req) - case "getTransaction": - ser.handleGetTransaction(ctx, conn, req) - case "getSignaturesForAddress": - ser.handleGetSignaturesForAddress(ctx, conn, req) - default: - conn.ReplyWithError( - ctx, - req.ID, - &jsonrpc2.Error{ - Code: jsonrpc2.CodeMethodNotFound, - Message: "Method not found", - }) - } -} diff --git a/cmd-rpc-server-filecoin.go b/cmd-rpc-server-filecoin.go index 44c3fb97..6d421534 100644 --- a/cmd-rpc-server-filecoin.go +++ b/cmd-rpc-server-filecoin.go @@ -3,115 +3,9 @@ package main import ( "fmt" - "github.com/davecgh/go-spew/spew" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" - "github.com/rpcpool/yellowstone-faithful/gsfa" "github.com/urfave/cli/v2" ) -func newCmd_rpcServerFilecoin() *cli.Command { - var listenOn string - var gsfaOnlySignatures bool - return &cli.Command{ - Name: "rpc-server-filecoin", - Description: "Start a Solana JSON RPC that exposes getTransaction and getBlock", - ArgsUsage: " ", - Before: func(c *cli.Context) error { - return nil - }, - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "listen", - Usage: "Listen address", - Value: ":8899", - Destination: &listenOn, - }, - &cli.StringFlag{ - Name: "config", - Usage: "Load config from file instead of arguments", - Value: "", - }, - &cli.BoolFlag{ - Name: "gsfa-only-signatures", - Usage: "gSFA: only return signatures", - Value: false, - Destination: &gsfaOnlySignatures, - }, - }, - Action: func(c *cli.Context) error { - config, err := rpcServerFilecoinLoadConfig(c) - if err != nil { - return fmt.Errorf("failed to load config: %w", err) - } - spew.Dump(config) - if config.Indexes.SlotToCid == "" { - return cli.Exit("Must provide a slot-to-CID index filepath/url", 1) - } - if config.Indexes.SigToCid == "" { - return cli.Exit("Must provide a signature-to-CID index filepath/url", 1) - } - - slotToCidIndexFile, err := openIndexStorage( - c.Context, - config.Indexes.SlotToCid, - DebugMode, - ) - if err != nil { - return fmt.Errorf("failed to open slot-to-cid index file: %w", err) - } - defer slotToCidIndexFile.Close() - - slotToCidIndex, err := compactindexsized.Open(slotToCidIndexFile) - if err != nil { - return fmt.Errorf("failed to open slot-to-cid index: %w", err) - } - - sigToCidIndexFile, err := openIndexStorage( - c.Context, - config.Indexes.SigToCid, - DebugMode, - ) - if err != nil { - return fmt.Errorf("failed to open sig-to-cid index file: %w", err) - } - defer sigToCidIndexFile.Close() - - sigToCidIndex, err := compactindexsized.Open(sigToCidIndexFile) - if err != nil { - return fmt.Errorf("failed to open sig-to-cid index: %w", err) - } - - ls, err := newLassieWrapper(c, globalFetchProviderAddrInfos) - if err != nil { - return fmt.Errorf("newLassieWrapper: %w", err) - } - - var gsfaIndex *gsfa.GsfaReader - if config.Indexes.Gsfa != "" { - gsfaIndex, err = gsfa.NewGsfaReader(config.Indexes.Gsfa) - if err != nil { - return fmt.Errorf("failed to open gsfa index: %w", err) - } - defer gsfaIndex.Close() - } - - options := &RpcServerOptions{ - ListenOn: listenOn, - GsfaOnlySignatures: gsfaOnlySignatures, - } - - return createAndStartRPCServer_lassie( - c.Context, - options, - ls, - slotToCidIndex, - sigToCidIndex, - gsfaIndex, - ) - }, - } -} - func rpcServerFilecoinLoadConfig(c *cli.Context) (*RpcServerFilecoinConfig, error) { // Either load from config file or from args: cfg := &RpcServerFilecoinConfig{} diff --git a/http-handler.go b/http-handler.go index 81bf5feb..aa7719fa 100644 --- a/http-handler.go +++ b/http-handler.go @@ -1,70 +1,11 @@ package main import ( - "encoding/json" - "net/http" - "strings" - "time" - jsoniter "github.com/json-iterator/go" - "github.com/sourcegraph/jsonrpc2" "github.com/valyala/fasthttp" "k8s.io/klog/v2" ) -func newRPCHandler_fast(handler *deprecatedRPCServer) func(ctx *fasthttp.RequestCtx) { - return func(c *fasthttp.RequestCtx) { - startedAt := time.Now() - defer func() { - klog.Infof("request took %s", time.Since(startedAt)) - }() - { - // make sure the method is POST - if !c.IsPost() { - replyJSON(c, http.StatusMethodNotAllowed, jsonrpc2.Response{ - Error: &jsonrpc2.Error{ - Code: jsonrpc2.CodeMethodNotFound, - Message: "Method not allowed", - }, - }) - return - } - - // limit request body size - if c.Request.Header.ContentLength() > 1024 { - replyJSON(c, http.StatusRequestEntityTooLarge, jsonrpc2.Response{ - Error: &jsonrpc2.Error{ - Code: jsonrpc2.CodeInvalidRequest, - Message: "Request entity too large", - }, - }) - return - } - } - // read request body - body := c.Request.Body() - - // parse request - var rpcRequest jsonrpc2.Request - if err := json.Unmarshal(body, &rpcRequest); err != nil { - klog.Errorf("failed to unmarshal request: %v", err) - replyJSON(c, http.StatusBadRequest, jsonrpc2.Response{ - Error: &jsonrpc2.Error{ - Code: jsonrpc2.CodeParseError, - Message: "Parse error", - }, - }) - return - } - - klog.Infof("Received request: %q", strings.TrimSpace(string(body))) - - rqCtx := &requestContext{ctx: c} - - handler.Handle(c, rqCtx, &rpcRequest) - } -} - func replyJSON(ctx *fasthttp.RequestCtx, code int, v interface{}) { ctx.SetContentType("application/json") ctx.SetStatusCode(code) diff --git a/main.go b/main.go index fc16471e..1925f19a 100644 --- a/main.go +++ b/main.go @@ -55,8 +55,6 @@ func main() { newCmd_Index(), newCmd_VerifyIndex(), newCmd_XTraverse(), - newCmd_rpcServerCar(), - newCmd_rpcServerFilecoin(), newCmd_Version(), newCmd_rpc(), }, From 9ad7951a1c382187d8672128cd7c4f262e56582c Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 4 Dec 2023 23:11:49 +0100 Subject: [PATCH 17/63] Remove deprecated --- cmd-rpc-server-car-getBlock.go | 63 ------------------- cmd-rpc-server-car-getTransaction.go | 1 - cmd-rpc-server-car.go | 13 ---- ...orAddress.go => getSignaturesForAddress.go | 0 go.mod | 2 +- index-cid-to-offset.go | 14 ----- index-sig-to-cid.go | 20 ------ index-slot-to-cid.go | 30 --------- multiepoch-getTransaction.go | 2 +- tools.go | 63 ++++++++++++++++--- 10 files changed, 56 insertions(+), 152 deletions(-) delete mode 100644 cmd-rpc-server-car-getBlock.go delete mode 100644 cmd-rpc-server-car-getTransaction.go rename cmd-rpc-server-car-getSignaturesForAddress.go => getSignaturesForAddress.go (100%) diff --git a/cmd-rpc-server-car-getBlock.go b/cmd-rpc-server-car-getBlock.go deleted file mode 100644 index 21e88d7b..00000000 --- a/cmd-rpc-server-car-getBlock.go +++ /dev/null @@ -1,63 +0,0 @@ -package main - -import ( - "time" - - "k8s.io/klog/v2" -) - -type timer struct { - start time.Time - prev time.Time -} - -func newTimer() *timer { - now := time.Now() - return &timer{ - start: now, - prev: now, - } -} - -func (t *timer) time(name string) { - klog.V(2).Infof("TIMED: %s: %s (overall %s)", name, time.Since(t.prev), time.Since(t.start)) - t.prev = time.Now() -} - -// pub enum RewardType { -// Fee, -// Rent, -// Staking, -// Voting, -// } -func rewardTypeToString(typ int) string { - switch typ { - case 1: - return "Fee" - case 2: - return "Rent" - case 3: - return "Staking" - case 4: - return "Voting" - default: - return "Unknown" - } -} - -func rewardTypeStringToInt(typ string) int { - switch typ { - case "Fee": - return 1 - case "Rent": - return 2 - case "Staking": - return 3 - case "Voting": - return 4 - default: - return 0 - } -} - -const CodeNotFound = -32009 diff --git a/cmd-rpc-server-car-getTransaction.go b/cmd-rpc-server-car-getTransaction.go deleted file mode 100644 index 06ab7d0f..00000000 --- a/cmd-rpc-server-car-getTransaction.go +++ /dev/null @@ -1 +0,0 @@ -package main diff --git a/cmd-rpc-server-car.go b/cmd-rpc-server-car.go index 32752ace..06ab7d0f 100644 --- a/cmd-rpc-server-car.go +++ b/cmd-rpc-server-car.go @@ -1,14 +1 @@ package main - -import ( - "fmt" -) - -type RpcServerOptions struct { - ListenOn string - GsfaOnlySignatures bool -} - -func getCidCacheKey(off int64, p []byte) string { - return fmt.Sprintf("%d-%d", off, len(p)) -} diff --git a/cmd-rpc-server-car-getSignaturesForAddress.go b/getSignaturesForAddress.go similarity index 100% rename from cmd-rpc-server-car-getSignaturesForAddress.go rename to getSignaturesForAddress.go diff --git a/go.mod b/go.mod index bc4906c5..f4f126d0 100644 --- a/go.mod +++ b/go.mod @@ -60,6 +60,7 @@ require ( github.com/fsnotify/fsnotify v1.5.4 github.com/goware/urlx v0.3.2 github.com/ipld/go-car v0.5.0 + github.com/ipld/go-trustless-utils v0.4.1 github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 github.com/mr-tron/base58 v1.2.0 github.com/patrickmn/go-cache v2.1.0+incompatible @@ -135,7 +136,6 @@ require ( github.com/ipfs/go-metrics-interface v0.0.1 // indirect github.com/ipfs/go-peertaskqueue v0.8.1 // indirect github.com/ipfs/go-verifcid v0.0.2 // indirect - github.com/ipld/go-trustless-utils v0.4.1 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect github.com/jbenet/goprocess v0.1.4 // indirect diff --git a/index-cid-to-offset.go b/index-cid-to-offset.go index 85d673c8..776eca95 100644 --- a/index-cid-to-offset.go +++ b/index-cid-to-offset.go @@ -13,11 +13,9 @@ import ( "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" - "github.com/ipfs/go-cid" carv1 "github.com/ipld/go-car" "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "k8s.io/klog/v2" @@ -248,15 +246,3 @@ func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath s } return nil } - -func findOffsetFromCid(db *compactindexsized.DB, c cid.Cid) (uint64, error) { - bucket, err := db.LookupBucket(c.Bytes()) - if err != nil { - return 0, fmt.Errorf("failed to lookup bucket for %s: %w", c, err) - } - offset, err := bucket.Lookup(c.Bytes()) - if err != nil { - return 0, fmt.Errorf("failed to lookup offset for %s: %w", c, err) - } - return btoi(offset), nil -} diff --git a/index-sig-to-cid.go b/index-sig-to-cid.go index f0cb49ac..7bf6eb50 100644 --- a/index-sig-to-cid.go +++ b/index-sig-to-cid.go @@ -13,7 +13,6 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/rpcpool/yellowstone-faithful/bucketteer" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" @@ -291,25 +290,6 @@ func VerifyIndex_sigExists(ctx context.Context, carPath string, indexFilePath st return nil } -func findCidFromSignature(db *compactindexsized.DB, sig solana.Signature) (cid.Cid, error) { - bucket, err := db.LookupBucket(sig[:]) - if err != nil { - return cid.Cid{}, fmt.Errorf("failed to lookup bucket for %s: %w", sig, err) - } - got, err := bucket.Lookup(sig[:]) - if err != nil { - return cid.Cid{}, fmt.Errorf("failed to lookup value for %s: %w", sig, err) - } - l, c, err := cid.CidFromBytes(got[:]) - if err != nil { - return cid.Cid{}, fmt.Errorf("failed to parse cid from bytes: %w", err) - } - if l != 36 { - return cid.Cid{}, fmt.Errorf("unexpected cid length %d", l) - } - return c, nil -} - func readFirstSignature(buf []byte) (solana.Signature, error) { decoder := bin.NewCompactU16Decoder(buf) numSigs, err := decoder.ReadCompactU16() diff --git a/index-slot-to-cid.go b/index-slot-to-cid.go index f5cfc63d..51bb2fc3 100644 --- a/index-slot-to-cid.go +++ b/index-slot-to-cid.go @@ -2,7 +2,6 @@ package main import ( "context" - "encoding/binary" "fmt" "os" "path/filepath" @@ -11,7 +10,6 @@ import ( "github.com/dustin/go-humanize" "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" - "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" @@ -196,31 +194,3 @@ func VerifyIndex_slot2cid(ctx context.Context, carPath string, indexFilePath str } return nil } - -// uint64ToLeBytes converts a uint64 to a little-endian byte slice. -func uint64ToLeBytes(n uint64) []byte { - b := make([]byte, 8) - binary.LittleEndian.PutUint64(b, n) - return b -} - -// findCidFromSlot finds the CID for the given slot number in the given index. -func findCidFromSlot(db *compactindexsized.DB, slotNum uint64) (cid.Cid, error) { - slotBytes := uint64ToLeBytes(uint64(slotNum)) - bucket, err := db.LookupBucket(slotBytes) - if err != nil { - return cid.Cid{}, fmt.Errorf("failed to lookup bucket for %d: %w", slotNum, err) - } - got, err := bucket.Lookup(slotBytes) - if err != nil { - return cid.Cid{}, fmt.Errorf("failed to lookup value for %d: %w", slotNum, err) - } - l, c, err := cid.CidFromBytes(got[:]) - if err != nil { - return cid.Cid{}, fmt.Errorf("failed to parse cid from bytes: %w", err) - } - if l != 36 { - return cid.Cid{}, fmt.Errorf("unexpected cid length %d", l) - } - return c, nil -} diff --git a/multiepoch-getTransaction.go b/multiepoch-getTransaction.go index 5d055a23..64ffd8f0 100644 --- a/multiepoch-getTransaction.go +++ b/multiepoch-getTransaction.go @@ -82,7 +82,7 @@ func (multi *MultiEpoch) findEpochNumberFromSignature(ctx context.Context, sig s } return 0, ErrNotFound - // Search all epochs in parallel: + // TODO: Search all epochs in parallel: wg := NewFirstResponse(ctx, multi.options.EpochSearchConcurrency) for i := range numbers { epochNumber := numbers[i] diff --git a/tools.go b/tools.go index 8bd6c8d4..2ad5d100 100644 --- a/tools.go +++ b/tools.go @@ -1,12 +1,13 @@ package main import ( - "encoding/binary" "encoding/json" "fmt" "os" + "time" "gopkg.in/yaml.v3" + "k8s.io/klog/v2" ) func isDirectory(path string) (bool, error) { @@ -71,14 +72,58 @@ func loadFromYAML(configFilepath string, dst any) error { return yaml.NewDecoder(file).Decode(dst) } -// btoi converts a byte slice of length 8 to a uint64. -func btoi(b []byte) uint64 { - return binary.LittleEndian.Uint64(b) +type timer struct { + start time.Time + prev time.Time } -// itob converts a uint64 to a byte slice of length 8. -func itob(v uint64) []byte { - var buf [8]byte - binary.LittleEndian.PutUint64(buf[:], v) - return buf[:] +func newTimer() *timer { + now := time.Now() + return &timer{ + start: now, + prev: now, + } +} + +func (t *timer) time(name string) { + klog.V(2).Infof("TIMED: %s: %s (overall %s)", name, time.Since(t.prev), time.Since(t.start)) + t.prev = time.Now() +} + +// pub enum RewardType { +// Fee, +// Rent, +// Staking, +// Voting, +// } +func rewardTypeToString(typ int) string { + switch typ { + case 1: + return "Fee" + case 2: + return "Rent" + case 3: + return "Staking" + case 4: + return "Voting" + default: + return "Unknown" + } } + +func rewardTypeStringToInt(typ string) int { + switch typ { + case "Fee": + return 1 + case "Rent": + return 2 + case "Staking": + return 3 + case "Voting": + return 4 + default: + return 0 + } +} + +const CodeNotFound = -32009 From 5838ffebd3b35892969329bebb7b2095dd536fde Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 00:10:47 +0100 Subject: [PATCH 18/63] Refactor cache --- cmd-rpc.go | 22 ++++- epoch.go | 126 +++++++++++------------- go.mod | 3 +- go.sum | 6 +- huge-cache/cache.go | 100 +++++++++++++++++++ indexes/index-cid-to-offset-and-size.go | 12 +++ multiepoch-getBlock.go | 10 +- storage.go | 31 +----- 8 files changed, 206 insertions(+), 104 deletions(-) create mode 100644 huge-cache/cache.go diff --git a/cmd-rpc.go b/cmd-rpc.go index 6e46bb54..d15c62a6 100644 --- a/cmd-rpc.go +++ b/cmd-rpc.go @@ -11,8 +11,10 @@ import ( "sync" "time" + "github.com/allegro/bigcache/v3" "github.com/davecgh/go-spew/spew" "github.com/fsnotify/fsnotify" + hugecache "github.com/rpcpool/yellowstone-faithful/huge-cache" "github.com/ryanuber/go-glob" "github.com/urfave/cli/v2" "golang.org/x/sync/errgroup" @@ -28,6 +30,7 @@ func newCmd_rpc() *cli.Command { var pathForProxyForUnknownRpcMethods string var epochSearchConcurrency int var epochLoadConcurrency int + var maxCacheSizeMB int return &cli.Command{ Name: "rpc", Description: "Provide multiple epoch config files, and start a Solana JSON RPC that exposes getTransaction, getBlock, and (optionally) getSignaturesForAddress", @@ -90,6 +93,12 @@ func newCmd_rpc() *cli.Command { Value: runtime.NumCPU(), Destination: &epochLoadConcurrency, }, + &cli.IntFlag{ + Name: "max-cache-size-mb", + Usage: "Maximum size of the cache in MB", + Value: 1024, + Destination: &maxCacheSizeMB, + }, ), Action: func(c *cli.Context) error { src := c.Args().Slice() @@ -106,6 +115,13 @@ func newCmd_rpc() *cli.Command { fmt.Printf(" - %s\n", configFile) } + conf := bigcache.DefaultConfig(2 * time.Minute) + conf.HardMaxCacheSize = maxCacheSizeMB + allCache, err := hugecache.NewWithConfig(c.Context, conf) + if err != nil { + return fmt.Errorf("failed to create cache: %w", err) + } + // Load configs: configs := make(ConfigSlice, 0) for _, configFile := range configFiles { @@ -130,7 +146,7 @@ func newCmd_rpc() *cli.Command { for confIndex := range configs { config := configs[confIndex] wg.Go(func() error { - epoch, err := NewEpochFromConfig(config, c) + epoch, err := NewEpochFromConfig(config, c, allCache) if err != nil { return fmt.Errorf("failed to create epoch from config %q: %s", config.ConfigFilepath(), err.Error()) } @@ -219,7 +235,7 @@ func newCmd_rpc() *cli.Command { klog.Errorf("error loading config file %q: %s", event.Name, err.Error()) return } - epoch, err := NewEpochFromConfig(config, c) + epoch, err := NewEpochFromConfig(config, c, allCache) if err != nil { klog.Errorf("error creating epoch from config file %q: %s", event.Name, err.Error()) return @@ -241,7 +257,7 @@ func newCmd_rpc() *cli.Command { klog.Errorf("error loading config file %q: %s", event.Name, err.Error()) return } - epoch, err := NewEpochFromConfig(config, c) + epoch, err := NewEpochFromConfig(config, c, allCache) if err != nil { klog.Errorf("error creating epoch from config file %q: %s", event.Name, err.Error()) return diff --git a/epoch.go b/epoch.go index e5214dc8..d79003c5 100644 --- a/epoch.go +++ b/epoch.go @@ -2,6 +2,7 @@ package main import ( "bufio" + "bytes" "context" "crypto/rand" "encoding/binary" @@ -15,9 +16,9 @@ import ( "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" "github.com/libp2p/go-libp2p/core/peer" - "github.com/patrickmn/go-cache" "github.com/rpcpool/yellowstone-faithful/bucketteer" "github.com/rpcpool/yellowstone-faithful/gsfa" + hugecache "github.com/rpcpool/yellowstone-faithful/huge-cache" "github.com/rpcpool/yellowstone-faithful/indexes" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" @@ -39,32 +40,12 @@ type Epoch struct { sigToCidIndex *indexes.SigToCid_Reader sigExists *bucketteer.Reader gsfaReader *gsfa.GsfaReader - cidToNodeCache *cache.Cache // TODO: prevent OOM onClose []func() error - slotToCidCache *cache.Cache - cidToOffsetCache *cache.Cache + allCache *hugecache.Cache } -func (r *Epoch) getSlotToCidFromCache(slot uint64) (cid.Cid, error, bool) { - if v, ok := r.slotToCidCache.Get(fmt.Sprint(slot)); ok { - return v.(cid.Cid), nil, true - } - return cid.Undef, nil, false -} - -func (r *Epoch) putSlotToCidInCache(slot uint64, c cid.Cid) { - r.slotToCidCache.Set(fmt.Sprint(slot), c, cache.DefaultExpiration) -} - -func (r *Epoch) getCidToOffsetFromCache(c cid.Cid) (uint64, error, bool) { - if v, ok := r.cidToOffsetCache.Get(c.String()); ok { - return v.(uint64), nil, true - } - return 0, nil, false -} - -func (r *Epoch) putCidToOffsetInCache(c cid.Cid, offset uint64) { - r.cidToOffsetCache.Set(c.String(), offset, cache.DefaultExpiration) +func (r *Epoch) GetCache() *hugecache.Cache { + return r.allCache } func (e *Epoch) Epoch() uint64 { @@ -91,7 +72,11 @@ func (e *Epoch) Close() error { return errors.Join(multiErr...) } -func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { +func NewEpochFromConfig( + config *Config, + c *cli.Context, + allCache *hugecache.Cache, +) (*Epoch, error) { if config == nil { return nil, fmt.Errorf("config must not be nil") } @@ -103,6 +88,7 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { isFilecoinMode: isLassieMode, config: config, onClose: make([]func() error, 0), + allCache: allCache, } if isCarMode { @@ -246,19 +232,6 @@ func NewEpochFromConfig(config *Config, c *cli.Context) (*Epoch, error) { ep.sigExists = sigExists } - { - ca := cache.New(30*time.Second, 1*time.Minute) - ep.cidToNodeCache = ca - } - { - ca := cache.New(30*time.Second, 1*time.Minute) - ep.slotToCidCache = ca - } - { - ca := cache.New(30*time.Second, 1*time.Minute) - ep.cidToOffsetCache = ca - } - return ep, nil } @@ -281,17 +254,6 @@ func newRandomSignature() [64]byte { return sig } -func (r *Epoch) getNodeFromCache(c cid.Cid) (v []byte, err error, has bool) { - if v, ok := r.cidToNodeCache.Get(c.String()); ok { - return v.([]byte), nil, true - } - return nil, nil, false -} - -func (r *Epoch) putNodeInCache(c cid.Cid, data []byte) { - r.cidToNodeCache.Set(c.String(), data, cache.DefaultExpiration) -} - func (r *Epoch) Config() *Config { return r.config } @@ -303,7 +265,7 @@ func (s *Epoch) prefetchSubgraph(ctx context.Context, wantedCid cid.Cid) error { if err == nil { // put in cache return sub.Each(ctx, func(c cid.Cid, data []byte) error { - s.putNodeInCache(c, data) + s.GetCache().PutRawCarObject(c, data) return nil }) } @@ -316,7 +278,7 @@ func (s *Epoch) prefetchSubgraph(ctx context.Context, wantedCid cid.Cid) error { func (s *Epoch) GetNodeByCid(ctx context.Context, wantedCid cid.Cid) ([]byte, error) { { // try from cache - data, err, has := s.getNodeFromCache(wantedCid) + data, err, has := s.GetCache().GetRawCarObject(wantedCid) if err != nil { return nil, err } @@ -329,7 +291,7 @@ func (s *Epoch) GetNodeByCid(ctx context.Context, wantedCid cid.Cid) ([]byte, er data, err := s.lassieFetcher.GetNodeByCid(ctx, wantedCid) if err == nil { // put in cache - s.putNodeInCache(wantedCid, data) + s.GetCache().PutRawCarObject(wantedCid, data) return data, nil } klog.Errorf("failed to get node from lassie: %v", err) @@ -369,13 +331,21 @@ func (s *Epoch) ReadAtFromCar(ctx context.Context, offset uint64, length uint64) return data, nil } -func (s *Epoch) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offset uint64) ([]byte, error) { +func (s *Epoch) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offsetAndSize *indexes.OffsetAndSize) ([]byte, error) { + if offsetAndSize == nil { + return nil, fmt.Errorf("offsetAndSize must not be nil") + } + if offsetAndSize.Size == 0 { + return nil, fmt.Errorf("offsetAndSize.Size must not be 0") + } + offset := offsetAndSize.Offset + length := offsetAndSize.Size if s.localCarReader == nil { // try remote reader if s.remoteCarReader == nil { return nil, fmt.Errorf("no CAR reader available") } - return readNodeFromReaderAt(s.remoteCarReader, wantedCid, offset) + return readNodeFromReaderAt(s.remoteCarReader, wantedCid, offset, length) } // Get reader and seek to offset, then read node. dr, err := s.localCarReader.DataReader() @@ -386,17 +356,39 @@ func (s *Epoch) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offset u dr.Seek(int64(offset), io.SeekStart) br := bufio.NewReader(dr) - gotCid, data, err := util.ReadNode(br) + return readNodeWithKnownSize(br, wantedCid, length) +} + +func readNodeWithKnownSize(br *bufio.Reader, wantedCid cid.Cid, length uint64) ([]byte, error) { + section := make([]byte, length) + _, err := io.ReadFull(br, section) if err != nil { - klog.Errorf("failed to read node: %v", err) + klog.Errorf("failed to read section: %v", err) return nil, err } + return parseNodeFromSection(section, wantedCid) +} + +func parseNodeFromSection(section []byte, wantedCid cid.Cid) ([]byte, error) { + // read an uvarint from the buffer + gotLen, usize := binary.Uvarint(section) + if usize <= 0 { + return nil, fmt.Errorf("failed to decode uvarint") + } + if gotLen > uint64(util.MaxAllowedSectionSize) { // Don't OOM + return nil, errors.New("malformed car; header is bigger than util.MaxAllowedSectionSize") + } + data := section[usize:] + cidLen, gotCid, err := cid.CidFromReader(bytes.NewReader(data)) + if err != nil { + return nil, fmt.Errorf("failed to read cid: %w", err) + } // verify that the CID we read matches the one we expected. if !gotCid.Equals(wantedCid) { klog.Errorf("CID mismatch: expected %s, got %s", wantedCid, gotCid) return nil, fmt.Errorf("CID mismatch: expected %s, got %s", wantedCid, gotCid) } - return data, nil + return data[cidLen:], nil } func (ser *Epoch) FindCidFromSlot(ctx context.Context, slot uint64) (o cid.Cid, e error) { @@ -406,7 +398,7 @@ func (ser *Epoch) FindCidFromSlot(ctx context.Context, slot uint64) (o cid.Cid, }() // try from cache - if c, err, has := ser.getSlotToCidFromCache(slot); err != nil { + if c, err, has := ser.GetCache().GetSlotToCid(slot); err != nil { return cid.Undef, err } else if has { return c, nil @@ -415,7 +407,7 @@ func (ser *Epoch) FindCidFromSlot(ctx context.Context, slot uint64) (o cid.Cid, if err != nil { return cid.Undef, err } - ser.putSlotToCidInCache(slot, found) + ser.GetCache().PutSlotToCid(slot, found) return found, nil } @@ -427,25 +419,25 @@ func (ser *Epoch) FindCidFromSignature(ctx context.Context, sig solana.Signature return ser.sigToCidIndex.Get(sig) } -func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (o uint64, e error) { +func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (os *indexes.OffsetAndSize, e error) { startedAt := time.Now() defer func() { - klog.Infof("Found offset for CID %s in %s: %d", cid, time.Since(startedAt), o) + klog.Infof("Found offset and size for CID %s in %s: o=%d s=%d", cid, time.Since(startedAt), os.Offset, os.Size) }() // try from cache - if offset, err, has := ser.getCidToOffsetFromCache(cid); err != nil { - return 0, err + if osi, err, has := ser.GetCache().GetCidToOffsetAndSize(cid); err != nil { + return nil, err } else if has { - return offset, nil + return osi, nil } found, err := ser.cidToOffsetIndex.Get(cid) if err != nil { - return 0, err + return nil, err } // TODO: use also the size. - ser.putCidToOffsetInCache(cid, found.Offset) - return found.Offset, nil + ser.GetCache().PutCidToOffsetAndSize(cid, found) + return found, nil } func (ser *Epoch) GetBlock(ctx context.Context, slot uint64) (*ipldbindcode.Block, error) { diff --git a/go.mod b/go.mod index f4f126d0..d072323e 100644 --- a/go.mod +++ b/go.mod @@ -57,13 +57,14 @@ require ( ) require ( + github.com/allegro/bigcache v1.2.1 + github.com/allegro/bigcache/v3 v3.1.0 github.com/fsnotify/fsnotify v1.5.4 github.com/goware/urlx v0.3.2 github.com/ipld/go-car v0.5.0 github.com/ipld/go-trustless-utils v0.4.1 github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 github.com/mr-tron/base58 v1.2.0 - github.com/patrickmn/go-cache v2.1.0+incompatible github.com/ronanh/intcomp v1.1.0 github.com/ryanuber/go-glob v1.0.0 github.com/tejzpr/ordered-concurrently/v3 v3.0.1 diff --git a/go.sum b/go.sum index b4bbf224..9892ce2b 100644 --- a/go.sum +++ b/go.sum @@ -59,6 +59,10 @@ github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkK github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= +github.com/allegro/bigcache v1.2.1 h1:hg1sY1raCwic3Vnsvje6TT7/pnZba83LeFck5NrFKSc= +github.com/allegro/bigcache v1.2.1/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= +github.com/allegro/bigcache/v3 v3.1.0 h1:H2Vp8VOvxcrB91o86fUSVJFqeuz8kpyyB02eH3bSzwk= +github.com/allegro/bigcache/v3 v3.1.0/go.mod h1:aPyh7jEvrog9zAwx5N7+JUQX5dZTSGpxF1LAR4dr35I= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129/go.mod h1:rFgpPQZYZ8vdbc+48xibu8ALc3yeyd64IhHS+PU6Yyg= github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= @@ -665,8 +669,6 @@ github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= -github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= diff --git a/huge-cache/cache.go b/huge-cache/cache.go new file mode 100644 index 00000000..900894f4 --- /dev/null +++ b/huge-cache/cache.go @@ -0,0 +1,100 @@ +package hugecache + +import ( + "context" + "errors" + "strconv" + + "github.com/allegro/bigcache/v3" + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/indexes" +) + +type Cache struct { + cache *bigcache.BigCache +} + +func NewWithConfig(ctx context.Context, config bigcache.Config) (*Cache, error) { + cache, err := bigcache.New(ctx, config) + if err != nil { + return nil, err + } + return &Cache{ + cache: cache, + }, nil +} + +func formatRawCarObjectKey(c cid.Cid) string { + return "rco-" + c.String() +} + +func formatSlotToCidKey(slot uint64) string { + return "s2c-" + strconv.FormatUint(slot, 10) +} + +func formatOffsetAndSizeKey(c cid.Cid) string { + return "o&s-" + c.String() +} + +// PutRawCarObject stores the raw CAR object data. +func (r *Cache) PutRawCarObject(c cid.Cid, data []byte) error { + return r.cache.Set(formatRawCarObjectKey(c), data) +} + +// GetRawCarObject returns the raw CAR object data from the cache if it exists. +func (r *Cache) GetRawCarObject(c cid.Cid) (v []byte, err error, has bool) { + if v, err := r.cache.Get(formatRawCarObjectKey(c)); err == nil { + return v, nil, true + } else { + if errors.Is(err, bigcache.ErrEntryNotFound) { + return nil, nil, false + } + return nil, err, false + } +} + +// PutSlotToCid stores the CID for the given slot. +func (r *Cache) PutSlotToCid(slot uint64, c cid.Cid) error { + return r.cache.Set(formatSlotToCidKey(slot), c.Bytes()) +} + +// GetSlotToCid returns the CID for the given slot if it exists in the cache. +func (r *Cache) GetSlotToCid(slot uint64) (cid.Cid, error, bool) { + if v, err := r.cache.Get(formatSlotToCidKey(slot)); err == nil { + _, parsed, err := cid.CidFromBytes(v) + if err != nil { + return cid.Undef, err, false + } + return parsed, nil, true + } else { + if errors.Is(err, bigcache.ErrEntryNotFound) { + return cid.Undef, nil, false + } + return cid.Undef, err, false + } +} + +func (r *Cache) PutCidToOffsetAndSize(c cid.Cid, oas *indexes.OffsetAndSize) error { + if oas == nil { + return errors.New("offset and size is nil") + } + if !oas.IsValid() { + return errors.New("offset and size is invalid") + } + return r.cache.Set(formatOffsetAndSizeKey(c), oas.Bytes()) +} + +func (r *Cache) GetCidToOffsetAndSize(c cid.Cid) (*indexes.OffsetAndSize, error, bool) { + if v, err := r.cache.Get(formatOffsetAndSizeKey(c)); err == nil { + var oas indexes.OffsetAndSize + if err := oas.FromBytes(v); err != nil { + return nil, err, false + } + return &oas, nil, true + } else { + if errors.Is(err, bigcache.ErrEntryNotFound) { + return nil, nil, false + } + return nil, err, false + } +} diff --git a/indexes/index-cid-to-offset-and-size.go b/indexes/index-cid-to-offset-and-size.go index 761603e6..85c0e77a 100644 --- a/indexes/index-cid-to-offset-and-size.go +++ b/indexes/index-cid-to-offset-and-size.go @@ -124,6 +124,18 @@ func (w *CidToOffsetAndSize_Writer) GetFilepath() string { return w.finalPath } +func NewOffsetAndSize(offset uint64, size uint64) *OffsetAndSize { + return &OffsetAndSize{ + Offset: offset, + Size: size, + } +} + +// IsValid returns true if the offset and size are valid. +func (oas *OffsetAndSize) IsValid() bool { + return oas.Offset <= maxUint48 && oas.Size <= maxUint24 +} + type OffsetAndSize struct { Offset uint64 // uint48, 6 bytes, max 281.5 TB (terabytes) Size uint64 // uint24, 3 bytes, max 16.7 MB (megabytes) diff --git a/multiepoch-getBlock.go b/multiepoch-getBlock.go index 6b8e6f6f..cafd479c 100644 --- a/multiepoch-getBlock.go +++ b/multiepoch-getBlock.go @@ -99,10 +99,11 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex var blockOffset, parentOffset uint64 wg := new(errgroup.Group) wg.Go(func() (err error) { - blockOffset, err = epochHandler.FindOffsetFromCid(ctx, blockCid) + offsetAndSize, err := epochHandler.FindOffsetFromCid(ctx, blockCid) if err != nil { return err } + blockOffset = offsetAndSize.Offset return nil }) wg.Go(func() (err error) { @@ -111,11 +112,12 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex parentOffset = epochHandler.remoteCarHeaderSize return nil } - parentOffset, err = epochHandler.FindOffsetFromCid(ctx, parentCid) + offsetAndSize, err := epochHandler.FindOffsetFromCid(ctx, parentCid) if err != nil { // If the parent is not found, it (probably) means that it's outside of the car file. parentOffset = epochHandler.remoteCarHeaderSize } + parentOffset = offsetAndSize.Offset return nil }) err = wg.Wait() @@ -161,7 +163,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex if !parentIsInPreviousEpoch && !gotCid.Equals(parentCid) { return fmt.Errorf("CID mismatch: expected %s, got %s", parentCid, gotCid) } - epochHandler.putNodeInCache(gotCid, data) + epochHandler.GetCache().PutRawCarObject(gotCid, data) for { gotCid, data, err = util.ReadNode(br) @@ -174,7 +176,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex if gotCid.Equals(blockCid) { break } - epochHandler.putNodeInCache(gotCid, data) + epochHandler.GetCache().PutRawCarObject(gotCid, data) } } return nil diff --git a/storage.go b/storage.go index 4474d3c4..dfd2bd25 100644 --- a/storage.go +++ b/storage.go @@ -3,15 +3,12 @@ package main import ( "bytes" "context" - "encoding/binary" - "errors" "fmt" "strings" bin "github.com/gagliardetto/binary" "github.com/gagliardetto/solana-go" "github.com/ipfs/go-cid" - "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" @@ -93,34 +90,14 @@ func readSectionFromReaderAt(reader ReaderAtCloser, offset uint64, length uint64 return data, nil } -func readNodeFromReaderAt(reader ReaderAtCloser, wantedCid cid.Cid, offset uint64) ([]byte, error) { +func readNodeFromReaderAt(reader ReaderAtCloser, wantedCid cid.Cid, offset uint64, length uint64) ([]byte, error) { // read MaxVarintLen64 bytes - lenBuf := make([]byte, binary.MaxVarintLen64) - _, err := reader.ReadAt(lenBuf, int64(offset)) + section := make([]byte, length) + _, err := reader.ReadAt(section, int64(offset)) if err != nil { return nil, err } - // read uvarint - dataLen, n := binary.Uvarint(lenBuf) - offset += uint64(n) - if dataLen > uint64(util.MaxAllowedSectionSize) { // Don't OOM - return nil, errors.New("malformed car; header is bigger than util.MaxAllowedSectionSize") - } - data := make([]byte, dataLen) - _, err = reader.ReadAt(data, int64(offset)) - if err != nil { - return nil, err - } - - n, gotCid, err := cid.CidFromReader(bytes.NewReader(data)) - if err != nil { - return nil, err - } - // verify that the CID we read matches the one we expected. - if !gotCid.Equals(wantedCid) { - return nil, fmt.Errorf("CID mismatch: expected %s, got %s", wantedCid, gotCid) - } - return data[n:], nil + return parseNodeFromSection(section, wantedCid) } type GetBlockResponse struct { From 072b6e13c8b954930f6a376e716affc4b72e2fd7 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 00:14:00 +0100 Subject: [PATCH 19/63] Cleanup max cache --- cmd-rpc.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd-rpc.go b/cmd-rpc.go index d15c62a6..1dbcfed3 100644 --- a/cmd-rpc.go +++ b/cmd-rpc.go @@ -94,9 +94,9 @@ func newCmd_rpc() *cli.Command { Destination: &epochLoadConcurrency, }, &cli.IntFlag{ - Name: "max-cache-size-mb", + Name: "max-cache", Usage: "Maximum size of the cache in MB", - Value: 1024, + Value: 0, Destination: &maxCacheSizeMB, }, ), @@ -115,7 +115,7 @@ func newCmd_rpc() *cli.Command { fmt.Printf(" - %s\n", configFile) } - conf := bigcache.DefaultConfig(2 * time.Minute) + conf := bigcache.DefaultConfig(5 * time.Minute) conf.HardMaxCacheSize = maxCacheSizeMB allCache, err := hugecache.NewWithConfig(c.Context, conf) if err != nil { From e36a709e1d17ef2aea7adbdadf41fb8993c5a415 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 00:21:44 +0100 Subject: [PATCH 20/63] cid_to_offset => cid_to_offset_and_size --- cmd-x-index-all.go | 20 ++++++++++---------- config.go | 14 +++++++------- epoch.go | 36 ++++++++++++++++++------------------ 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index a3928369..f5aa33a7 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -184,7 +184,7 @@ func createAllIndexes( numTotalItems, ) if err != nil { - return nil, fmt.Errorf("failed to create cid_to_offset index: %w", err) + return nil, fmt.Errorf("failed to create cid_to_offset_and_size index: %w", err) } defer cid_to_offset_and_size.Close() @@ -332,7 +332,7 @@ func createAllIndexes( klog.Infof("Sealing cid_to_offset_and_size index...") err = cid_to_offset_and_size.Seal(ctx, indexDir) if err != nil { - return nil, fmt.Errorf("failed to seal cid_to_offset index: %w", err) + return nil, fmt.Errorf("failed to seal cid_to_offset_and_size index: %w", err) } paths.CidToOffsetAndSize = cid_to_offset_and_size.GetFilepath() klog.Infof("Successfully sealed cid_to_offset_and_size index: %s", paths.CidToOffsetAndSize) @@ -397,7 +397,7 @@ func NewBuilder_CidToOffset( ) (*indexes.CidToOffsetAndSize_Writer, error) { tmpDir = filepath.Join(tmpDir, "index-cid-to-offset-"+time.Now().Format("20060102-150405.000000000")+fmt.Sprintf("-%d", rand.Int63())) if err := os.MkdirAll(tmpDir, 0o755); err != nil { - return nil, fmt.Errorf("failed to create cid_to_offset tmp dir: %w", err) + return nil, fmt.Errorf("failed to create cid_to_offset_and_size tmp dir: %w", err) } index, err := indexes.NewWriter_CidToOffsetAndSize( epoch, @@ -489,13 +489,13 @@ func verifyAllIndexes( return fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.header.Roots)) } - cid_to_offset, err := OpenIndex_CidToOffset( + cid_to_offset_and_size, err := OpenIndex_CidToOffset( indexes.CidToOffsetAndSize, ) if err != nil { - return fmt.Errorf("failed to open cid_to_offset index: %w", err) + return fmt.Errorf("failed to open cid_to_offset_and_size index: %w", err) } - defer cid_to_offset.Close() + defer cid_to_offset_and_size.Close() slot_to_cid, err := OpenIndex_SlotToCid( indexes.SlotToCid, @@ -549,7 +549,7 @@ func verifyAllIndexes( // klog.Infof("key: %s, offset: %d", bin.FormatByteSlice(c.Bytes()), totalOffset) - offset, err := cid_to_offset.Get(_cid) + offset, err := cid_to_offset_and_size.Get(_cid) if err != nil { return fmt.Errorf("failed to lookup offset for %s: %w", _cid, err) } @@ -644,7 +644,7 @@ func OpenIndex_CidToOffset( ) (*indexes.CidToOffsetAndSize_Reader, error) { index, err := indexes.Open_CidToOffsetAndSize(indexFilePath) if err != nil { - return nil, fmt.Errorf("failed to open index: %w", err) + return nil, fmt.Errorf("failed to open cid_to_offset_and_size index: %w", err) } return index, nil } @@ -654,7 +654,7 @@ func OpenIndex_SlotToCid( ) (*indexes.SlotToCid_Reader, error) { index, err := indexes.Open_SlotToCid(indexFilePath) if err != nil { - return nil, fmt.Errorf("failed to open index: %w", err) + return nil, fmt.Errorf("failed to open slot_to_cid index: %w", err) } return index, nil } @@ -664,7 +664,7 @@ func OpenIndex_SigToCid( ) (*indexes.SigToCid_Reader, error) { index, err := indexes.Open_SigToCid(indexFilePath) if err != nil { - return nil, fmt.Errorf("failed to open index: %w", err) + return nil, fmt.Errorf("failed to open sig_to_cid index: %w", err) } return index, nil } diff --git a/config.go b/config.go index a7699c4a..e7df24df 100644 --- a/config.go +++ b/config.go @@ -109,9 +109,9 @@ type Config struct { } `json:"filecoin" yaml:"filecoin"` } `json:"data" yaml:"data"` Indexes struct { - CidToOffset struct { + CidToOffsetAndSize struct { URI URI `json:"uri" yaml:"uri"` - } `json:"cid_to_offset" yaml:"cid_to_offset"` + } `json:"cid_to_offset_and_size" yaml:"cid_to_offset_and_size"` SlotToCid struct { URI URI `json:"uri" yaml:"uri"` } `json:"slot_to_cid" yaml:"slot_to_cid"` @@ -214,10 +214,10 @@ func (c *Config) Validate() error { if err := isSupportedURI(c.Data.Car.URI, "data.car.uri"); err != nil { return err } - if c.Indexes.CidToOffset.URI.IsZero() { - return fmt.Errorf("indexes.cid_to_offset.uri must be set") + if c.Indexes.CidToOffsetAndSize.URI.IsZero() { + return fmt.Errorf("indexes.cid_to_offset_and_size.uri must be set") } - if err := isSupportedURI(c.Indexes.CidToOffset.URI, "indexes.cid_to_offset.uri"); err != nil { + if err := isSupportedURI(c.Indexes.CidToOffsetAndSize.URI, "indexes.cid_to_offset_and_size.uri"); err != nil { return err } } else { @@ -273,8 +273,8 @@ func (c *Config) Validate() error { if !c.Data.Car.URI.IsValid() { return fmt.Errorf("data.car.uri is invalid") } - if !c.Indexes.CidToOffset.URI.IsValid() { - return fmt.Errorf("indexes.cid_to_offset.uri is invalid") + if !c.Indexes.CidToOffsetAndSize.URI.IsValid() { + return fmt.Errorf("indexes.cid_to_offset_and_size.uri is invalid") } } if !c.Indexes.SlotToCid.URI.IsValid() { diff --git a/epoch.go b/epoch.go index d79003c5..cc3fd832 100644 --- a/epoch.go +++ b/epoch.go @@ -31,17 +31,17 @@ type Epoch struct { isFilecoinMode bool // true if the epoch is in Filecoin mode (i.e. Lassie mode) config *Config // contains indexes and block data for the epoch - lassieFetcher *lassieWrapper - localCarReader *carv2.Reader - remoteCarReader ReaderAtCloser - remoteCarHeaderSize uint64 - cidToOffsetIndex *indexes.CidToOffsetAndSize_Reader - slotToCidIndex *indexes.SlotToCid_Reader - sigToCidIndex *indexes.SigToCid_Reader - sigExists *bucketteer.Reader - gsfaReader *gsfa.GsfaReader - onClose []func() error - allCache *hugecache.Cache + lassieFetcher *lassieWrapper + localCarReader *carv2.Reader + remoteCarReader ReaderAtCloser + remoteCarHeaderSize uint64 + cidToOffsetAndSizeIndex *indexes.CidToOffsetAndSize_Reader + slotToCidIndex *indexes.SlotToCid_Reader + sigToCidIndex *indexes.SigToCid_Reader + sigExists *bucketteer.Reader + gsfaReader *gsfa.GsfaReader + onClose []func() error + allCache *hugecache.Cache } func (r *Epoch) GetCache() *hugecache.Cache { @@ -93,24 +93,24 @@ func NewEpochFromConfig( if isCarMode { // The CAR-mode requires a cid-to-offset index. - cidToOffsetIndexFile, err := openIndexStorage( + cidToOffsetAndSizeIndexFile, err := openIndexStorage( c.Context, - string(config.Indexes.CidToOffset.URI), + string(config.Indexes.CidToOffsetAndSize.URI), DebugMode, ) if err != nil { return nil, fmt.Errorf("failed to open cid-to-offset index file: %w", err) } - ep.onClose = append(ep.onClose, cidToOffsetIndexFile.Close) + ep.onClose = append(ep.onClose, cidToOffsetAndSizeIndexFile.Close) - cidToOffsetIndex, err := indexes.OpenWithReader_CidToOffsetAndSize(cidToOffsetIndexFile) + cidToOffsetIndex, err := indexes.OpenWithReader_CidToOffsetAndSize(cidToOffsetAndSizeIndexFile) if err != nil { return nil, fmt.Errorf("failed to open cid-to-offset index: %w", err) } - if config.Indexes.CidToOffset.URI.IsRemoteWeb() { + if config.Indexes.CidToOffsetAndSize.URI.IsRemoteWeb() { cidToOffsetIndex.Prefetch(true) } - ep.cidToOffsetIndex = cidToOffsetIndex + ep.cidToOffsetAndSizeIndex = cidToOffsetIndex } { @@ -431,7 +431,7 @@ func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (os *index } else if has { return osi, nil } - found, err := ser.cidToOffsetIndex.Get(cid) + found, err := ser.cidToOffsetAndSizeIndex.Get(cid) if err != nil { return nil, err } From 6ee3412c251643e0d7d79dcde12d52f8fbfa43d5 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 12:02:48 +0100 Subject: [PATCH 21/63] Use github.com/valyala/fasthttp/reuseport --- go.mod | 3 +-- go.sum | 2 -- multiepoch.go | 8 +++++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index d072323e..b010a27c 100644 --- a/go.mod +++ b/go.mod @@ -57,12 +57,12 @@ require ( ) require ( - github.com/allegro/bigcache v1.2.1 github.com/allegro/bigcache/v3 v3.1.0 github.com/fsnotify/fsnotify v1.5.4 github.com/goware/urlx v0.3.2 github.com/ipld/go-car v0.5.0 github.com/ipld/go-trustless-utils v0.4.1 + github.com/libp2p/go-reuseport v0.4.0 github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 github.com/mr-tron/base58 v1.2.0 github.com/ronanh/intcomp v1.1.0 @@ -151,7 +151,6 @@ require ( github.com/libp2p/go-msgio v0.3.0 // indirect github.com/libp2p/go-nat v0.2.0 // indirect github.com/libp2p/go-netroute v0.2.1 // indirect - github.com/libp2p/go-reuseport v0.4.0 // indirect github.com/libp2p/go-yamux/v4 v4.0.1 // indirect github.com/logrusorgru/aurora v2.0.3+incompatible // indirect github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd // indirect diff --git a/go.sum b/go.sum index 9892ce2b..3ccd1f56 100644 --- a/go.sum +++ b/go.sum @@ -59,8 +59,6 @@ github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkK github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= -github.com/allegro/bigcache v1.2.1 h1:hg1sY1raCwic3Vnsvje6TT7/pnZba83LeFck5NrFKSc= -github.com/allegro/bigcache v1.2.1/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/allegro/bigcache/v3 v3.1.0 h1:H2Vp8VOvxcrB91o86fUSVJFqeuz8kpyyB02eH3bSzwk= github.com/allegro/bigcache/v3 v3.1.0/go.mod h1:aPyh7jEvrog9zAwx5N7+JUQX5dZTSGpxF1LAR4dr35I= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI= diff --git a/multiepoch.go b/multiepoch.go index 79628ae4..f4510e71 100644 --- a/multiepoch.go +++ b/multiepoch.go @@ -12,6 +12,7 @@ import ( "time" "github.com/goware/urlx" + "github.com/libp2p/go-reuseport" "github.com/mr-tron/base58" "github.com/sourcegraph/jsonrpc2" "github.com/valyala/fasthttp" @@ -202,7 +203,12 @@ func (m *MultiEpoch) ListenAndServe(ctx context.Context, listenOn string, lsConf klog.Errorf("Error while shutting down RPC server: %s", err) } }() - return s.ListenAndServe(listenOn) + ln, err := reuseport.Listen("tcp4", listenOn) + if err != nil { + klog.Fatalf("error in reuseport listener: %v", err) + return err + } + return s.Serve(ln) } func randomRequestID() string { From f11c35fc168d7120bf297bf9cbdd503bec22ae95 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 12:05:01 +0100 Subject: [PATCH 22/63] Cleanup readahead --- readahead/readahead.go | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/readahead/readahead.go b/readahead/readahead.go index 124514b0..ca777b0a 100644 --- a/readahead/readahead.go +++ b/readahead/readahead.go @@ -1,8 +1,7 @@ package readahead import ( - "bytes" - "errors" + "bufio" "fmt" "io" "os" @@ -17,7 +16,7 @@ const DefaultChunkSize = 12 * MiB type CachingReader struct { file io.ReadCloser - buffer *bytes.Buffer + buffer *bufio.Reader chunkSize int } @@ -34,7 +33,7 @@ func NewCachingReader(filePath string, chunkSize int) (*CachingReader, error) { if err != nil { return nil, err } - return &CachingReader{file: file, buffer: new(bytes.Buffer), chunkSize: chunkSize}, nil + return &CachingReader{file: file, buffer: bufio.NewReaderSize(file, chunkSize), chunkSize: chunkSize}, nil } func NewCachingReaderFromReader(file io.ReadCloser, chunkSize int) (*CachingReader, error) { @@ -42,7 +41,7 @@ func NewCachingReaderFromReader(file io.ReadCloser, chunkSize int) (*CachingRead chunkSize = DefaultChunkSize } chunkSize = alignValueToPageSize(chunkSize) - return &CachingReader{file: file, buffer: new(bytes.Buffer), chunkSize: chunkSize}, nil + return &CachingReader{file: file, buffer: bufio.NewReaderSize(file, chunkSize), chunkSize: chunkSize}, nil } func alignValueToPageSize(value int) int { @@ -57,37 +56,7 @@ func (cr *CachingReader) Read(p []byte) (int, error) { if len(p) == 0 { return 0, nil } - - if len(p) > cr.chunkSize { - // read what we can from the buffer - n := copy(p, cr.buffer.Next(cr.chunkSize)) - // read the rest directly from the file - n2, err := cr.file.Read(p[n:]) - if err != nil && err != io.EOF { - return 0, fmt.Errorf("failed to read from file: %w", err) - } - return n + n2, nil - } - - // Refill the buffer if needed - if cr.buffer.Len() < len(p) { - tmp := make([]byte, cr.chunkSize) - n, err := cr.file.Read(tmp) - if err != nil && err != io.EOF { - return 0, fmt.Errorf("failed to read from file: %w", err) - } - if n > 0 { - cr.buffer.Write(tmp[:n]) - } - if errors.Is(err, io.EOF) && cr.buffer.Len() == 0 { - // If EOF is reached and buffer is empty, return EOF - return 0, io.EOF - } - } - - // Read and discard bytes from the buffer - n := copy(p, cr.buffer.Next(len(p))) - return n, nil + return cr.buffer.Read(p) } func (cr *CachingReader) Close() error { From 4e9d6b7118c878a32ed5752fb26a8d479e0ac2ed Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 13:35:30 +0100 Subject: [PATCH 23/63] Equalize usage of index metadata --- bucketteer/bucketteer_test.go | 84 +++++++++------ bucketteer/example/main.go | 3 +- bucketteer/read.go | 34 ++---- bucketteer/write.go | 23 ++-- cmd-x-index-all.go | 12 ++- cmd-x-index-sig-exists.go | 38 ++++++- compactindexsized/build.go | 21 ++-- compactindexsized/build36_test.go | 11 +- compactindexsized/build48_test.go | 9 +- compactindexsized/build8_test.go | 4 +- compactindexsized/compactindex.go | 7 +- compactindexsized/header_test.go | 52 --------- compactindexsized/query.go | 10 +- compactindexsized/query_test.go | 7 +- epoch.go | 31 ++++++ index-sig-to-cid.go | 11 +- indexes/metadata.go | 25 ++--- .../header.go => indexmeta/indexmeta.go | 100 ++++++++++++++---- indexmeta/indexmeta_test.go | 68 ++++++++++++ indexmeta/keys.go | 8 ++ 20 files changed, 359 insertions(+), 199 deletions(-) rename compactindexsized/header.go => indexmeta/indexmeta.go (71%) create mode 100644 indexmeta/indexmeta_test.go create mode 100644 indexmeta/keys.go diff --git a/bucketteer/bucketteer_test.go b/bucketteer/bucketteer_test.go index 9010b910..21cd2406 100644 --- a/bucketteer/bucketteer_test.go +++ b/bucketteer/bucketteer_test.go @@ -1,11 +1,13 @@ package bucketteer import ( + "math" "os" "path/filepath" "testing" bin "github.com/gagliardetto/binary" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/stretchr/testify/require" "golang.org/x/exp/mmap" ) @@ -44,11 +46,11 @@ func TestBucketteer(t *testing.T) { wr.Put(sig) require.True(t, wr.Has(sig)) } - require.Equal(t, 3, len(wr.prefixToHashes)) + require.Equal(t, 65536, len(wr.prefixToHashes)) { - gotSize, err := wr.Seal(map[string]string{ - "epoch": "test", - }) + meta := indexmeta.Meta{} + meta.Add([]byte("epoch"), []byte("test")) + gotSize, err := wr.Seal(meta) require.NoError(t, err) require.NoError(t, wr.Close()) realSize, err := getFizeSize(path) @@ -63,7 +65,7 @@ func TestBucketteer(t *testing.T) { // read header size: headerSize, err := reader.ReadUint32(bin.LE) require.NoError(t, err) - require.Equal(t, uint32(8+8+8+(8+(4+5)+(4+4))+(3*(2+8))), headerSize) + require.Equal(t, uint32(8+8+8+(1+(1+5)+(1+4))+(65536*(2+8))), headerSize) // magic: { @@ -80,24 +82,34 @@ func TestBucketteer(t *testing.T) { } { // read meta: - numMeta, err := reader.ReadUint64(bin.LE) + numMeta, err := reader.ReadUint8() require.NoError(t, err) - require.Equal(t, uint64(1), numMeta) + require.Equal(t, uint8(1), numMeta) - key, err := reader.ReadString() + keyLen, err := reader.ReadUint8() require.NoError(t, err) - require.Equal(t, "epoch", key) + require.Equal(t, uint8(5), keyLen) - value, err := reader.ReadString() + key := make([]byte, keyLen) + _, err = reader.Read(key) require.NoError(t, err) - require.Equal(t, "test", value) + require.Equal(t, "epoch", string(key)) + + valueLen, err := reader.ReadUint8() + require.NoError(t, err) + require.Equal(t, uint8(4), valueLen) + + value := make([]byte, valueLen) + _, err = reader.Read(value) + require.NoError(t, err) + require.Equal(t, "test", string(value)) } // numPrefixes: numPrefixes, err := reader.ReadUint64(bin.LE) require.NoError(t, err) - require.Equal(t, uint64(3), numPrefixes) + require.Equal(t, uint64(65536), numPrefixes) // prefix -> offset: - prefixToOffset := make(map[[2]byte]uint64) + prefixToOffset := [math.MaxUint16 + 1]uint64{} { for i := 0; i < int(numPrefixes); i++ { var prefix [2]byte @@ -105,35 +117,35 @@ func TestBucketteer(t *testing.T) { require.NoError(t, err) offset, err := reader.ReadUint64(bin.LE) require.NoError(t, err) - prefixToOffset[prefix] = offset + prefixToOffset[prefixToUint16(prefix)] = offset } } { - require.Equal(t, - map[[2]uint8]uint64{ - {0x1, 0x2}: 0x0, - {0x16, 0x2}: 0x1c, - {0x63, 0x2}: 0x28, - }, prefixToOffset) + require.Equal(t, 65536, len(prefixToOffset)) + + require.Equal(t, uint64(0x804), prefixToOffset[prefixToUint16([2]byte{0x1, 0x2})]) + require.Equal(t, uint64(0x870), prefixToOffset[prefixToUint16([2]byte{0x16, 0x2})]) + require.Equal(t, uint64(0x9ac), prefixToOffset[prefixToUint16([2]byte{0x63, 0x2})]) } contentBuf, err := reader.ReadNBytes(reader.Remaining()) require.NoError(t, err) - require.Equal(t, - []byte{ - 0x3, 0x0, 0x0, 0x0, // num entries - 0x49, 0xd7, 0xaf, 0x9e, 0x94, 0x4d, 0x9a, 0x6f, - 0x2f, 0x12, 0xdb, 0x5b, 0x1, 0x62, 0xae, 0x1a, - 0x3b, 0xb6, 0x71, 0x5f, 0x4, 0x4f, 0x36, 0xf2, - 0x1, 0x0, 0x0, 0x0, // num entries - 0x58, 0xe1, 0x9d, 0xde, 0x7c, 0xfb, 0xeb, 0x5a, - 0x1, 0x0, 0x0, 0x0, // num entries - 0x4c, 0xbd, 0xa3, 0xed, 0xd3, 0x8b, 0xa8, 0x44, - }, - contentBuf, - ) + // require.Equal(t, + // []byte{ + // 0x3, 0x0, 0x0, 0x0, // num entries + // 0x49, 0xd7, 0xaf, 0x9e, 0x94, 0x4d, 0x9a, 0x6f, + // 0x2f, 0x12, 0xdb, 0x5b, 0x1, 0x62, 0xae, 0x1a, + // 0x3b, 0xb6, 0x71, 0x5f, 0x4, 0x4f, 0x36, 0xf2, + // 0x1, 0x0, 0x0, 0x0, // num entries + // 0x58, 0xe1, 0x9d, 0xde, 0x7c, 0xfb, 0xeb, 0x5a, + // 0x1, 0x0, 0x0, 0x0, // num entries + // 0x4c, 0xbd, 0xa3, 0xed, 0xd3, 0x8b, 0xa8, 0x44, + // }, + // contentBuf, + // ) contentReader := bin.NewBorshDecoder(contentBuf) { - for prefix, offset := range prefixToOffset { + for prefixAsNumber, offset := range prefixToOffset { + prefix := uint16ToPrefix(uint16(prefixAsNumber)) // Now read the bucket: { err := contentReader.SetPosition(uint(offset)) @@ -175,6 +187,10 @@ func TestBucketteer(t *testing.T) { ok, err := reader.Has(firstSig) require.NoError(t, err) require.True(t, ok) + + got, ok := reader.Meta().Get(indexmeta.MetadataKey_Epoch) + require.True(t, ok) + require.Equal(t, []byte("test"), got) } } } diff --git a/bucketteer/example/main.go b/bucketteer/example/main.go index 4ffed250..95994d33 100644 --- a/bucketteer/example/main.go +++ b/bucketteer/example/main.go @@ -10,6 +10,7 @@ import ( "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" "github.com/rpcpool/yellowstone-faithful/bucketteer" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "golang.org/x/exp/mmap" ) @@ -60,7 +61,7 @@ func main() { fmt.Println("writing to file...") writeStartedAt := time.Now() - _, err = buWr.Seal(nil) + _, err = buWr.Seal(indexmeta.Meta{}) if err != nil { panic(err) } diff --git a/bucketteer/read.go b/bucketteer/read.go index 8ab65653..d9b678d1 100644 --- a/bucketteer/read.go +++ b/bucketteer/read.go @@ -8,12 +8,13 @@ import ( "math" bin "github.com/gagliardetto/binary" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "golang.org/x/exp/mmap" ) type Reader struct { contentReader io.ReaderAt - meta map[string]string + meta *indexmeta.Meta prefixToOffset *bucketToOffset } @@ -76,16 +77,10 @@ func (r *Reader) Close() error { return nil } -func (r *Reader) Meta() map[string]string { +func (r *Reader) Meta() *indexmeta.Meta { return r.meta } -// GetMeta returns the value of the given key. -// Returns an empty string if the key does not exist. -func (r *Reader) GetMeta(key string) string { - return r.meta[key] -} - func readHeaderSize(reader io.ReaderAt) (int64, error) { // read header size: headerSizeBuf := make([]byte, 4) @@ -96,7 +91,7 @@ func readHeaderSize(reader io.ReaderAt) (int64, error) { return headerSize, nil } -func readHeader(reader io.ReaderAt) (*bucketToOffset, map[string]string, int64, error) { +func readHeader(reader io.ReaderAt) (*bucketToOffset, *indexmeta.Meta, int64, error) { // read header size: headerSize, err := readHeaderSize(reader) if err != nil { @@ -132,21 +127,10 @@ func readHeader(reader io.ReaderAt) (*bucketToOffset, map[string]string, int64, } } // read meta: - numMeta, err := decoder.ReadUint64(bin.LE) - if err != nil { - return nil, nil, 0, err - } - meta := make(map[string]string, numMeta) - for i := uint64(0); i < numMeta; i++ { - key, err := decoder.ReadString() - if err != nil { - return nil, nil, 0, err - } - value, err := decoder.ReadString() - if err != nil { - return nil, nil, 0, err - } - meta[key] = value + var meta indexmeta.Meta + // read key-value pairs + if err := meta.UnmarshalWithDecoder(decoder); err != nil { + return nil, nil, 0, fmt.Errorf("failed to unmarshal metadata: %w", err) } // numPrefixes: numPrefixes, err := decoder.ReadUint64(bin.LE) @@ -167,7 +151,7 @@ func readHeader(reader io.ReaderAt) (*bucketToOffset, map[string]string, int64, } prefixToOffset[prefixToUint16(prefix)] = offset } - return &prefixToOffset, meta, headerSize + 4, err + return &prefixToOffset, &meta, headerSize + 4, err } func (r *Reader) Has(sig [64]byte) (bool, error) { diff --git a/bucketteer/write.go b/bucketteer/write.go index 3400da22..20bd94fe 100644 --- a/bucketteer/write.go +++ b/bucketteer/write.go @@ -10,6 +10,7 @@ import ( "sort" bin "github.com/gagliardetto/binary" + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) type Writer struct { @@ -77,7 +78,7 @@ func (b *Writer) Close() error { } // Seal writes the Bucketteer's state to the given writer. -func (b *Writer) Seal(meta map[string]string) (int64, error) { +func (b *Writer) Seal(meta indexmeta.Meta) (int64, error) { // truncate file and seek to beginning: if err := b.destination.Truncate(0); err != nil { return 0, err @@ -96,7 +97,7 @@ func createHeader( magic [8]byte, version uint64, headerSizeIn uint32, - meta map[string]string, + meta indexmeta.Meta, prefixToOffset bucketToOffset, ) ([]byte, error) { tmpHeaderBuf := new(bytes.Buffer) @@ -120,18 +121,12 @@ func createHeader( } // write meta { - // write num meta entries - if err := headerWriter.WriteUint64(uint64(len(meta)), binary.LittleEndian); err != nil { - return nil, err + metaBuf, err := meta.MarshalBinary() + if err != nil { + return nil, fmt.Errorf("failed to marshal metadata: %w", err) } - // write meta entries - for k, v := range meta { - if err := headerWriter.WriteString(k); err != nil { - return nil, err - } - if err := headerWriter.WriteString(v); err != nil { - return nil, err - } + if _, err := headerWriter.Write(metaBuf); err != nil { + return nil, fmt.Errorf("failed to write metadata: %w", err) } } // write num buckets @@ -171,7 +166,7 @@ func overwriteFileContentAt( func seal( out *bufio.Writer, prefixToHashes *prefixToHashes, - meta map[string]string, + meta indexmeta.Meta, ) ([]byte, int64, error) { prefixToOffset := bucketToOffset{} for prefixAsUint16 := range prefixToHashes { diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index f5aa33a7..a6d00967 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -17,6 +17,7 @@ import ( carv1 "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/bucketteer" "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" "k8s.io/klog/v2" @@ -360,8 +361,15 @@ func createAllIndexes( { klog.Infof("Sealing sig_exists index...") - meta := map[string]string{ - "root_cid": rootCID.String(), + meta := indexmeta.Meta{} + if err := meta.AddUint64(indexmeta.MetadataKey_Epoch, epoch); err != nil { + return nil, fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) + } + if err := meta.AddCid(indexmeta.MetadataKey_RootCid, rootCID); err != nil { + return nil, fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) + } + if err := meta.AddString(indexmeta.MetadataKey_Network, string(network)); err != nil { + return nil, fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) } if _, err = sig_exists.Seal(meta); err != nil { return nil, fmt.Errorf("failed to seal sig_exists index: %w", err) diff --git a/cmd-x-index-sig-exists.go b/cmd-x-index-sig-exists.go index 7faf87eb..acc92358 100644 --- a/cmd-x-index-sig-exists.go +++ b/cmd-x-index-sig-exists.go @@ -19,6 +19,8 @@ import ( "github.com/ipfs/go-libipfs/blocks" "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/bucketteer" + "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/rpcpool/yellowstone-faithful/readahead" concurrently "github.com/tejzpr/ordered-concurrently/v3" @@ -28,6 +30,8 @@ import ( func newCmd_Index_sigExists() *cli.Command { var verify bool + var epoch uint64 + var network indexes.Network return &cli.Command{ Name: "sig-exists", Description: "Create sig-exists index from a CAR file", @@ -53,6 +57,24 @@ func newCmd_Index_sigExists() *cli.Command { Usage: "verify the index after creating it", Destination: &verify, }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "epoch", + Destination: &epoch, + Required: true, + }, + &cli.StringFlag{ + Name: "network", + Usage: "network", + Destination: (*string)(&network), + Required: true, + Action: func(c *cli.Context, v string) error { + if !indexes.IsValidNetwork(indexes.Network(v)) { + return fmt.Errorf("invalid network: %s", v) + } + return nil + }, + }, }, Action: func(c *cli.Context) error { carPath := c.Args().First() @@ -204,11 +226,17 @@ func newCmd_Index_sigExists() *cli.Command { klog.Info("Sealing index...") sealingStartedAt := time.Now() - _, err = index.Seal( - map[string]string{ - "root_cid": rootCID.String(), - }, - ) + meta := indexmeta.Meta{} + if err := meta.AddUint64(indexmeta.MetadataKey_Epoch, epoch); err != nil { + return fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) + } + if err := meta.AddCid(indexmeta.MetadataKey_RootCid, rootCID); err != nil { + return fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) + } + if err := meta.AddString(indexmeta.MetadataKey_Network, string(network)); err != nil { + return fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) + } + _, err = index.Seal(meta) if err != nil { return fmt.Errorf("error while sealing index: %w", err) } diff --git a/compactindexsized/build.go b/compactindexsized/build.go index 7b4041be..4dfda8f4 100644 --- a/compactindexsized/build.go +++ b/compactindexsized/build.go @@ -16,11 +16,13 @@ import ( "path/filepath" "sort" "syscall" + + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) // Builder creates new compactindex files. type Builder struct { - Header + Header Header tmpDir string headerSize int64 closers []io.Closer @@ -79,6 +81,7 @@ func NewBuilderSized( Header: Header{ ValueSize: uint64(valueSize), NumBuckets: uint32(numBuckets), + Metadata: &indexmeta.Meta{}, }, closers: closers, buckets: buckets, @@ -90,7 +93,7 @@ func NewBuilderSized( // If the kind is already set, it is overwritten. func (b *Builder) SetKind(kind []byte) error { // check if kind is too long - if len(kind) > MaxKeySize { + if len(kind) > indexmeta.MaxKeySize { return fmt.Errorf("kind is too long") } // check if kind is empty @@ -98,21 +101,21 @@ func (b *Builder) SetKind(kind []byte) error { return fmt.Errorf("kind is empty") } // check if kind is already set - if b.Header.Metadata.Count(KeyKind) > 0 { + if b.Header.Metadata.Count(indexmeta.MetadataKey_Kind) > 0 { // remove kind - b.Header.Metadata.Remove(KeyKind) + b.Header.Metadata.Remove(indexmeta.MetadataKey_Kind) } // set kind - b.Header.Metadata.Add(KeyKind, kind) + b.Header.Metadata.Add(indexmeta.MetadataKey_Kind, kind) return nil } -func (b *Builder) Metadata() *Meta { - return &b.Header.Metadata +func (b *Builder) Metadata() *indexmeta.Meta { + return b.Header.Metadata } func (b *Builder) getValueSize() int { - return int(b.ValueSize) + return int(b.Header.ValueSize) } // Insert writes a key-value mapping to the index. @@ -147,7 +150,7 @@ func (b *Builder) Seal(ctx context.Context, file *os.File) (err error) { } b.headerSize = headerSize // Create hole to leave space for bucket header table. - bucketTableLen := int64(b.NumBuckets) * bucketHdrLen + bucketTableLen := int64(b.Header.NumBuckets) * bucketHdrLen err = fallocate(file, headerSize, bucketTableLen) if errors.Is(err, syscall.EOPNOTSUPP) { // The underlying file system may not support fallocate diff --git a/compactindexsized/build36_test.go b/compactindexsized/build36_test.go index d5088529..56e43257 100644 --- a/compactindexsized/build36_test.go +++ b/compactindexsized/build36_test.go @@ -20,6 +20,7 @@ import ( "time" "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/vbauerster/mpb/v8/decor" @@ -307,19 +308,21 @@ func TestBuilder36(t *testing.T) { db, err := Open(targetFile) require.NoError(t, err, "Failed to open generated index") require.NotNil(t, db) + require.NotNil(t, db.Header) + require.NotNil(t, db.Header.Metadata) got, ok := db.GetKind() require.True(t, ok) assert.Equal(t, kindSomething, got) // File header assertions. - assert.Equal(t, Header{ + assert.Equal(t, &Header{ ValueSize: valueSize, NumBuckets: numBuckets, - Metadata: Meta{ - KeyVals: []KV{ + Metadata: &indexmeta.Meta{ + KeyVals: []indexmeta.KV{ { - Key: KeyKind, + Key: indexmeta.MetadataKey_Kind, Value: kindSomething, }, { diff --git a/compactindexsized/build48_test.go b/compactindexsized/build48_test.go index ca10f847..c5b6a042 100644 --- a/compactindexsized/build48_test.go +++ b/compactindexsized/build48_test.go @@ -16,6 +16,7 @@ import ( "testing" "time" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/vbauerster/mpb/v8/decor" @@ -276,13 +277,13 @@ func TestBuilder48(t *testing.T) { require.NotNil(t, db) // File header assertions. - assert.Equal(t, Header{ + assert.Equal(t, &Header{ ValueSize: valueSize, NumBuckets: numBuckets, - Metadata: Meta{ - KeyVals: []KV{ + Metadata: &indexmeta.Meta{ + KeyVals: []indexmeta.KV{ { - Key: KeyKind, + Key: indexmeta.MetadataKey_Kind, Value: kindSomething48, }, }, diff --git a/compactindexsized/build8_test.go b/compactindexsized/build8_test.go index 1f0dde67..12c87e3d 100644 --- a/compactindexsized/build8_test.go +++ b/compactindexsized/build8_test.go @@ -11,6 +11,7 @@ import ( "testing" "time" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/vbauerster/mpb/v8/decor" @@ -139,9 +140,10 @@ func TestBuilder8(t *testing.T) { require.NotNil(t, db) // File header assertions. - assert.Equal(t, Header{ + assert.Equal(t, &Header{ ValueSize: valueSize, NumBuckets: numBuckets, + Metadata: &indexmeta.Meta{}, }, db.Header) // Get bucket handles. diff --git a/compactindexsized/compactindex.go b/compactindexsized/compactindex.go index c9a4d709..22be1581 100644 --- a/compactindexsized/compactindex.go +++ b/compactindexsized/compactindex.go @@ -92,6 +92,7 @@ import ( "sort" "github.com/cespare/xxhash/v2" + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) // Magic are the first eight bytes of an index. @@ -103,7 +104,7 @@ const Version = uint8(1) type Header struct { ValueSize uint64 NumBuckets uint32 - Metadata Meta + Metadata *indexmeta.Meta } // Load checks the Magic sequence and loads the header fields. @@ -124,6 +125,7 @@ func (h *Header) Load(buf []byte) error { *h = Header{ ValueSize: binary.LittleEndian.Uint64(buf[12:20]), NumBuckets: binary.LittleEndian.Uint32(buf[20:24]), + Metadata: new(indexmeta.Meta), } // Check version. if buf[24] != Version { @@ -152,6 +154,9 @@ func (h *Header) Bytes() []byte { // version buf.WriteByte(Version) // key-value pairs + if h.Metadata == nil { + h.Metadata = new(indexmeta.Meta) + } kvb := h.Metadata.Bytes() buf.Write(kvb) } diff --git a/compactindexsized/header_test.go b/compactindexsized/header_test.go index c85d755e..6a65ca61 100644 --- a/compactindexsized/header_test.go +++ b/compactindexsized/header_test.go @@ -6,58 +6,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestHeaderMeta(t *testing.T) { - require.Equal(t, (255), MaxKeySize) - require.Equal(t, (255), MaxValueSize) - require.Equal(t, (255), MaxNumKVs) - - var meta Meta - require.NoError(t, meta.Add([]byte("foo"), []byte("bar"))) - require.NoError(t, meta.Add([]byte("foo"), []byte("baz"))) - - require.Equal(t, 2, meta.Count([]byte("foo"))) - - got, ok := meta.Get([]byte("foo")) - require.True(t, ok) - require.Equal(t, []byte("bar"), got) - - require.Equal(t, [][]byte{[]byte("bar"), []byte("baz")}, meta.GetAll([]byte("foo"))) - - require.Equal(t, [][]byte(nil), meta.GetAll([]byte("bar"))) - - got, ok = meta.Get([]byte("bar")) - require.False(t, ok) - require.Equal(t, []byte(nil), got) - - require.Equal(t, 0, meta.Count([]byte("bar"))) - - encoded, err := meta.MarshalBinary() - require.NoError(t, err) - { - mustBeEncoded := concatBytes( - []byte{2}, // number of key-value pairs - - []byte{3}, // length of key - []byte("foo"), // key - - []byte{3}, // length of value - []byte("bar"), // value - - []byte{3}, // length of key - []byte("foo"), // key - - []byte{3}, // length of value - []byte("baz"), // value - ) - require.Equal(t, mustBeEncoded, encoded) - } - - var decoded Meta - require.NoError(t, decoded.UnmarshalBinary(encoded)) - - require.Equal(t, meta, decoded) -} - func TestHeader(t *testing.T) { var header Header diff --git a/compactindexsized/query.go b/compactindexsized/query.go index a9a3b4ee..4130f9fb 100644 --- a/compactindexsized/query.go +++ b/compactindexsized/query.go @@ -11,11 +11,13 @@ import ( "errors" "fmt" "io" + + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) // DB is a compactindex handle. type DB struct { - Header + Header *Header headerSize int64 Stream io.ReaderAt prefetch bool @@ -48,6 +50,8 @@ func Open(stream io.ReaderAt) (*DB, error) { return nil, readErr } db := new(DB) + db.Header = new(Header) + db.Header.Metadata = new(indexmeta.Meta) if err := db.Header.Load(fileHeaderBuf); err != nil { return nil, err } @@ -62,12 +66,12 @@ func (db *DB) Prefetch(yes bool) { // GetKind returns the kind of the index. func (db *DB) GetKind() ([]byte, bool) { - return db.Header.Metadata.Get(KeyKind) + return db.Header.Metadata.Get(indexmeta.MetadataKey_Kind) } // KindIs returns whether the index is of the given kind. func (db *DB) KindIs(kind []byte) bool { - got, ok := db.Header.Metadata.Get(KeyKind) + got, ok := db.Header.Metadata.Get(indexmeta.MetadataKey_Kind) return ok && bytes.Equal(got, kind) } diff --git a/compactindexsized/query_test.go b/compactindexsized/query_test.go index f39e7d20..b5efdf7b 100644 --- a/compactindexsized/query_test.go +++ b/compactindexsized/query_test.go @@ -6,6 +6,7 @@ import ( "math/rand" "testing" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -75,11 +76,11 @@ func TestOpen_HeaderOnly(t *testing.T) { require.NotNil(t, db) assert.NotNil(t, db.Stream) - assert.Equal(t, Header{ + assert.Equal(t, &Header{ ValueSize: 0x1337, NumBuckets: 0x42, - Metadata: Meta{ - KeyVals: []KV{ + Metadata: &indexmeta.Meta{ + KeyVals: []indexmeta.KV{ { Key: []byte("foo"), Value: []byte("bar"), diff --git a/epoch.go b/epoch.go index cc3fd832..0639e290 100644 --- a/epoch.go +++ b/epoch.go @@ -90,6 +90,7 @@ func NewEpochFromConfig( onClose: make([]func() error, 0), allCache: allCache, } + var lastRootCid cid.Cid if isCarMode { // The CAR-mode requires a cid-to-offset index. @@ -111,6 +112,11 @@ func NewEpochFromConfig( cidToOffsetIndex.Prefetch(true) } ep.cidToOffsetAndSizeIndex = cidToOffsetIndex + + if ep.Epoch() != cidToOffsetIndex.Meta().Epoch { + return nil, fmt.Errorf("epoch mismatch in cid-to-offset-and-size index: expected %d, got %d", ep.Epoch(), cidToOffsetIndex.Meta().Epoch) + } + lastRootCid = cidToOffsetIndex.Meta().RootCid } { @@ -132,6 +138,14 @@ func NewEpochFromConfig( slotToCidIndex.Prefetch(true) } ep.slotToCidIndex = slotToCidIndex + + if ep.Epoch() != slotToCidIndex.Meta().Epoch { + return nil, fmt.Errorf("epoch mismatch in slot-to-cid index: expected %d, got %d", ep.Epoch(), slotToCidIndex.Meta().Epoch) + } + if lastRootCid != cid.Undef && !lastRootCid.Equals(slotToCidIndex.Meta().RootCid) { + return nil, fmt.Errorf("root CID mismatch in slot-to-cid index: expected %s, got %s", lastRootCid, slotToCidIndex.Meta().RootCid) + } + lastRootCid = slotToCidIndex.Meta().RootCid } { @@ -153,6 +167,14 @@ func NewEpochFromConfig( sigToCidIndex.Prefetch(true) } ep.sigToCidIndex = sigToCidIndex + + if ep.Epoch() != sigToCidIndex.Meta().Epoch { + return nil, fmt.Errorf("epoch mismatch in sig-to-cid index: expected %d, got %d", ep.Epoch(), sigToCidIndex.Meta().Epoch) + } + + if !lastRootCid.Equals(sigToCidIndex.Meta().RootCid) { + return nil, fmt.Errorf("root CID mismatch in sig-to-cid index: expected %s, got %s", lastRootCid, sigToCidIndex.Meta().RootCid) + } } { @@ -163,6 +185,8 @@ func NewEpochFromConfig( } ep.onClose = append(ep.onClose, gsfaIndex.Close) ep.gsfaReader = gsfaIndex + + // TODO: check epoch and root CID } } @@ -176,6 +200,11 @@ func NewEpochFromConfig( return nil, fmt.Errorf("newLassieWrapper: %w", err) } ep.lassieFetcher = ls + + if !lastRootCid.Equals(config.Data.Filecoin.RootCID) { + return nil, fmt.Errorf("root CID mismatch in lassie: expected %s, got %s", lastRootCid, config.Data.Filecoin.RootCID) + } + // TODO: check epoch. } if isCarMode { @@ -204,6 +233,7 @@ func NewEpochFromConfig( } ep.remoteCarHeaderSize = uint64(n) + headerSize } + } { sigExistsFile, err := openIndexStorage( @@ -230,6 +260,7 @@ func NewEpochFromConfig( } ep.sigExists = sigExists + // TODO: check epoch and root CID } return ep, nil diff --git a/index-sig-to-cid.go b/index-sig-to-cid.go index 7bf6eb50..32fa36a6 100644 --- a/index-sig-to-cid.go +++ b/index-sig-to-cid.go @@ -14,6 +14,7 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/rpcpool/yellowstone-faithful/bucketteer" "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "k8s.io/klog/v2" ) @@ -242,13 +243,9 @@ func VerifyIndex_sigExists(ctx context.Context, carPath string, indexFilePath st // check root_cid matches rootCID := roots[0] - storedRootCidString := sigExists.GetMeta("root_cid") - if storedRootCidString == "" { - return fmt.Errorf("index file does not have a root_cid meta") - } - storedRootCid, err := cid.Parse(storedRootCidString) - if err != nil { - return fmt.Errorf("failed to parse stored root cid: %w", err) + storedRootCid, ok := sigExists.Meta().GetCid(indexmeta.MetadataKey_RootCid) + if !ok { + return fmt.Errorf("index file does not have a root cid meta") } if !rootCID.Equals(storedRootCid) { return fmt.Errorf("root CID mismatch: expected %s, got %s", rootCID, storedRootCid) diff --git a/indexes/metadata.go b/indexes/metadata.go index f5cb1a44..16fd4459 100644 --- a/indexes/metadata.go +++ b/indexes/metadata.go @@ -6,6 +6,7 @@ import ( "github.com/ipfs/go-cid" "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) type Metadata struct { @@ -47,12 +48,6 @@ func (m *Metadata) AssertIndexKind(x []byte) error { return nil } -var ( - MetadataKey_Epoch = []byte("epoch") - MetadataKey_RootCid = []byte("rootCid") - MetadataKey_Network = []byte("network") -) - func setDefaultMetadata(index *compactindexsized.Builder, metadata *Metadata) error { if index == nil { return fmt.Errorf("index is nil") @@ -62,51 +57,51 @@ func setDefaultMetadata(index *compactindexsized.Builder, metadata *Metadata) er } setter := index.Metadata() - if err := setter.Add(MetadataKey_Epoch, uint64tob(metadata.Epoch)); err != nil { + if err := setter.Add(indexmeta.MetadataKey_Epoch, uint64tob(metadata.Epoch)); err != nil { return err } if metadata.RootCid == cid.Undef { return fmt.Errorf("root cid is undefined") } - if err := setter.Add(MetadataKey_RootCid, metadata.RootCid.Bytes()); err != nil { + if err := setter.Add(indexmeta.MetadataKey_RootCid, metadata.RootCid.Bytes()); err != nil { return err } if !IsValidNetwork(metadata.Network) { return fmt.Errorf("invalid network") } - if err := setter.Add(MetadataKey_Network, []byte(metadata.Network)); err != nil { + if err := setter.Add(indexmeta.MetadataKey_Network, []byte(metadata.Network)); err != nil { return err } if len(metadata.IndexKind) == 0 { return fmt.Errorf("index kind is empty") } - return setter.Add(compactindexsized.KeyKind, metadata.IndexKind) + return setter.Add(indexmeta.MetadataKey_Kind, metadata.IndexKind) } // getDefaultMetadata gets and validates the metadata from the index. // Will return an error if some of the metadata is missing. func getDefaultMetadata(index *compactindexsized.DB) (*Metadata, error) { out := &Metadata{} - meta := index.Metadata + meta := index.Header.Metadata - indexKind, ok := meta.Get(compactindexsized.KeyKind) + indexKind, ok := meta.Get(indexmeta.MetadataKey_Kind) if ok { out.IndexKind = indexKind } else { return nil, fmt.Errorf("metadata.kind is empty (index kind)") } - epochBytes, ok := meta.Get(MetadataKey_Epoch) + epochBytes, ok := meta.Get(indexmeta.MetadataKey_Epoch) if ok { out.Epoch = btoUint64(epochBytes) } else { return nil, fmt.Errorf("metadata.epoch is empty") } - rootCidBytes, ok := meta.Get(MetadataKey_RootCid) + rootCidBytes, ok := meta.Get(indexmeta.MetadataKey_RootCid) if ok { var err error out.RootCid, err = cid.Cast(rootCidBytes) @@ -117,7 +112,7 @@ func getDefaultMetadata(index *compactindexsized.DB) (*Metadata, error) { return nil, fmt.Errorf("metadata.rootCid is empty") } - networkBytes, ok := meta.Get(MetadataKey_Network) + networkBytes, ok := meta.Get(indexmeta.MetadataKey_Network) if ok { out.Network = Network(networkBytes) } else { diff --git a/compactindexsized/header.go b/indexmeta/indexmeta.go similarity index 71% rename from compactindexsized/header.go rename to indexmeta/indexmeta.go index 3185f24f..11add2af 100644 --- a/compactindexsized/header.go +++ b/indexmeta/indexmeta.go @@ -1,9 +1,19 @@ -package compactindexsized +package indexmeta import ( "bytes" + "encoding/binary" "fmt" "io" + + bin "github.com/gagliardetto/binary" + "github.com/ipfs/go-cid" +) + +const ( + MaxNumKVs = 255 + MaxKeySize = 255 + MaxValueSize = 255 ) type Meta struct { @@ -19,7 +29,7 @@ func (m *Meta) Bytes() []byte { return b } -func (m *Meta) MarshalBinary() ([]byte, error) { +func (m Meta) MarshalBinary() ([]byte, error) { var buf bytes.Buffer if len(m.KeyVals) > MaxNumKVs { return nil, fmt.Errorf("number of key-value pairs %d exceeds max %d", len(m.KeyVals), MaxNumKVs) @@ -46,17 +56,19 @@ func (m *Meta) MarshalBinary() ([]byte, error) { return buf.Bytes(), nil } -func (m *Meta) UnmarshalBinary(b []byte) error { - if len(b) == 0 { +func (m *Meta) UnmarshalWithDecoder(decoder *bin.Decoder) error { + if !decoder.HasRemaining() { return nil } - numKVs := int(b[0]) + numKVs, err := decoder.ReadByte() + if err != nil { + return fmt.Errorf("failed to read number of key-value pairs: %w", err) + } if numKVs > MaxNumKVs { return fmt.Errorf("number of key-value pairs %d exceeds max %d", numKVs, MaxNumKVs) } - b = b[1:] - reader := bytes.NewReader(b) - for i := 0; i < numKVs; i++ { + reader := decoder + for i := 0; i < int(numKVs); i++ { var kv KV { keyLen, err := reader.ReadByte() @@ -83,11 +95,13 @@ func (m *Meta) UnmarshalBinary(b []byte) error { return nil } -const ( - MaxNumKVs = 255 - MaxKeySize = 255 - MaxValueSize = 255 -) +func (m *Meta) UnmarshalBinary(b []byte) error { + if len(b) == 0 { + return nil + } + decoder := bin.NewBorshDecoder(b) + return m.UnmarshalWithDecoder(decoder) +} // Add adds a key-value pair to the metadata. func (m *Meta) Add(key, value []byte) error { @@ -104,6 +118,56 @@ func (m *Meta) Add(key, value []byte) error { return nil } +func (m *Meta) AddCid(key []byte, value cid.Cid) error { + return m.Add(key, value.Bytes()) +} + +func (m Meta) GetCid(key []byte) (cid.Cid, bool) { + value, ok := m.Get(key) + if !ok { + return cid.Undef, false + } + _, _cid, err := cid.CidFromBytes(value) + if err != nil { + return cid.Undef, false + } + return _cid, true +} + +func (m *Meta) AddString(key []byte, value string) error { + return m.Add(key, []byte(value)) +} + +func (m Meta) GetString(key []byte) (string, bool) { + value, ok := m.Get(key) + if !ok { + return "", false + } + return string(value), true +} + +func (m *Meta) AddUint64(key []byte, value uint64) error { + return m.Add(key, encodeUint64(value)) +} + +func (m Meta) GetUint64(key []byte) (uint64, bool) { + value, ok := m.Get(key) + if !ok { + return 0, false + } + return decodeUint64(value), true +} + +func encodeUint64(value uint64) []byte { + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, value) + return buf +} + +func decodeUint64(buf []byte) uint64 { + return binary.LittleEndian.Uint64(buf) +} + // Replace replaces the first value for the given key. func (m *Meta) Replace(key, value []byte) error { if len(m.KeyVals) >= MaxNumKVs { @@ -125,7 +189,7 @@ func (m *Meta) Replace(key, value []byte) error { } // Get returns the first value for the given key. -func (m *Meta) Get(key []byte) ([]byte, bool) { +func (m Meta) Get(key []byte) ([]byte, bool) { for _, kv := range m.KeyVals { if bytes.Equal(kv.Key, key) { return kv.Value, true @@ -136,7 +200,7 @@ func (m *Meta) Get(key []byte) ([]byte, bool) { // ReadFirst copies the first value for the given key into the given value. // It returns the number of bytes copied. -func (m *Meta) ReadFirst(key []byte, valueDst []byte) int { +func (m Meta) ReadFirst(key []byte, valueDst []byte) int { for _, kv := range m.KeyVals { if bytes.Equal(kv.Key, key) { return copy(valueDst, kv.Value) @@ -146,7 +210,7 @@ func (m *Meta) ReadFirst(key []byte, valueDst []byte) int { } // HasDuplicateKeys returns true if there are duplicate keys. -func (m *Meta) HasDuplicateKeys() bool { +func (m Meta) HasDuplicateKeys() bool { seen := make(map[string]struct{}) for _, kv := range m.KeyVals { if _, ok := seen[string(kv.Key)]; ok { @@ -168,7 +232,7 @@ func (m *Meta) Remove(key []byte) { } // GetAll returns all values for the given key. -func (m *Meta) GetAll(key []byte) [][]byte { +func (m Meta) GetAll(key []byte) [][]byte { var values [][]byte for _, kv := range m.KeyVals { if bytes.Equal(kv.Key, key) { @@ -197,5 +261,3 @@ type KV struct { func NewKV(key, value []byte) KV { return KV{Key: key, Value: value} } - -var KeyKind = []byte{'k', 'i', 'n', 'd'} diff --git a/indexmeta/indexmeta_test.go b/indexmeta/indexmeta_test.go new file mode 100644 index 00000000..5353c004 --- /dev/null +++ b/indexmeta/indexmeta_test.go @@ -0,0 +1,68 @@ +package indexmeta_test + +import ( + "testing" + + "github.com/rpcpool/yellowstone-faithful/indexmeta" + "github.com/stretchr/testify/require" +) + +func TestHeaderMeta(t *testing.T) { + require.Equal(t, (255), indexmeta.MaxKeySize) + require.Equal(t, (255), indexmeta.MaxValueSize) + require.Equal(t, (255), indexmeta.MaxNumKVs) + + var meta indexmeta.Meta + require.NoError(t, meta.Add([]byte("foo"), []byte("bar"))) + require.NoError(t, meta.Add([]byte("foo"), []byte("baz"))) + + require.Equal(t, 2, meta.Count([]byte("foo"))) + + got, ok := meta.Get([]byte("foo")) + require.True(t, ok) + require.Equal(t, []byte("bar"), got) + + require.Equal(t, [][]byte{[]byte("bar"), []byte("baz")}, meta.GetAll([]byte("foo"))) + + require.Equal(t, [][]byte(nil), meta.GetAll([]byte("bar"))) + + got, ok = meta.Get([]byte("bar")) + require.False(t, ok) + require.Equal(t, []byte(nil), got) + + require.Equal(t, 0, meta.Count([]byte("bar"))) + + encoded, err := meta.MarshalBinary() + require.NoError(t, err) + { + mustBeEncoded := concatBytes( + []byte{2}, // number of key-value pairs + + []byte{3}, // length of key + []byte("foo"), // key + + []byte{3}, // length of value + []byte("bar"), // value + + []byte{3}, // length of key + []byte("foo"), // key + + []byte{3}, // length of value + []byte("baz"), // value + ) + require.Equal(t, mustBeEncoded, encoded) + } + + var decoded indexmeta.Meta + require.NoError(t, decoded.UnmarshalBinary(encoded)) + + require.Equal(t, meta, decoded) +} + +func concatBytes(bs ...[]byte) []byte { + var out []byte + for _, b := range bs { + out = append(out, b...) + } + return out +} diff --git a/indexmeta/keys.go b/indexmeta/keys.go new file mode 100644 index 00000000..a0116773 --- /dev/null +++ b/indexmeta/keys.go @@ -0,0 +1,8 @@ +package indexmeta + +var MetadataKey_Kind = []byte{'k', 'i', 'n', 'd'} +var ( + MetadataKey_Epoch = []byte("epoch") + MetadataKey_RootCid = []byte("rootCid") + MetadataKey_Network = []byte("network") +) From 93ac5dc91e594a1541400d95d50c8cd3dbadeb16 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 14:01:28 +0100 Subject: [PATCH 24/63] Add index meta to gsfa and sigexists --- cmd-x-index-gsfa.go | 33 ++++++++++++++++++++ epoch.go | 36 +++++++++++++++++++-- gsfa/gsfa-read.go | 7 ++++- gsfa/gsfa-write.go | 4 ++- gsfa/manifest/manifest.go | 57 +++++++++++++++++++++++++++------- gsfa/manifest/manifest_test.go | 11 +++++-- gsfa/worker.go | 3 ++ indexmeta/indexmeta.go | 10 +++--- 8 files changed, 139 insertions(+), 22 deletions(-) diff --git a/cmd-x-index-gsfa.go b/cmd-x-index-gsfa.go index e538c2f3..cf6afb0f 100644 --- a/cmd-x-index-gsfa.go +++ b/cmd-x-index-gsfa.go @@ -19,6 +19,8 @@ import ( "github.com/ipfs/go-libipfs/blocks" "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/gsfa" + "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/rpcpool/yellowstone-faithful/readahead" @@ -28,6 +30,8 @@ import ( ) func newCmd_Index_gsfa() *cli.Command { + var epoch uint64 + var network indexes.Network return &cli.Command{ Name: "gsfa", Description: "Create GSFA index from a CAR file", @@ -53,6 +57,24 @@ func newCmd_Index_gsfa() *cli.Command { Usage: "number of workers", Value: uint(runtime.NumCPU()) * 3, }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "epoch", + Destination: &epoch, + Required: true, + }, + &cli.StringFlag{ + Name: "network", + Usage: "network", + Destination: (*string)(&network), + Required: true, + Action: func(c *cli.Context, v string) error { + if !indexes.IsValidNetwork(indexes.Network(v)) { + return fmt.Errorf("invalid network: %s", v) + } + return nil + }, + }, }, Action: func(c *cli.Context) error { carPath := c.Args().First() @@ -112,9 +134,20 @@ func newCmd_Index_gsfa() *cli.Command { } klog.Infof("Will flush to index every %s transactions", humanize.Comma(int64(flushEvery))) + meta := indexmeta.Meta{} + if err := meta.AddUint64(indexmeta.MetadataKey_Epoch, epoch); err != nil { + return fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) + } + if err := meta.AddCid(indexmeta.MetadataKey_RootCid, rootCID); err != nil { + return fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) + } + if err := meta.AddString(indexmeta.MetadataKey_Network, string(network)); err != nil { + return fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) + } accu, err := gsfa.NewGsfaWriter( gsfaIndexDir, flushEvery, + meta, ) if err != nil { return fmt.Errorf("error while opening gsfa index writer: %w", err) diff --git a/epoch.go b/epoch.go index 0639e290..50dc4c43 100644 --- a/epoch.go +++ b/epoch.go @@ -20,6 +20,7 @@ import ( "github.com/rpcpool/yellowstone-faithful/gsfa" hugecache "github.com/rpcpool/yellowstone-faithful/huge-cache" "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" @@ -186,7 +187,21 @@ func NewEpochFromConfig( ep.onClose = append(ep.onClose, gsfaIndex.Close) ep.gsfaReader = gsfaIndex - // TODO: check epoch and root CID + gotIndexEpoch, ok := gsfaIndex.Meta().GetUint64(indexmeta.MetadataKey_Epoch) + if !ok { + return nil, fmt.Errorf("the gsfa index does not have the epoch metadata") + } + if ep.Epoch() != gotIndexEpoch { + return nil, fmt.Errorf("epoch mismatch in gsfa index: expected %d, got %d", ep.Epoch(), gotIndexEpoch) + } + + gotRootCid, ok := gsfaIndex.Meta().GetCid(indexmeta.MetadataKey_RootCid) + if !ok { + return nil, fmt.Errorf("the gsfa index does not have the root CID metadata") + } + if !lastRootCid.Equals(gotRootCid) { + return nil, fmt.Errorf("root CID mismatch in gsfa index: expected %s, got %s", lastRootCid, gotRootCid) + } } } @@ -233,7 +248,6 @@ func NewEpochFromConfig( } ep.remoteCarHeaderSize = uint64(n) + headerSize } - } { sigExistsFile, err := openIndexStorage( @@ -260,7 +274,23 @@ func NewEpochFromConfig( } ep.sigExists = sigExists - // TODO: check epoch and root CID + + gotEpoch, ok := sigExists.Meta().GetUint64(indexmeta.MetadataKey_Epoch) + if !ok { + return nil, fmt.Errorf("the sig-exists index does not have the epoch metadata") + } + if ep.Epoch() != gotEpoch { + return nil, fmt.Errorf("epoch mismatch in sig-exists index: expected %d, got %d", ep.Epoch(), gotEpoch) + } + + gotRootCid, ok := sigExists.Meta().GetCid(indexmeta.MetadataKey_RootCid) + if !ok { + return nil, fmt.Errorf("the sig-exists index does not have the root CID metadata") + } + + if !lastRootCid.Equals(gotRootCid) { + return nil, fmt.Errorf("root CID mismatch in sig-exists index: expected %s, got %s", lastRootCid, gotRootCid) + } } return ep, nil diff --git a/gsfa/gsfa-read.go b/gsfa/gsfa-read.go index 61c18dfa..ebf009cd 100644 --- a/gsfa/gsfa-read.go +++ b/gsfa/gsfa-read.go @@ -12,6 +12,7 @@ import ( "github.com/rpcpool/yellowstone-faithful/gsfa/manifest" "github.com/rpcpool/yellowstone-faithful/gsfa/offsetstore" "github.com/rpcpool/yellowstone-faithful/gsfa/sff" + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) type GsfaReader struct { @@ -67,7 +68,7 @@ func NewGsfaReader(indexRootDir string) (*GsfaReader, error) { index.sff = sff } { - man, err := manifest.NewManifest(filepath.Join(indexRootDir, "manifest")) + man, err := manifest.NewManifest(filepath.Join(indexRootDir, "manifest"), indexmeta.Meta{}) if err != nil { return nil, err } @@ -95,6 +96,10 @@ func (index *GsfaReader) Close() error { ) } +func (index *GsfaReader) Meta() indexmeta.Meta { + return index.man.Meta() +} + func (index *GsfaReader) Get( ctx context.Context, pk solana.PublicKey, diff --git a/gsfa/gsfa-write.go b/gsfa/gsfa-write.go index d5928a6b..e0d477fe 100644 --- a/gsfa/gsfa-write.go +++ b/gsfa/gsfa-write.go @@ -14,6 +14,7 @@ import ( "github.com/rpcpool/yellowstone-faithful/gsfa/manifest" "github.com/rpcpool/yellowstone-faithful/gsfa/offsetstore" "github.com/rpcpool/yellowstone-faithful/gsfa/sff" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/store" "k8s.io/klog" ) @@ -40,6 +41,7 @@ var offsetstoreOptions = []store.Option{ func NewGsfaWriter( indexRootDir string, flushEveryXSigs uint64, + meta indexmeta.Meta, ) (*GsfaWriter, error) { // if exists and is dir, open. // if exists and is not dir, error. @@ -94,7 +96,7 @@ func NewGsfaWriter( index.sff = sff } { - man, err := manifest.NewManifest(filepath.Join(indexRootDir, "manifest")) + man, err := manifest.NewManifest(filepath.Join(indexRootDir, "manifest"), meta) if err != nil { return nil, err } diff --git a/gsfa/manifest/manifest.go b/gsfa/manifest/manifest.go index 4f6a7799..91689ccb 100644 --- a/gsfa/manifest/manifest.go +++ b/gsfa/manifest/manifest.go @@ -1,12 +1,15 @@ package manifest import ( + "bufio" "encoding/binary" "errors" "fmt" "io" "os" "sync" + + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) type Manifest struct { @@ -20,10 +23,12 @@ var ( _Version = uint64(1) ) -var headerLen = len(_MAGIC) + 8 // 8 bytes for the version +var headerLenWithoutMeta = len(_MAGIC) + 8 // 8 bytes for the version type Header struct { - version uint64 + version uint64 + metaByteSize int64 + meta indexmeta.Meta } // Version returns the version of the manifest. @@ -31,6 +36,11 @@ func (h *Header) Version() uint64 { return h.version } +// Meta returns the metadata of the manifest. +func (h *Header) Meta() indexmeta.Meta { + return h.meta +} + func readHeader(file *os.File) (*Header, error) { // seek to the beginning of the file _, err := file.Seek(0, io.SeekStart) @@ -50,12 +60,20 @@ func readHeader(file *os.File) (*Header, error) { if err != nil { return nil, err } + var meta indexmeta.Meta + err = meta.UnmarshalWithDecoder(bufio.NewReader(file)) + if err != nil { + return nil, err + } + metaByteSize := len(meta.Bytes()) return &Header{ - version: version, + version: version, + metaByteSize: int64(metaByteSize), + meta: meta, }, nil } -func writeHeader(file *os.File) error { +func writeHeader(file *os.File, meta indexmeta.Meta, version uint64) error { _, err := file.Seek(0, io.SeekStart) if err != nil { return err @@ -64,7 +82,12 @@ func writeHeader(file *os.File) error { if err != nil { return err } - err = binary.Write(file, binary.LittleEndian, _Version) + err = binary.Write(file, binary.LittleEndian, version) + if err != nil { + return err + } + metaBytes := meta.Bytes() + _, err = file.Write(metaBytes) if err != nil { return err } @@ -72,7 +95,7 @@ func writeHeader(file *os.File) error { } // NewManifest creates a new manifest or opens an existing one. -func NewManifest(filename string) (*Manifest, error) { +func NewManifest(filename string, meta indexmeta.Meta) (*Manifest, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR, 0o644) if err != nil { return nil, err @@ -85,10 +108,15 @@ func NewManifest(filename string) (*Manifest, error) { return nil, err } if currentFileSize == 0 { - err = writeHeader(file) + err = writeHeader(file, meta, _Version) if err != nil { return nil, err } + man.header = &Header{ + version: _Version, + metaByteSize: int64(len(meta.Bytes())), + meta: meta, + } } else { header, err := readHeader(file) if err != nil { @@ -104,7 +132,8 @@ func NewManifest(filename string) (*Manifest, error) { if err != nil { return nil, err } - if currentFileSize > 0 && (currentFileSize-int64(headerLen))%16 != 0 { + dataSizeWithoutHeaderAndMeta := currentFileSize - int64(headerLenWithoutMeta) - man.header.metaByteSize + if currentFileSize > 0 && (dataSizeWithoutHeaderAndMeta)%16 != 0 { return nil, fmt.Errorf("manifest is corrupt: size=%d", currentFileSize) } return man, nil @@ -121,9 +150,15 @@ func (m *Manifest) close() (err error) { if err != nil { return err } + m.file = nil + m.header = nil return } +func (m *Manifest) Meta() indexmeta.Meta { + return m.header.meta +} + // Flush flushes the cache to disk. func (m *Manifest) Flush() error { m.mu.Lock() @@ -154,7 +189,7 @@ func (m *Manifest) getContentLength() (int64, error) { if err != nil { return 0, err } - return currentFileSize - int64(headerLen), nil + return currentFileSize - int64(headerLenWithoutMeta) - m.header.metaByteSize, nil } // Put appends the given uint64 tuple to the file. @@ -162,7 +197,7 @@ func (m *Manifest) Put(key, value uint64) error { m.mu.Lock() defer m.mu.Unlock() if m.header == nil { - err := writeHeader(m.file) + err := writeHeader(m.file, indexmeta.Meta{}, _Version) if err != nil { return err } @@ -201,7 +236,7 @@ func (m *Manifest) getContentReader() (io.Reader, int64, error) { if err != nil { return nil, -1, err } - return io.NewSectionReader(m.file, int64(headerLen), currentContentSize), currentContentSize, nil + return io.NewSectionReader(m.file, int64(headerLenWithoutMeta)+m.header.metaByteSize, currentContentSize), currentContentSize, nil } // readAllContent reads all the uint64 tuples from the file. diff --git a/gsfa/manifest/manifest_test.go b/gsfa/manifest/manifest_test.go index 624290c4..e0b866c8 100644 --- a/gsfa/manifest/manifest_test.go +++ b/gsfa/manifest/manifest_test.go @@ -4,12 +4,15 @@ import ( "path/filepath" "testing" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/stretchr/testify/require" ) func TestManifest(t *testing.T) { fp := filepath.Join(t.TempDir(), "test_manifest") - m, err := NewManifest(fp) + meta := indexmeta.Meta{} + meta.Add([]byte("epoch"), []byte("test")) + m, err := NewManifest(fp, meta) require.NoError(t, err) defer m.Close() require.NotNil(t, m) @@ -42,10 +45,14 @@ func TestManifest(t *testing.T) { { // now close and reopen m.Close() - m, err = NewManifest(fp) + m, err = NewManifest(fp, indexmeta.Meta{}) require.NoError(t, err) defer m.Close() require.NotNil(t, m) + + epoch, ok := m.header.meta.Get([]byte("epoch")) + require.True(t, ok) + require.Equal(t, []byte("test"), epoch) } { all, err := m.ReadAll() diff --git a/gsfa/worker.go b/gsfa/worker.go index a3a06e49..d72c4746 100644 --- a/gsfa/worker.go +++ b/gsfa/worker.go @@ -10,6 +10,7 @@ import ( "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" "github.com/gagliardetto/solana-go" + "github.com/rpcpool/yellowstone-faithful/indexmeta" ) func workerRead(indexRoot string, pubkey string, limit int) error { @@ -75,6 +76,7 @@ func workerDemoLoad(root string, numGlobalAccounts uint64, numSigs int) error { accu, err := NewGsfaWriter( root, 500_000, + indexmeta.Meta{}, ) if err != nil { return fmt.Errorf("error while opening accumulator: %w", err) @@ -122,6 +124,7 @@ func worker(root string) error { accu, err := NewGsfaWriter( root, 1000000, + indexmeta.Meta{}, ) if err != nil { return fmt.Errorf("error while opening accumulator: %w", err) diff --git a/indexmeta/indexmeta.go b/indexmeta/indexmeta.go index 11add2af..18526a0e 100644 --- a/indexmeta/indexmeta.go +++ b/indexmeta/indexmeta.go @@ -56,10 +56,12 @@ func (m Meta) MarshalBinary() ([]byte, error) { return buf.Bytes(), nil } -func (m *Meta) UnmarshalWithDecoder(decoder *bin.Decoder) error { - if !decoder.HasRemaining() { - return nil - } +type Decoder interface { + io.ByteReader + io.Reader +} + +func (m *Meta) UnmarshalWithDecoder(decoder Decoder) error { numKVs, err := decoder.ReadByte() if err != nil { return fmt.Errorf("failed to read number of key-value pairs: %w", err) From 5c31f840a0a7d60a1d81530b937d74b40f8d3ea2 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 15:44:55 +0100 Subject: [PATCH 25/63] Cleanup indexing --- bucketteer/bucketteer.go | 2 +- bucketteer/write.go | 2 +- cmd-x-index-all.go | 189 ++++++++++++++++-------- cmd-x-verify-index-all.go | 1 + gsfa/manifest/manifest.go | 2 +- indexes/index-cid-to-offset-and-size.go | 2 +- indexes/index-sig-to-cid.go | 2 +- indexes/index-slot-to-cid.go | 2 +- indexmeta/indexmeta.go | 8 +- readers.go | 11 +- 10 files changed, 148 insertions(+), 73 deletions(-) diff --git a/bucketteer/bucketteer.go b/bucketteer/bucketteer.go index 43bf18ca..38c7f9cc 100644 --- a/bucketteer/bucketteer.go +++ b/bucketteer/bucketteer.go @@ -12,7 +12,7 @@ func Magic() [8]byte { return _Magic } -const Version = uint64(1) +const Version = uint64(2) func sortWithCompare[T any](a []T, compare func(i, j int) int) { sort.Slice(a, func(i, j int) bool { diff --git a/bucketteer/write.go b/bucketteer/write.go index 20bd94fe..e6691f6f 100644 --- a/bucketteer/write.go +++ b/bucketteer/write.go @@ -39,7 +39,7 @@ func NewWriter(path string) (*Writer, error) { if ok, err := fileIsBlank(path); err != nil { return nil, err } else if !ok { - return nil, fmt.Errorf("file is not blank") + return nil, fmt.Errorf("file already exists and is not empty: %s", path) } file, err := os.Create(path) if err != nil { diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index a6d00967..df065dac 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -88,7 +88,9 @@ func newCmd_Index_all() *cli.Command { klog.Infof("Took %s", time.Since(startedAt)) }() klog.Infof("Creating all indexes for %s", carPath) - indexPaths, err := createAllIndexes( + klog.Infof("Indexes will be saved in %s", indexDir) + klog.Infof("This CAR file is for epoch %d and cluster %s", epoch, network) + indexPaths, numTotalItems, err := createAllIndexes( c.Context, epoch, network, @@ -100,9 +102,14 @@ func newCmd_Index_all() *cli.Command { return err } klog.Info("Indexes created:") - veryPlainSdumpConfig.Dump(indexPaths) + fmt.Println(indexPaths.String()) if verify { - return verifyAllIndexes(context.Background(), carPath, indexPaths) + return verifyAllIndexes( + context.Background(), + carPath, + indexPaths, + numTotalItems, + ) } klog.Info("Skipping verification.") } @@ -128,29 +135,29 @@ func createAllIndexes( tmpDir string, carPath string, indexDir string, -) (*IndexPaths, error) { +) (*IndexPaths, uint64, error) { // Check if the CAR file exists: exists, err := fileExists(carPath) if err != nil { - return nil, fmt.Errorf("failed to check if CAR file exists: %w", err) + return nil, 0, fmt.Errorf("failed to check if CAR file exists: %w", err) } if !exists { - return nil, fmt.Errorf("CAR file %q does not exist", carPath) + return nil, 0, fmt.Errorf("CAR file %q does not exist", carPath) } carFile, err := os.Open(carPath) if err != nil { - return nil, fmt.Errorf("failed to open car file: %w", err) + return nil, 0, fmt.Errorf("failed to open car file: %w", err) } defer carFile.Close() rd, err := newCarReader(carFile) if err != nil { - return nil, fmt.Errorf("failed to create car reader: %w", err) + return nil, 0, fmt.Errorf("failed to create car reader: %w", err) } // check it has 1 root if len(rd.header.Roots) != 1 { - return nil, fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.header.Roots)) + return nil, 0, fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.header.Roots)) } // print roots: for _, root := range rd.header.Roots { @@ -163,8 +170,9 @@ func createAllIndexes( klog.Infof("Counting items in car file...") numItems, err := carCountItemsByFirstByte(carPath) if err != nil { - return nil, fmt.Errorf("failed to count items in car file: %w", err) + return nil, 0, fmt.Errorf("failed to count items in car file: %w", err) } + fmt.Println() klog.Infof("Found items in car file:") numTotalItems := uint64(0) var kinds []byte @@ -185,7 +193,7 @@ func createAllIndexes( numTotalItems, ) if err != nil { - return nil, fmt.Errorf("failed to create cid_to_offset_and_size index: %w", err) + return nil, 0, fmt.Errorf("failed to create cid_to_offset_and_size index: %w", err) } defer cid_to_offset_and_size.Close() @@ -197,7 +205,7 @@ func createAllIndexes( numItems[byte(iplddecoders.KindBlock)], ) if err != nil { - return nil, fmt.Errorf("failed to create slot_to_cid index: %w", err) + return nil, 0, fmt.Errorf("failed to create slot_to_cid index: %w", err) } defer slot_to_cid.Close() @@ -209,7 +217,7 @@ func createAllIndexes( numItems[byte(iplddecoders.KindTransaction)], ) if err != nil { - return nil, fmt.Errorf("failed to create sig_to_cid index: %w", err) + return nil, 0, fmt.Errorf("failed to create sig_to_cid index: %w", err) } defer sig_to_cid.Close() @@ -218,7 +226,7 @@ func createAllIndexes( sigExistsFilepath, ) if err != nil { - return nil, fmt.Errorf("failed to create sig_exists index: %w", err) + return nil, 0, fmt.Errorf("failed to create sig_exists index: %w", err) } defer sig_exists.Close() @@ -226,7 +234,7 @@ func createAllIndexes( { var buf bytes.Buffer if err = carv1.WriteHeader(rd.header, &buf); err != nil { - return nil, err + return nil, 0, err } totalOffset = uint64(buf.Len()) } @@ -236,20 +244,22 @@ func createAllIndexes( numIndexedTransactions := uint64(0) lastCheckpoint := time.Now() klog.Infof("Indexing...") + var eta time.Duration + startedAt := time.Now() for { _cid, sectionLength, block, err := rd.NextNode() if err != nil { if errors.Is(err, io.EOF) { break } - return nil, err + return nil, 0, err } // klog.Infof("key: %s, offset: %d", bin.FormatByteSlice(c.Bytes()), totalOffset) err = cid_to_offset_and_size.Put(_cid, totalOffset, sectionLength) if err != nil { - return nil, fmt.Errorf("failed to index cid to offset: %w", err) + return nil, 0, fmt.Errorf("failed to index cid to offset: %w", err) } numIndexedOffsets++ @@ -259,12 +269,12 @@ func createAllIndexes( { block, err := iplddecoders.DecodeBlock(block.RawData()) if err != nil { - return nil, fmt.Errorf("failed to decode block: %w", err) + return nil, 0, fmt.Errorf("failed to decode block: %w", err) } err = slot_to_cid.Put(uint64(block.Slot), _cid) if err != nil { - return nil, fmt.Errorf("failed to index slot to cid: %w", err) + return nil, 0, fmt.Errorf("failed to index slot to cid: %w", err) } numIndexedBlocks++ } @@ -272,17 +282,17 @@ func createAllIndexes( { txNode, err := iplddecoders.DecodeTransaction(block.RawData()) if err != nil { - return nil, fmt.Errorf("failed to decode transaction: %w", err) + return nil, 0, fmt.Errorf("failed to decode transaction: %w", err) } sig, err := readFirstSignature(txNode.Data.Bytes()) if err != nil { - return nil, fmt.Errorf("failed to read signature: %w", err) + return nil, 0, fmt.Errorf("failed to read signature: %w", err) } err = sig_to_cid.Put(sig, _cid) if err != nil { - return nil, fmt.Errorf("failed to index signature to cid: %w", err) + return nil, 0, fmt.Errorf("failed to index signature to cid: %w", err) } sig_exists.Put(sig) @@ -293,25 +303,35 @@ func createAllIndexes( totalOffset += sectionLength - if numIndexedOffsets%100_000 == 0 { - printToStderr(".") + if numIndexedOffsets%1_000_000 == 0 && numIndexedOffsets > 0 { + timeForChunk := time.Since(lastCheckpoint) + numChunksLeft := ((numTotalItems - numIndexedOffsets) / 1_000_000) + 1 + eta = timeForChunk * time.Duration(numChunksLeft) + lastCheckpoint = time.Now() } - if numIndexedOffsets%10_000_000 == 0 { - timeFor10_000_000 := time.Since(lastCheckpoint) - howMany10_000_000 := ((numTotalItems - numIndexedOffsets) / 10_000_000) + 1 - eta := timeFor10_000_000 * time.Duration(howMany10_000_000) - + if numIndexedOffsets%100_000 == 0 { + var etaString string + if eta > 0 { + etaString = fmt.Sprintf(" ETA: %s ", eta.Truncate(time.Second).String()) + } else { + etaString = ", ETA: --- " + } printToStderr( - "\n" + greenBackground( - fmt.Sprintf(" %s (%s) ", - humanize.Comma(int64(numIndexedOffsets)), - time.Since(lastCheckpoint), - ), - ) + "ETA: " + eta.String() + "\n", + fmt.Sprintf("\rIndexing: %s/%s items [%s%%] %s", + humanize.Comma(int64(numIndexedOffsets)), + humanize.Comma(int64(numTotalItems)), + humanize.CommafWithDigits(float64(numIndexedOffsets)/float64(numTotalItems)*100, 2), + etaString, + ), ) - lastCheckpoint = time.Now() } } + printToStderr( + fmt.Sprintf("\rIndexed %s items in %s \n", + humanize.Comma(int64(numIndexedOffsets)), + time.Since(startedAt).Truncate(time.Second), + ), + ) printToStderr("\n") klog.Infof( "Indexed %s offsets, %s blocks, %s transactions", @@ -320,20 +340,18 @@ func createAllIndexes( humanize.Comma(int64(numIndexedTransactions)), ) - klog.Infof("Preparing to seal indexes...") + klog.Infof("Preparing to seal indexes (DO NOT EXIT)...") paths := &IndexPaths{} paths.SignatureExists = sigExistsFilepath - klog.Infof("Root CID: %s", rootCID) - { // seal the indexes { klog.Infof("Sealing cid_to_offset_and_size index...") err = cid_to_offset_and_size.Seal(ctx, indexDir) if err != nil { - return nil, fmt.Errorf("failed to seal cid_to_offset_and_size index: %w", err) + return nil, 0, fmt.Errorf("failed to seal cid_to_offset_and_size index: %w", err) } paths.CidToOffsetAndSize = cid_to_offset_and_size.GetFilepath() klog.Infof("Successfully sealed cid_to_offset_and_size index: %s", paths.CidToOffsetAndSize) @@ -343,7 +361,7 @@ func createAllIndexes( klog.Infof("Sealing slot_to_cid index...") err = slot_to_cid.Seal(ctx, indexDir) if err != nil { - return nil, fmt.Errorf("failed to seal slot_to_cid index: %w", err) + return nil, 0, fmt.Errorf("failed to seal slot_to_cid index: %w", err) } paths.SlotToCid = slot_to_cid.GetFilepath() klog.Infof("Successfully sealed slot_to_cid index: %s", paths.SlotToCid) @@ -353,7 +371,7 @@ func createAllIndexes( klog.Infof("Sealing sig_to_cid index...") err = sig_to_cid.Seal(ctx, indexDir) if err != nil { - return nil, fmt.Errorf("failed to seal sig_to_cid index: %w", err) + return nil, 0, fmt.Errorf("failed to seal sig_to_cid index: %w", err) } paths.SignatureToCid = sig_to_cid.GetFilepath() klog.Infof("Successfully sealed sig_to_cid index: %s", paths.SignatureToCid) @@ -363,22 +381,22 @@ func createAllIndexes( klog.Infof("Sealing sig_exists index...") meta := indexmeta.Meta{} if err := meta.AddUint64(indexmeta.MetadataKey_Epoch, epoch); err != nil { - return nil, fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) + return nil, 0, fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) } if err := meta.AddCid(indexmeta.MetadataKey_RootCid, rootCID); err != nil { - return nil, fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) + return nil, 0, fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) } if err := meta.AddString(indexmeta.MetadataKey_Network, string(network)); err != nil { - return nil, fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) + return nil, 0, fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) } if _, err = sig_exists.Seal(meta); err != nil { - return nil, fmt.Errorf("failed to seal sig_exists index: %w", err) + return nil, 0, fmt.Errorf("failed to seal sig_exists index: %w", err) } klog.Infof("Successfully sealed sig_exists index: %s", paths.SignatureExists) } } - return paths, nil + return paths, numTotalItems, nil } func greenBackground(s string) string { @@ -396,6 +414,28 @@ type IndexPaths struct { SignatureExists string } +// IndexPaths.String +func (p *IndexPaths) String() string { + var builder bytes.Buffer + builder.WriteString(" cid_to_offset_and_size:\n uri: ") + builder.WriteString(quoteSingle(p.CidToOffsetAndSize)) + builder.WriteString("\n") + builder.WriteString(" slot_to_cid:\n uri: ") + builder.WriteString(quoteSingle(p.SlotToCid)) + builder.WriteString("\n") + builder.WriteString(" sig_to_cid:\n uri: ") + builder.WriteString(quoteSingle(p.SignatureToCid)) + builder.WriteString("\n") + builder.WriteString(" sig_exists:\n uri: ") + builder.WriteString(quoteSingle(p.SignatureExists)) + builder.WriteString("\n") + return builder.String() +} + +func quoteSingle(s string) string { + return fmt.Sprintf("'%s'", s) +} + func NewBuilder_CidToOffset( epoch uint64, rootCid cid.Cid, @@ -472,6 +512,7 @@ func verifyAllIndexes( ctx context.Context, carPath string, indexes *IndexPaths, + numTotalItems uint64, ) error { // Check if the CAR file exists: exists, err := fileExists(carPath) @@ -546,6 +587,8 @@ func verifyAllIndexes( numIndexedTransactions := uint64(0) klog.Infof("Verifying indexes...") lastCheckpoint := time.Now() + var eta time.Duration + startedAt := time.Now() for { _cid, sectionLength, block, err := rd.NextNode() if err != nil { @@ -621,28 +664,46 @@ func verifyAllIndexes( totalOffset += sectionLength - if numIndexedOffsets%100_000 == 0 { - printToStderr(".") + if numIndexedOffsets%1_000_000 == 0 && numIndexedOffsets > 0 && numTotalItems > 0 { + timeForChunk := time.Since(lastCheckpoint) + numChunksLeft := ((numTotalItems - numIndexedOffsets) / 1_000_000) + 1 + eta = timeForChunk * time.Duration(numChunksLeft) + lastCheckpoint = time.Now() } - if numIndexedOffsets%1_000_000 == 0 { - printToStderr( - "\n" + greenBackground( - fmt.Sprintf(" %s (%s) ", + if numIndexedOffsets%100_000 == 0 { + if numTotalItems > 0 { + var etaString string + if eta > 0 { + etaString = fmt.Sprintf(", ETA: %s ", eta.Truncate(time.Second).String()) + } else { + etaString = ", ETA: --- " + } + printToStderr( + fmt.Sprintf("\rVerifying index: %s/%s items [%s%%] %s", humanize.Comma(int64(numIndexedOffsets)), - time.Since(lastCheckpoint), + humanize.Comma(int64(numTotalItems)), + humanize.CommafWithDigits(float64(numIndexedOffsets)/float64(numTotalItems)*100, 2), + etaString, ), - ) + "\n", - ) - lastCheckpoint = time.Now() + ) + } else { + printToStderr( + fmt.Sprintf("\rVerifying index: %s items", + humanize.Comma(int64(numIndexedOffsets)), + ), + ) + } } } - printToStderr("\n") - klog.Infof( - "Verified %s offsets, %s blocks, %s transactions", - humanize.Comma(int64(numIndexedOffsets)), - humanize.Comma(int64(numIndexedBlocks)), - humanize.Comma(int64(numIndexedTransactions)), - ) + + printToStderr( + fmt.Sprintf( + "\rVerified %s offsets, %s blocks, %s transactions in %s\n", + humanize.Comma(int64(numIndexedOffsets)), + humanize.Comma(int64(numIndexedBlocks)), + humanize.Comma(int64(numIndexedTransactions)), + time.Since(startedAt).Truncate(time.Second), + )) return nil } diff --git a/cmd-x-verify-index-all.go b/cmd-x-verify-index-all.go index 47af273c..b5107441 100644 --- a/cmd-x-verify-index-all.go +++ b/cmd-x-verify-index-all.go @@ -39,6 +39,7 @@ func newCmd_VerifyIndex_all() *cli.Command { SignatureToCid: indexFilePathSig2Cid, SignatureExists: indexFilePathSigExists, }, + 0, ) if err != nil { return err diff --git a/gsfa/manifest/manifest.go b/gsfa/manifest/manifest.go index 91689ccb..f7ded229 100644 --- a/gsfa/manifest/manifest.go +++ b/gsfa/manifest/manifest.go @@ -20,7 +20,7 @@ type Manifest struct { var ( _MAGIC = [...]byte{'g', 's', 'f', 'a', 'm', 'n', 'f', 's'} - _Version = uint64(1) + _Version = uint64(2) ) var headerLenWithoutMeta = len(_MAGIC) + 8 // 8 bytes for the version diff --git a/indexes/index-cid-to-offset-and-size.go b/indexes/index-cid-to-offset-and-size.go index 85c0e77a..2203c081 100644 --- a/indexes/index-cid-to-offset-and-size.go +++ b/indexes/index-cid-to-offset-and-size.go @@ -114,7 +114,7 @@ func (w *CidToOffsetAndSize_Writer) Seal(ctx context.Context, dstDir string) err func (w *CidToOffsetAndSize_Writer) Close() error { if !w.sealed { - panic(fmt.Errorf("attempted to close a cid-to-offset-and-size index that was not sealed")) + return fmt.Errorf("attempted to close a cid-to-offset-and-size index that was not sealed") } return w.index.Close() } diff --git a/indexes/index-sig-to-cid.go b/indexes/index-sig-to-cid.go index 461c2eb4..272070f8 100644 --- a/indexes/index-sig-to-cid.go +++ b/indexes/index-sig-to-cid.go @@ -110,7 +110,7 @@ func (w *SigToCid_Writer) Seal(ctx context.Context, dstDir string) error { func (w *SigToCid_Writer) Close() error { if !w.sealed { - panic(fmt.Errorf("attempted to close a sig-to-cid index that was not sealed")) + return fmt.Errorf("attempted to close a sig-to-cid index that was not sealed") } return w.index.Close() } diff --git a/indexes/index-slot-to-cid.go b/indexes/index-slot-to-cid.go index 32401855..d310315d 100644 --- a/indexes/index-slot-to-cid.go +++ b/indexes/index-slot-to-cid.go @@ -109,7 +109,7 @@ func (w *SlotToCid_Writer) Seal(ctx context.Context, dstDir string) error { func (w *SlotToCid_Writer) Close() error { if !w.sealed { - panic(fmt.Errorf("attempted to close a slot-to-cid index that was not sealed")) + return fmt.Errorf("attempted to close a slot-to-cid index that was not sealed") } return w.index.Close() } diff --git a/indexmeta/indexmeta.go b/indexmeta/indexmeta.go index 18526a0e..d923f4c5 100644 --- a/indexmeta/indexmeta.go +++ b/indexmeta/indexmeta.go @@ -116,10 +116,14 @@ func (m *Meta) Add(key, value []byte) error { if len(value) > MaxValueSize { return fmt.Errorf("value size %d exceeds max %d", len(value), MaxValueSize) } - m.KeyVals = append(m.KeyVals, KV{Key: key, Value: value}) + m.KeyVals = append(m.KeyVals, KV{Key: cloneBytes(key), Value: cloneBytes(value)}) return nil } +func cloneBytes(b []byte) []byte { + return append([]byte(nil), b...) +} + func (m *Meta) AddCid(key []byte, value cid.Cid) error { return m.Add(key, value.Bytes()) } @@ -183,7 +187,7 @@ func (m *Meta) Replace(key, value []byte) error { } for i, kv := range m.KeyVals { if bytes.Equal(kv.Key, key) { - m.KeyVals[i].Value = value + m.KeyVals[i].Value = cloneBytes(value) return nil } } diff --git a/readers.go b/readers.go index f5770033..54eac2fa 100644 --- a/readers.go +++ b/readers.go @@ -7,7 +7,9 @@ import ( "fmt" "io" "os" + "time" + "github.com/dustin/go-humanize" "github.com/ipfs/go-cid" cbor "github.com/ipfs/go-ipld-cbor" "github.com/ipfs/go-libipfs/blocks" @@ -227,6 +229,7 @@ func carCountItemsByFirstByte(carPath string) (map[byte]uint64, error) { numTotalItems := uint64(0) counts := make(map[byte]uint64) + startedCountAt := time.Now() for { _, _, block, err := rd.NextNode() if err != nil { @@ -241,10 +244,16 @@ func carCountItemsByFirstByte(carPath string) (map[byte]uint64, error) { numTotalItems++ if numTotalItems%1_000_000 == 0 { - printToStderr(".") + printToStderr( + fmt.Sprintf("\rCounted %s items", humanize.Comma(int64(numTotalItems))), + ) } } + printToStderr( + fmt.Sprintf("\rCounted %s items in %s\n", humanize.Comma(int64(numTotalItems)), time.Since(startedCountAt).Truncate(time.Second)), + ) + return counts, nil } From 696dfdf65fee19c0b2a0c7a6bd27a1b892770e43 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 16:09:30 +0100 Subject: [PATCH 26/63] Cleanup gsfa --- multiepoch-getSignaturesForAddress.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/multiepoch-getSignaturesForAddress.go b/multiepoch-getSignaturesForAddress.go index d4126a15..4d9af0d0 100644 --- a/multiepoch-getSignaturesForAddress.go +++ b/multiepoch-getSignaturesForAddress.go @@ -191,7 +191,11 @@ func (multi *MultiEpoch) handleGetSignaturesForAddress(ctx context.Context, conn } slot := uint64(transactionNode.Slot) response[ii]["slot"] = slot - response[ii]["blockTime"] = getBlockTime(slot, ser) + if blockTime := getBlockTime(slot, ser); blockTime != 0 { + response[ii]["blockTime"] = blockTime + } else { + response[ii]["blockTime"] = nil + } response[ii]["confirmationStatus"] = "finalized" } return nil From 2c845bdef3404e1382ca7ce6db0746de6ca2db85 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 16:12:22 +0100 Subject: [PATCH 27/63] GSFA: If no found signatures, return empty result instead of error. --- multiepoch-getSignaturesForAddress.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/multiepoch-getSignaturesForAddress.go b/multiepoch-getSignaturesForAddress.go index 4d9af0d0..655d7b38 100644 --- a/multiepoch-getSignaturesForAddress.go +++ b/multiepoch-getSignaturesForAddress.go @@ -102,10 +102,15 @@ func (multi *MultiEpoch) handleGetSignaturesForAddress(ctx context.Context, conn } if len(foundSignatures) == 0 { - return &jsonrpc2.Error{ - Code: jsonrpc2.CodeInternalError, - Message: "Not found", - }, fmt.Errorf("no signatures found for address: %s", pk) + err = conn.ReplyRaw( + ctx, + req.ID, + []map[string]any{}, + ) + if err != nil { + return nil, fmt.Errorf("failed to reply: %w", err) + } + return nil, nil } var blockTimeCache struct { From 091519736b61895c973cc29217626c71d1d0edfc Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 17:12:13 +0100 Subject: [PATCH 28/63] Improve index filenames --- cmd-x-index-gsfa.go | 17 ++++++++++++++++- indexes/index-cid-to-offset-and-size.go | 2 +- indexes/index-sig-to-cid.go | 2 +- indexes/index-slot-to-cid.go | 2 +- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/cmd-x-index-gsfa.go b/cmd-x-index-gsfa.go index cf6afb0f..2018af2f 100644 --- a/cmd-x-index-gsfa.go +++ b/cmd-x-index-gsfa.go @@ -16,6 +16,7 @@ import ( "github.com/dustin/go-humanize" bin "github.com/gagliardetto/binary" "github.com/gagliardetto/solana-go" + "github.com/ipfs/go-cid" "github.com/ipfs/go-libipfs/blocks" "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/gsfa" @@ -121,7 +122,11 @@ func newCmd_Index_gsfa() *cli.Command { rootCID := rd.Header.Roots[0] // Use the car file name and root CID to name the gsfa index dir: - gsfaIndexDir := filepath.Join(indexDir, fmt.Sprintf("%s-%s-gsfa-index", filepath.Base(carPath), rootCID.String())) + gsfaIndexDir := filepath.Join(indexDir, formatIndexDirname_gsfa( + epoch, + rootCID, + network, + )) klog.Infof("Creating gsfa index dir at %s", gsfaIndexDir) err = os.Mkdir(gsfaIndexDir, 0o755) if err != nil { @@ -257,6 +262,16 @@ func newCmd_Index_gsfa() *cli.Command { } } +func formatIndexDirname_gsfa(epoch uint64, rootCid cid.Cid, network indexes.Network) string { + return fmt.Sprintf( + "epoch-%d-%s-%s-%s", + epoch, + rootCid.String(), + network, + "gsfa.indexdir", + ) +} + type TransactionWithSlot struct { Slot uint64 Transaction solana.Transaction diff --git a/indexes/index-cid-to-offset-and-size.go b/indexes/index-cid-to-offset-and-size.go index 2203c081..1c18cb58 100644 --- a/indexes/index-cid-to-offset-and-size.go +++ b/indexes/index-cid-to-offset-and-size.go @@ -28,7 +28,7 @@ const ( func formatFilename_CidToOffsetAndSize(epoch uint64, rootCid cid.Cid, network Network) string { return fmt.Sprintf( - "%d-%s-%s-%s", + "epoch-%d-%s-%s-%s", epoch, rootCid.String(), network, diff --git a/indexes/index-sig-to-cid.go b/indexes/index-sig-to-cid.go index 272070f8..226aaccb 100644 --- a/indexes/index-sig-to-cid.go +++ b/indexes/index-sig-to-cid.go @@ -27,7 +27,7 @@ const ( func formatFilename_SigToCid(epoch uint64, rootCid cid.Cid, network Network) string { return fmt.Sprintf( - "%d-%s-%s-%s", + "epoch-%d-%s-%s-%s", epoch, rootCid.String(), network, diff --git a/indexes/index-slot-to-cid.go b/indexes/index-slot-to-cid.go index d310315d..14088a8b 100644 --- a/indexes/index-slot-to-cid.go +++ b/indexes/index-slot-to-cid.go @@ -26,7 +26,7 @@ const ( func formatFilename_SlotToCid(epoch uint64, rootCid cid.Cid, network Network) string { return fmt.Sprintf( - "%d-%s-%s-%s", + "epoch-%d-%s-%s-%s", epoch, rootCid.String(), network, From 9df2aa28951a5cef0999ce99397c5acd795bf168 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 19:48:48 +0100 Subject: [PATCH 29/63] Add libraries to manage split car files --- config.go | 45 +++- epoch.go | 3 + go.mod | 8 +- go.sum | 9 +- split-car-fetcher/fetcher.go | 354 ++++++++++++++++++++++++++++++ split-car-fetcher/fetcher_test.go | 128 +++++++++++ split-car-fetcher/miner-info.go | 85 +++++++ 7 files changed, 625 insertions(+), 7 deletions(-) create mode 100644 split-car-fetcher/fetcher.go create mode 100644 split-car-fetcher/fetcher_test.go create mode 100644 split-car-fetcher/miner-info.go diff --git a/config.go b/config.go index 5afdf810..ad43c908 100644 --- a/config.go +++ b/config.go @@ -99,7 +99,14 @@ type Config struct { Epoch *uint64 `json:"epoch" yaml:"epoch"` Data struct { Car *struct { - URI URI `json:"uri" yaml:"uri"` + URI URI `json:"uri" yaml:"uri"` + SplitMetadata *struct { + URI URI `json:"uri" yaml:"uri"` // Local path to the split metadata file. + Miners []struct { + MinerID string `json:"miner_id" yaml:"miner_id"` + // If the miner is a Filecoin miner, then the provider is the miner's peer ID. + } `json:"miners" yaml:"miners"` + } `json:"split_metadata" yaml:"split_metadata"` } `json:"car" yaml:"car"` Filecoin *struct { // Enable enables Filecoin mode. If false, or if this section is not present, CAR mode is used. @@ -156,6 +163,10 @@ func (c *Config) IsFilecoinMode() bool { return c.Data.Filecoin != nil && c.Data.Filecoin.Enable } +func (c *Config) IsSplitCarMode() bool { + return c.Data.Car != nil && c.Data.Car.SplitMetadata != nil && len(c.Data.Car.SplitMetadata.Miners) > 0 +} + type ConfigSlice []*Config func (c ConfigSlice) Validate() error { @@ -211,11 +222,29 @@ func (c *Config) Validate() error { if c.Data.Car == nil { return fmt.Errorf("car-mode=true; data.car must be set") } - if c.Data.Car.URI.IsZero() { - return fmt.Errorf("data.car.uri must be set") + if c.Data.Car.URI.IsZero() && c.Data.Car.SplitMetadata == nil { + return fmt.Errorf("data.car.uri or data.car.split_metadata must be set") } - if err := isSupportedURI(c.Data.Car.URI, "data.car.uri"); err != nil { - return err + if !c.Data.Car.URI.IsZero() { + if err := isSupportedURI(c.Data.Car.URI, "data.car.uri"); err != nil { + return err + } + } + if c.Data.Car.SplitMetadata != nil { + if c.Data.Car.SplitMetadata.URI.IsZero() { + return fmt.Errorf("data.car.split_metadata.uri must be set") + } + if !c.Data.Car.SplitMetadata.URI.IsLocal() { + return fmt.Errorf("data.car.split_metadata.uri must be a local file") + } + if len(c.Data.Car.SplitMetadata.Miners) == 0 { + return fmt.Errorf("data.car.split_metadata.miners must not be empty") + } + for minerIndex, miner := range c.Data.Car.SplitMetadata.Miners { + if miner.MinerID == "" { + return fmt.Errorf("data.car.split_metadata.miners[%d].miner_id must not be empty", minerIndex) + } + } } if c.Indexes.CidToOffsetAndSize.URI.IsZero() { return fmt.Errorf("indexes.cid_to_offset_and_size.uri must be set") @@ -279,6 +308,12 @@ func (c *Config) Validate() error { if !c.Indexes.CidToOffsetAndSize.URI.IsValid() { return fmt.Errorf("indexes.cid_to_offset_and_size.uri is invalid") } + + if c.Data.Car.SplitMetadata != nil { + if !c.Data.Car.SplitMetadata.URI.IsValid() { + return fmt.Errorf("data.car.split_metadata.uri is invalid") + } + } } if !c.Indexes.SlotToCid.URI.IsValid() { return fmt.Errorf("indexes.slot_to_cid.uri is invalid") diff --git a/epoch.go b/epoch.go index d5bec468..461cb26d 100644 --- a/epoch.go +++ b/epoch.go @@ -247,6 +247,9 @@ func NewEpochFromConfig( if err != nil { return nil, fmt.Errorf("failed to open CAR file: %w", err) } + if config.IsSplitCarMode() { + // TODO: load the remote split CAR files. + } if localCarReader != nil { ep.onClose = append(ep.onClose, localCarReader.Close) } diff --git a/go.mod b/go.mod index b010a27c..d3ed9f7f 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,8 @@ module github.com/rpcpool/yellowstone-faithful go 1.20 +replace github.com/anjor/carlet => github.com/rpcpool/carlet v0.0.4 + require ( github.com/benbjohnson/clock v1.3.5 // indirect github.com/cespare/xxhash/v2 v2.2.0 @@ -33,7 +35,7 @@ require ( github.com/klauspost/compress v1.17.2 github.com/libp2p/go-libp2p v0.32.1 github.com/libp2p/go-libp2p-routing-helpers v0.7.1 // indirect - github.com/multiformats/go-multiaddr v0.12.0 // indirect + github.com/multiformats/go-multiaddr v0.12.0 github.com/multiformats/go-multicodec v0.9.0 github.com/multiformats/go-multihash v0.2.3 // indirect github.com/sourcegraph/jsonrpc2 v0.2.0 @@ -58,10 +60,12 @@ require ( require ( github.com/allegro/bigcache/v3 v3.1.0 + github.com/anjor/carlet v0.0.0-00010101000000-000000000000 github.com/fsnotify/fsnotify v1.5.4 github.com/goware/urlx v0.3.2 github.com/ipld/go-car v0.5.0 github.com/ipld/go-trustless-utils v0.4.1 + github.com/jellydator/ttlcache/v3 v3.1.0 github.com/libp2p/go-reuseport v0.4.0 github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 github.com/mr-tron/base58 v1.2.0 @@ -102,6 +106,8 @@ require ( github.com/filecoin-project/go-amt-ipld/v4 v4.1.0 // indirect github.com/filecoin-project/go-cbor-util v0.0.1 // indirect github.com/filecoin-project/go-ds-versioning v0.1.2 // indirect + github.com/filecoin-project/go-fil-commcid v0.1.0 // indirect + github.com/filecoin-project/go-fil-commp-hashhash v0.2.0 // indirect github.com/filecoin-project/go-hamt-ipld/v3 v3.2.0 // indirect github.com/filecoin-project/go-retrieval-types v1.2.0 // indirect github.com/filecoin-project/go-statemachine v1.0.3 // indirect diff --git a/go.sum b/go.sum index 3ccd1f56..44078685 100644 --- a/go.sum +++ b/go.sum @@ -173,7 +173,10 @@ github.com/filecoin-project/go-ds-versioning v0.1.2 h1:to4pTadv3IeV1wvgbCbN6Vqd+ github.com/filecoin-project/go-ds-versioning v0.1.2/go.mod h1:C9/l9PnB1+mwPa26BBVpCjG/XQCB0yj/q5CK2J8X1I4= github.com/filecoin-project/go-fil-commcid v0.0.0-20200716160307-8f644712406f/go.mod h1:Eaox7Hvus1JgPrL5+M3+h7aSPHc0cVqpSxA+TxIEpZQ= github.com/filecoin-project/go-fil-commcid v0.0.0-20201016201715-d41df56b4f6a/go.mod h1:Eaox7Hvus1JgPrL5+M3+h7aSPHc0cVqpSxA+TxIEpZQ= +github.com/filecoin-project/go-fil-commcid v0.1.0 h1:3R4ds1A9r6cr8mvZBfMYxTS88OqLYEo6roi+GiIeOh8= github.com/filecoin-project/go-fil-commcid v0.1.0/go.mod h1:Eaox7Hvus1JgPrL5+M3+h7aSPHc0cVqpSxA+TxIEpZQ= +github.com/filecoin-project/go-fil-commp-hashhash v0.2.0 h1:HYIUugzjq78YvV3vC6rL95+SfC/aSTVSnZSZiDV5pCk= +github.com/filecoin-project/go-fil-commp-hashhash v0.2.0/go.mod h1:VH3fAFOru4yyWar4626IoS5+VGE8SfZiBODJLUigEo4= github.com/filecoin-project/go-hamt-ipld/v3 v3.1.0/go.mod h1:bxmzgT8tmeVQA1/gvBwFmYdT8SOFUwB3ovSUfG1Ux0g= github.com/filecoin-project/go-hamt-ipld/v3 v3.2.0 h1:McvVkfSvpreP8zA5hplCUdzEZgqToSFdZzIEegm1/8Y= github.com/filecoin-project/go-hamt-ipld/v3 v3.2.0/go.mod h1:T6p2jInnwr6aML/731EEwBg3dEbzlGS8a5SgKXBHcJs= @@ -480,6 +483,8 @@ github.com/jbenet/goprocess v0.1.3/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZl github.com/jbenet/goprocess v0.1.4 h1:DRGOFReOMqqDNXwW70QkacFW0YN9QnwLV0Vqk+3oU0o= github.com/jbenet/goprocess v0.1.4/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4= github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU= +github.com/jellydator/ttlcache/v3 v3.1.0 h1:0gPFG0IHHP6xyUyXq+JaD8fwkDCqgqwohXNJBcYE71g= +github.com/jellydator/ttlcache/v3 v3.1.0/go.mod h1:hi7MGFdMAwZna5n2tuvh63DvFLzVKySzCVW6+0gA2n4= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -724,6 +729,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/ronanh/intcomp v1.1.0 h1:i54kxmpmSoOZFcWPMWryuakN0vLxLswASsGa07zkvLU= github.com/ronanh/intcomp v1.1.0/go.mod h1:7FOLy3P3Zj3er/kVrU/pl+Ql7JFZj7bwliMGketo0IU= +github.com/rpcpool/carlet v0.0.4 h1:ZrDMvrS1Jewy4rQkj/ODy0SG8jXG0mljeNcY76kEnYg= +github.com/rpcpool/carlet v0.0.4/go.mod h1:sTpcN668YLQ1cvCT2RcIxifc/soFheI9wbNeDGCJX74= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -905,7 +912,7 @@ go.uber.org/fx v1.20.1 h1:zVwVQGS8zYvhh9Xxcu4w1M6ESyeMzebzj2NbSayZ4Mk= go.uber.org/fx v1.20.1/go.mod h1:iSYNbHf2y55acNCwCXKx7LbWb5WG1Bnue5RDXz1OREg= go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= go.uber.org/mock v0.3.0 h1:3mUxI1No2/60yUYax92Pt8eNOEecx2D3lcXZh2NEZJo= go.uber.org/mock v0.3.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= diff --git a/split-car-fetcher/fetcher.go b/split-car-fetcher/fetcher.go new file mode 100644 index 00000000..ff74ee55 --- /dev/null +++ b/split-car-fetcher/fetcher.go @@ -0,0 +1,354 @@ +package splitcarfetcher + +import ( + "bytes" + "encoding/base64" + "encoding/binary" + "fmt" + "io" + "math" + "net/http" + "os" + + "github.com/anjor/carlet" +) + +type SplitCarReader struct { + files *carlet.CarPiecesAndMetadata + multireader io.ReaderAt + closers []io.Closer +} + +type ReaderAtCloserSize interface { + io.ReaderAt + io.Closer + Size() int64 +} + +type SplitCarFileReaderCreator func(carFile carlet.CarFile) (ReaderAtCloserSize, error) + +type FileSplitCarReader struct { + filepath string + file *os.File + size int64 +} + +func NewFileSplitCarReader(filepath string) (*FileSplitCarReader, error) { + fi, err := os.Open(filepath) + if err != nil { + return nil, fmt.Errorf("failed to open file %q: %s", filepath, err) + } + stat, err := fi.Stat() + if err != nil { + return nil, fmt.Errorf("failed to stat file %q: %s", filepath, err) + } + size := stat.Size() + return &FileSplitCarReader{ + filepath: filepath, + file: fi, + size: size, + }, nil +} + +func (fscr *FileSplitCarReader) ReadAt(p []byte, off int64) (n int, err error) { + return fscr.file.ReadAt(p, off) +} + +func (fscr *FileSplitCarReader) Close() error { + return fscr.file.Close() +} + +func (fscr *FileSplitCarReader) Size() int64 { + return fscr.size +} + +func getContentSizeWithHeadOrZeroRange(url string) (int64, error) { + // try sending a HEAD request to the server to get the file size: + resp, err := http.Head(url) + if err != nil { + return 0, err + } + if resp.StatusCode != http.StatusOK { + // try sending a GET request with a zero range to the server to get the file size: + req := &http.Request{ + Method: "GET", + URL: resp.Request.URL, + Header: make(http.Header), + } + req.Header.Set("Range", "bytes=0-0") + resp, err = http.DefaultClient.Do(req) + if err != nil { + return 0, err + } + if resp.StatusCode != http.StatusPartialContent { + return 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + // now find the content length: + contentRange := resp.Header.Get("Content-Range") + if contentRange == "" { + return 0, fmt.Errorf("missing Content-Range header") + } + var contentLength int64 + _, err := fmt.Sscanf(contentRange, "bytes 0-0/%d", &contentLength) + if err != nil { + return 0, err + } + return contentLength, nil + } + return resp.ContentLength, nil +} + +type RemoteFileSplitCarReader struct { + commP string + url string + size int64 + httpClient *http.Client +} + +func NewRemoteFileSplitCarReader(commP string, url string) (*RemoteFileSplitCarReader, error) { + size, err := getContentSizeWithHeadOrZeroRange(url) + if err != nil { + return nil, fmt.Errorf("failed to get content size: %s", err) + } + return &RemoteFileSplitCarReader{ + commP: commP, + url: url, + size: size, + httpClient: http.DefaultClient, + }, nil +} + +func (fscr *RemoteFileSplitCarReader) ReadAt(p []byte, off int64) (n int, err error) { + req, err := http.NewRequest("GET", fscr.url, nil) + if err != nil { + return 0, err + } + req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", off, off+int64(len(p))-1)) + { + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Keep-Alive", "timeout=600") + } + resp, err := fscr.httpClient.Do(req) + if err != nil { + return 0, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusPartialContent { + return 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + n, err = io.ReadFull(resp.Body, p) + if err != nil { + return 0, err + } + return n, nil +} + +func (fscr *RemoteFileSplitCarReader) Close() error { + fscr.httpClient.CloseIdleConnections() + return nil +} + +func (fscr *RemoteFileSplitCarReader) Size() int64 { + return fscr.size +} + +func NewSplitCarReader( + files *carlet.CarPiecesAndMetadata, + readerCreator SplitCarFileReaderCreator, +) (*SplitCarReader, error) { + scr := &SplitCarReader{ + files: files, + closers: make([]io.Closer, 0), + } + readers := make([]io.ReaderAt, 0) + sizes := make([]int64, 0) + + { + // add the original car header + originalCarHeaderReaderAt, originalCarHeaderSize, err := scr.getOriginalCarHeaderReaderAt() + if err != nil { + return nil, fmt.Errorf("failed to get original car header reader: %s", err) + } + readers = append(readers, originalCarHeaderReaderAt) + sizes = append(sizes, int64(originalCarHeaderSize)) + } + for _, cf := range files.CarPieces { + fi, err := readerCreator(cf) + if err != nil { + return nil, fmt.Errorf("failed to open file %q: %s", cf.Name, err) + } + + size := int(fi.Size()) + + // if local file, check the size: + if _, ok := fi.(*FileSplitCarReader); ok { + expectedSize := int(cf.HeaderSize) + int(cf.ContentSize) // NOTE: valid only for pre-upload split CARs. They get padded after upload. + if size != expectedSize { + return nil, fmt.Errorf( + "file %q has unexpected size: saved=%d actual=%d (diff=%d)", + cf.Name, + expectedSize, + size, + expectedSize-size, + ) + } + } + + // if remote, then the file must be at least as header size + content size: + if _, ok := fi.(*RemoteFileSplitCarReader); ok { + expectedMinSize := int(cf.HeaderSize) + int(cf.ContentSize) + if size < expectedMinSize { + return nil, fmt.Errorf( + "remote file %q has unexpected size: expected min size=%d actual=%d (diff=%d)", + cf.CommP.String(), + expectedMinSize, + size, + expectedMinSize-size, + ) + } + } + + scr.closers = append(scr.closers, fi) + sectionReader := io.NewSectionReader(fi, int64(cf.HeaderSize), int64(cf.ContentSize)) + + readers = append(readers, sectionReader) + sizes = append(sizes, int64(cf.ContentSize)) + } + scr.multireader = NewMultiReaderAt(readers, sizes) + return scr, nil +} + +func (scr *SplitCarReader) Close() error { + for _, closer := range scr.closers { + closer.Close() + } + return nil +} + +func (scr *SplitCarReader) ReadAt(p []byte, off int64) (n int, err error) { + return scr.multireader.ReadAt(p, off) +} + +func (scr *SplitCarReader) getOriginalCarHeaderReaderAt() (io.ReaderAt, int, error) { + originalWholeCarHeader, originalWholeCarHeaderSize, err := scr.originalCarHeader() + if err != nil { + return nil, 0, fmt.Errorf("failed to get original car header: %s", err) + } + originalWholeCarHeaderReader := bytes.NewReader(originalWholeCarHeader) + return originalWholeCarHeaderReader, int(originalWholeCarHeaderSize), nil +} + +func (scr *SplitCarReader) originalCarHeader() ([]byte, int64, error) { + accu := int64(0) + + // now add the size of the actual header + headerBytes, err := base64.StdEncoding.DecodeString(scr.files.OriginalCarHeader) + if err != nil { + return nil, 0, fmt.Errorf("failed to decode original car header: %s", err) + } + headerSizePrefix := make([]byte, 0) + headerSizePrefix = binary.AppendUvarint(headerSizePrefix, uint64(len(headerBytes))) + accu += int64(len(headerSizePrefix)) + + totalSize := int(len(headerBytes)) + int(len(headerSizePrefix)) + if totalSize != int(scr.files.OriginalCarHeaderSize) { + return nil, 0, fmt.Errorf("unexpected header size: saved=%d actual=%d", scr.files.OriginalCarHeaderSize, totalSize) + } + accu += int64(len(headerBytes)) + totalHeader := make([]byte, 0) + totalHeader = append(totalHeader, headerSizePrefix...) + totalHeader = append(totalHeader, headerBytes...) + return totalHeader, accu, nil +} + +type MultiReaderAt struct { + readers []io.ReaderAt + offsets []int64 +} + +func NewMultiReaderAt(readers []io.ReaderAt, sizes []int64) *MultiReaderAt { + offsets := make([]int64, len(sizes)) + var total int64 = 0 + for i, size := range sizes { + offsets[i] = total + total += size + } + return &MultiReaderAt{ + readers: readers, + offsets: offsets, + } +} + +func (m *MultiReaderAt) ReadAt(p []byte, off int64) (totalN int, err error) { + remaining := len(p) + bufOffset := 0 + reachedEnd := false + + for i, offset := range m.offsets { + if off < offset { + continue + } + + nextOffset := int64(math.MaxInt64) + if i < len(m.offsets)-1 { + nextOffset = m.offsets[i+1] + } + + toRead := int(min(max(0, nextOffset-off), int64(remaining))) + + n, err := m.readers[i].ReadAt(p[bufOffset:bufOffset+toRead], off-offset) + totalN += n + bufOffset += n + remaining -= n + + if err != nil { + if err == io.EOF && i == len(m.readers)-1 { + reachedEnd = true + } else if err != io.EOF { + return totalN, err + } + } + + if n == toRead { + off += int64(n) + } + + if remaining == 0 { + break + } + } + + if remaining > 0 && reachedEnd { + return totalN, io.EOF + } + + return totalN, nil +} + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func max(a, b int64) int64 { + if a > b { + return a + } + return b +} + +func getFileSize(path string) int { + file, err := os.Open(path) + if err != nil { + panic(err) + } + defer file.Close() + fileInfo, err := file.Stat() + if err != nil { + panic(err) + } + fileSize := fileInfo.Size() + return int(fileSize) +} diff --git a/split-car-fetcher/fetcher_test.go b/split-car-fetcher/fetcher_test.go new file mode 100644 index 00000000..15568daf --- /dev/null +++ b/split-car-fetcher/fetcher_test.go @@ -0,0 +1,128 @@ +package splitcarfetcher + +import ( + "bytes" + "fmt" + "io" + "testing" +) + +func TestMulti(t *testing.T) { + reader1 := bytes.NewReader([]byte("Hello ")) + reader2 := bytes.NewReader([]byte("Worlds")) + multiReader := NewMultiReaderAt([]io.ReaderAt{reader1, reader2}, []int64{6, 5}) + { + off := int64(0) + accu := make([]byte, 0) + for { + buf := make([]byte, 1) + n, err := multiReader.ReadAt(buf, off) + if err != nil { + if err == io.EOF { + break + } + panic(err) + } + if n != 1 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + accu = append(accu, buf[0]) + off++ + } + if !bytes.Equal(accu, []byte("Hello Worlds")) { + panic(fmt.Errorf("unexpected accu: %s", accu)) + } + fmt.Printf("accu = %s\n", accu) + } + { + off := int64(0) + accu := make([]byte, 0) + for { + buf := make([]byte, 2) + n, err := multiReader.ReadAt(buf, off) + if err != nil { + if err == io.EOF { + break + } + panic(err) + } + if n != 2 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + accu = append(accu, buf...) + off += 2 + } + if !bytes.Equal(accu, []byte("Hello Worlds")) { + panic(fmt.Errorf("unexpected accu: %q", accu)) + } + fmt.Printf("accu = %s\n", accu) + } + + { + buf := make([]byte, 11) + n, err := multiReader.ReadAt(buf, 0) + if err != nil { + panic(err) + } + if n != 11 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + fmt.Printf("buf = %s\n", buf) + } + { + buf := make([]byte, 5) + n, err := multiReader.ReadAt(buf, 0) + if err != nil { + panic(err) + } + if n != 5 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + if !bytes.Equal(buf, []byte("Hello")) { + panic(fmt.Errorf("unexpected buf: %s", buf)) + } + fmt.Printf("buf = %s\n", buf) + } + { + buf := make([]byte, 6) + n, err := multiReader.ReadAt(buf, 0) + if err != nil { + panic(err) + } + if n != 6 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + if !bytes.Equal(buf, []byte("Hello ")) { + panic(fmt.Errorf("unexpected buf: %s", buf)) + } + fmt.Printf("buf = %s\n", buf) + } + { + buf := make([]byte, 7) + n, err := multiReader.ReadAt(buf, 0) + if err != nil { + panic(err) + } + if n != 7 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + if !bytes.Equal(buf, []byte("Hello W")) { + panic(fmt.Errorf("unexpected buf: %s", buf)) + } + fmt.Printf("buf = %s\n", buf) + } + { + buf := make([]byte, 7) + n, err := multiReader.ReadAt(buf, 2) + if err != nil { + panic(err) + } + if n != 7 { + panic(fmt.Errorf("unexpected size: %d", n)) + } + if !bytes.Equal(buf, []byte("llo Wor")) { + panic(fmt.Errorf("unexpected buf: %s", buf)) + } + fmt.Printf("buf = %s\n", buf) + } +} diff --git a/split-car-fetcher/miner-info.go b/split-car-fetcher/miner-info.go new file mode 100644 index 00000000..3aef0a33 --- /dev/null +++ b/split-car-fetcher/miner-info.go @@ -0,0 +1,85 @@ +package splitcarfetcher + +import ( + "context" + "encoding/base64" + "fmt" + "time" + + "github.com/gagliardetto/solana-go/rpc/jsonrpc" + "github.com/jellydator/ttlcache/v3" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multiaddr" +) + +type MinerInfoCache struct { + lotusClient jsonrpc.RPCClient + requestTimeout time.Duration + minerInfoCache *ttlcache.Cache[string, *MinerInfo] +} +type MinerInfo struct { + PeerIDEncoded string `json:"PeerID"` + PeerID peer.ID + MultiaddrsBase64Encoded []string `json:"Multiaddrs"` + Multiaddrs []multiaddr.Multiaddr +} + +func NewMinerInfo( + lotusClient jsonrpc.RPCClient, + cacheTTL time.Duration, + requestTimeout time.Duration, +) MinerInfoCache { + minerInfoCache := ttlcache.New[string, *MinerInfo]( + ttlcache.WithTTL[string, *MinerInfo](cacheTTL), + ttlcache.WithDisableTouchOnHit[string, *MinerInfo]()) + + return MinerInfoCache{ + lotusClient: lotusClient, + requestTimeout: requestTimeout, + minerInfoCache: minerInfoCache, + } +} + +func (d MinerInfoCache) GetProviderInfo(ctx context.Context, provider string) (*MinerInfo, error) { + file := d.minerInfoCache.Get(provider) + if file != nil && !file.IsExpired() { + return file.Value(), nil + } + + minerInfo, err := MinerInfoFetcher{Client: d.lotusClient}.GetProviderInfo(ctx, provider) + if err != nil { + return nil, err + } + d.minerInfoCache.Set(provider, minerInfo, ttlcache.DefaultTTL) + return minerInfo, nil +} + +type MinerInfoFetcher struct { + Client jsonrpc.RPCClient +} + +func (m MinerInfoFetcher) GetProviderInfo(ctx context.Context, provider string) (*MinerInfo, error) { + minerInfo := new(MinerInfo) + err := m.Client.CallFor(ctx, minerInfo, "Filecoin.StateMinerInfo", provider, nil) + if err != nil { + return nil, fmt.Errorf("failed to get miner info for %s: %w", provider, err) + } + + minerInfo.Multiaddrs = make([]multiaddr.Multiaddr, len(minerInfo.MultiaddrsBase64Encoded)) + for i, addr := range minerInfo.MultiaddrsBase64Encoded { + decoded, err := base64.StdEncoding.DecodeString(addr) + if err != nil { + return nil, fmt.Errorf("failed to decode multiaddr %s: %w", addr, err) + } + minerInfo.Multiaddrs[i], err = multiaddr.NewMultiaddrBytes(decoded) + if err != nil { + return nil, fmt.Errorf("failed to parse multiaddr %s: %w", addr, err) + } + } + minerInfo.PeerID, err = peer.Decode(minerInfo.PeerIDEncoded) + if err != nil { + return nil, fmt.Errorf("failed to decode peer id %s: %w", minerInfo.PeerIDEncoded, err) + } + + return minerInfo, nil +} From 7f6a462697fe46dba230230d1a3dfd82be82eb14 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 21:53:03 +0100 Subject: [PATCH 30/63] Stub jsonParsed --- .gitignore | 1 + Makefile | 31 +- adapters.go | 6 + readers.go | 6 +- request-response.go | 54 +- txstatus/.gitignore | 2 + txstatus/Cargo.lock | 3316 +++++++++++++++++++++++++++++++ txstatus/Cargo.toml | 28 + txstatus/src/byte_order.rs | 5 + txstatus/src/lib.rs | 247 +++ txstatus/src/reader.rs | 398 ++++ txstatus/src/tools.rs | 15 + txstatus/src/type_size.rs | 19 + txstatus/transaction-wrapper.go | 117 ++ txstatus/txstatus-dummy.go | 19 + txstatus/txstatus-ffi.go | 56 + txstatus/types.go | 188 ++ 17 files changed, 4493 insertions(+), 15 deletions(-) create mode 100644 txstatus/.gitignore create mode 100644 txstatus/Cargo.lock create mode 100644 txstatus/Cargo.toml create mode 100644 txstatus/src/byte_order.rs create mode 100644 txstatus/src/lib.rs create mode 100644 txstatus/src/reader.rs create mode 100644 txstatus/src/tools.rs create mode 100644 txstatus/src/type_size.rs create mode 100644 txstatus/transaction-wrapper.go create mode 100644 txstatus/txstatus-dummy.go create mode 100644 txstatus/txstatus-ffi.go create mode 100644 txstatus/types.go diff --git a/.gitignore b/.gitignore index 5afc0108..5abd0d8c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /bin/ *.car _site +/.cargo diff --git a/Makefile b/Makefile index d1cc9a23..259a8198 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,43 @@ DEFAULT:compile IPLD_SCHEMA_PATH := ledger.ipldsch -LD_FLAGS := "-X main.GitCommit=`git rev-parse HEAD` -X main.GitTag=`git symbolic-ref -q --short HEAD || git describe --tags --exact-match || git rev-parse HEAD`" +BASE_LD_FLAGS := -X main.GitCommit=`git rev-parse HEAD` -X main.GitTag=`git symbolic-ref -q --short HEAD || git describe --tags --exact-match || git rev-parse HEAD` +ROOT_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) + +build-rust-wrapper: + rm -rf txstatus/lib + cd txstatus && cargo build --release --lib --target=x86_64-unknown-linux-gnu --target-dir=target + cbindgen ./txstatus -o txstatus/lib/transaction_status.h --lang c + echo "build-rust-wrapper done" +jsonParsed: build-rust-wrapper + # build faithful-cli with jsonParsed format support via ffi (rust) + rm -rf ./bin/faithful-cli_jsonParsed + cp txstatus/target/x86_64-unknown-linux-gnu/release/libdemo_transaction_status_ffi.so ./txstatus/lib/libsolana_transaction_status_wrapper.so + LD_FLAGS="$(BASE_LD_FLAGS) -r $(ROOT_DIR)txstatus/lib" + go build -ldflags=$(LD_FLAGS) -tags ffi -o ./bin/faithful-cli_jsonParsed . + echo "built old-faithful with jsonParsed format support via ffi (rust)" + # LD_LIBRARY_PATH=txstatus/lib:$LD_LIBRARY_PATH ./bin/faithful-cli_jsonParsed + echo "To run the binary, please set LD_LIBRARY_PATH=txstatus/lib:\$$LD_LIBRARY_PATH ./bin/faithful-cli_jsonParsed" + # or: + # sudo cp ./txstatus/lib/libsolana_transaction_status_wrapper.so /usr/local/lib/ + # sudo ldconfig compile: @echo "\nCompiling faithful-cli binary for current platform ..." - go build -ldflags=$(LD_FLAGS) -o ./bin/faithful-cli . + go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/faithful-cli . compile-all: compile-linux compile-mac compile-windows compile-linux: @echo "\nCompiling faithful-cli binary for linux amd64 ..." - GOOS=linux GOARCH=amd64 go build -ldflags=$(LD_FLAGS) -o ./bin/linux/amd64/faithful-cli_linux_amd64 . + GOOS=linux GOARCH=amd64 go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/linux/amd64/faithful-cli_linux_amd64 . compile-mac: @echo "\nCompiling faithful-cli binary for mac amd64 ..." - GOOS=darwin GOARCH=amd64 go build -ldflags=$(LD_FLAGS) -o ./bin/darwin/amd64/faithful-cli_darwin_amd64 . + GOOS=darwin GOARCH=amd64 go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/darwin/amd64/faithful-cli_darwin_amd64 . @echo "\nCompiling faithful-cli binary for mac arm64 ..." - GOOS=darwin GOARCH=arm64 go build -ldflags=$(LD_FLAGS) -o ./bin/darwin/arm64/faithful-cli_darwin_arm64 . + GOOS=darwin GOARCH=arm64 go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/darwin/arm64/faithful-cli_darwin_arm64 . compile-windows: @echo "\nCompiling faithful-cli binary for windows amd64 ..." - GOOS=windows GOARCH=amd64 go build -ldflags=$(LD_FLAGS) -o ./bin/windows/amd64/faithful-cli_windows_amd64.exe . + GOOS=windows GOARCH=amd64 go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/windows/amd64/faithful-cli_windows_amd64.exe . test: go test -v ./... bindcode: install-deps diff --git a/adapters.go b/adapters.go index 9378e802..e248a5d4 100644 --- a/adapters.go +++ b/adapters.go @@ -22,6 +22,12 @@ func byteSliceAsIntegerSlice(b []byte) []uint64 { // adaptTransactionMetaToExpectedOutput adapts the transaction meta to the expected output // as per what solana RPC server returns. func adaptTransactionMetaToExpectedOutput(m map[string]any) map[string]any { + { + _, ok := m["blockTime"] + if !ok { + m["blockTime"] = nil + } + } meta, ok := m["meta"].(map[string]any) if !ok { return m diff --git a/readers.go b/readers.go index 54eac2fa..ad4d3b8f 100644 --- a/readers.go +++ b/readers.go @@ -38,11 +38,7 @@ type carReader struct { } func newCarReader(r io.ReadCloser) (*carReader, error) { - cachingReader, err := readahead.NewCachingReaderFromReader(r, readahead.DefaultChunkSize) - if err != nil { - return nil, fmt.Errorf("failed to create caching reader: %s", err) - } - br := bufio.NewReader(cachingReader) + br := bufio.NewReaderSize(r, readahead.DefaultChunkSize) ch, err := readHeader(br) if err != nil { return nil, err diff --git a/request-response.go b/request-response.go index 561fc45f..dec5de83 100644 --- a/request-response.go +++ b/request-response.go @@ -14,6 +14,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/mostynb/zstdpool-freelist" "github.com/mr-tron/base58" + "github.com/rpcpool/yellowstone-faithful/txstatus" "github.com/sourcegraph/jsonrpc2" "github.com/valyala/fasthttp" ) @@ -164,7 +165,7 @@ func (req *GetBlockRequest) Validate() error { solana.EncodingBase64, solana.EncodingBase64Zstd, solana.EncodingJSON, - // solana.EncodingJSONParsed, // TODO: add support for this + solana.EncodingJSONParsed, // TODO: add support for this ) { return fmt.Errorf("unsupported encoding") } @@ -292,7 +293,7 @@ func (req *GetTransactionRequest) Validate() error { solana.EncodingBase64, solana.EncodingBase64Zstd, solana.EncodingJSON, - // solana.EncodingJSONParsed, // TODO: add support for this + solana.EncodingJSONParsed, // TODO: add support for this ) { return fmt.Errorf("unsupported encoding") } @@ -386,9 +387,54 @@ func encodeTransactionResponseBasedOnWantedEncoding( } return encodeBytesResponseBasedOnWantedEncoding(encoding, txBuf) case solana.EncodingJSONParsed: - return nil, fmt.Errorf("unsupported encoding") + if !txstatus.IsEnabled() { + return nil, fmt.Errorf("unsupported encoding") + } + + parsedInstructions := make([]json.RawMessage, 0) + + for _, inst := range tx.Message.Instructions { + instrParams := txstatus.Parameters{ + ProgramID: solana.VoteProgramID, + Instruction: txstatus.CompiledInstruction{ + ProgramIDIndex: uint8(inst.ProgramIDIndex), + Accounts: func() []uint8 { + out := make([]uint8, len(inst.Accounts)) + for i, v := range inst.Accounts { + out[i] = uint8(v) + } + return out + }(), + Data: inst.Data, + }, + AccountKeys: txstatus.AccountKeys{ + StaticKeys: tx.Message.AccountKeys, + // TODO: add support for dynamic keys? + // DynamicKeys: &LoadedAddresses{ + // Writable: []solana.PublicKey{}, + // Readonly: []solana.PublicKey{ + // solana.TokenLendingProgramID, + // }, + // }, + }, + StackHeight: nil, + } + + parsedInstructionJSON, err := instrParams.ParseInstruction() + if err != nil { + return nil, fmt.Errorf("failed to parse instruction: %w", err) + } + parsedInstructions = append(parsedInstructions, parsedInstructionJSON) + } + + resp, err := txstatus.FromTransaction(tx) + if err != nil { + return nil, fmt.Errorf("failed to convert transaction to txstatus.Transaction: %w", err) + } + resp.Message.Instructions = parsedInstructions + + return resp, nil case solana.EncodingJSON: - // TODO: add support for this return tx, nil default: return nil, fmt.Errorf("unsupported encoding") diff --git a/txstatus/.gitignore b/txstatus/.gitignore new file mode 100644 index 00000000..b1b85578 --- /dev/null +++ b/txstatus/.gitignore @@ -0,0 +1,2 @@ +/target +lib/ diff --git a/txstatus/Cargo.lock b/txstatus/Cargo.lock new file mode 100644 index 00000000..4d38837f --- /dev/null +++ b/txstatus/Cargo.lock @@ -0,0 +1,3316 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +dependencies = [ + "lazy_static", + "regex", +] + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aead" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b613b8e1e3cf911a086f53f03bf286f52fd7a7258e4fa606f0ef220d39d8877" +dependencies = [ + "generic-array", +] + +[[package]] +name = "aes" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", + "opaque-debug", +] + +[[package]] +name = "aes-gcm-siv" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589c637f0e68c877bbd59a4599bbe849cac8e5f3e4b5a3ebae8f528cd218dcdc" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "polyval", + "subtle", + "zeroize", +] + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.10", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "getrandom 0.2.10", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + +[[package]] +name = "ark-bn254" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a22f4561524cd949590d78d7d4c5df8f592430d221f7f3c9497bbafd8972120f" +dependencies = [ + "ark-ec", + "ark-ff", + "ark-std", +] + +[[package]] +name = "ark-ec" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defd9a439d56ac24968cca0571f598a61bc8c55f71d50a89cda591cb750670ba" +dependencies = [ + "ark-ff", + "ark-poly", + "ark-serialize", + "ark-std", + "derivative", + "hashbrown 0.13.2", + "itertools", + "num-traits", + "zeroize", +] + +[[package]] +name = "ark-ff" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec847af850f44ad29048935519032c33da8aa03340876d351dfab5660d2966ba" +dependencies = [ + "ark-ff-asm", + "ark-ff-macros", + "ark-serialize", + "ark-std", + "derivative", + "digest 0.10.7", + "itertools", + "num-bigint 0.4.4", + "num-traits", + "paste", + "rustc_version", + "zeroize", +] + +[[package]] +name = "ark-ff-asm" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed4aa4fe255d0bc6d79373f7e31d2ea147bcf486cba1be5ba7ea85abdb92348" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "ark-ff-macros" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7abe79b0e4288889c4574159ab790824d0033b9fdcb2a112a3182fac2e514565" +dependencies = [ + "num-bigint 0.4.4", + "num-traits", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "ark-poly" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d320bfc44ee185d899ccbadfa8bc31aab923ce1558716e1997a1e74057fe86bf" +dependencies = [ + "ark-ff", + "ark-serialize", + "ark-std", + "derivative", + "hashbrown 0.13.2", +] + +[[package]] +name = "ark-serialize" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb7b85a02b83d2f22f89bd5cac66c9c89474240cb6207cb1efc16d098e822a5" +dependencies = [ + "ark-serialize-derive", + "ark-std", + "digest 0.10.7", + "num-bigint 0.4.4", +] + +[[package]] +name = "ark-serialize-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae3281bc6d0fd7e549af32b52511e1302185bd688fd3359fa36423346ff682ea" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "ark-std" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94893f1e0c6eeab764ade8dc4c0db24caf4fe7cbbaafc0eba0a9030f447b5185" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "array-bytes" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ad284aeb45c13f2fb4f084de4a420ebf447423bdf9386c0540ce33cb3ef4b8c" + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "ascii" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab1c04a571841102f5345a8fc0f6bb3d31c315dec879b5c6e42e40ce7ffa34e" + +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + +[[package]] +name = "async-compression" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" +dependencies = [ + "brotli", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff" + +[[package]] +name = "base64" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitmaps" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" +dependencies = [ + "typenum", +] + +[[package]] +name = "blake3" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest 0.10.7", +] + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "block-padding", + "generic-array", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d696c370c750c948ada61c69a0ee2cbbb9c50b1019ddb86d9317157a99c2cae" + +[[package]] +name = "borsh" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15bf3650200d8bffa99015595e10f1fbd17de07abbc25bb067da79e769939bfa" +dependencies = [ + "borsh-derive 0.9.3", + "hashbrown 0.11.2", +] + +[[package]] +name = "borsh" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b" +dependencies = [ + "borsh-derive 0.10.3", + "hashbrown 0.13.2", +] + +[[package]] +name = "borsh-derive" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6441c552f230375d18e3cc377677914d2ca2b0d36e52129fe15450a2dce46775" +dependencies = [ + "borsh-derive-internal 0.9.3", + "borsh-schema-derive-internal 0.9.3", + "proc-macro-crate 0.1.5", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "borsh-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7" +dependencies = [ + "borsh-derive-internal 0.10.3", + "borsh-schema-derive-internal 0.10.3", + "proc-macro-crate 0.1.5", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "borsh-derive-internal" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5449c28a7b352f2d1e592a8a28bf139bc71afb0764a14f3c02500935d8c44065" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "borsh-derive-internal" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "borsh-schema-derive-internal" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdbd5696d8bfa21d53d9fe39a714a18538bad11492a42d066dbbc395fb1951c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "borsh-schema-derive-internal" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "brotli" +version = "3.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bs58" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "771fe0050b883fcc3ea2359b1a96bcfbc090b7116eae7c3c512c7a083fdf23d3" + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "bv" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8834bb1d8ee5dc048ee3124f2c7c1afcc6bc9aed03f11e9dfd8c69470a5db340" +dependencies = [ + "feature-probe", + "serde", +] + +[[package]] +name = "bytemuck" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "cipher" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +dependencies = [ + "generic-array", +] + +[[package]] +name = "combine" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3da6baa321ec19e1cc41d31bf599f00c783d0517095cdaf0332e3fe8d20680" +dependencies = [ + "ascii", + "byteorder", + "either", + "memchr", + "unreachable", +] + +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "console_log" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89f72f65e8501878b8a004d5a1afb780987e2ce2b4532c562e367a72c57499f" +dependencies = [ + "log", + "web-sys", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "core-foundation-sys" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" + +[[package]] +name = "cpufeatures" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "crypto-mac" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b584a330336237c1eecd3e94266efb216c56ed91225d634cb2991c5f3fd1aeab" +dependencies = [ + "generic-array", + "subtle", +] + +[[package]] +name = "ctr" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "049bb91fb4aaf0e3c7efa6cd5ef877dbbbd15b39dad06d9948de4ec8a75761ea" +dependencies = [ + "cipher", +] + +[[package]] +name = "curve25519-dalek" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f9d052967f590a76e62eb387bd0bbb1b000182c3cefe5364db6b7211651bc0" +dependencies = [ + "byteorder", + "digest 0.9.0", + "rand_core 0.5.1", + "serde", + "subtle", + "zeroize", +] + +[[package]] +name = "darling" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.37", +] + +[[package]] +name = "darling_macro" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "demo-transaction-status-ffi" +version = "0.1.0" +dependencies = [ + "bs58", + "byteorder", + "chrono", + "libc", + "serde_json", + "solana-sdk", + "solana-transaction-status", +] + +[[package]] +name = "derivation-path" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e5c37193a1db1d8ed868c03ec7b152175f26160a5b740e5e484143877e0adf0" + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common", + "subtle", +] + +[[package]] +name = "eager" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe71d579d1812060163dff96056261deb5bf6729b100fa2e36a68b9649ba3d3" + +[[package]] +name = "ed25519" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91cff35c70bba8a626e3185d8cd48cc11b5437e1a5bcd15b9b5fa3c64b6dfee7" +dependencies = [ + "signature", +] + +[[package]] +name = "ed25519-dalek" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c762bae6dcaf24c4c84667b8579785430908723d5c889f469d76a41d59cc7a9d" +dependencies = [ + "curve25519-dalek", + "ed25519", + "rand 0.7.3", + "serde", + "sha2 0.9.9", + "zeroize", +] + +[[package]] +name = "ed25519-dalek-bip32" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d2be62a4061b872c8c0873ee4fc6f101ce7b889d039f019c5fa2af471a59908" +dependencies = [ + "derivation-path", + "ed25519-dalek", + "hmac 0.12.1", + "sha2 0.10.7", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "enum-iterator" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7add3873b5dd076766ee79c8e406ad1a472c385476b9e38849f8eec24f1be689" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "env_logger" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "feature-probe" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" + +[[package]] +name = "flate2" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-core", + "futures-io", + "futures-macro", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "serde", + "typenum", + "version_check", +] + +[[package]] +name = "gethostname" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1ebd34e35c46e00bb73e81363248d627782724609fe1b6396f553f68fe3862e" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "gimli" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" + +[[package]] +name = "goblin" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7666983ed0dd8d21a6f6576ee00053ca0926fb281a5522577a4dbd0f1b54143" +dependencies = [ + "log", + "plain", + "scroll", +] + +[[package]] +name = "h2" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap 1.9.3", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hash32" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" +dependencies = [ + "byteorder", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash 0.7.6", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash 0.8.3", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "hmac" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "126888268dcc288495a26bf004b38c5fdbb31682f992c84ceb046a1f0fe38840" +dependencies = [ + "crypto-mac", + "digest 0.9.0", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "hmac-drbg" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17ea0a1394df5b6574da6e0c1ade9e78868c9fb0a4e5ef4428e32da4676b85b1" +dependencies = [ + "digest 0.9.0", + "generic-array", + "hmac 0.8.1", +] + +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "0.14.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.4.9", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" +dependencies = [ + "futures-util", + "http", + "hyper", + "rustls", + "tokio", + "tokio-rustls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "im" +version = "15.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9" +dependencies = [ + "bitmaps", + "rand_core 0.6.4", + "rand_xoshiro", + "rayon", + "serde", + "sized-chunks", + "typenum", + "version_check", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", +] + +[[package]] +name = "ipnet" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "jobserver" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "keccak" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f6d5ed8676d904364de097082f4e7d240b571b67989ced0240f08b7f966f940" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" + +[[package]] +name = "libsecp256k1" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d220bc1feda2ac231cb78c3d26f27676b8cf82c96971f7aeef3d0cf2797c73" +dependencies = [ + "arrayref", + "base64 0.12.3", + "digest 0.9.0", + "hmac-drbg", + "libsecp256k1-core", + "libsecp256k1-gen-ecmult", + "libsecp256k1-gen-genmult", + "rand 0.7.3", + "serde", + "sha2 0.9.9", + "typenum", +] + +[[package]] +name = "libsecp256k1-core" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f6ab710cec28cef759c5f18671a27dae2a5f952cdaaee1d8e2908cb2478a80" +dependencies = [ + "crunchy", + "digest 0.9.0", + "subtle", +] + +[[package]] +name = "libsecp256k1-gen-ecmult" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccab96b584d38fac86a83f07e659f0deafd0253dc096dab5a36d53efe653c5c3" +dependencies = [ + "libsecp256k1-core", +] + +[[package]] +name = "libsecp256k1-gen-genmult" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67abfe149395e3aa1c48a2beb32b068e2334402df8181f818d3aee2b304c4f5d" +dependencies = [ + "libsecp256k1-core", +] + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" + +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "merlin" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c38e2799fc0978b65dfff8023ec7843e2330bb462f19198840b34b6582397d" +dependencies = [ + "byteorder", + "keccak", + "rand_core 0.6.4", + "zeroize", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys", +] + +[[package]] +name = "num" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8536030f9fea7127f841b45bb6243b27255787fb4eb83958aa1ef9d2fdc0c36" +dependencies = [ + "num-bigint 0.2.6", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef" +dependencies = [ + "autocfg", + "num-bigint 0.2.6", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.3", + "libc", +] + +[[package]] +name = "num_enum" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" +dependencies = [ + "num_enum_derive 0.5.11", +] + +[[package]] +name = "num_enum" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" +dependencies = [ + "num_enum_derive 0.6.1", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" +dependencies = [ + "proc-macro-crate 1.3.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num_enum_derive" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" +dependencies = [ + "proc-macro-crate 1.3.1", + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "object" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "pbkdf2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "216eaa586a190f0a738f2f918511eecfa90f13295abec0e457cdebcceda80cbd" +dependencies = [ + "crypto-mac", +] + +[[package]] +name = "pbkdf2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "percentage" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd23b938276f14057220b707937bcb42fa76dda7560e57a2da30cb52d557937" +dependencies = [ + "num", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "polyval" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro-crate" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" +dependencies = [ + "toml", +] + +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "qstring" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d464fae65fff2680baf48019211ce37aaec0c78e9264c84a3e484717f965104e" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.10", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", +] + +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "reqwest" +version = "0.11.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" +dependencies = [ + "async-compression", + "base64 0.21.4", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-rustls", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-rustls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg", +] + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustls" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2" +dependencies = [ + "base64 0.21.4", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45a27e3b59326c16e23d30aeb7a36a24cc0d29e71d68ff611cdfb4a01d013bed" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "sct" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "semver" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_bytes" +version = "0.11.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab33ec92f677585af6d88c65593ae2375adde54efdbf16d597f2cbc7a6d368ff" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "serde_json" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_with" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ff71d2c147a7b57362cead5e22f772cd52f6ab31cfcd9edcd7f6aeb2a0afbe" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "881b6f881b17d13214e5d494c939ebab463d01264ce1811e9d4ac3a882e7695f" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "sha2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if", + "cpufeatures", + "digest 0.9.0", + "opaque-debug", +] + +[[package]] +name = "sha2" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest 0.10.7", +] + +[[package]] +name = "sha3" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81199417d4e5de3f04b1e871023acea7389672c4135918f05aa9cbf2f2fa809" +dependencies = [ + "block-buffer 0.9.0", + "digest 0.9.0", + "keccak", + "opaque-debug", +] + +[[package]] +name = "sha3" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" +dependencies = [ + "digest 0.10.7", + "keccak", +] + +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" + +[[package]] +name = "sized-chunks" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e" +dependencies = [ + "bitmaps", + "typenum", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + +[[package]] +name = "socket2" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "socket2" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "solana-account-decoder" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b83daa56035885dac1a47f5bd3d4e02379e3fc5915b2c3ce978a9af9eeecf07d" +dependencies = [ + "Inflector", + "base64 0.21.4", + "bincode", + "bs58", + "bv", + "lazy_static", + "serde", + "serde_derive", + "serde_json", + "solana-address-lookup-table-program", + "solana-config-program", + "solana-sdk", + "spl-token", + "spl-token-2022", + "thiserror", + "zstd", +] + +[[package]] +name = "solana-address-lookup-table-program" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dd3f3e85d67e559985fbdc6b5b4d5dd9c8462b78e6079c3b465496c1f3c55d6" +dependencies = [ + "bincode", + "bytemuck", + "log", + "num-derive", + "num-traits", + "rustc_version", + "serde", + "solana-frozen-abi", + "solana-frozen-abi-macro", + "solana-program", + "solana-program-runtime", + "solana-sdk", + "thiserror", +] + +[[package]] +name = "solana-config-program" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a35e4cc9f2996a2ef95aac398443fc4a110ef585521e11a7685b3591648b7cf" +dependencies = [ + "bincode", + "chrono", + "serde", + "serde_derive", + "solana-program-runtime", + "solana-sdk", +] + +[[package]] +name = "solana-frozen-abi" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b9e2169fd13394af838b13f047067c35ce69372aea0fb46e026405b5e931f9" +dependencies = [ + "ahash 0.8.3", + "blake3", + "block-buffer 0.10.4", + "bs58", + "bv", + "byteorder", + "cc", + "either", + "generic-array", + "getrandom 0.1.16", + "im", + "lazy_static", + "log", + "memmap2", + "once_cell", + "rand_core 0.6.4", + "rustc_version", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "sha2 0.10.7", + "solana-frozen-abi-macro", + "subtle", + "thiserror", +] + +[[package]] +name = "solana-frozen-abi-macro" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db08ab0af4007dc0954b900aa5febc0c0ae50d9f9f598be27263c3195d90240b" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.37", +] + +[[package]] +name = "solana-logger" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf8a48e734f78a44399516f7c130c114b455911e351f001abc0d96e7c5694efa" +dependencies = [ + "env_logger", + "lazy_static", + "log", +] + +[[package]] +name = "solana-measure" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3529d2ff63ceedd3707c51188aacb9e3c142118de3f55447c40584a78223ffd" +dependencies = [ + "log", + "solana-sdk", +] + +[[package]] +name = "solana-metrics" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4792f29de5378a13c51be3fa9fdd526a20550b5ffabd7d1a57a4e49468e17d90" +dependencies = [ + "crossbeam-channel", + "gethostname", + "lazy_static", + "log", + "reqwest", + "solana-sdk", +] + +[[package]] +name = "solana-program" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f17a1fbcf1e94e282db16153d323b446d6386ac99f597f78e76332265829336" +dependencies = [ + "ark-bn254", + "ark-ec", + "ark-ff", + "ark-serialize", + "array-bytes", + "base64 0.21.4", + "bincode", + "bitflags", + "blake3", + "borsh 0.10.3", + "borsh 0.9.3", + "bs58", + "bv", + "bytemuck", + "cc", + "console_error_panic_hook", + "console_log", + "curve25519-dalek", + "getrandom 0.2.10", + "itertools", + "js-sys", + "lazy_static", + "libc", + "libsecp256k1", + "log", + "memoffset", + "num-bigint 0.4.4", + "num-derive", + "num-traits", + "parking_lot", + "rand 0.7.3", + "rand_chacha 0.2.2", + "rustc_version", + "rustversion", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "sha2 0.10.7", + "sha3 0.10.8", + "solana-frozen-abi", + "solana-frozen-abi-macro", + "solana-sdk-macro", + "thiserror", + "tiny-bip39", + "wasm-bindgen", + "zeroize", +] + +[[package]] +name = "solana-program-runtime" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ff9f0c8043b2e7921e25a3fee88fa253b8cb5dbab1e521a4d83e78e8874c551" +dependencies = [ + "base64 0.21.4", + "bincode", + "eager", + "enum-iterator", + "itertools", + "libc", + "log", + "num-derive", + "num-traits", + "percentage", + "rand 0.7.3", + "rustc_version", + "serde", + "solana-frozen-abi", + "solana-frozen-abi-macro", + "solana-measure", + "solana-metrics", + "solana-sdk", + "solana_rbpf", + "thiserror", +] + +[[package]] +name = "solana-sdk" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74a01f25b9f4022fc222c21c589ef7943027fb0fa2b9f6ae943fc4a65c2c01a2" +dependencies = [ + "assert_matches", + "base64 0.21.4", + "bincode", + "bitflags", + "borsh 0.10.3", + "bs58", + "bytemuck", + "byteorder", + "chrono", + "derivation-path", + "digest 0.10.7", + "ed25519-dalek", + "ed25519-dalek-bip32", + "generic-array", + "hmac 0.12.1", + "itertools", + "js-sys", + "lazy_static", + "libsecp256k1", + "log", + "memmap2", + "num-derive", + "num-traits", + "num_enum 0.6.1", + "pbkdf2 0.11.0", + "qstring", + "rand 0.7.3", + "rand_chacha 0.2.2", + "rustc_version", + "rustversion", + "serde", + "serde_bytes", + "serde_derive", + "serde_json", + "serde_with", + "sha2 0.10.7", + "sha3 0.10.8", + "solana-frozen-abi", + "solana-frozen-abi-macro", + "solana-logger", + "solana-program", + "solana-sdk-macro", + "thiserror", + "uriparse", + "wasm-bindgen", +] + +[[package]] +name = "solana-sdk-macro" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75b33716470fa4a65a23ddc2d4abcb8d28532c6e3ae3f04f4fe79b5e1f8c247" +dependencies = [ + "bs58", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.37", +] + +[[package]] +name = "solana-transaction-status" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9266f75afa4163c9a5f29f1066f907e87482858749942380d6538af567b44c7" +dependencies = [ + "Inflector", + "base64 0.21.4", + "bincode", + "borsh 0.9.3", + "bs58", + "lazy_static", + "log", + "serde", + "serde_derive", + "serde_json", + "solana-account-decoder", + "solana-address-lookup-table-program", + "solana-sdk", + "spl-associated-token-account", + "spl-memo", + "spl-token", + "spl-token-2022", + "thiserror", +] + +[[package]] +name = "solana-zk-token-sdk" +version = "1.16.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1669c9d223d850cd96cad69d3ba1a4234bc3e2f83ac837fbdbc0ce774dac7b92" +dependencies = [ + "aes-gcm-siv", + "base64 0.21.4", + "bincode", + "bytemuck", + "byteorder", + "curve25519-dalek", + "getrandom 0.1.16", + "itertools", + "lazy_static", + "merlin", + "num-derive", + "num-traits", + "rand 0.7.3", + "serde", + "serde_json", + "sha3 0.9.1", + "solana-program", + "solana-sdk", + "subtle", + "thiserror", + "zeroize", +] + +[[package]] +name = "solana_rbpf" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3082ec3a1d4ef7879eb5b84916d5acde057abd59733eec3647e0ab8885283ef" +dependencies = [ + "byteorder", + "combine", + "goblin", + "hash32", + "libc", + "log", + "rand 0.8.5", + "rustc-demangle", + "scroll", + "thiserror", + "winapi", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "spl-associated-token-account" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978dba3bcbe88d0c2c58366c254d9ea41c5f73357e72fc0bdee4d6b5fc99c8f4" +dependencies = [ + "assert_matches", + "borsh 0.9.3", + "num-derive", + "num-traits", + "solana-program", + "spl-token", + "spl-token-2022", + "thiserror", +] + +[[package]] +name = "spl-memo" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd0dc6f70db6bacea7ff25870b016a65ba1d1b6013536f08e4fd79a8f9005325" +dependencies = [ + "solana-program", +] + +[[package]] +name = "spl-token" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e85e168a785e82564160dcb87b2a8e04cee9bfd1f4d488c729d53d6a4bd300d" +dependencies = [ + "arrayref", + "bytemuck", + "num-derive", + "num-traits", + "num_enum 0.5.11", + "solana-program", + "thiserror", +] + +[[package]] +name = "spl-token-2022" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0043b590232c400bad5ee9eb983ced003d15163c4c5d56b090ac6d9a57457b47" +dependencies = [ + "arrayref", + "bytemuck", + "num-derive", + "num-traits", + "num_enum 0.5.11", + "solana-program", + "solana-zk-token-sdk", + "spl-memo", + "spl-token", + "thiserror", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "subtle" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "tiny-bip39" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffc59cb9dfc85bb312c3a78fd6aa8a8582e310b0fa885d5bb877f6dcc601839d" +dependencies = [ + "anyhow", + "hmac 0.8.1", + "once_cell", + "pbkdf2 0.4.0", + "rand 0.7.3", + "rustc-hash", + "sha2 0.9.9", + "thiserror", + "unicode-normalization", + "wasm-bindgen", + "zeroize", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2 0.5.4", + "windows-sys", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap 2.0.0", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "universal-hash" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f214e8f697e925001e66ec2c6e37a4ef93f0f78c2eed7814394e10c62025b05" +dependencies = [ + "generic-array", + "subtle", +] + +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +dependencies = [ + "void", +] + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + +[[package]] +name = "uriparse" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0200d0fc04d809396c2ad43f3c95da3582a2556eba8d453c1087f4120ee352ff" +dependencies = [ + "fnv", + "lazy_static", +] + +[[package]] +name = "url" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.37", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "winnow" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc" +dependencies = [ + "memchr", +] + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys", +] + +[[package]] +name = "zeroize" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4756f7db3f7b5574938c3eb1c117038b8e07f95ee6718c0efad4ac21508f1efd" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.37", +] + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.8+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] diff --git a/txstatus/Cargo.toml b/txstatus/Cargo.toml new file mode 100644 index 00000000..8ae2752d --- /dev/null +++ b/txstatus/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "demo-transaction-status-ffi" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libc = "0.2.2" +chrono = "0.4.19" +bs58 = "0.4.0" +serde_json = "1.0.107" +byteorder = "1.4.3" +solana-transaction-status = "1.16.13" +solana-sdk = "1.16.13" + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] + +[lib] +# If you only wanted shared lib, you'd use only "cdylib". +# If you only wanted static lib, you'd use only "staticlib". +# This demo shows both. +crate-type = ["staticlib", "cdylib"] + +[profile.release] +lto = true +codegen-units = 1 diff --git a/txstatus/src/byte_order.rs b/txstatus/src/byte_order.rs new file mode 100644 index 00000000..e5862ad9 --- /dev/null +++ b/txstatus/src/byte_order.rs @@ -0,0 +1,5 @@ +#[allow(dead_code)] +pub enum ByteOrder { + BigEndian, + LittleEndian, +} diff --git a/txstatus/src/lib.rs b/txstatus/src/lib.rs new file mode 100644 index 00000000..f24b6ad9 --- /dev/null +++ b/txstatus/src/lib.rs @@ -0,0 +1,247 @@ +use std::ffi::CStr; +use std::ffi::CString; + +use std::slice; +use std::time::Instant; + +mod byte_order; +mod reader; +mod tools; +mod type_size; + +use reader::Decoder; +use solana_sdk::message::v0::LoadedAddresses; +use solana_sdk::{instruction::CompiledInstruction, message::AccountKeys, pubkey::Pubkey}; +use solana_transaction_status::parse_instruction::parse; + +#[no_mangle] +pub extern "C" fn hello_from_rust() { + println!("Hello from Rust at time: {}!", chrono::Local::now()); +} + +#[no_mangle] +pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { + let started_at = Instant::now(); + let bytes = unsafe { + assert!(!bytes.is_null()); + slice::from_raw_parts(bytes, len) + }; + let bytes = bytes.to_vec(); + println!("[rust] params raw bytes: {:?}", bytes); + println!("[rust] params:"); + let mut decoder = Decoder::new(bytes); + { + // read program ID: + let program_id_bytes = decoder.read_bytes(32).unwrap(); + let program_id = solana_sdk::pubkey::Pubkey::new(&program_id_bytes); + let mut instruction = CompiledInstruction { + program_id_index: 0, + accounts: vec![], + data: vec![], + }; + { + instruction.program_id_index = decoder.read_u8().unwrap() as u8; + let accounts_len = decoder.read_u8().unwrap() as usize; + for _ in 0..accounts_len { + let account_index = decoder.read_u8().unwrap() as u8; + instruction.accounts.push(account_index); + } + let data_len = decoder.read_u8().unwrap() as usize; + for _ in 0..data_len { + let data_byte = decoder.read_u8().unwrap() as u8; + instruction.data.push(data_byte); + } + } + + let mut parsed_account_keys = Combined { + parent: vec![], + child: None, + }; + let static_account_keys_len = decoder.read_u8().unwrap() as usize; + println!( + "[rust] static_account_keys_len: {:?}", + static_account_keys_len + ); + let mut static_account_keys_vec = vec![]; + for _ in 0..static_account_keys_len { + let account_key_bytes = decoder.read_bytes(32).unwrap(); + let account_key = solana_sdk::pubkey::Pubkey::new(&account_key_bytes); + static_account_keys_vec.push(account_key); + } + + let has_dynamic_account_keys = decoder.read_option().unwrap(); + if has_dynamic_account_keys { + let mut loaded_addresses = LoadedAddresses::default(); + let num_writable_accounts = decoder.read_u8().unwrap() as usize; + println!("[rust] num_writable_accounts: {:?}", num_writable_accounts); + // read 32 bytes for each writable account: + for _ in 0..num_writable_accounts { + let account_key_bytes = decoder.read_bytes(32).unwrap(); + let account_key = solana_sdk::pubkey::Pubkey::new(&account_key_bytes); + loaded_addresses.writable.push(account_key); + } + let num_readonly_accounts = decoder.read_u8().unwrap() as usize; + // read 32 bytes for each readonly account: + for _ in 0..num_readonly_accounts { + let account_key_bytes = decoder.read_bytes(32).unwrap(); + let account_key = solana_sdk::pubkey::Pubkey::new(&account_key_bytes); + loaded_addresses.readonly.push(account_key); + } + + parsed_account_keys = Combined { + parent: static_account_keys_vec, + child: Some(loaded_addresses), + }; + } else { + parsed_account_keys = Combined { + parent: static_account_keys_vec, + child: None, + }; + } + let sommmm = &parsed_account_keys + .child + .or(Some(LoadedAddresses::default())) + .unwrap(); + + let account_keys = AccountKeys::new( + &parsed_account_keys.parent, + if has_dynamic_account_keys { + Some(sommmm) + } else { + None + }, + ); + + let mut stack_height: Option = None; + { + let has_stack_height = decoder.read_option().unwrap(); + println!("[rust] has_stack_height: {:?}", has_stack_height); + if has_stack_height { + stack_height = Some( + decoder + .read_u32(byte_order::ByteOrder::LittleEndian) + .unwrap(), + ); + println!("[rust] stack_height: {:?}", stack_height); + } + } + println!("[rust] program_id: {:?}", program_id); + println!("[rust] instruction: {:?}", instruction); + println!( + "[rust] account_keys.static: {:?}", + parsed_account_keys.parent + ); + println!( + "[rust] has_dynamic_account_keys: {:?}", + has_dynamic_account_keys + ); + println!("[rust] account_keys.dynamic: {:?}", sommmm); + println!("[rust] stack_height: {:?}", stack_height); + + let parsed = parse( + &program_id, // program_id + &instruction, + &account_keys, + stack_height, + ); + if parsed.is_err() { + println!("[rust] parse error: {:?}", parsed); + let mut response = vec![0; 32]; + // add error string to response: + let error = parsed.err().unwrap(); + let error = format!("{:?}", error); + response.extend_from_slice(error.as_bytes()); + let data = response.as_mut_ptr(); + let len = response.len(); + + return Response { + buf: Buffer { + data: unsafe { data.add(32) }, + len: len - 32, + }, + status: 1, + }; + } else { + println!( + "[rust] successfully parsed the instruction in {:?}: {:?}", + Instant::now() - started_at, + parsed + ); + let parsed = parsed.unwrap(); + let parsed_json = serde_json::to_vec(&parsed).unwrap(); + { + let parsed_json_str = String::from_utf8(parsed_json.clone()).unwrap(); + println!( + "[rust] parsed instruction as json at {:?}: {}", + Instant::now() - started_at, + parsed_json_str + ); + } + + println!("[rust] {:?}", Instant::now() - started_at); + let mut response = vec![0; 32]; + response.extend_from_slice(&parsed_json); + + let data = response.as_mut_ptr(); + let len = response.len(); + println!("[rust] {:?}", Instant::now() - started_at); + return Response { + buf: Buffer { + data: unsafe { data.add(32) }, + len: len - 32, + }, + status: 0, + }; + } + } + let mut response = vec![0; 32]; + for i in 0..32 { + response[i] = i as u8; + } + let data = response.as_mut_ptr(); + let len = response.len(); + std::mem::forget(response); + Response { + buf: Buffer { data, len }, + status: 123, + } +} + +#[repr(C)] +struct Response { + buf: Buffer, + status: i32, +} + +#[repr(C)] +struct Buffer { + data: *mut u8, + len: usize, +} + +extern "C" fn free_buf(buf: Buffer) { + let s = unsafe { std::slice::from_raw_parts_mut(buf.data, buf.len) }; + let s = s.as_mut_ptr(); + unsafe { + Box::from_raw(s); + } +} + +// write a C external function that accepts a string, parses it as json, and returns a string: +#[no_mangle] +pub extern "C" fn accept_json(json: *const libc::c_char) -> *const libc::c_char { + let json = unsafe { CStr::from_ptr(json).to_bytes() }; + let json = String::from_utf8(json.to_vec()).unwrap(); + { + let v: serde_json::Value = serde_json::from_str(&json).unwrap(); + println!("v: {:?}", v); + } + let json = json + "!"; + let json = CString::new(json).unwrap().into_raw(); + json +} + +struct Combined { + parent: Vec, + child: Option, +} diff --git a/txstatus/src/reader.rs b/txstatus/src/reader.rs new file mode 100644 index 00000000..e7391162 --- /dev/null +++ b/txstatus/src/reader.rs @@ -0,0 +1,398 @@ +use crate::byte_order; +use crate::type_size; +use std::error::Error as StdError; + +// declare error type +pub enum Error { + ShortBuffer { msg: String }, + InvalidValue { msg: String }, + GenericError { msg: String }, +} + +impl StdError for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Error::ShortBuffer { msg } => write!(f, "short buffer: {}", msg), + Error::InvalidValue { msg } => write!(f, "invalid value: {}", msg), + Error::GenericError { msg } => write!(f, "generic error: {}", msg), + } + } +} + +impl std::fmt::Debug for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Error::ShortBuffer { msg } => write!(f, "short buffer: {}", msg), + Error::InvalidValue { msg } => write!(f, "invalid value: {}", msg), + Error::GenericError { msg } => write!(f, "generic error: {}", msg), + } + } +} + +pub struct Decoder { + data: Vec, + pos: usize, +} + +#[allow(dead_code)] +impl Decoder { + pub fn new(data: Vec) -> Decoder { + Decoder { data, pos: 0 } + } + + pub fn reset(&mut self, data: Vec) { + self.data = data; + self.pos = 0; + } + + pub fn read_byte(&mut self) -> Result { + if self.pos + type_size::BYTE as usize > self.data.len() { + return Err(Error::ShortBuffer { + msg: format!( + "required {} bytes, but only {} bytes available", + type_size::BYTE, + self.remaining() + ), + }); + } + let b = self.data[self.pos]; + self.pos += type_size::BYTE as usize; + Ok(b) + } + + fn read_n_bytes(&mut self, n: usize) -> Result, Error> { + if n == 0 { + return Ok(Vec::new()); + } + if n > 0x7FFF_FFFF { + return Err(Error::ShortBuffer { + msg: format!("n not valid: {}", n), + }); + } + if self.pos + n > self.data.len() { + return Err(Error::ShortBuffer { + msg: format!( + "required {} bytes, but only {} bytes available", + n, + self.remaining() + ), + }); + } + let out = self.data[self.pos..self.pos + n].to_vec(); + self.pos += n; + Ok(out) + } + + pub fn remaining(&self) -> usize { + self.data.len() - self.pos + } + + pub fn read(&mut self, buf: &mut [u8]) -> Result { + if self.pos + buf.len() > self.data.len() { + return Err(Error::ShortBuffer { + msg: format!( + "not enough data: {} bytes missing", + self.pos + buf.len() - self.data.len() + ), + }); + } + let num_copied = buf.len(); + buf.copy_from_slice(&self.data[self.pos..self.pos + buf.len()]); + if num_copied != buf.len() { + return Err(Error::ShortBuffer { + msg: format!( + "expected to read {} bytes, but read only {} bytes", + buf.len(), + num_copied + ), + }); + } + self.pos += num_copied; + Ok(num_copied) + } + + pub fn read_bytes(&mut self, n: usize) -> Result, Error> { + self.read_n_bytes(n) + } + + pub fn read_option(&mut self) -> Result { + let b = self.read_byte()?; + let out = b != 0; + Ok(out) + } + + pub fn read_u8(&mut self) -> Result { + let out = self.read_byte()?; + Ok(out) + } + + pub fn read_u32(&mut self, order: byte_order::ByteOrder) -> Result { + if self.remaining() < type_size::UINT32 { + return Err(Error::InvalidValue { + msg: format!( + "uint32 requires [{}] bytes, remaining [{}]", + type_size::UINT32, + self.remaining() + ), + }); + } + let buf = self.read_bytes(type_size::UINT32)?; + let buf: [u8; 4] = buf.try_into().unwrap(); + let out = match order { + byte_order::ByteOrder::LittleEndian => u32::from_le_bytes(buf), + byte_order::ByteOrder::BigEndian => u32::from_be_bytes(buf), + }; + Ok(out) + } + + pub fn set_position(&mut self, idx: usize) -> Result<(), Error> { + if idx < self.data.len() { + self.pos = idx; + Ok(()) + } else { + Err(Error::InvalidValue { + msg: format!( + "request to set position to {} outsize of buffer (buffer size {})", + idx, + self.data.len() + ), + }) + } + } + + pub fn position(&self) -> usize { + self.pos + } + + pub fn len(&self) -> usize { + self.data.len() + } + + pub fn has_remaining(&self) -> bool { + self.remaining() > 0 + } +} + +// declare TypeID as a [u8; 8] +pub type TypeID = [u8; 8]; + +// use extension trait to add a method to the TypeID type +pub trait TypeIDFromBytes { + fn from_bytes(bytes: Vec) -> TypeID; +} + +impl TypeIDFromBytes for TypeID { + fn from_bytes(bytes: Vec) -> TypeID { + let mut type_id = [0u8; 8]; + type_id.copy_from_slice(&bytes); + type_id + } +} + +// func DecodeCompactU16(bytes []byte) (int, int, error) { +// ln := 0 +// size := 0 +// for { +// if len(bytes) == 0 { +// return 0, 0, io.ErrUnexpectedEOF +// } +// elem := int(bytes[0]) +// bytes = bytes[1:] +// ln |= (elem & 0x7f) << (size * 7) +// size += 1 +// if (elem & 0x80) == 0 { +// break +// } +// } +// return ln, size, nil +// } + +pub fn decode_compact_u16(bytes: &[u8]) -> Result<(usize, usize), Error> { + let mut ln = 0; + let mut size = 0; + for elem in bytes { + ln |= (usize::from(*elem) & 0x7F) << (size * 7); + size += 1; + if (usize::from(*elem) & 0x80) == 0 { + break; + } + } + Ok((ln, size)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_decoder_uint8() { + let buf = vec![0x63, 0x64]; + + let mut d = Decoder::new(buf); + + let n = d.read_u8().unwrap(); + assert_eq!(99, n); + assert_eq!(1, d.remaining()); + + let n = d.read_u8().unwrap(); + assert_eq!(100, n); + assert_eq!(0, d.remaining()); + } + + #[test] + fn test_decoder_byte() { + let buf = vec![0x00, 0x01]; + + let mut d = Decoder::new(buf); + + let n = d.read_byte().unwrap(); + assert_eq!(0, n); + assert_eq!(1, d.remaining()); + + let n = d.read_byte().unwrap(); + assert_eq!(1, n); + assert_eq!(0, d.remaining()); + } + + #[test] + fn test_decoder_read_bytes() { + let mut buf = vec![]; + buf.extend_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]); + let mut decoder = Decoder::new(buf); + let b = decoder.read_bytes(1).unwrap(); + assert_eq!(vec![0xff], b); + assert_eq!(7, decoder.remaining()); + + let b = decoder.read_bytes(2).unwrap(); + assert_eq!(vec![0xff, 0xff], b); + assert_eq!(5, decoder.remaining()); + + decoder.read_bytes(6).unwrap_err(); + + let b = decoder.read_bytes(5).unwrap(); + assert_eq!(vec![0xff, 0xff, 0xff, 0xff, 0xff], b); + assert_eq!(0, decoder.remaining()); + } + + #[test] + fn test_read_n_bytes() { + let mut b1 = vec![]; + b1.extend_from_slice(&[123, 99, 88, 77, 66, 55, 44, 33, 22, 11]); + let mut b2 = vec![]; + b2.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let mut buf = vec![]; + buf.extend_from_slice(&b1); + buf.extend_from_slice(&b2); + let mut decoder = Decoder::new(buf); + + let got = decoder.read_n_bytes(10).unwrap(); + assert_eq!(b1, got); + + let got = decoder.read_n_bytes(10).unwrap(); + assert_eq!(b2, got); + } + + #[test] + fn test_read_n_bytes_error() { + let mut b1 = vec![]; + b1.extend_from_slice(&[123, 99, 88, 77, 66, 55, 44, 33, 22, 11]); + let mut b2 = vec![]; + b2.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let mut buf = vec![]; + buf.extend_from_slice(&b1); + buf.extend_from_slice(&b2); + let mut decoder = Decoder::new(buf); + + let res = decoder.read_n_bytes(9999); + assert!(res.is_err()); + } + + #[test] + fn test_read_bytes() { + let mut b1 = vec![]; + b1.extend_from_slice(&[123, 99, 88, 77, 66, 55, 44, 33, 22, 11]); + let mut b2 = vec![]; + b2.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let mut buf = vec![]; + buf.extend_from_slice(&b1); + buf.extend_from_slice(&b2); + let mut decoder = Decoder::new(buf); + + let got = decoder.read_bytes(10).unwrap(); + assert_eq!(b1, got); + + let got = decoder.read_bytes(10).unwrap(); + assert_eq!(b2, got); + } + + #[test] + fn test_read() { + let mut b1 = vec![]; + b1.extend_from_slice(&[123, 99, 88, 77, 66, 55, 44, 33, 22, 11]); + let mut b2 = vec![]; + b2.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let mut buf = vec![]; + buf.extend_from_slice(&b1); + buf.extend_from_slice(&b2); + let mut decoder = Decoder::new(buf); + + { + let mut got = vec![]; + got.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let num = decoder.read(&mut got).unwrap(); + assert_eq!(b1, got); + assert_eq!(10, num); + } + + { + let mut got = vec![]; + got.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let num = decoder.read(&mut got).unwrap(); + assert_eq!(b2, got); + assert_eq!(10, num); + } + { + let mut got = vec![]; + got.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let res = decoder.read(&mut got); + assert!(res.is_err()); + } + { + let mut got = vec![]; + let num = decoder.read(&mut got).unwrap(); + assert_eq!(0, num); + assert_eq!(vec![] as Vec, got); + } + } + + #[test] + fn test_decoder_uint32() { + // little endian + let buf = vec![0x28, 0x72, 0x75, 0x10, 0x4f, 0x9f, 0x03, 0x00]; + + let mut d = Decoder::new(buf); + + let n = d.read_u32(byte_order::ByteOrder::LittleEndian).unwrap(); + assert_eq!(276132392, n); + assert_eq!(4, d.remaining()); + + let n = d.read_u32(byte_order::ByteOrder::LittleEndian).unwrap(); + assert_eq!(237391, n); + assert_eq!(0, d.remaining()); + + // big endian + let buf = vec![0x10, 0x75, 0x72, 0x28, 0x00, 0x03, 0x9f, 0x4f]; + + let mut d = Decoder::new(buf); + + let n = d.read_u32(byte_order::ByteOrder::BigEndian).unwrap(); + assert_eq!(276132392, n); + assert_eq!(4, d.remaining()); + + let n = d.read_u32(byte_order::ByteOrder::BigEndian).unwrap(); + assert_eq!(237391, n); + assert_eq!(0, d.remaining()); + } +} diff --git a/txstatus/src/tools.rs b/txstatus/src/tools.rs new file mode 100644 index 00000000..9f5a6bbf --- /dev/null +++ b/txstatus/src/tools.rs @@ -0,0 +1,15 @@ +fn read_uint32(bytes: &[u8]) -> u32 { + let mut result = 0; + for i in 0..4 { + result |= (bytes[i] as u32) << (i * 8); + } + result +} + +fn read_n_bytes(bytes: &[u8], n: usize) -> Vec { + let mut result = Vec::new(); + for i in 0..n { + result.push(bytes[i]); + } + result +} diff --git a/txstatus/src/type_size.rs b/txstatus/src/type_size.rs new file mode 100644 index 00000000..508e346f --- /dev/null +++ b/txstatus/src/type_size.rs @@ -0,0 +1,19 @@ +#[allow(dead_code)] +pub const BOOL: usize = 1; +pub const BYTE: usize = 1; +#[allow(dead_code)] +pub const INT8: usize = 1; +#[allow(dead_code)] +pub const INT16: usize = 2; +#[allow(dead_code)] +pub const UINT8: usize = 1; +pub const UINT16: usize = 2; +pub const UINT32: usize = 4; +pub const UINT64: usize = 8; +pub const UINT128: usize = 16; +pub const FLOAT32: usize = 4; +pub const FLOAT64: usize = 8; +#[allow(dead_code)] +pub const PUBLIC_KEY: usize = 32; +#[allow(dead_code)] +pub const SIGNATURE: usize = 64; diff --git a/txstatus/transaction-wrapper.go b/txstatus/transaction-wrapper.go new file mode 100644 index 00000000..9d2814a8 --- /dev/null +++ b/txstatus/transaction-wrapper.go @@ -0,0 +1,117 @@ +package txstatus + +import ( + "encoding/json" + "fmt" + + "github.com/gagliardetto/solana-go" +) + +type Transaction struct { + Message Message `json:"message"` + Signatures []solana.Signature `json:"signatures"` +} + +type Message struct { + AccountKeys []AccountKey `json:"accountKeys"` + Instructions []json.RawMessage `json:"instructions"` + RecentBlockhash string `json:"recentBlockhash"` +} + +type AccountKey struct { + Pubkey string `json:"pubkey"` + Signer bool `json:"signer"` + Source string `json:"source"` + Writable bool `json:"writable"` +} + +func FromTransaction(solTx solana.Transaction) (Transaction, error) { + tx := Transaction{ + Message: Message{ + AccountKeys: make([]AccountKey, len(solTx.Message.AccountKeys)), + Instructions: make([]json.RawMessage, len(solTx.Message.Instructions)), + }, + Signatures: solTx.Signatures, + } + for i, accKey := range solTx.Message.AccountKeys { + isWr, err := solTx.IsWritable(accKey) + if err != nil { + return tx, fmt.Errorf("failed to check if account key #%d is writable: %w", i, err) + } + tx.Message.AccountKeys[i] = AccountKey{ + Pubkey: accKey.String(), + Signer: solTx.IsSigner(accKey), + Source: "transaction", // TODO: what is this? + Writable: isWr, + } + } + for i, inst := range solTx.Message.Instructions { + tx.Message.Instructions[i] = json.RawMessage(inst.Data) + } + tx.Message.RecentBlockhash = solTx.Message.RecentBlockhash.String() + return tx, nil +} + +// { +// "message": { +// "accountKeys": [ +// { +// "pubkey": "GdnSyH3YtwcxFvQrVVJMm1JhTS4QVX7MFsX56uJLUfiZ", +// "signer": true, +// "source": "transaction", +// "writable": true +// }, +// { +// "pubkey": "sCtiJieP8B3SwYnXemiLpRFRR8KJLMtsMVN25fAFWjW", +// "signer": false, +// "source": "transaction", +// "writable": true +// }, +// { +// "pubkey": "SysvarS1otHashes111111111111111111111111111", +// "signer": false, +// "source": "transaction", +// "writable": false +// }, +// { +// "pubkey": "SysvarC1ock11111111111111111111111111111111", +// "signer": false, +// "source": "transaction", +// "writable": false +// }, +// { +// "pubkey": "Vote111111111111111111111111111111111111111", +// "signer": false, +// "source": "transaction", +// "writable": false +// } +// ], +// "instructions": [ +// { +// "parsed": { +// "info": { +// "clockSysvar": "SysvarC1ock11111111111111111111111111111111", +// "slotHashesSysvar": "SysvarS1otHashes111111111111111111111111111", +// "vote": { +// "hash": "EYEnTi2GEy7ApyWm63hvpi6c69Kvfcsc3TtdFu92yLxr", +// "slots": [ +// 431996 +// ], +// "timestamp": null +// }, +// "voteAccount": "sCtiJieP8B3SwYnXemiLpRFRR8KJLMtsMVN25fAFWjW", +// "voteAuthority": "GdnSyH3YtwcxFvQrVVJMm1JhTS4QVX7MFsX56uJLUfiZ" +// }, +// "type": "vote" +// }, +// "program": "vote", +// "programId": "Vote111111111111111111111111111111111111111", +// "stackHeight": null +// } +// ], +// "recentBlockhash": "G9jx9FCto47ebxHgXBomE14hvG1WiwGD8LL3p7pEt1JX" +// }, +// "signatures": [ +// "55y2u7sCd8mZ5LqtdrWnqJ6WBxVojXGXBd5KVuJFZrMJiC6bzziMdaPB3heNWqK9JpB5KfXSY4wTzf1AbyNSwUPd" +// ] +// } diff --git a/txstatus/txstatus-dummy.go b/txstatus/txstatus-dummy.go new file mode 100644 index 00000000..1a623186 --- /dev/null +++ b/txstatus/txstatus-dummy.go @@ -0,0 +1,19 @@ +//go:build !ffi +// +build !ffi + +package txstatus + +import ( + "encoding/json" + "fmt" +) + +func (inst Parameters) ParseInstruction() (json.RawMessage, error) { + return nil, fmt.Errorf("not implemented") +} + +// IsEnabled returns true if the library was build with the necessary +// flags to enable the FFI features necessary for parsing instructions. +func IsEnabled() bool { + return false +} diff --git a/txstatus/txstatus-ffi.go b/txstatus/txstatus-ffi.go new file mode 100644 index 00000000..506abe25 --- /dev/null +++ b/txstatus/txstatus-ffi.go @@ -0,0 +1,56 @@ +//go:build ffi +// +build ffi + +package txstatus + +/* +#cgo LDFLAGS: -L./lib -lsolana_transaction_status_wrapper +#include "./lib/transaction_status.h" +*/ +import "C" + +import ( + "bytes" + "encoding/json" + "fmt" + "time" + "unsafe" + + "github.com/davecgh/go-spew/spew" + bin "github.com/gagliardetto/binary" +) + +func (inst Parameters) ParseInstruction() (json.RawMessage, error) { + buf := new(bytes.Buffer) + buf.Grow(1024) + encoder := bin.NewBinEncoder(buf) + + err := inst.MarshalWithEncoder(encoder) + if err != nil { + return nil, fmt.Errorf("failed to marshal Parameters: %w", err) + } + + cs := (*C.u_char)(C.CBytes(buf.Bytes())) + defer C.free(unsafe.Pointer(cs)) + + startedParsingAt := time.Now() + got := C.parse_instruction(cs, C.ulong(len(buf.Bytes()))) + if got.status == 0 { + debugln("[golang] got status (OK):", got.status) + } else { + debugln("[golang] got status (ERR):", got.status) + } + debugln("[golang] got parsed instruction in:", time.Since(startedParsingAt)) + + parsedInstructionJSON := C.GoBytes(unsafe.Pointer(got.buf.data), C.int(got.buf.len)) + debugln("[golang] got parsed instruction as json:", spew.Sdump(parsedInstructionJSON)) + debugln("[golang] got parsed instruction as json:", string(parsedInstructionJSON)) + + return parsedInstructionJSON, nil +} + +// IsEnabled returns true if the library was build with the necessary +// flags to enable the FFI features necessary for parsing instructions. +func IsEnabled() bool { + return true +} diff --git a/txstatus/types.go b/txstatus/types.go new file mode 100644 index 00000000..3d903d7c --- /dev/null +++ b/txstatus/types.go @@ -0,0 +1,188 @@ +package txstatus + +import ( + "encoding/binary" + "fmt" + + bin "github.com/gagliardetto/binary" + "github.com/gagliardetto/solana-go" +) + +type Parameters struct { + ProgramID solana.PublicKey + Instruction CompiledInstruction + AccountKeys AccountKeys + StackHeight *uint32 +} + +func (inst Parameters) MarshalWithEncoder(encoder *bin.Encoder) error { + _, err := encoder.Write(inst.ProgramID[:]) + if err != nil { + return fmt.Errorf("failed to write ProgramID: %w", err) + } + err = inst.Instruction.MarshalWithEncoder(encoder) + if err != nil { + return fmt.Errorf("failed to write Instruction: %w", err) + } + err = inst.AccountKeys.MarshalWithEncoder(encoder) + if err != nil { + return fmt.Errorf("failed to write AccountKeys: %w", err) + } + if inst.StackHeight != nil { + err = encoder.WriteOption(true) + if err != nil { + return fmt.Errorf("failed to write Option(StackHeight): %w", err) + } + err = encoder.WriteUint32(*inst.StackHeight, binary.LittleEndian) + if err != nil { + return fmt.Errorf("failed to write StackHeight: %w", err) + } + } else { + err = encoder.WriteOption(false) + if err != nil { + return fmt.Errorf("failed to write Option(StackHeight): %w", err) + } + } + return nil +} + +type CompiledInstruction struct { + ProgramIDIndex uint8 + Accounts []uint8 + Data []byte +} + +func (inst CompiledInstruction) MarshalWithEncoder(encoder *bin.Encoder) error { + { + // .compiled_instruction.program_id_index as uint8 + err := encoder.WriteUint8(inst.ProgramIDIndex) + if err != nil { + return fmt.Errorf("failed to write ProgramIDIndex: %w", err) + } + // .compiled_instruction.accounts: + { + // len uint8 + err := encoder.WriteUint8(uint8(len(inst.Accounts))) + if err != nil { + return fmt.Errorf("failed to write len(Accounts): %w", err) + } + // values: + _, err = encoder.Write(inst.Accounts) + if err != nil { + return fmt.Errorf("failed to write Accounts: %w", err) + } + } + // .compiled_instruction.data: + { + // len uint8 + err := encoder.WriteUint8(uint8(len(inst.Data))) + if err != nil { + return fmt.Errorf("failed to write len(Data): %w", err) + } + // value: + _, err = encoder.Write(inst.Data) + if err != nil { + return fmt.Errorf("failed to write Data: %w", err) + } + } + } + return nil +} + +type AccountKeys struct { + StaticKeys []solana.PublicKey + DynamicKeys *LoadedAddresses +} + +func (inst AccountKeys) MarshalWithEncoder(encoder *bin.Encoder) error { + { + // account_keys.static_keys: + { + // len uint8 + err := encoder.WriteUint8(uint8(len(inst.StaticKeys))) + if err != nil { + return fmt.Errorf("failed to write len(StaticKeys): %w", err) + } + // keys: + for keyIndex, key := range inst.StaticKeys { + // key + _, err := encoder.Write(key[:]) + if err != nil { + return fmt.Errorf("failed to write StaticKeys[%d]: %w", keyIndex, err) + } + } + } + // account_keys.dynamic_keys: + if inst.DynamicKeys != nil { + err := encoder.WriteOption(true) + if err != nil { + return fmt.Errorf("failed to write Option(DynamicKeys): %w", err) + } + err = inst.DynamicKeys.MarshalWithEncoder(encoder) + if err != nil { + return fmt.Errorf("failed to write DynamicKeys: %w", err) + } + } else { + err := encoder.WriteOption(false) + if err != nil { + return fmt.Errorf("failed to write Option(DynamicKeys): %w", err) + } + } + } + return nil +} + +type LoadedAddresses struct { + Writable []solana.PublicKey + Readonly []solana.PublicKey +} + +func (inst LoadedAddresses) MarshalWithEncoder(encoder *bin.Encoder) error { + { + // account_keys.dynamic_keys.writable: + { + // len uint8 + err := encoder.WriteUint8(uint8(len(inst.Writable))) + if err != nil { + return fmt.Errorf("failed to write len(Writable): %w", err) + } + // keys: + for keyIndex, key := range inst.Writable { + _, err := encoder.Write(key[:]) + if err != nil { + return fmt.Errorf("failed to write Writable[%d]: %w", keyIndex, err) + } + } + } + // account_keys.dynamic_keys.readonly: + { + // len uint8 + err := encoder.WriteUint8(uint8(len(inst.Readonly))) + if err != nil { + return fmt.Errorf("failed to write len(Readonly): %w", err) + } + // keys: + for keyIndex, key := range inst.Readonly { + _, err := encoder.Write(key[:]) + if err != nil { + return fmt.Errorf("failed to write Readonly[%d]: %w", keyIndex, err) + } + } + } + } + return nil +} + +var DebugMode bool + +func debugf(format string, args ...interface{}) { + if DebugMode { + fmt.Printf(format, args...) + } +} + +func debugln(args ...interface{}) { + if DebugMode { + fmt.Println(args...) + } +} From 643f2d7cc84f1c07a9c894c8eb8bf742d21fc8e7 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 22:14:59 +0100 Subject: [PATCH 31/63] Statically link the ffi library --- Makefile | 10 +++------- txstatus/src/lib.rs | 2 +- txstatus/txstatus-ffi.go | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 259a8198..2f739642 100644 --- a/Makefile +++ b/Makefile @@ -13,15 +13,11 @@ build-rust-wrapper: jsonParsed: build-rust-wrapper # build faithful-cli with jsonParsed format support via ffi (rust) rm -rf ./bin/faithful-cli_jsonParsed - cp txstatus/target/x86_64-unknown-linux-gnu/release/libdemo_transaction_status_ffi.so ./txstatus/lib/libsolana_transaction_status_wrapper.so - LD_FLAGS="$(BASE_LD_FLAGS) -r $(ROOT_DIR)txstatus/lib" + # static linking: + cp txstatus/target/x86_64-unknown-linux-gnu/release/libdemo_transaction_status_ffi.a ./txstatus/lib/libsolana_transaction_status_wrapper.a + LD_FLAGS="$(BASE_LD_FLAGS) -extldflags -static" go build -ldflags=$(LD_FLAGS) -tags ffi -o ./bin/faithful-cli_jsonParsed . echo "built old-faithful with jsonParsed format support via ffi (rust)" - # LD_LIBRARY_PATH=txstatus/lib:$LD_LIBRARY_PATH ./bin/faithful-cli_jsonParsed - echo "To run the binary, please set LD_LIBRARY_PATH=txstatus/lib:\$$LD_LIBRARY_PATH ./bin/faithful-cli_jsonParsed" - # or: - # sudo cp ./txstatus/lib/libsolana_transaction_status_wrapper.so /usr/local/lib/ - # sudo ldconfig compile: @echo "\nCompiling faithful-cli binary for current platform ..." go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/faithful-cli . diff --git a/txstatus/src/lib.rs b/txstatus/src/lib.rs index f24b6ad9..c927dd65 100644 --- a/txstatus/src/lib.rs +++ b/txstatus/src/lib.rs @@ -208,7 +208,7 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { } #[repr(C)] -struct Response { +pub struct Response { buf: Buffer, status: i32, } diff --git a/txstatus/txstatus-ffi.go b/txstatus/txstatus-ffi.go index 506abe25..d321fcd0 100644 --- a/txstatus/txstatus-ffi.go +++ b/txstatus/txstatus-ffi.go @@ -4,7 +4,7 @@ package txstatus /* -#cgo LDFLAGS: -L./lib -lsolana_transaction_status_wrapper +#cgo LDFLAGS: -L./lib -lsolana_transaction_status_wrapper -lm -ldl #include "./lib/transaction_status.h" */ import "C" From 805a2399cf942067390212a63a3335d1366a1c18 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 22:20:34 +0100 Subject: [PATCH 32/63] Cleanup rust --- Makefile | 2 +- txstatus/src/lib.rs | 100 ++++++++++++++++---------------------------- 2 files changed, 36 insertions(+), 66 deletions(-) diff --git a/Makefile b/Makefile index 2f739642..4f1b7e36 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ build-rust-wrapper: cd txstatus && cargo build --release --lib --target=x86_64-unknown-linux-gnu --target-dir=target cbindgen ./txstatus -o txstatus/lib/transaction_status.h --lang c echo "build-rust-wrapper done" -jsonParsed: build-rust-wrapper +jsonParsed-linux: build-rust-wrapper # build faithful-cli with jsonParsed format support via ffi (rust) rm -rf ./bin/faithful-cli_jsonParsed # static linking: diff --git a/txstatus/src/lib.rs b/txstatus/src/lib.rs index c927dd65..3bdbf7a0 100644 --- a/txstatus/src/lib.rs +++ b/txstatus/src/lib.rs @@ -14,21 +14,16 @@ use solana_sdk::message::v0::LoadedAddresses; use solana_sdk::{instruction::CompiledInstruction, message::AccountKeys, pubkey::Pubkey}; use solana_transaction_status::parse_instruction::parse; -#[no_mangle] -pub extern "C" fn hello_from_rust() { - println!("Hello from Rust at time: {}!", chrono::Local::now()); -} - #[no_mangle] pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { - let started_at = Instant::now(); + // let started_at = Instant::now(); let bytes = unsafe { assert!(!bytes.is_null()); slice::from_raw_parts(bytes, len) }; let bytes = bytes.to_vec(); - println!("[rust] params raw bytes: {:?}", bytes); - println!("[rust] params:"); + // println!("[rust] params raw bytes: {:?}", bytes); + // println!("[rust] params:"); let mut decoder = Decoder::new(bytes); { // read program ID: @@ -58,10 +53,10 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { child: None, }; let static_account_keys_len = decoder.read_u8().unwrap() as usize; - println!( - "[rust] static_account_keys_len: {:?}", - static_account_keys_len - ); + // println!( + // "[rust] static_account_keys_len: {:?}", + // static_account_keys_len + // ); let mut static_account_keys_vec = vec![]; for _ in 0..static_account_keys_len { let account_key_bytes = decoder.read_bytes(32).unwrap(); @@ -73,7 +68,7 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { if has_dynamic_account_keys { let mut loaded_addresses = LoadedAddresses::default(); let num_writable_accounts = decoder.read_u8().unwrap() as usize; - println!("[rust] num_writable_accounts: {:?}", num_writable_accounts); + // println!("[rust] num_writable_accounts: {:?}", num_writable_accounts); // read 32 bytes for each writable account: for _ in 0..num_writable_accounts { let account_key_bytes = decoder.read_bytes(32).unwrap(); @@ -115,28 +110,28 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { let mut stack_height: Option = None; { let has_stack_height = decoder.read_option().unwrap(); - println!("[rust] has_stack_height: {:?}", has_stack_height); + // println!("[rust] has_stack_height: {:?}", has_stack_height); if has_stack_height { stack_height = Some( decoder .read_u32(byte_order::ByteOrder::LittleEndian) .unwrap(), ); - println!("[rust] stack_height: {:?}", stack_height); + // println!("[rust] stack_height: {:?}", stack_height); } } - println!("[rust] program_id: {:?}", program_id); - println!("[rust] instruction: {:?}", instruction); - println!( - "[rust] account_keys.static: {:?}", - parsed_account_keys.parent - ); - println!( - "[rust] has_dynamic_account_keys: {:?}", - has_dynamic_account_keys - ); - println!("[rust] account_keys.dynamic: {:?}", sommmm); - println!("[rust] stack_height: {:?}", stack_height); + // println!("[rust] program_id: {:?}", program_id); + // println!("[rust] instruction: {:?}", instruction); + // println!( + // "[rust] account_keys.static: {:?}", + // parsed_account_keys.parent + // ); + // println!( + // "[rust] has_dynamic_account_keys: {:?}", + // has_dynamic_account_keys + // ); + // println!("[rust] account_keys.dynamic: {:?}", sommmm); + // println!("[rust] stack_height: {:?}", stack_height); let parsed = parse( &program_id, // program_id @@ -162,29 +157,29 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { status: 1, }; } else { - println!( - "[rust] successfully parsed the instruction in {:?}: {:?}", - Instant::now() - started_at, - parsed - ); + // println!( + // "[rust] successfully parsed the instruction in {:?}: {:?}", + // Instant::now() - started_at, + // parsed + // ); let parsed = parsed.unwrap(); let parsed_json = serde_json::to_vec(&parsed).unwrap(); { - let parsed_json_str = String::from_utf8(parsed_json.clone()).unwrap(); - println!( - "[rust] parsed instruction as json at {:?}: {}", - Instant::now() - started_at, - parsed_json_str - ); + // let parsed_json_str = String::from_utf8(parsed_json.clone()).unwrap(); + // println!( + // "[rust] parsed instruction as json at {:?}: {}", + // Instant::now() - started_at, + // parsed_json_str + // ); } - println!("[rust] {:?}", Instant::now() - started_at); + // println!("[rust] {:?}", Instant::now() - started_at); let mut response = vec![0; 32]; response.extend_from_slice(&parsed_json); let data = response.as_mut_ptr(); let len = response.len(); - println!("[rust] {:?}", Instant::now() - started_at); + // println!("[rust] {:?}", Instant::now() - started_at); return Response { buf: Buffer { data: unsafe { data.add(32) }, @@ -194,17 +189,6 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { }; } } - let mut response = vec![0; 32]; - for i in 0..32 { - response[i] = i as u8; - } - let data = response.as_mut_ptr(); - let len = response.len(); - std::mem::forget(response); - Response { - buf: Buffer { data, len }, - status: 123, - } } #[repr(C)] @@ -227,20 +211,6 @@ extern "C" fn free_buf(buf: Buffer) { } } -// write a C external function that accepts a string, parses it as json, and returns a string: -#[no_mangle] -pub extern "C" fn accept_json(json: *const libc::c_char) -> *const libc::c_char { - let json = unsafe { CStr::from_ptr(json).to_bytes() }; - let json = String::from_utf8(json.to_vec()).unwrap(); - { - let v: serde_json::Value = serde_json::from_str(&json).unwrap(); - println!("v: {:?}", v); - } - let json = json + "!"; - let json = CString::new(json).unwrap().into_raw(); - json -} - struct Combined { parent: Vec, child: Option, From bc92daae45b21092641afe704d25afffdafca297 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 22:51:29 +0100 Subject: [PATCH 33/63] Cleanup --- multiepoch-getBlock.go | 4 +++- multiepoch-getBlockTime.go | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/multiepoch-getBlock.go b/multiepoch-getBlock.go index 64a5ee74..d62a6ea1 100644 --- a/multiepoch-getBlock.go +++ b/multiepoch-getBlock.go @@ -396,7 +396,9 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex tim.time("get transactions") var blockResp GetBlockResponse blockResp.Transactions = allTransactions - blockResp.BlockTime = &blocktime + if blocktime != 0 { + blockResp.BlockTime = &blocktime + } blockResp.Blockhash = lastEntryHash.String() blockResp.ParentSlot = uint64(block.Meta.Parent_slot) blockResp.Rewards = rewards diff --git a/multiepoch-getBlockTime.go b/multiepoch-getBlockTime.go index e6528c99..dbcb047b 100644 --- a/multiepoch-getBlockTime.go +++ b/multiepoch-getBlockTime.go @@ -46,7 +46,12 @@ func (multi *MultiEpoch) handleGetBlockTime(ctx context.Context, conn *requestCo err = conn.ReplyRaw( ctx, req.ID, - blockTime, + func() any { + if blockTime != 0 { + return blockTime + } + return nil + }(), ) if err != nil { return nil, fmt.Errorf("failed to reply: %w", err) From 204e8f36e48a2a1da12945c78cc7f4f2406289d9 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 5 Dec 2023 23:21:44 +0100 Subject: [PATCH 34/63] Cleanup FFI --- request-response.go | 29 ++++++++++++++++++++++++----- txstatus/src/lib.rs | 2 +- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/request-response.go b/request-response.go index dec5de83..39552822 100644 --- a/request-response.go +++ b/request-response.go @@ -394,8 +394,9 @@ func encodeTransactionResponseBasedOnWantedEncoding( parsedInstructions := make([]json.RawMessage, 0) for _, inst := range tx.Message.Instructions { + programId, _ := tx.ResolveProgramIDIndex(inst.ProgramIDIndex) instrParams := txstatus.Parameters{ - ProgramID: solana.VoteProgramID, + ProgramID: programId, Instruction: txstatus.CompiledInstruction{ ProgramIDIndex: uint8(inst.ProgramIDIndex), Accounts: func() []uint8 { @@ -409,7 +410,7 @@ func encodeTransactionResponseBasedOnWantedEncoding( }, AccountKeys: txstatus.AccountKeys{ StaticKeys: tx.Message.AccountKeys, - // TODO: add support for dynamic keys? + // TODO: add support for dynamic keys? From meta? // DynamicKeys: &LoadedAddresses{ // Writable: []solana.PublicKey{}, // Readonly: []solana.PublicKey{ @@ -421,10 +422,28 @@ func encodeTransactionResponseBasedOnWantedEncoding( } parsedInstructionJSON, err := instrParams.ParseInstruction() - if err != nil { - return nil, fmt.Errorf("failed to parse instruction: %w", err) + if err != nil || parsedInstructionJSON == nil || !strings.HasPrefix(strings.TrimSpace(string(parsedInstructionJSON)), "{") { + nonParseadInstructionJSON := map[string]any{ + "accounts": func() []string { + out := make([]string, len(inst.Accounts)) + for i, v := range inst.Accounts { + // TODO: add support for dynamic keys? From meta? + if v >= uint16(len(tx.Message.AccountKeys)) { + continue + } + out[i] = tx.Message.AccountKeys[v].String() + } + return out + }(), + "data": base58.Encode(inst.Data), + "programId": programId.String(), + "stackHeight": nil, + } + asRaw, _ := jsoniter.ConfigCompatibleWithStandardLibrary.Marshal(nonParseadInstructionJSON) + parsedInstructions = append(parsedInstructions, asRaw) + } else { + parsedInstructions = append(parsedInstructions, parsedInstructionJSON) } - parsedInstructions = append(parsedInstructions, parsedInstructionJSON) } resp, err := txstatus.FromTransaction(tx) diff --git a/txstatus/src/lib.rs b/txstatus/src/lib.rs index 3bdbf7a0..53c91e7e 100644 --- a/txstatus/src/lib.rs +++ b/txstatus/src/lib.rs @@ -140,7 +140,7 @@ pub extern "C" fn parse_instruction(bytes: *const u8, len: usize) -> Response { stack_height, ); if parsed.is_err() { - println!("[rust] parse error: {:?}", parsed); + // println!("[rust] parse error: {:?}", parsed); let mut response = vec![0; 32]; // add error string to response: let error = parsed.err().unwrap(); From b26ab621898ba4b420efca71b0543cd9b7632afa Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 6 Dec 2023 00:13:56 +0100 Subject: [PATCH 35/63] Make network and epoch options more intuitive --- cmd-x-index-all.go | 36 ++++++++++++------------------------ cmd-x-index-cid2offset.go | 4 +++- cmd-x-index-gsfa.go | 4 +++- cmd-x-index-sig-exists.go | 4 +++- cmd-x-index-sig2cid.go | 4 +++- cmd-x-index-slot2cid.go | 4 +++- readers.go | 20 +++++++++++++++----- 7 files changed, 42 insertions(+), 34 deletions(-) diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index df065dac..e4c59a8c 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -11,7 +11,6 @@ import ( "path/filepath" "time" - "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" "github.com/ipfs/go-cid" carv1 "github.com/ipld/go-car" @@ -25,13 +24,15 @@ import ( func newCmd_Index_all() *cli.Command { var verify bool - var epoch uint64 var network indexes.Network return &cli.Command{ Name: "all", Description: "Given a CAR file containing a Solana epoch, create all the necessary indexes and save them in the specified index dir.", ArgsUsage: " ", Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } return nil }, Flags: []cli.Flag{ @@ -45,23 +46,16 @@ func newCmd_Index_all() *cli.Command { Usage: "temporary directory to use for storing intermediate files", Value: "", }, - &cli.Uint64Flag{ - Name: "epoch", - Usage: "the epoch of the CAR file", - Destination: &epoch, - Required: true, - }, &cli.StringFlag{ Name: "network", Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet", Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { - return fmt.Errorf("invalid network: %s", network) + return fmt.Errorf("invalid network: %q", network) } return nil }, - Required: true, }, }, Subcommands: []*cli.Command{}, @@ -89,10 +83,9 @@ func newCmd_Index_all() *cli.Command { }() klog.Infof("Creating all indexes for %s", carPath) klog.Infof("Indexes will be saved in %s", indexDir) - klog.Infof("This CAR file is for epoch %d and cluster %s", epoch, network) + indexPaths, numTotalItems, err := createAllIndexes( c.Context, - epoch, network, tmpDir, carPath, @@ -118,19 +111,8 @@ func newCmd_Index_all() *cli.Command { } } -var veryPlainSdumpConfig = spew.ConfigState{ - Indent: " ", - DisablePointerAddresses: true, - DisableCapacities: true, - DisableMethods: true, - DisablePointerMethods: true, - ContinueOnMethod: true, - SortKeys: true, -} - func createAllIndexes( ctx context.Context, - epoch uint64, network indexes.Network, tmpDir string, carPath string, @@ -168,10 +150,13 @@ func createAllIndexes( klog.Infof("Getting car file size") klog.Infof("Counting items in car file...") - numItems, err := carCountItemsByFirstByte(carPath) + numItems, epochObject, err := carCountItemsByFirstByte(carPath) if err != nil { return nil, 0, fmt.Errorf("failed to count items in car file: %w", err) } + if epochObject == nil { + return nil, 0, fmt.Errorf("failed to find epoch object in the car file") + } fmt.Println() klog.Infof("Found items in car file:") numTotalItems := uint64(0) @@ -185,6 +170,9 @@ func createAllIndexes( } klog.Infof("Total: %s items", humanize.Comma(int64(numTotalItems))) + epoch := uint64(epochObject.Epoch) + klog.Infof("This CAR file is for epoch %d and cluster %s", epoch, network) + cid_to_offset_and_size, err := NewBuilder_CidToOffset( epoch, rootCID, diff --git a/cmd-x-index-cid2offset.go b/cmd-x-index-cid2offset.go index f8a59fc3..ece4960b 100644 --- a/cmd-x-index-cid2offset.go +++ b/cmd-x-index-cid2offset.go @@ -19,6 +19,9 @@ func newCmd_Index_cid2offset() *cli.Command { Description: "Given a CAR file containing a Solana epoch, create an index of the file that maps CIDs to offsets in the CAR file.", ArgsUsage: " ", Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } return nil }, Flags: []cli.Flag{ @@ -48,7 +51,6 @@ func newCmd_Index_cid2offset() *cli.Command { } return nil }, - Required: true, }, }, Subcommands: []*cli.Command{}, diff --git a/cmd-x-index-gsfa.go b/cmd-x-index-gsfa.go index 2018af2f..86df0208 100644 --- a/cmd-x-index-gsfa.go +++ b/cmd-x-index-gsfa.go @@ -38,6 +38,9 @@ func newCmd_Index_gsfa() *cli.Command { Description: "Create GSFA index from a CAR file", ArgsUsage: " ", Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } return nil }, Flags: []cli.Flag{ @@ -68,7 +71,6 @@ func newCmd_Index_gsfa() *cli.Command { Name: "network", Usage: "network", Destination: (*string)(&network), - Required: true, Action: func(c *cli.Context, v string) error { if !indexes.IsValidNetwork(indexes.Network(v)) { return fmt.Errorf("invalid network: %s", v) diff --git a/cmd-x-index-sig-exists.go b/cmd-x-index-sig-exists.go index acc92358..f357feff 100644 --- a/cmd-x-index-sig-exists.go +++ b/cmd-x-index-sig-exists.go @@ -37,6 +37,9 @@ func newCmd_Index_sigExists() *cli.Command { Description: "Create sig-exists index from a CAR file", ArgsUsage: " ", Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } return nil }, Flags: []cli.Flag{ @@ -67,7 +70,6 @@ func newCmd_Index_sigExists() *cli.Command { Name: "network", Usage: "network", Destination: (*string)(&network), - Required: true, Action: func(c *cli.Context, v string) error { if !indexes.IsValidNetwork(indexes.Network(v)) { return fmt.Errorf("invalid network: %s", v) diff --git a/cmd-x-index-sig2cid.go b/cmd-x-index-sig2cid.go index 9a3a94e7..12a22331 100644 --- a/cmd-x-index-sig2cid.go +++ b/cmd-x-index-sig2cid.go @@ -19,6 +19,9 @@ func newCmd_Index_sig2cid() *cli.Command { Description: "Given a CAR file containing a Solana epoch, create an index of the file that maps transaction signatures to CIDs.", ArgsUsage: " ", Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } return nil }, Flags: []cli.Flag{ @@ -48,7 +51,6 @@ func newCmd_Index_sig2cid() *cli.Command { } return nil }, - Required: true, }, }, Subcommands: []*cli.Command{}, diff --git a/cmd-x-index-slot2cid.go b/cmd-x-index-slot2cid.go index 450c126c..5f69a575 100644 --- a/cmd-x-index-slot2cid.go +++ b/cmd-x-index-slot2cid.go @@ -19,6 +19,9 @@ func newCmd_Index_slot2cid() *cli.Command { Description: "Given a CAR file containing a Solana epoch, create an index of the file that maps slot numbers to CIDs.", ArgsUsage: " ", Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } return nil }, Flags: []cli.Flag{ @@ -48,7 +51,6 @@ func newCmd_Index_slot2cid() *cli.Command { } return nil }, - Required: true, }, }, Subcommands: []*cli.Command{}, diff --git a/readers.go b/readers.go index ad4d3b8f..cd0430b7 100644 --- a/readers.go +++ b/readers.go @@ -15,6 +15,8 @@ import ( "github.com/ipfs/go-libipfs/blocks" carv1 "github.com/ipld/go-car" "github.com/ipld/go-car/util" + "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" + "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/rpcpool/yellowstone-faithful/readahead" ) @@ -211,28 +213,29 @@ func carCountItems(carPath string) (uint64, error) { return count, nil } -func carCountItemsByFirstByte(carPath string) (map[byte]uint64, error) { +func carCountItemsByFirstByte(carPath string) (map[byte]uint64, *ipldbindcode.Epoch, error) { file, err := os.Open(carPath) if err != nil { - return nil, err + return nil, nil, err } defer file.Close() rd, err := newCarReader(file) if err != nil { - return nil, fmt.Errorf("failed to open car file: %w", err) + return nil, nil, fmt.Errorf("failed to open car file: %w", err) } numTotalItems := uint64(0) counts := make(map[byte]uint64) startedCountAt := time.Now() + var epochObject *ipldbindcode.Epoch for { _, _, block, err := rd.NextNode() if err != nil { if errors.Is(err, io.EOF) { break } - return nil, err + return nil, nil, err } // the first data byte is the block type (after the CBOR tag) firstDataByte := block.RawData()[1] @@ -244,13 +247,20 @@ func carCountItemsByFirstByte(carPath string) (map[byte]uint64, error) { fmt.Sprintf("\rCounted %s items", humanize.Comma(int64(numTotalItems))), ) } + + if iplddecoders.Kind(firstDataByte) == iplddecoders.KindEpoch { + epochObject, err = iplddecoders.DecodeEpoch(block.RawData()) + if err != nil { + return nil, nil, fmt.Errorf("failed to decode Epoch node: %w", err) + } + } } printToStderr( fmt.Sprintf("\rCounted %s items in %s\n", humanize.Comma(int64(numTotalItems)), time.Since(startedCountAt).Truncate(time.Second)), ) - return counts, nil + return counts, epochObject, err } func printToStderr(msg string) { From 3c87215d8ddd3cb594e82233967c3ad5d0712483 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 6 Dec 2023 00:27:41 +0100 Subject: [PATCH 36/63] Cleanup --- cmd-x-index-cid2offset.go | 2 +- cmd-x-index-sig2cid.go | 2 +- cmd-x-index-slot2cid.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd-x-index-cid2offset.go b/cmd-x-index-cid2offset.go index ece4960b..1af14a87 100644 --- a/cmd-x-index-cid2offset.go +++ b/cmd-x-index-cid2offset.go @@ -47,7 +47,7 @@ func newCmd_Index_cid2offset() *cli.Command { Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { - return fmt.Errorf("invalid network: %s", network) + return fmt.Errorf("invalid network: %q", network) } return nil }, diff --git a/cmd-x-index-sig2cid.go b/cmd-x-index-sig2cid.go index 12a22331..1755c633 100644 --- a/cmd-x-index-sig2cid.go +++ b/cmd-x-index-sig2cid.go @@ -47,7 +47,7 @@ func newCmd_Index_sig2cid() *cli.Command { Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { - return fmt.Errorf("invalid network: %s", network) + return fmt.Errorf("invalid network: %q", network) } return nil }, diff --git a/cmd-x-index-slot2cid.go b/cmd-x-index-slot2cid.go index 5f69a575..0b09b941 100644 --- a/cmd-x-index-slot2cid.go +++ b/cmd-x-index-slot2cid.go @@ -47,7 +47,7 @@ func newCmd_Index_slot2cid() *cli.Command { Action: func(c *cli.Context, s string) error { network = indexes.Network(s) if !indexes.IsValidNetwork(network) { - return fmt.Errorf("invalid network: %s", network) + return fmt.Errorf("invalid network: %q", network) } return nil }, From 190f35327dc2dcc067e8e928d9c840a920b115af Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 6 Dec 2023 00:46:48 +0100 Subject: [PATCH 37/63] Add getFirstAvailableBlock; closes #41 --- epoch.go | 40 ++++++++++++++++++++++++++++ multiepoch-getFirstAvailableBlock.go | 29 ++++++++++++++++++++ multiepoch.go | 27 ++++++++++++++++--- 3 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 multiepoch-getFirstAvailableBlock.go diff --git a/epoch.go b/epoch.go index 461cb26d..f0db8699 100644 --- a/epoch.go +++ b/epoch.go @@ -16,6 +16,7 @@ import ( carv1 "github.com/ipld/go-car" "github.com/ipld/go-car/util" carv2 "github.com/ipld/go-car/v2" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/libp2p/go-libp2p/core/peer" "github.com/rpcpool/yellowstone-faithful/bucketteer" "github.com/rpcpool/yellowstone-faithful/gsfa" @@ -40,6 +41,7 @@ type Epoch struct { localCarReader *carv2.Reader remoteCarReader ReaderAtCloser carHeaderSize uint64 + rootCid cid.Cid cidToOffsetAndSizeIndex *indexes.CidToOffsetAndSize_Reader slotToCidIndex *indexes.SlotToCid_Reader sigToCidIndex *indexes.SigToCid_Reader @@ -333,6 +335,8 @@ func NewEpochFromConfig( } } + ep.rootCid = lastRootCid + return ep, nil } @@ -359,6 +363,42 @@ func (r *Epoch) Config() *Config { return r.config } +func (s *Epoch) GetFirstAvailableBlock(ctx context.Context) (*ipldbindcode.Block, error) { + // get root object, then get the first subset, then the first block. + rootCid := s.rootCid + rootNode, err := s.GetNodeByCid(ctx, rootCid) + if err != nil { + return nil, fmt.Errorf("failed to get root node: %w", err) + } + epochNode, err := iplddecoders.DecodeEpoch(rootNode) + if err != nil { + return nil, fmt.Errorf("failed to decode epoch node: %w", err) + } + if len(epochNode.Subsets) == 0 { + return nil, fmt.Errorf("no subsets found") + } + subsetNode, err := s.GetNodeByCid(ctx, epochNode.Subsets[0].(cidlink.Link).Cid) + if err != nil { + return nil, fmt.Errorf("failed to get subset node: %w", err) + } + subset, err := iplddecoders.DecodeSubset(subsetNode) + if err != nil { + return nil, fmt.Errorf("failed to decode subset node: %w", err) + } + if len(subset.Blocks) == 0 { + return nil, fmt.Errorf("no blocks found") + } + blockNode, err := s.GetNodeByCid(ctx, subset.Blocks[0].(cidlink.Link).Cid) + if err != nil { + return nil, fmt.Errorf("failed to get block node: %w", err) + } + block, err := iplddecoders.DecodeBlock(blockNode) + if err != nil { + return nil, fmt.Errorf("failed to decode block node: %w", err) + } + return block, nil +} + func (s *Epoch) prefetchSubgraph(ctx context.Context, wantedCid cid.Cid) error { if s.lassieFetcher != nil { // Fetch the subgraph from lassie diff --git a/multiepoch-getFirstAvailableBlock.go b/multiepoch-getFirstAvailableBlock.go new file mode 100644 index 00000000..c6fcf017 --- /dev/null +++ b/multiepoch-getFirstAvailableBlock.go @@ -0,0 +1,29 @@ +package main + +import ( + "context" + "fmt" + + "github.com/sourcegraph/jsonrpc2" +) + +func (multi *MultiEpoch) handleGetFirstAvailableBlock(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) (*jsonrpc2.Error, error) { + firstBlock, err := multi.GetFirstAvailableBlock(ctx) + if err != nil { + return &jsonrpc2.Error{ + Code: CodeNotFound, + Message: fmt.Sprintf("Internal error"), + }, fmt.Errorf("failed to get first available block: %w", err) + } + + slotNumber := uint64(firstBlock.Slot) + err = conn.ReplyRaw( + ctx, + req.ID, + slotNumber, + ) + if err != nil { + return nil, fmt.Errorf("failed to reply: %w", err) + } + return nil, nil +} diff --git a/multiepoch.go b/multiepoch.go index 70217e9a..f70662eb 100644 --- a/multiepoch.go +++ b/multiepoch.go @@ -14,6 +14,7 @@ import ( "github.com/goware/urlx" "github.com/libp2p/go-reuseport" "github.com/mr-tron/base58" + "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/sourcegraph/jsonrpc2" "github.com/valyala/fasthttp" "k8s.io/klog/v2" @@ -138,7 +139,7 @@ func (m *MultiEpoch) GetEpochNumbers() []uint64 { return epochNumbers } -func (m *MultiEpoch) GetFirstAvailableEpoch() (*Epoch, error) { +func (m *MultiEpoch) GetMostRecentAvailableEpoch() (*Epoch, error) { m.mu.RLock() defer m.mu.RUnlock() numbers := m.GetEpochNumbers() @@ -148,7 +149,25 @@ func (m *MultiEpoch) GetFirstAvailableEpoch() (*Epoch, error) { return nil, fmt.Errorf("no epochs available") } -func (m *MultiEpoch) GetFirstAvailableEpochNumber() (uint64, error) { +func (m *MultiEpoch) GetOldestAvailableEpoch() (*Epoch, error) { + m.mu.RLock() + defer m.mu.RUnlock() + numbers := m.GetEpochNumbers() + if len(numbers) > 0 { + return m.epochs[numbers[len(numbers)-1]], nil + } + return nil, fmt.Errorf("no epochs available") +} + +func (m *MultiEpoch) GetFirstAvailableBlock(ctx context.Context) (*ipldbindcode.Block, error) { + oldestEpoch, err := m.GetOldestAvailableEpoch() + if err != nil { + return nil, err + } + return oldestEpoch.GetFirstAvailableBlock(ctx) +} + +func (m *MultiEpoch) GetMostRecentAvailableEpochNumber() (uint64, error) { numbers := m.GetEpochNumbers() if len(numbers) > 0 { return numbers[0], nil @@ -423,7 +442,7 @@ func sanitizeMethod(method string) string { func isValidLocalMethod(method string) bool { switch method { - case "getBlock", "getTransaction", "getSignaturesForAddress", "getBlockTime", "getGenesisHash": + case "getBlock", "getTransaction", "getSignaturesForAddress", "getBlockTime", "getGenesisHash", "getFirstAvailableBlock": return true default: return false @@ -443,6 +462,8 @@ func (ser *MultiEpoch) handleRequest(ctx context.Context, conn *requestContext, return ser.handleGetBlockTime(ctx, conn, req) case "getGenesisHash": return ser.handleGetGenesisHash(ctx, conn, req) + case "getFirstAvailableBlock": + return ser.handleGetFirstAvailableBlock(ctx, conn, req) default: return &jsonrpc2.Error{ Code: jsonrpc2.CodeMethodNotFound, From fef16209f033f4968891b51341d8648ded853b96 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 6 Dec 2023 00:54:19 +0100 Subject: [PATCH 38/63] Add getSlot; maybe closes #42 --- epoch.go | 36 ++++++++++++++++++++++++++++++++++++ multiepoch-getSlot.go | 30 ++++++++++++++++++++++++++++++ multiepoch.go | 12 +++++++++++- 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 multiepoch-getSlot.go diff --git a/epoch.go b/epoch.go index f0db8699..5fbb8ebc 100644 --- a/epoch.go +++ b/epoch.go @@ -363,6 +363,42 @@ func (r *Epoch) Config() *Config { return r.config } +func (s *Epoch) GetMostRecentAvailableBlock(ctx context.Context) (*ipldbindcode.Block, error) { + // get root object, then get the last subset, then the last block. + rootCid := s.rootCid + rootNode, err := s.GetNodeByCid(ctx, rootCid) + if err != nil { + return nil, fmt.Errorf("failed to get root node: %w", err) + } + epochNode, err := iplddecoders.DecodeEpoch(rootNode) + if err != nil { + return nil, fmt.Errorf("failed to decode epoch node: %w", err) + } + if len(epochNode.Subsets) == 0 { + return nil, fmt.Errorf("no subsets found") + } + subsetNode, err := s.GetNodeByCid(ctx, epochNode.Subsets[len(epochNode.Subsets)-1].(cidlink.Link).Cid) + if err != nil { + return nil, fmt.Errorf("failed to get subset node: %w", err) + } + subset, err := iplddecoders.DecodeSubset(subsetNode) + if err != nil { + return nil, fmt.Errorf("failed to decode subset node: %w", err) + } + if len(subset.Blocks) == 0 { + return nil, fmt.Errorf("no blocks found") + } + blockNode, err := s.GetNodeByCid(ctx, subset.Blocks[len(subset.Blocks)-1].(cidlink.Link).Cid) + if err != nil { + return nil, fmt.Errorf("failed to get block node: %w", err) + } + block, err := iplddecoders.DecodeBlock(blockNode) + if err != nil { + return nil, fmt.Errorf("failed to decode block node: %w", err) + } + return block, nil +} + func (s *Epoch) GetFirstAvailableBlock(ctx context.Context) (*ipldbindcode.Block, error) { // get root object, then get the first subset, then the first block. rootCid := s.rootCid diff --git a/multiepoch-getSlot.go b/multiepoch-getSlot.go new file mode 100644 index 00000000..64ffb44f --- /dev/null +++ b/multiepoch-getSlot.go @@ -0,0 +1,30 @@ +package main + +import ( + "context" + "fmt" + + "github.com/sourcegraph/jsonrpc2" +) + +func (multi *MultiEpoch) handleGetSlot(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) (*jsonrpc2.Error, error) { + // TODO: parse params? + lastBlock, err := multi.GetMostRecentAvailableBlock(ctx) + if err != nil { + return &jsonrpc2.Error{ + Code: CodeNotFound, + Message: "Internal error", + }, fmt.Errorf("failed to get first available block: %w", err) + } + + slotNumber := uint64(lastBlock.Slot) + err = conn.ReplyRaw( + ctx, + req.ID, + slotNumber, + ) + if err != nil { + return nil, fmt.Errorf("failed to reply: %w", err) + } + return nil, nil +} diff --git a/multiepoch.go b/multiepoch.go index f70662eb..88f841c7 100644 --- a/multiepoch.go +++ b/multiepoch.go @@ -167,6 +167,14 @@ func (m *MultiEpoch) GetFirstAvailableBlock(ctx context.Context) (*ipldbindcode. return oldestEpoch.GetFirstAvailableBlock(ctx) } +func (m *MultiEpoch) GetMostRecentAvailableBlock(ctx context.Context) (*ipldbindcode.Block, error) { + mostRecentEpoch, err := m.GetMostRecentAvailableEpoch() + if err != nil { + return nil, err + } + return mostRecentEpoch.GetMostRecentAvailableBlock(ctx) +} + func (m *MultiEpoch) GetMostRecentAvailableEpochNumber() (uint64, error) { numbers := m.GetEpochNumbers() if len(numbers) > 0 { @@ -442,7 +450,7 @@ func sanitizeMethod(method string) string { func isValidLocalMethod(method string) bool { switch method { - case "getBlock", "getTransaction", "getSignaturesForAddress", "getBlockTime", "getGenesisHash", "getFirstAvailableBlock": + case "getBlock", "getTransaction", "getSignaturesForAddress", "getBlockTime", "getGenesisHash", "getFirstAvailableBlock", "getSlot": return true default: return false @@ -464,6 +472,8 @@ func (ser *MultiEpoch) handleRequest(ctx context.Context, conn *requestContext, return ser.handleGetGenesisHash(ctx, conn, req) case "getFirstAvailableBlock": return ser.handleGetFirstAvailableBlock(ctx, conn, req) + case "getSlot": + return ser.handleGetSlot(ctx, conn, req) default: return &jsonrpc2.Error{ Code: jsonrpc2.CodeMethodNotFound, From 69d1c1d9953929368c9fb198b3852155bd952b60 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 6 Dec 2023 00:56:28 +0100 Subject: [PATCH 39/63] Add comment --- multiepoch.go | 1 + 1 file changed, 1 insertion(+) diff --git a/multiepoch.go b/multiepoch.go index 88f841c7..f6e65dc3 100644 --- a/multiepoch.go +++ b/multiepoch.go @@ -126,6 +126,7 @@ func (m *MultiEpoch) CountEpochs() int { return len(m.epochs) } +// GetEpochNumbers returns a list of epoch numbers, sorted from most recent to oldest. func (m *MultiEpoch) GetEpochNumbers() []uint64 { m.mu.RLock() defer m.mu.RUnlock() From d09abf8ff3bd75303a43adb538c08d3bfb9c6b23 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Sun, 10 Dec 2023 22:44:06 +0100 Subject: [PATCH 40/63] Seal all indexes at the same time --- cmd-x-index-all.go | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index e4c59a8c..c4a9d9c9 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -19,6 +19,7 @@ import ( "github.com/rpcpool/yellowstone-faithful/indexmeta" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/urfave/cli/v2" + "golang.org/x/sync/errgroup" "k8s.io/klog/v2" ) @@ -334,53 +335,63 @@ func createAllIndexes( paths.SignatureExists = sigExistsFilepath { + wg := new(errgroup.Group) + // seal the indexes - { + wg.Go(func() error { klog.Infof("Sealing cid_to_offset_and_size index...") err = cid_to_offset_and_size.Seal(ctx, indexDir) if err != nil { - return nil, 0, fmt.Errorf("failed to seal cid_to_offset_and_size index: %w", err) + return fmt.Errorf("failed to seal cid_to_offset_and_size index: %w", err) } paths.CidToOffsetAndSize = cid_to_offset_and_size.GetFilepath() klog.Infof("Successfully sealed cid_to_offset_and_size index: %s", paths.CidToOffsetAndSize) - } + return nil + }) - { + wg.Go(func() error { klog.Infof("Sealing slot_to_cid index...") err = slot_to_cid.Seal(ctx, indexDir) if err != nil { - return nil, 0, fmt.Errorf("failed to seal slot_to_cid index: %w", err) + return fmt.Errorf("failed to seal slot_to_cid index: %w", err) } paths.SlotToCid = slot_to_cid.GetFilepath() klog.Infof("Successfully sealed slot_to_cid index: %s", paths.SlotToCid) - } + return nil + }) - { + wg.Go(func() error { klog.Infof("Sealing sig_to_cid index...") err = sig_to_cid.Seal(ctx, indexDir) if err != nil { - return nil, 0, fmt.Errorf("failed to seal sig_to_cid index: %w", err) + return fmt.Errorf("failed to seal sig_to_cid index: %w", err) } paths.SignatureToCid = sig_to_cid.GetFilepath() klog.Infof("Successfully sealed sig_to_cid index: %s", paths.SignatureToCid) - } + return nil + }) - { + wg.Go(func() error { klog.Infof("Sealing sig_exists index...") meta := indexmeta.Meta{} if err := meta.AddUint64(indexmeta.MetadataKey_Epoch, epoch); err != nil { - return nil, 0, fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) + return fmt.Errorf("failed to add epoch to sig_exists index metadata: %w", err) } if err := meta.AddCid(indexmeta.MetadataKey_RootCid, rootCID); err != nil { - return nil, 0, fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) + return fmt.Errorf("failed to add root cid to sig_exists index metadata: %w", err) } if err := meta.AddString(indexmeta.MetadataKey_Network, string(network)); err != nil { - return nil, 0, fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) + return fmt.Errorf("failed to add network to sig_exists index metadata: %w", err) } if _, err = sig_exists.Seal(meta); err != nil { - return nil, 0, fmt.Errorf("failed to seal sig_exists index: %w", err) + return fmt.Errorf("failed to seal sig_exists index: %w", err) } klog.Infof("Successfully sealed sig_exists index: %s", paths.SignatureExists) + return nil + }) + + if err := wg.Wait(); err != nil { + return nil, 0, err } } From 77d24438d1a621e20e86e5a851b828f67f9c908f Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Sun, 10 Dec 2023 22:44:43 +0100 Subject: [PATCH 41/63] Refactor config --- config.go | 65 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/config.go b/config.go index ad43c908..08f002fb 100644 --- a/config.go +++ b/config.go @@ -99,14 +99,11 @@ type Config struct { Epoch *uint64 `json:"epoch" yaml:"epoch"` Data struct { Car *struct { - URI URI `json:"uri" yaml:"uri"` - SplitMetadata *struct { - URI URI `json:"uri" yaml:"uri"` // Local path to the split metadata file. - Miners []struct { - MinerID string `json:"miner_id" yaml:"miner_id"` - // If the miner is a Filecoin miner, then the provider is the miner's peer ID. - } `json:"miners" yaml:"miners"` - } `json:"split_metadata" yaml:"split_metadata"` + URI URI `json:"uri" yaml:"uri"` + FromPieces *struct { + Metadata URI `json:"uri" yaml:"uri"` // Local path to the split metadata file. + Deals URI `json:"deals" yaml:"deals"` // Local path to the split deals file. + } `json:"from_pieces" yaml:"from_pieces"` } `json:"car" yaml:"car"` Filecoin *struct { // Enable enables Filecoin mode. If false, or if this section is not present, CAR mode is used. @@ -164,7 +161,7 @@ func (c *Config) IsFilecoinMode() bool { } func (c *Config) IsSplitCarMode() bool { - return c.Data.Car != nil && c.Data.Car.SplitMetadata != nil && len(c.Data.Car.SplitMetadata.Miners) > 0 + return c.Data.Car != nil && c.Data.Car.FromPieces != nil && !c.Data.Car.FromPieces.Metadata.IsZero() && !c.Data.Car.FromPieces.Deals.IsZero() } type ConfigSlice []*Config @@ -222,7 +219,7 @@ func (c *Config) Validate() error { if c.Data.Car == nil { return fmt.Errorf("car-mode=true; data.car must be set") } - if c.Data.Car.URI.IsZero() && c.Data.Car.SplitMetadata == nil { + if c.Data.Car.URI.IsZero() && c.Data.Car.FromPieces == nil { return fmt.Errorf("data.car.uri or data.car.split_metadata must be set") } if !c.Data.Car.URI.IsZero() { @@ -230,19 +227,25 @@ func (c *Config) Validate() error { return err } } - if c.Data.Car.SplitMetadata != nil { - if c.Data.Car.SplitMetadata.URI.IsZero() { - return fmt.Errorf("data.car.split_metadata.uri must be set") - } - if !c.Data.Car.SplitMetadata.URI.IsLocal() { - return fmt.Errorf("data.car.split_metadata.uri must be a local file") - } - if len(c.Data.Car.SplitMetadata.Miners) == 0 { - return fmt.Errorf("data.car.split_metadata.miners must not be empty") + // can't have both: + if !c.Data.Car.URI.IsZero() && c.Data.Car.FromPieces != nil { + return fmt.Errorf("data.car.uri and data.car.split_metadata cannot both be set") + } + if c.Data.Car.FromPieces != nil { + { + if c.Data.Car.FromPieces.Metadata.IsZero() { + return fmt.Errorf("data.car.from_pieces.metadata.uri must be set") + } + if !c.Data.Car.FromPieces.Metadata.IsLocal() { + return fmt.Errorf("data.car.from_pieces.metadata.uri must be a local file") + } } - for minerIndex, miner := range c.Data.Car.SplitMetadata.Miners { - if miner.MinerID == "" { - return fmt.Errorf("data.car.split_metadata.miners[%d].miner_id must not be empty", minerIndex) + { + if c.Data.Car.FromPieces.Deals.IsZero() { + return fmt.Errorf("data.car.from_pieces.deals.uri must be set") + } + if !c.Data.Car.FromPieces.Deals.IsLocal() { + return fmt.Errorf("data.car.from_pieces.deals.uri must be a local file") } } } @@ -260,7 +263,6 @@ func (c *Config) Validate() error { return fmt.Errorf("data.filecoin.root_cid must be set") } // validate providers: - for providerIndex, provider := range c.Data.Filecoin.Providers { if provider == "" { return fmt.Errorf("data.filecoin.providers must not be empty") @@ -302,16 +304,19 @@ func (c *Config) Validate() error { { // check that the URIs are valid if isCarMode { - if !c.Data.Car.URI.IsValid() { - return fmt.Errorf("data.car.uri is invalid") - } if !c.Indexes.CidToOffsetAndSize.URI.IsValid() { return fmt.Errorf("indexes.cid_to_offset_and_size.uri is invalid") } - - if c.Data.Car.SplitMetadata != nil { - if !c.Data.Car.SplitMetadata.URI.IsValid() { - return fmt.Errorf("data.car.split_metadata.uri is invalid") + if c.Data.Car.FromPieces != nil { + if !c.Data.Car.FromPieces.Metadata.IsValid() { + return fmt.Errorf("data.car.from_pieces.metadata.uri is invalid") + } + if !c.Data.Car.FromPieces.Deals.IsValid() { + return fmt.Errorf("data.car.from_pieces.deals.uri is invalid") + } + } else { + if !c.Data.Car.URI.IsValid() { + return fmt.Errorf("data.car.uri is invalid") } } } From c6362955565447989670a97cc39af33c1e259fb7 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 11 Dec 2023 17:55:34 +0100 Subject: [PATCH 42/63] Add support for remote split car pieces --- config.go | 26 ++++--- epoch.go | 88 +++++++++++++++++++++-- getSignaturesForAddress.go | 6 +- go.mod | 6 +- go.sum | 27 +++++++ multiepoch-getBlock.go | 8 ++- multiepoch-getVersion.go | 16 ++--- multiepoch.go | 3 +- request-response.go | 6 +- split-car-fetcher/deals.go | 121 ++++++++++++++++++++++++++++++++ split-car-fetcher/fetcher.go | 8 +-- split-car-fetcher/metadata.go | 29 ++++++++ split-car-fetcher/miner-info.go | 11 +-- tools.go | 3 +- 14 files changed, 311 insertions(+), 47 deletions(-) create mode 100644 split-car-fetcher/deals.go create mode 100644 split-car-fetcher/metadata.go diff --git a/config.go b/config.go index 08f002fb..a6341ee4 100644 --- a/config.go +++ b/config.go @@ -101,8 +101,12 @@ type Config struct { Car *struct { URI URI `json:"uri" yaml:"uri"` FromPieces *struct { - Metadata URI `json:"uri" yaml:"uri"` // Local path to the split metadata file. - Deals URI `json:"deals" yaml:"deals"` // Local path to the split deals file. + Metadata struct { + URI URI `json:"uri" yaml:"uri"` // Local path to the metadata file. + } `json:"metadata" yaml:"metadata"` + Deals struct { + URI URI `json:"uri" yaml:"uri"` // Local path to the deals file. + } `json:"deals" yaml:"deals"` } `json:"from_pieces" yaml:"from_pieces"` } `json:"car" yaml:"car"` Filecoin *struct { @@ -161,7 +165,7 @@ func (c *Config) IsFilecoinMode() bool { } func (c *Config) IsSplitCarMode() bool { - return c.Data.Car != nil && c.Data.Car.FromPieces != nil && !c.Data.Car.FromPieces.Metadata.IsZero() && !c.Data.Car.FromPieces.Deals.IsZero() + return c.Data.Car != nil && c.Data.Car.FromPieces != nil && !c.Data.Car.FromPieces.Metadata.URI.IsZero() && !c.Data.Car.FromPieces.Deals.URI.IsZero() } type ConfigSlice []*Config @@ -220,7 +224,7 @@ func (c *Config) Validate() error { return fmt.Errorf("car-mode=true; data.car must be set") } if c.Data.Car.URI.IsZero() && c.Data.Car.FromPieces == nil { - return fmt.Errorf("data.car.uri or data.car.split_metadata must be set") + return fmt.Errorf("data.car.uri or data.car.from_pieces must be set") } if !c.Data.Car.URI.IsZero() { if err := isSupportedURI(c.Data.Car.URI, "data.car.uri"); err != nil { @@ -229,22 +233,22 @@ func (c *Config) Validate() error { } // can't have both: if !c.Data.Car.URI.IsZero() && c.Data.Car.FromPieces != nil { - return fmt.Errorf("data.car.uri and data.car.split_metadata cannot both be set") + return fmt.Errorf("data.car.uri and data.car.from_pieces cannot both be set") } if c.Data.Car.FromPieces != nil { { - if c.Data.Car.FromPieces.Metadata.IsZero() { + if c.Data.Car.FromPieces.Metadata.URI.IsZero() { return fmt.Errorf("data.car.from_pieces.metadata.uri must be set") } - if !c.Data.Car.FromPieces.Metadata.IsLocal() { + if !c.Data.Car.FromPieces.Metadata.URI.IsLocal() { return fmt.Errorf("data.car.from_pieces.metadata.uri must be a local file") } } { - if c.Data.Car.FromPieces.Deals.IsZero() { + if c.Data.Car.FromPieces.Deals.URI.IsZero() { return fmt.Errorf("data.car.from_pieces.deals.uri must be set") } - if !c.Data.Car.FromPieces.Deals.IsLocal() { + if !c.Data.Car.FromPieces.Deals.URI.IsLocal() { return fmt.Errorf("data.car.from_pieces.deals.uri must be a local file") } } @@ -308,10 +312,10 @@ func (c *Config) Validate() error { return fmt.Errorf("indexes.cid_to_offset_and_size.uri is invalid") } if c.Data.Car.FromPieces != nil { - if !c.Data.Car.FromPieces.Metadata.IsValid() { + if !c.Data.Car.FromPieces.Metadata.URI.IsValid() { return fmt.Errorf("data.car.from_pieces.metadata.uri is invalid") } - if !c.Data.Car.FromPieces.Deals.IsValid() { + if !c.Data.Car.FromPieces.Deals.URI.IsValid() { return fmt.Errorf("data.car.from_pieces.deals.uri is invalid") } } else { diff --git a/epoch.go b/epoch.go index 5fbb8ebc..1bdd9b4e 100644 --- a/epoch.go +++ b/epoch.go @@ -11,6 +11,11 @@ import ( "io" "time" + "github.com/multiformats/go-multiaddr" + "github.com/ybbus/jsonrpc/v3" + + "github.com/anjor/carlet" + "github.com/davecgh/go-spew/spew" "github.com/gagliardetto/solana-go" "github.com/ipfs/go-cid" carv1 "github.com/ipld/go-car" @@ -26,6 +31,7 @@ import ( "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" "github.com/rpcpool/yellowstone-faithful/iplddecoders" "github.com/rpcpool/yellowstone-faithful/radiance/genesis" + splitcarfetcher "github.com/rpcpool/yellowstone-faithful/split-car-fetcher" "github.com/urfave/cli/v2" "k8s.io/klog/v2" ) @@ -245,12 +251,84 @@ func NewEpochFromConfig( } if isCarMode { - localCarReader, remoteCarReader, err := openCarStorage(c.Context, string(config.Data.Car.URI)) - if err != nil { - return nil, fmt.Errorf("failed to open CAR file: %w", err) - } + var localCarReader *carv2.Reader + var remoteCarReader ReaderAtCloser + var err error if config.IsSplitCarMode() { - // TODO: load the remote split CAR files. + + metadata, err := splitcarfetcher.MetadataFromYaml(string(config.Data.Car.FromPieces.Metadata.URI)) + if err != nil { + return nil, fmt.Errorf("failed to read pieces metadata: %w", err) + } + + dealRegistry, err := splitcarfetcher.DealsFromCSV(string(config.Data.Car.FromPieces.Deals.URI)) + if err != nil { + return nil, fmt.Errorf("failed to read deals: %w", err) + } + + lotusAPIAddress := "https://api.node.glif.io" + cl := jsonrpc.NewClient(lotusAPIAddress) + dm := splitcarfetcher.NewMinerInfo( + cl, + 5*time.Minute, + 5*time.Second, + ) + + scr, err := splitcarfetcher.NewSplitCarReader(metadata.CarPieces, + func(piece carlet.CarFile) (splitcarfetcher.ReaderAtCloserSize, error) { + minerID, ok := dealRegistry.GetMinerByPieceCID(piece.CommP) + if !ok { + return nil, fmt.Errorf("failed to find miner for piece CID %s", piece.CommP) + } + klog.Infof("piece CID %s is stored on miner %s", piece.CommP, minerID) + minerInfo, err := dm.GetProviderInfo(c.Context, minerID) + if err != nil { + return nil, fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) + } + if len(minerInfo.Multiaddrs) == 0 { + return nil, fmt.Errorf("miner %s has no multiaddrs", minerID) + } + spew.Dump(minerInfo) + // extract the IP address from the multiaddr: + split := multiaddr.Split(minerInfo.Multiaddrs[0]) + if len(split) < 2 { + return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + component0 := split[0].(*multiaddr.Component) + component1 := split[1].(*multiaddr.Component) + + var ip string + var port string + + if component0.Protocol().Code == multiaddr.P_IP4 { + ip = component0.Value() + port = component1.Value() + } else if component1.Protocol().Code == multiaddr.P_IP4 { + ip = component1.Value() + port = component0.Value() + } else { + return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + // reset the port to 80: + // TODO: use the appropriate port (80, better if 443 with TLS) + port = "80" + minerIP := fmt.Sprintf("%s:%s", ip, port) + klog.Infof("piece CID %s is stored on miner %s (%s)", piece.CommP, minerID, minerIP) + formattedURL := fmt.Sprintf("http://%s/piece/%s", minerIP, piece.CommP.String()) + return splitcarfetcher.NewRemoteFileSplitCarReader( + piece.CommP.String(), + formattedURL, + ) + }) + if err != nil { + return nil, fmt.Errorf("failed to open CAR file from pieces: %w", err) + } + remoteCarReader = scr + } else { + localCarReader, remoteCarReader, err = openCarStorage(c.Context, string(config.Data.Car.URI)) + if err != nil { + return nil, fmt.Errorf("failed to open CAR file: %w", err) + } } if localCarReader != nil { ep.onClose = append(ep.onClose, localCarReader.Close) diff --git a/getSignaturesForAddress.go b/getSignaturesForAddress.go index b999c633..855cbf44 100644 --- a/getSignaturesForAddress.go +++ b/getSignaturesForAddress.go @@ -19,7 +19,7 @@ type GetSignaturesForAddressParams struct { func parseGetSignaturesForAddressParams(raw *json.RawMessage) (*GetSignaturesForAddressParams, error) { var params []any - if err := json.Unmarshal(*raw, ¶ms); err != nil { + if err := fasterJson.Unmarshal(*raw, ¶ms); err != nil { return nil, fmt.Errorf("failed to unmarshal params: %w", err) } if len(params) < 1 { @@ -94,13 +94,13 @@ var ( func parseTransactionError(v any) (map[string]any, error) { // TODO: if any of the following fails, return the original value. // marshal to json - b, err := json.Marshal(v) + b, err := fasterJson.Marshal(v) if err != nil { return nil, err } // then unmarshal to map var m map[string]any - err = json.Unmarshal(b, &m) + err = fasterJson.Unmarshal(b, &m) if err != nil { return nil, err } diff --git a/go.mod b/go.mod index d3ed9f7f..279a4540 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/rpcpool/yellowstone-faithful -go 1.20 +go 1.21 + +toolchain go1.21.4 replace github.com/anjor/carlet => github.com/rpcpool/carlet v0.0.4 @@ -73,7 +75,9 @@ require ( github.com/ryanuber/go-glob v1.0.0 github.com/tejzpr/ordered-concurrently/v3 v3.0.1 github.com/valyala/fasthttp v1.47.0 + github.com/ybbus/jsonrpc/v3 v3.1.5 golang.org/x/exp v0.0.0-20231006140011-7918f672742d + gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/klog v1.0.0 ) diff --git a/go.sum b/go.sum index 44078685..9c1e67d4 100644 --- a/go.sum +++ b/go.sum @@ -59,6 +59,7 @@ github.com/akavel/rsrc v0.8.0/go.mod h1:uLoCtb9J+EyAqh+26kdrTgmzRBFPGOolLWKpdxkK github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= +github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/allegro/bigcache/v3 v3.1.0 h1:H2Vp8VOvxcrB91o86fUSVJFqeuz8kpyyB02eH3bSzwk= github.com/allegro/bigcache/v3 v3.1.0/go.mod h1:aPyh7jEvrog9zAwx5N7+JUQX5dZTSGpxF1LAR4dr35I= github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 h1:MzBOUgng9orim59UnfUTLRjMpd09C5uEVQ6RPGeCaVI= @@ -116,6 +117,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3 h1:HVTnpeuvF6Owjd5mniCL8DEXo7uYXdQEmOP4FJbV5tg= +github.com/crackcomm/go-gitignore v0.0.0-20170627025303-887ab5e44cc3/go.mod h1:p1d6YEZWvFzEh4KLyvBcVSnrfNDDvK2zfK/4x2v/4pE= github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0= github.com/cskr/pubsub v1.0.2/go.mod h1:/8MzYXk/NJAz782G8RPkFzXTZVu63VotefPnR9TIRis= github.com/daaku/go.zipexe v1.0.0/go.mod h1:z8IiR6TsVLEYKwXAoE/I+8ys/sDkgTzSL0CLnGVd57E= @@ -125,6 +127,7 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c h1:pFUpOrbxDR6AkioZ1ySsx5yxlDQZ8stG2b88gTPxgJU= github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c/go.mod h1:6UhI8N9EjYm1c2odKpFpAYeR8dsBeM7PtzQhRgxRr9U= github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5ilcvdfma9wOH6Y= +github.com/decred/dcrd/crypto/blake256 v1.0.1/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etlyjdBU4sfcs2WYQMs= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= github.com/dfuse-io/logging v0.0.0-20201110202154-26697de88c79/go.mod h1:V+ED4kT/t/lKtH99JQmKIb0v9WL3VaYkJ36CfHlVECI= @@ -204,6 +207,7 @@ github.com/flynn/noise v1.0.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwU github.com/francoispqt/gojay v1.2.13 h1:d2m3sFjloqoIUQU3TsHBgj6qg/BVGlTBeHDUmyJnXKk= github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= @@ -286,6 +290,7 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -370,6 +375,7 @@ github.com/ipfs/boxo v0.11.1-0.20230817065640-7ec68c5e5adf/go.mod h1:8IfDmp+FzFG github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= +github.com/ipfs/go-bitswap v0.11.0/go.mod h1:05aE8H3XOU+LXpTedeAS0OZpcO1WFsj5niYQH9a1Tmk= github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= github.com/ipfs/go-block-format v0.0.3/go.mod h1:4LmD4ZUw0mhO+JSKdpWwrzATiEfM7WWgQ8H5l6P8MVk= github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= @@ -400,6 +406,7 @@ github.com/ipfs/go-hamt-ipld v0.1.1/go.mod h1:1EZCr2v0jlCnhpa+aZ0JZYp8Tt2w16+JJO github.com/ipfs/go-ipfs-blockstore v1.3.0 h1:m2EXaWgwTzAfsmt5UdJ7Is6l4gJcaM/A12XwJyvYvMM= github.com/ipfs/go-ipfs-blockstore v1.3.0/go.mod h1:KgtZyc9fq+P2xJUiCAzbRdhhqJHvsw8u2Dlqy2MyRTE= github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IWFJMcGIPQ= +github.com/ipfs/go-ipfs-blocksutil v0.0.1/go.mod h1:Yq4M86uIOmxmGPUHv/uI7uKqZNtLb449gwKqXjIsnRk= github.com/ipfs/go-ipfs-chunker v0.0.6 h1:+EBescK+ekHPMfmX3VYXRyOn/f80RXv7V8tGgLEYvo8= github.com/ipfs/go-ipfs-chunker v0.0.6/go.mod h1:whszqTIBqWNUvYvjkKvBSoR1akrsSpZbZCYMFbekMjE= github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= @@ -410,11 +417,15 @@ github.com/ipfs/go-ipfs-ds-help v1.1.0/go.mod h1:YR5+6EaebOhfcqVCyqemItCLthrpVNo github.com/ipfs/go-ipfs-exchange-interface v0.2.0 h1:8lMSJmKogZYNo2jjhUs0izT+dck05pqUw4mWNW9Pw6Y= github.com/ipfs/go-ipfs-exchange-interface v0.2.0/go.mod h1:z6+RhJuDQbqKguVyslSOuVDhqF9JtTrO3eptSAiW2/Y= github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uYokgWRFidfvEkuA= +github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s= github.com/ipfs/go-ipfs-files v0.3.0 h1:fallckyc5PYjuMEitPNrjRfpwl7YFt69heCOUhsbGxQ= +github.com/ipfs/go-ipfs-files v0.3.0/go.mod h1:xAUtYMwB+iu/dtf6+muHNSFQCJG2dSiStR2P6sn9tIM= github.com/ipfs/go-ipfs-posinfo v0.0.1 h1:Esoxj+1JgSjX0+ylc0hUmJCOv6V2vFoZiETLR6OtpRs= +github.com/ipfs/go-ipfs-posinfo v0.0.1/go.mod h1:SwyeVP+jCwiDu0C313l/8jg6ZxM0qqtlt2a0vILTc1A= github.com/ipfs/go-ipfs-pq v0.0.3 h1:YpoHVJB+jzK15mr/xsWC574tyDLkezVrDNeaalQBsTE= github.com/ipfs/go-ipfs-pq v0.0.3/go.mod h1:btNw5hsHBpRcSSgZtiNm/SLj5gYIZ18AKtv3kERkRb4= github.com/ipfs/go-ipfs-routing v0.3.0 h1:9W/W3N+g+y4ZDeffSgqhgo7BsBSJwPMcyssET9OWevc= +github.com/ipfs/go-ipfs-routing v0.3.0/go.mod h1:dKqtTFIql7e1zYsEuWLyuOU+E0WJWW8JjbTPLParDWo= github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc= github.com/ipfs/go-ipfs-util v0.0.2/go.mod h1:CbPtkWJzjLdEcezDns2XYaehFVNXG9zrdrtMecczcsQ= github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0= @@ -452,6 +463,7 @@ github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j github.com/ipfs/go-peertaskqueue v0.8.1 h1:YhxAs1+wxb5jk7RvS0LHdyiILpNmRIRnZVztekOF0pg= github.com/ipfs/go-peertaskqueue v0.8.1/go.mod h1:Oxxd3eaK279FxeydSPPVGHzbwVeHjatZ2GA8XD+KbPU= github.com/ipfs/go-unixfs v0.4.5 h1:wj8JhxvV1G6CD7swACwSKYa+NgtdWC1RUit+gFnymDU= +github.com/ipfs/go-unixfs v0.4.5/go.mod h1:BIznJNvt/gEx/ooRMI4Us9K8+qeGO7vx1ohnbk8gjFg= github.com/ipfs/go-unixfsnode v1.9.0 h1:ubEhQhr22sPAKO2DNsyVBW7YB/zA8Zkif25aBvz8rc8= github.com/ipfs/go-unixfsnode v1.9.0/go.mod h1:HxRu9HYHOjK6HUqFBAi++7DVoWAHn0o4v/nZ/VA+0g8= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= @@ -465,9 +477,11 @@ github.com/ipld/go-codec-dagpb v1.6.0/go.mod h1:ANzFhfP2uMJxRBr8CE+WQWs5UsNa0pYt github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH9C2E= github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ= github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd h1:gMlw/MhNr2Wtp5RwGdsW23cs+yCuj9k2ON7i9MiJlRo= +github.com/ipld/go-ipld-prime/storage/bsadapter v0.0.0-20230102063945-1a409dc236dd/go.mod h1:wZ8hH8UxeryOs4kJEJaiui/s00hDSbE37OKsL47g+Sw= github.com/ipld/go-trustless-utils v0.4.1 h1:puA14381Hg2LzH724mZ5ZFKFx+FFjjT5fPFs01vwlgM= github.com/ipld/go-trustless-utils v0.4.1/go.mod h1:DgGuyfJ33goYwYVisjnxrlra0HVmZuHWVisVIkzVo1o= github.com/ipld/ipld/specs v0.0.0-20231012031213-54d3b21deda4 h1:0VXv637/xpI0Pb5J8K+K8iRtTw4DOcxs0MB1HMzfwNY= +github.com/ipld/ipld/specs v0.0.0-20231012031213-54d3b21deda4/go.mod h1:WcT0DfRe+e2QFY0kcbsOnuT6jL5Q0JNZ83I5DHIdStg= github.com/ipni/go-libipni v0.5.3 h1:OtyQsetnTjIeXFMHcuEFUmCyAlrKEiOfZrv4FpCFj5A= github.com/ipni/go-libipni v0.5.3/go.mod h1:lwecr1Bn32BtroPW3Dnb9qzWGQ3IsB4STr1Cs+gS8TA= github.com/ipsn/go-secp256k1 v0.0.0-20180726113642-9d62b9f0bc52 h1:QG4CGBqCeuBo6aZlGAamSkxWdgWfZGeE49eUOWJPA4c= @@ -476,6 +490,7 @@ github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7Bd github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/jbenet/go-cienv v0.1.0/go.mod h1:TqNnHUmJgXau0nCzC7kXWeotg3J9W34CUv5Djy1+FlA= github.com/jbenet/go-random v0.0.0-20190219211222-123a90aedc0c h1:uUx61FiAa1GI6ZmVd2wf2vULeQZIKG66eybjNXKYCz4= +github.com/jbenet/go-random v0.0.0-20190219211222-123a90aedc0c/go.mod h1:sdx1xVM9UuLw1tXnhJWN3piypTUO3vCIHYmG15KE/dU= github.com/jbenet/go-temp-err-catcher v0.1.0 h1:zpb3ZH6wIE8Shj2sKS+khgRvf7T7RABoLk/+KKHggpk= github.com/jbenet/go-temp-err-catcher v0.1.0/go.mod h1:0kJRvmDZXNMIiJirNPEYfhpPwbGVtZVWC34vc5WLsDk= github.com/jbenet/goprocess v0.0.0-20160826012719-b497e2f366b8/go.mod h1:Ly/wlsjFq/qrU3Rar62tu1gASgGw6chQbSh/XgIIXCY= @@ -518,10 +533,12 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38yPW7c= @@ -537,6 +554,7 @@ github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvN github.com/libp2p/go-libp2p-routing-helpers v0.7.1 h1:kc0kWCZecbBPAiFEHhxfGJZPqjg1g9zV+X+ovR4Tmnc= github.com/libp2p/go-libp2p-routing-helpers v0.7.1/go.mod h1:cHStPSRC/wgbfpb5jYdMP7zaSmc2wWcb1mkzNr6AR8o= github.com/libp2p/go-libp2p-testing v0.12.0 h1:EPvBb4kKMWO29qP4mZGyhVzUyR25dvfUIK5WDu6iPUA= +github.com/libp2p/go-libp2p-testing v0.12.0/go.mod h1:KcGDRXyN7sQCllucn1cOOS+Dmm7ujhfEyXQL5lvkcPg= github.com/libp2p/go-msgio v0.3.0 h1:mf3Z8B1xcFN314sWX+2vOTShIE0Mmn2TXn3YCUQGNj0= github.com/libp2p/go-msgio v0.3.0/go.mod h1:nyRM819GmVaF9LX3l03RMh10QdOroF++NBbxAb0mmDM= github.com/libp2p/go-nat v0.2.0 h1:Tyz+bUFAYqGyJ/ppPPymMGbIgNRH+WqC5QrT5fKrrGk= @@ -664,6 +682,7 @@ github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xl github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= +github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= @@ -727,6 +746,7 @@ github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/ronanh/intcomp v1.1.0 h1:i54kxmpmSoOZFcWPMWryuakN0vLxLswASsGa07zkvLU= github.com/ronanh/intcomp v1.1.0/go.mod h1:7FOLy3P3Zj3er/kVrU/pl+Ql7JFZj7bwliMGketo0IU= github.com/rpcpool/carlet v0.0.4 h1:ZrDMvrS1Jewy4rQkj/ODy0SG8jXG0mljeNcY76kEnYg= @@ -772,6 +792,7 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1 github.com/smartystreets/assertions v1.0.1/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs= +github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8= github.com/smartystreets/goconvey v0.0.0-20190222223459-a17d461953aa/go.mod h1:2RVY1rIf+2J2o/IM9+vPq9RzmHDSseB7FoXiSNIUsoU= github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= @@ -842,6 +863,7 @@ github.com/vbauerster/mpb/v8 v8.2.1/go.mod h1:DqGePwrIYW6Bs5pXaGAuGgP0PYgu5VZKIj github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= github.com/warpfork/go-testmark v0.12.1 h1:rMgCpJfwy1sJ50x0M0NgyphxYYPMOODIJHhsXyEHU0s= +github.com/warpfork/go-testmark v0.12.1/go.mod h1:kHwy7wfvGSPh1rQJYKayD4AbtNaeyZdcGi9tNJTaa5Y= github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ= @@ -864,6 +886,7 @@ github.com/whyrusleeping/cbor-gen v0.0.0-20210303213153-67a261a1d291/go.mod h1:f github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25 h1:yVYDLoN2gmB3OdBXFW8e1UwgVbmCvNlnAKhvHPaNARI= github.com/whyrusleeping/cbor-gen v0.0.0-20230818171029-f91ae536ca25/go.mod h1:fgkXqYy7bV2cFeIEOkVTZS/WjXARfBqSH6Q2qHL33hQ= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= +github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= @@ -873,6 +896,8 @@ github.com/xlab/pkgconfig v0.0.0-20170226114623-cea12a0fd245/go.mod h1:C+diUUz7p github.com/xorcare/golden v0.6.0/go.mod h1:7T39/ZMvaSEZlBPoYfVFmsBLmUl3uz9IuzWj/U6FtvQ= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/ybbus/jsonrpc/v3 v3.1.5 h1:0cC/QzS8OCuXYqqDbYnKKhsEe+IZLrNlDx8KPCieeW0= +github.com/ybbus/jsonrpc/v3 v3.1.5/go.mod h1:U1QbyNfL5Pvi2roT0OpRbJeyvGxfWYSgKJHjxWdAEeE= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -898,6 +923,7 @@ go.opentelemetry.io/otel v1.16.0/go.mod h1:vl0h9NUa1D5s1nv3A5vZOYWn8av4K8Ml6JDeH go.opentelemetry.io/otel/metric v1.16.0 h1:RbrpwVG1Hfv85LgnZ7+txXioPDoh6EdbZHo26Q3hqOo= go.opentelemetry.io/otel/metric v1.16.0/go.mod h1:QE47cpOmkwipPiefDwo2wDzwJrlfxxNYodqc4xnGCo4= go.opentelemetry.io/otel/sdk v1.14.0 h1:PDCppFRDq8A1jL9v6KMI6dYesaq+DFcDZvjsoGvxGzY= +go.opentelemetry.io/otel/sdk v1.14.0/go.mod h1:bwIC5TjrNG6QDCHNWvW4HLHtUQ4I+VQDsnjhvyZCALM= go.opentelemetry.io/otel/trace v1.16.0 h1:8JRpaObFoW0pxuVPapkgH8UhHQj+bJW8jJsCZEu5MQs= go.opentelemetry.io/otel/trace v1.16.0/go.mod h1:Yt9vYq1SdNz3xdjZZK7wcXv1qv2pwLkqr2QVwea0ef0= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -913,6 +939,7 @@ go.uber.org/fx v1.20.1/go.mod h1:iSYNbHf2y55acNCwCXKx7LbWb5WG1Bnue5RDXz1OREg= go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= +go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= go.uber.org/mock v0.3.0 h1:3mUxI1No2/60yUYax92Pt8eNOEecx2D3lcXZh2NEZJo= go.uber.org/mock v0.3.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= diff --git a/multiepoch-getBlock.go b/multiepoch-getBlock.go index d62a6ea1..496e3755 100644 --- a/multiepoch-getBlock.go +++ b/multiepoch-getBlock.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "context" - "encoding/json" "errors" "fmt" "io" @@ -16,6 +15,7 @@ import ( "github.com/ipfs/go-cid" "github.com/ipld/go-car/util" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + jsoniter "github.com/json-iterator/go" "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/rpcpool/yellowstone-faithful/ipld/ipldbindcode" solanablockrewards "github.com/rpcpool/yellowstone-faithful/solana-block-rewards" @@ -24,6 +24,8 @@ import ( "k8s.io/klog/v2" ) +var fasterJson = jsoniter.ConfigCompatibleWithStandardLibrary + func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContext, req *jsonrpc2.Request) (*jsonrpc2.Error, error) { tim := newTimer() params, err := parseGetBlockRequest(req.Params) @@ -281,7 +283,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex } else { { // encode rewards as JSON, then decode it as a map - buf, err := json.Marshal(actualRewards) + buf, err := fasterJson.Marshal(actualRewards) if err != nil { return &jsonrpc2.Error{ Code: jsonrpc2.CodeInternalError, @@ -289,7 +291,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex }, fmt.Errorf("failed to encode rewards: %v", err) } var m map[string]any - err = json.Unmarshal(buf, &m) + err = fasterJson.Unmarshal(buf, &m) if err != nil { return &jsonrpc2.Error{ Code: jsonrpc2.CodeInternalError, diff --git a/multiepoch-getVersion.go b/multiepoch-getVersion.go index 9201c56e..f635ee18 100644 --- a/multiepoch-getVersion.go +++ b/multiepoch-getVersion.go @@ -9,7 +9,7 @@ import ( func (ser *MultiEpoch) tryEnrichGetVersion(body []byte) ([]byte, error) { var decodedRemote jsonrpc2.Response - if err := json.Unmarshal(body, &decodedRemote); err != nil { + if err := fasterJson.Unmarshal(body, &decodedRemote); err != nil { return nil, err } if decodedRemote.Error != nil || decodedRemote.Result == nil { @@ -17,7 +17,7 @@ func (ser *MultiEpoch) tryEnrichGetVersion(body []byte) ([]byte, error) { } // node decode the result: var decodedResult map[string]any - if err := json.Unmarshal(*decodedRemote.Result, &decodedResult); err != nil { + if err := fasterJson.Unmarshal(*decodedRemote.Result, &decodedResult); err != nil { return nil, fmt.Errorf("failed to decode result: %w", err) } // enrich the result: @@ -25,13 +25,13 @@ func (ser *MultiEpoch) tryEnrichGetVersion(body []byte) ([]byte, error) { decodedResult["faithful"] = faithfulVersion // re-encode the result: - encodedResult, err := json.Marshal(decodedResult) + encodedResult, err := fasterJson.Marshal(decodedResult) if err != nil { return nil, fmt.Errorf("failed to re-encode result: %w", err) } // re-encode the response: decodedRemote.Result = (*json.RawMessage)(&encodedResult) - encodedResponse, err := json.Marshal(decodedRemote) + encodedResponse, err := fasterJson.Marshal(decodedRemote) if err != nil { return nil, fmt.Errorf("failed to re-encode response: %w", err) } @@ -49,8 +49,8 @@ func (ser *MultiEpoch) GetFaithfulVersionInfo() map[string]any { // This function should return the solana version we are compatible with func (ser *MultiEpoch) GetSolanaVersionInfo() map[string]any { - solanaVersion := make(map[string]any) - solanaVersion["feature-set"] = 1879391783 - solanaVersion["solana-core"] = "1.16.7" - return solanaVersion + solanaVersion := make(map[string]any) + solanaVersion["feature-set"] = 1879391783 + solanaVersion["solana-core"] = "1.16.7" + return solanaVersion } diff --git a/multiepoch.go b/multiepoch.go index f6e65dc3..ec2e1711 100644 --- a/multiepoch.go +++ b/multiepoch.go @@ -3,7 +3,6 @@ package main import ( "context" "crypto/rand" - "encoding/json" "fmt" "net/http" "sort" @@ -310,7 +309,7 @@ func newMultiEpochHandler(handler *MultiEpoch, lsConf *ListenerConfig) func(ctx // parse request var rpcRequest jsonrpc2.Request - if err := json.Unmarshal(body, &rpcRequest); err != nil { + if err := fasterJson.Unmarshal(body, &rpcRequest); err != nil { klog.Errorf("[%s] failed to parse request body: %v", err) replyJSON(reqCtx, http.StatusBadRequest, jsonrpc2.Response{ Error: &jsonrpc2.Error{ diff --git a/request-response.go b/request-response.go index 39552822..2983bb1d 100644 --- a/request-response.go +++ b/request-response.go @@ -174,7 +174,7 @@ func (req *GetBlockRequest) Validate() error { func parseGetBlockRequest(raw *json.RawMessage) (*GetBlockRequest, error) { var params []any - if err := json.Unmarshal(*raw, ¶ms); err != nil { + if err := fasterJson.Unmarshal(*raw, ¶ms); err != nil { return nil, fmt.Errorf("failed to unmarshal params: %w", err) } if len(params) < 1 { @@ -311,7 +311,7 @@ func isAnyEncodingOf(s solana.EncodingType, anyOf ...solana.EncodingType) bool { func parseGetTransactionRequest(raw *json.RawMessage) (*GetTransactionRequest, error) { var params []any - if err := json.Unmarshal(*raw, ¶ms); err != nil { + if err := fasterJson.Unmarshal(*raw, ¶ms); err != nil { return nil, fmt.Errorf("failed to unmarshal params: %w", err) } if len(params) < 1 { @@ -483,7 +483,7 @@ func encodeBytesResponseBasedOnWantedEncoding( func parseGetBlockTimeRequest(raw *json.RawMessage) (uint64, error) { var params []any - if err := json.Unmarshal(*raw, ¶ms); err != nil { + if err := fasterJson.Unmarshal(*raw, ¶ms); err != nil { return 0, fmt.Errorf("failed to unmarshal params: %w", err) } if len(params) < 1 { diff --git a/split-car-fetcher/deals.go b/split-car-fetcher/deals.go new file mode 100644 index 00000000..42f442a6 --- /dev/null +++ b/split-car-fetcher/deals.go @@ -0,0 +1,121 @@ +package splitcarfetcher + +import ( + "encoding/csv" + "fmt" + "io" + "os" + "strconv" + + "github.com/filecoin-project/go-address" + "github.com/ipfs/go-cid" +) + +// provider,deal_uuid,file_name,url,commp_piece_cid,file_size,padded_size,payload_cid +type Deal struct { + Provider address.Address + DealUUID string + FileName string + URL string + CommpPieceCID cid.Cid + FileSize int64 + PaddedFileSize int64 + PayloadCID string +} + +type DealRegistry struct { + pieceToDeal map[cid.Cid]Deal +} + +func NewDealRegistry() *DealRegistry { + return &DealRegistry{ + pieceToDeal: make(map[cid.Cid]Deal), + } +} + +// DealsFromCSV reads a CSV file and returns a DealRegistry. +func DealsFromCSV(path string) (*DealRegistry, error) { + file, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open file %q: %w", path, err) + } + defer file.Close() + + r := csv.NewReader(file) + r.FieldsPerRecord = 8 + r.Comment = '#' + r.TrimLeadingSpace = true + + registry := NewDealRegistry() + + // read header + if header, err := r.Read(); err != nil { + return registry, err + } else { + // check that the header is correct + if header[0] != "provider" || + header[1] != "deal_uuid" || + header[2] != "file_name" || + header[3] != "url" || + header[4] != "commp_piece_cid" || + header[5] != "file_size" || + header[6] != "padded_size" || + header[7] != "payload_cid" { + return registry, fmt.Errorf("invalid header: %v", header) + } + } + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return registry, fmt.Errorf("failed to read csv record line: %w", err) + } + + maddr, err := address.NewFromString(record[0]) + if err != nil { + return registry, fmt.Errorf("failed to parse miner address: %w", err) + } + + fileSize, err := strconv.ParseInt(record[5], 10, 64) + if err != nil { + return registry, fmt.Errorf("failed to parse file_size: %w", err) + } + + paddedFileSize, err := strconv.ParseInt(record[6], 10, 64) + if err != nil { + return registry, fmt.Errorf("failed to parse padded_size: %w", err) + } + + deal := Deal{ + Provider: maddr, + DealUUID: record[1], + FileName: record[2], + URL: record[3], + CommpPieceCID: cid.MustParse(record[4]), + FileSize: fileSize, + PaddedFileSize: paddedFileSize, + PayloadCID: record[7], + } + + registry.pieceToDeal[deal.CommpPieceCID] = deal + } + + return registry, nil +} + +// GetDeal returns the deal associated with the given piece CID. +func (r *DealRegistry) GetDeal(pieceCID cid.Cid) (Deal, bool) { + deal, ok := r.pieceToDeal[pieceCID] + return deal, ok +} + +// GetMinerByPieceCID returns the miner associated with the given piece CID. +func (r *DealRegistry) GetMinerByPieceCID(pieceCID cid.Cid) (address.Address, bool) { + deal, ok := r.pieceToDeal[pieceCID] + if !ok { + return address.Address{}, false + } + return deal.Provider, true +} diff --git a/split-car-fetcher/fetcher.go b/split-car-fetcher/fetcher.go index ff74ee55..acf4ce1b 100644 --- a/split-car-fetcher/fetcher.go +++ b/split-car-fetcher/fetcher.go @@ -108,7 +108,7 @@ type RemoteFileSplitCarReader struct { func NewRemoteFileSplitCarReader(commP string, url string) (*RemoteFileSplitCarReader, error) { size, err := getContentSizeWithHeadOrZeroRange(url) if err != nil { - return nil, fmt.Errorf("failed to get content size: %s", err) + return nil, fmt.Errorf("failed to get content size from %q: %s", url, err) } return &RemoteFileSplitCarReader{ commP: commP, @@ -134,7 +134,7 @@ func (fscr *RemoteFileSplitCarReader) ReadAt(p []byte, off int64) (n int, err er } defer resp.Body.Close() if resp.StatusCode != http.StatusPartialContent { - return 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + return 0, fmt.Errorf("GET %q: unexpected status code: %d", fscr.url, resp.StatusCode) } n, err = io.ReadFull(resp.Body, p) if err != nil { @@ -175,7 +175,7 @@ func NewSplitCarReader( for _, cf := range files.CarPieces { fi, err := readerCreator(cf) if err != nil { - return nil, fmt.Errorf("failed to open file %q: %s", cf.Name, err) + return nil, fmt.Errorf("failed to open remote file %q: %s", cf.CommP, err) } size := int(fi.Size()) @@ -185,7 +185,7 @@ func NewSplitCarReader( expectedSize := int(cf.HeaderSize) + int(cf.ContentSize) // NOTE: valid only for pre-upload split CARs. They get padded after upload. if size != expectedSize { return nil, fmt.Errorf( - "file %q has unexpected size: saved=%d actual=%d (diff=%d)", + "remote file %q has unexpected size: saved=%d actual=%d (diff=%d)", cf.Name, expectedSize, size, diff --git a/split-car-fetcher/metadata.go b/split-car-fetcher/metadata.go new file mode 100644 index 00000000..6dd021a5 --- /dev/null +++ b/split-car-fetcher/metadata.go @@ -0,0 +1,29 @@ +package splitcarfetcher + +import ( + "fmt" + "os" + + "github.com/anjor/carlet" + "gopkg.in/yaml.v2" +) + +type Metadata struct { + CarPieces *carlet.CarPiecesAndMetadata `yaml:"car_pieces_meta"` +} + +func MetadataFromYaml(path string) (*Metadata, error) { + var meta Metadata + + metadataFileContent, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read pieces metadata file: %w", err) + } + + // read the yaml file + err = yaml.Unmarshal(metadataFileContent, &meta) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal pieces metadata: %w", err) + } + return &meta, nil +} diff --git a/split-car-fetcher/miner-info.go b/split-car-fetcher/miner-info.go index 3aef0a33..54c22806 100644 --- a/split-car-fetcher/miner-info.go +++ b/split-car-fetcher/miner-info.go @@ -6,10 +6,11 @@ import ( "fmt" "time" - "github.com/gagliardetto/solana-go/rpc/jsonrpc" + "github.com/filecoin-project/go-address" "github.com/jellydator/ttlcache/v3" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multiaddr" + "github.com/ybbus/jsonrpc/v3" ) type MinerInfoCache struct { @@ -40,17 +41,17 @@ func NewMinerInfo( } } -func (d MinerInfoCache) GetProviderInfo(ctx context.Context, provider string) (*MinerInfo, error) { - file := d.minerInfoCache.Get(provider) +func (d MinerInfoCache) GetProviderInfo(ctx context.Context, provider address.Address) (*MinerInfo, error) { + file := d.minerInfoCache.Get(provider.String()) if file != nil && !file.IsExpired() { return file.Value(), nil } - minerInfo, err := MinerInfoFetcher{Client: d.lotusClient}.GetProviderInfo(ctx, provider) + minerInfo, err := MinerInfoFetcher{Client: d.lotusClient}.GetProviderInfo(ctx, provider.String()) if err != nil { return nil, err } - d.minerInfoCache.Set(provider, minerInfo, ttlcache.DefaultTTL) + d.minerInfoCache.Set(provider.String(), minerInfo, ttlcache.DefaultTTL) return minerInfo, nil } diff --git a/tools.go b/tools.go index 2ad5d100..8d6f4f11 100644 --- a/tools.go +++ b/tools.go @@ -1,7 +1,6 @@ package main import ( - "encoding/json" "fmt" "os" "time" @@ -58,7 +57,7 @@ func loadFromJSON(configFilepath string, dst any) error { return fmt.Errorf("failed to open config file: %w", err) } defer file.Close() - return json.NewDecoder(file).Decode(dst) + return fasterJson.NewDecoder(file).Decode(dst) } // loadFromYAML loads a YAML file into dst (which must be a pointer). From 3a3e63c212042837fa44f2b669cdcade52e038a9 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 14 Dec 2023 12:27:38 +0100 Subject: [PATCH 43/63] Fix tests --- indexes/index-cid-to-offset-and-size_test.go | 6 ++---- indexes/index-sig-to-cid_test.go | 6 ++---- indexes/index-slot-to-cid_test.go | 6 ++---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/indexes/index-cid-to-offset-and-size_test.go b/indexes/index-cid-to-offset-and-size_test.go index 74f5d451..79e3d609 100644 --- a/indexes/index-cid-to-offset-and-size_test.go +++ b/indexes/index-cid-to-offset-and-size_test.go @@ -72,10 +72,8 @@ func TestCidToOffsetAndSize(t *testing.T) { } } { - // if try to close the index before sealing it, should panic - require.Panics(t, func() { - require.NoError(t, writer.Close()) - }) + // if try to close the index before sealing it, it should fail + require.Error(t, writer.Close()) } // seal the index diff --git a/indexes/index-sig-to-cid_test.go b/indexes/index-sig-to-cid_test.go index 69d36424..2c79de57 100644 --- a/indexes/index-sig-to-cid_test.go +++ b/indexes/index-sig-to-cid_test.go @@ -57,10 +57,8 @@ func TestSigToCid(t *testing.T) { } } { - // if try to close the index before sealing it, should panic - require.Panics(t, func() { - require.NoError(t, writer.Close()) - }) + // if try to close the index before sealing it, it should fail + require.Error(t, writer.Close()) } // seal the index diff --git a/indexes/index-slot-to-cid_test.go b/indexes/index-slot-to-cid_test.go index 007a67eb..a236e2ea 100644 --- a/indexes/index-slot-to-cid_test.go +++ b/indexes/index-slot-to-cid_test.go @@ -51,10 +51,8 @@ func TestSlotToCid(t *testing.T) { } } { - // if try to close the index before sealing it, should panic - require.Panics(t, func() { - require.NoError(t, writer.Close()) - }) + // if try to close the index before sealing it, it should fail + require.Error(t, writer.Close()) } // seal the index From a18e67ea0ed8022bafaef73d01d0567ffd10bbec Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Mon, 18 Dec 2023 16:11:10 +0100 Subject: [PATCH 44/63] Add json tags --- go.mod | 2 +- ipld/ipldbindcode/methods.go | 100 +++++++++++++++++++++-- ipld/ipldbindcode/methods_test.go | 73 +++++++++++++++++ ipld/ipldbindcode/non-generated-types.go | 67 +++++++++++++++ ipld/ipldbindcode/types.go | 78 +++++++++--------- iplddecoders/decoders.go | 34 +++++++- 6 files changed, 308 insertions(+), 46 deletions(-) create mode 100644 ipld/ipldbindcode/methods_test.go diff --git a/go.mod b/go.mod index 279a4540..1932a9ef 100644 --- a/go.mod +++ b/go.mod @@ -63,6 +63,7 @@ require ( require ( github.com/allegro/bigcache/v3 v3.1.0 github.com/anjor/carlet v0.0.0-00010101000000-000000000000 + github.com/filecoin-project/go-address v1.1.0 github.com/fsnotify/fsnotify v1.5.4 github.com/goware/urlx v0.3.2 github.com/ipld/go-car v0.5.0 @@ -106,7 +107,6 @@ require ( github.com/docker/go-units v0.5.0 // indirect github.com/elastic/gosigar v0.14.2 // indirect github.com/fatih/color v1.14.1 // indirect - github.com/filecoin-project/go-address v1.1.0 // indirect github.com/filecoin-project/go-amt-ipld/v4 v4.1.0 // indirect github.com/filecoin-project/go-cbor-util v0.0.1 // indirect github.com/filecoin-project/go-ds-versioning v0.1.2 // indirect diff --git a/ipld/ipldbindcode/methods.go b/ipld/ipldbindcode/methods.go index ce3a0158..463fec10 100644 --- a/ipld/ipldbindcode/methods.go +++ b/ipld/ipldbindcode/methods.go @@ -1,9 +1,15 @@ package ipldbindcode import ( + "encoding/json" "fmt" "hash/crc64" "hash/fnv" + "strconv" + "strings" + + "github.com/ipfs/go-cid" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" ) // DataFrame.HasHash returns whether the 'Hash' field is present. @@ -13,11 +19,11 @@ func (n DataFrame) HasHash() bool { // GetHash returns the value of the 'Hash' field and // a flag indicating whether the field has a value. -func (n DataFrame) GetHash() (int, bool) { +func (n DataFrame) GetHash() (uint64, bool) { if n.Hash == nil || *n.Hash == nil { return 0, false } - return **n.Hash, true + return uint64(**n.Hash), true } // HasIndex returns whether the 'Index' field is present. @@ -85,10 +91,10 @@ func checksumCrc64(buf []byte) uint64 { // VerifyHash verifies that the provided data matches the provided hash. // In case of DataFrames, the hash is stored in the 'Hash' field, and // it is the hash of the concatenated 'Data' fields of all the DataFrames. -func VerifyHash(data []byte, hash int) error { - if checksumCrc64(data) != uint64(hash) { +func VerifyHash(data []byte, hash uint64) error { + if checksumCrc64(data) != (hash) { // Maybe it's the legacy checksum function? - if checksumFnv(data) != uint64(hash) { + if checksumFnv(data) != (hash) { return fmt.Errorf("data hash mismatch") } } @@ -119,3 +125,87 @@ func (n Block) GetBlockHeight() (uint64, bool) { } return uint64(**n.Meta.Block_height), true } + +// DataFrame.MarshalJSON implements the json.Marshaler interface. +func (n DataFrame) MarshalJSON() ([]byte, error) { + out := new(strings.Builder) + out.WriteString(`{"kind":`) + out.WriteString(fmt.Sprintf("%d", n.Kind)) + if n.Hash != nil && *n.Hash != nil { + out.WriteString(`,"hash":`) + out.WriteString(fmt.Sprintf(`"%d"`, uint64(**n.Hash))) + } else { + out.WriteString(`,"hash":null`) + } + + if n.Index != nil && *n.Index != nil { + out.WriteString(`,"index":`) + out.WriteString(fmt.Sprintf("%d", **n.Index)) + } else { + out.WriteString(`,"index":null`) + } + if n.Total != nil && *n.Total != nil { + out.WriteString(`,"total":`) + out.WriteString(fmt.Sprintf("%d", **n.Total)) + } else { + out.WriteString(`,"total":null`) + } + out.WriteString(`,"data":`) + out.WriteString(fmt.Sprintf("%q", n.Data.String())) + if n.Next != nil && *n.Next != nil { + out.WriteString(`,"next":`) + nextAsJSON, err := json.Marshal(**n.Next) + if err != nil { + return nil, err + } + out.Write(nextAsJSON) + } else { + out.WriteString(`,"next":null`) + } + out.WriteString("}") + return []byte(out.String()), nil +} + +// DataFrame.UnmarshalJSON implements the json.Unmarshaler interface. +func (n *DataFrame) UnmarshalJSON(data []byte) error { + // We have to use a custom unmarshaler because we need to + // unmarshal the 'data' field as a string, and then convert + // it to a byte slice. + type Alias DataFrame + + type CidObj map[string]string + aux := &struct { + Data string `json:"data"` + Hash string `json:"hash"` + Next []CidObj `json:"next"` + *Alias + }{ + Alias: (*Alias)(n), + } + if err := json.Unmarshal(data, &aux); err != nil { + return err + } + n.Data.FromString(aux.Data) + if aux.Hash != "" { + hash, err := strconv.ParseUint(aux.Hash, 10, 64) + if err != nil { + return err + } + h := int(hash) + hp := &h + n.Hash = &hp + } + if len(aux.Next) > 0 { + next := List__Link{} + for _, c := range aux.Next { + decoded, err := cid.Decode(c["/"]) + if err != nil { + return err + } + next = append(next, cidlink.Link{Cid: decoded}) + } + nextP := &next + n.Next = &nextP + } + return nil +} diff --git a/ipld/ipldbindcode/methods_test.go b/ipld/ipldbindcode/methods_test.go new file mode 100644 index 00000000..7eec8e63 --- /dev/null +++ b/ipld/ipldbindcode/methods_test.go @@ -0,0 +1,73 @@ +package ipldbindcode + +import ( + "encoding/json" + "testing" + + "github.com/ipfs/go-cid" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/stretchr/testify/require" +) + +func TestDataFrame(t *testing.T) { + { + df := DataFrame{ + Kind: 6, + } + { + h := 456 + hp := &h + df.Hash = &hp + } + { + i := 123 + ip := &i + df.Index = &ip + } + { + t := 789 + tp := &t + df.Total = &tp + } + df.Data = []uint8{1, 2, 3} + { + got, err := df.MarshalJSON() + require.NoError(t, err) + want := `{"kind":6,"hash":"456","index":123,"total":789,"data":"AQID","next":null}` + if string(got) != want { + t.Fatalf("got %s, want %s", got, want) + } + { + // try unmarshal + var df2 DataFrame + err := json.Unmarshal(got, &df2) + require.NoError(t, err) + + require.Equal(t, df, df2) + } + } + // now add some next values + parsedCid, err := cid.Parse("bafyreigggzehcmuibshwtq35acyie6cyuahqjklwe5stxnqoqosuevz6w4") + require.NoError(t, err) + next := &List__Link{ + cidlink.Link{Cid: parsedCid}, + } + df.Next = &next + { + got, err := df.MarshalJSON() + require.NoError(t, err) + want := `{"kind":6,"hash":"456","index":123,"total":789,"data":"AQID","next":[{"/":"bafyreigggzehcmuibshwtq35acyie6cyuahqjklwe5stxnqoqosuevz6w4"}]}` + if string(got) != want { + t.Fatalf("got %s, want %s", got, want) + } + { + // try unmarshal + var df2 DataFrame + err := json.Unmarshal(got, &df2) + require.NoError(t, err) + + require.Equal(t, df, df2) + } + } + } +} diff --git a/ipld/ipldbindcode/non-generated-types.go b/ipld/ipldbindcode/non-generated-types.go index eb381e0a..2a672564 100644 --- a/ipld/ipldbindcode/non-generated-types.go +++ b/ipld/ipldbindcode/non-generated-types.go @@ -1,6 +1,73 @@ package ipldbindcode +import ( + "encoding/base64" + "encoding/hex" + "strconv" +) + type ( Hash []uint8 Buffer []uint8 ) + +// Hash.String() returns the string representation of the Hash in hex. +func (h Hash) String() string { + return hex.EncodeToString(h) +} + +// Buffer.String() returns the string representation of the Buffer in base64. +func (b Buffer) String() string { + return base64.StdEncoding.EncodeToString(b) +} + +func (b *Buffer) FromString(s string) error { + decoded, err := base64.StdEncoding.DecodeString(s) + if err != nil { + return err + } + *b = decoded + return nil +} + +// Buffer.MarshalJSON() returns the JSON representation of the Buffer in base64. +func (b Buffer) MarshalJSON() ([]byte, error) { + return []byte("\"" + b.String() + "\""), nil +} + +// Buffer.UnmarshalJSON() decodes the JSON representation of the Buffer in base64. +func (b *Buffer) UnmarshalJSON(data []byte) error { + // strip the quotes + dataAsString, err := strconv.Unquote(string(data)) + if err != nil { + return err + } + // decode the base64 + decoded, err := base64.StdEncoding.DecodeString(dataAsString) + if err != nil { + return err + } + *b = decoded + return nil +} + +// Hash.MarshalJSON() returns the JSON representation of the Hash in hex. +func (h Hash) MarshalJSON() ([]byte, error) { + return []byte("\"" + h.String() + "\""), nil +} + +// Hash.UnmarshalJSON() decodes the JSON representation of the Hash in hex. +func (h *Hash) UnmarshalJSON(data []byte) error { + // strip the quotes + dataAsString, err := strconv.Unquote(string(data)) + if err != nil { + return err + } + // decode the hex + decoded, err := hex.DecodeString(dataAsString) + if err != nil { + return err + } + *h = decoded + return nil +} diff --git a/ipld/ipldbindcode/types.go b/ipld/ipldbindcode/types.go index 399837e7..11a94ec2 100644 --- a/ipld/ipldbindcode/types.go +++ b/ipld/ipldbindcode/types.go @@ -1,64 +1,68 @@ package ipldbindcode -import "github.com/ipld/go-ipld-prime/datamodel" +import ( + "github.com/ipld/go-ipld-prime/datamodel" +) type ( List__Link []datamodel.Link Epoch struct { - Kind int - Epoch int - Subsets List__Link + Kind int `json:"kind" yaml:"kind"` + Epoch int `json:"epoch" yaml:"epoch"` + Subsets List__Link `json:"subsets" yaml:"subsets"` } ) + type Subset struct { - Kind int - First int - Last int - Blocks List__Link + Kind int `json:"kind" yaml:"kind"` + First int `json:"first" yaml:"first"` + Last int `json:"last" yaml:"last"` + Blocks List__Link `json:"blocks" yaml:"blocks"` } type ( List__Shredding []Shredding Block struct { - Kind int - Slot int - Shredding List__Shredding - Entries List__Link - Meta SlotMeta - Rewards datamodel.Link + Kind int `json:"kind" yaml:"kind"` + Slot int `json:"slot" yaml:"slot"` + Shredding List__Shredding `json:"shredding" yaml:"shredding"` + Entries List__Link `json:"entries" yaml:"entries"` + Meta SlotMeta `json:"meta" yaml:"meta"` + Rewards datamodel.Link `json:"rewards" yaml:"rewards"` } ) + type Rewards struct { - Kind int - Slot int - Data DataFrame + Kind int `json:"kind" yaml:"kind"` + Slot int `json:"slot" yaml:"slot"` + Data DataFrame `json:"data" yaml:"data"` } type SlotMeta struct { - Parent_slot int - Blocktime int - Block_height **int + Parent_slot int `json:"parent_slot" yaml:"parent_slot"` + Blocktime int `json:"blocktime" yaml:"blocktime"` + Block_height **int `json:"block_height" yaml:"block_height"` } type Shredding struct { - EntryEndIdx int - ShredEndIdx int + EntryEndIdx int `json:"entry_end_idx" yaml:"entry_end_idx"` + ShredEndIdx int `json:"shred_end_idx" yaml:"shred_end_idx"` } type Entry struct { - Kind int - NumHashes int - Hash []uint8 - Transactions List__Link + Kind int `json:"kind" yaml:"kind"` + NumHashes int `json:"num_hashes" yaml:"num_hashes"` + Hash Hash `json:"hash" yaml:"hash"` + Transactions List__Link `json:"transactions" yaml:"transactions"` } type Transaction struct { - Kind int - Data DataFrame - Metadata DataFrame - Slot int - Index **int + Kind int `json:"kind" yaml:"kind"` + Data DataFrame `json:"data" yaml:"data"` + Metadata DataFrame `json:"metadata" yaml:"metadata"` + Slot int `json:"slot" yaml:"slot"` + Index **int `json:"index" yaml:"index"` } type DataFrame struct { - Kind int - Hash **int - Index **int - Total **int - Data []uint8 - Next **List__Link + Kind int `json:"kind" yaml:"kind"` + Hash **int `json:"hash" yaml:"hash"` + Index **int `json:"index" yaml:"index"` + Total **int `json:"total" yaml:"total"` + Data Buffer `json:"data" yaml:"data"` + Next **List__Link `json:"next" yaml:"next"` } diff --git a/iplddecoders/decoders.go b/iplddecoders/decoders.go index 55cda5ed..d363e5fe 100644 --- a/iplddecoders/decoders.go +++ b/iplddecoders/decoders.go @@ -20,6 +20,26 @@ const ( KindDataFrame ) +type KindSlice []Kind + +func (ks KindSlice) Has(k Kind) bool { + for _, kind := range ks { + if kind == k { + return true + } + } + return false +} + +func (ks KindSlice) HasAny(kinds ...Kind) bool { + for _, kind := range kinds { + if ks.Has(kind) { + return true + } + } + return false +} + // String returns the string representation of the Kind. func (k Kind) String() string { switch k { @@ -127,10 +147,10 @@ func DecodeDataFrame(dataFrameRaw []byte) (*ipldbindcode.DataFrame, error) { } func DecodeAny(anyRaw []byte) (any, error) { - if len(anyRaw) == 0 { - return nil, fmt.Errorf("empty bytes") + kind, err := GetKind(anyRaw) + if err != nil { + return nil, err } - kind := Kind(anyRaw[1]) switch kind { case KindTransaction: @@ -151,3 +171,11 @@ func DecodeAny(anyRaw []byte) (any, error) { return nil, fmt.Errorf("unknown kind %d", int(kind)) } } + +func GetKind(anyRaw []byte) (Kind, error) { + if len(anyRaw) == 0 { + return Kind(0), fmt.Errorf("empty bytes") + } + kind := Kind(anyRaw[1]) + return kind, nil +} From e813ba2ab3b4c2f7d2f7a61d0daacfc8c6fcf757 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 19 Dec 2023 18:56:57 +0100 Subject: [PATCH 45/63] Fix filename of sig-exists indexes --- cmd-x-index-all.go | 2 +- cmd-x-index-sig-exists.go | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index c4a9d9c9..1e6c0e59 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -210,7 +210,7 @@ func createAllIndexes( } defer sig_to_cid.Close() - sigExistsFilepath := formatSigExistsIndexFilePath(indexDir, carPath, rootCID.String()) + sigExistsFilepath := formatSigExistsIndexFilePath(indexDir, epoch, rootCID, network) sig_exists, err := bucketteer.NewWriter( sigExistsFilepath, ) diff --git a/cmd-x-index-sig-exists.go b/cmd-x-index-sig-exists.go index f357feff..37517139 100644 --- a/cmd-x-index-sig-exists.go +++ b/cmd-x-index-sig-exists.go @@ -16,6 +16,7 @@ import ( "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" "github.com/gagliardetto/solana-go" + "github.com/ipfs/go-cid" "github.com/ipfs/go-libipfs/blocks" "github.com/ipld/go-car" "github.com/rpcpool/yellowstone-faithful/bucketteer" @@ -122,7 +123,7 @@ func newCmd_Index_sigExists() *cli.Command { } klog.Infof("Creating sig-exists index for %s", carPath) - indexFilePath := formatSigExistsIndexFilePath(indexDir, carPath, rootCID.String()) + indexFilePath := formatSigExistsIndexFilePath(indexDir, epoch, rootCID, network) index, err := bucketteer.NewWriter( indexFilePath, ) @@ -264,8 +265,21 @@ func newCmd_Index_sigExists() *cli.Command { } } -func formatSigExistsIndexFilePath(indexDir string, carPath string, rootCID string) string { - return filepath.Join(indexDir, fmt.Sprintf("%s.%s.sig-exists.index", filepath.Base(carPath), rootCID)) +func formatSigExistsIndexFilePath(indexDir string, epoch uint64, rootCID cid.Cid, network indexes.Network) string { + return filepath.Join( + indexDir, + formatFilename_SigExists(epoch, rootCID, network), + ) +} + +func formatFilename_SigExists(epoch uint64, rootCid cid.Cid, network indexes.Network) string { + return fmt.Sprintf( + "epoch-%d-%s-%s-%s", + epoch, + rootCid.String(), + network, + "sig-exists.index", + ) } var classicSpewConfig = spew.ConfigState{ From b1ad395fdb6606b0a3449de5ff47a0a37678a1e5 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 12 Jan 2024 14:34:36 +0100 Subject: [PATCH 46/63] Add check-deals command --- cmd-check-deals.go | 158 +++++++++++++++++++++++++++++++++++ main.go | 1 + split-car-fetcher/fetcher.go | 4 +- 3 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 cmd-check-deals.go diff --git a/cmd-check-deals.go b/cmd-check-deals.go new file mode 100644 index 00000000..5bc5b6ae --- /dev/null +++ b/cmd-check-deals.go @@ -0,0 +1,158 @@ +package main + +import ( + "fmt" + "time" + + "github.com/multiformats/go-multiaddr" + "github.com/ybbus/jsonrpc/v3" + + "github.com/anjor/carlet" + splitcarfetcher "github.com/rpcpool/yellowstone-faithful/split-car-fetcher" + "github.com/urfave/cli/v2" + "k8s.io/klog/v2" +) + +func newCmd_check_deals() *cli.Command { + var includePatterns cli.StringSlice + var excludePatterns cli.StringSlice + return &cli.Command{ + Name: "check-deals", + Description: "Validate remote split car retrieval for the given config files", + ArgsUsage: "", + Before: func(c *cli.Context) error { + return nil + }, + Flags: []cli.Flag{ + &cli.StringSliceFlag{ + Name: "include", + Usage: "Include files or dirs matching the given glob patterns", + Value: cli.NewStringSlice(), + Destination: &includePatterns, + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "Exclude files or dirs matching the given glob patterns", + Value: cli.NewStringSlice(".git"), + Destination: &excludePatterns, + }, + }, + Action: func(c *cli.Context) error { + src := c.Args().Slice() + configFiles, err := GetListOfConfigFiles( + src, + includePatterns.Value(), + excludePatterns.Value(), + ) + if err != nil { + return cli.Exit(err.Error(), 1) + } + klog.Infof("Found %d config files:", len(configFiles)) + for _, configFile := range configFiles { + fmt.Printf(" - %s\n", configFile) + } + + // Load configs: + configs := make(ConfigSlice, 0) + for _, configFile := range configFiles { + config, err := LoadConfig(configFile) + if err != nil { + return cli.Exit(fmt.Sprintf("failed to load config file %q: %s", configFile, err.Error()), 1) + } + configs = append(configs, config) + } + + configs.SortByEpoch() + klog.Infof("Loaded %d epoch configs (NO VALIDATION)", len(configs)) + klog.Info("Will check remote storage pieces for each epoch config") + + // Check deals: + for _, config := range configs { + epoch := *config.Epoch + klog.Infof("Checking pieces for epoch %d", epoch) + isLassieMode := config.IsFilecoinMode() + isCarMode := !isLassieMode + if isCarMode && config.IsSplitCarMode() { + klog.Infof("Checking pieces for epoch %d, CAR mode", epoch) + + metadata, err := splitcarfetcher.MetadataFromYaml(string(config.Data.Car.FromPieces.Metadata.URI)) + if err != nil { + return fmt.Errorf("failed to read pieces metadata: %w", err) + } + + dealRegistry, err := splitcarfetcher.DealsFromCSV(string(config.Data.Car.FromPieces.Deals.URI)) + if err != nil { + return fmt.Errorf("failed to read deals: %w", err) + } + + lotusAPIAddress := "https://api.node.glif.io" + cl := jsonrpc.NewClient(lotusAPIAddress) + dm := splitcarfetcher.NewMinerInfo( + cl, + 5*time.Minute, + 5*time.Second, + ) + + _, err = splitcarfetcher.NewSplitCarReader(metadata.CarPieces, + func(piece carlet.CarFile) (splitcarfetcher.ReaderAtCloserSize, error) { + minerID, ok := dealRegistry.GetMinerByPieceCID(piece.CommP) + if !ok { + return nil, fmt.Errorf("failed to find miner for piece CID %s", piece.CommP) + } + klog.Infof("piece CID %s is supposedly stored on miner %s", piece.CommP, minerID) + minerInfo, err := dm.GetProviderInfo(c.Context, minerID) + if err != nil { + return nil, fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) + } + if len(minerInfo.Multiaddrs) == 0 { + return nil, fmt.Errorf("miner %s has no multiaddrs", minerID) + } + // spew.Dump(minerInfo) + // extract the IP address from the multiaddr: + split := multiaddr.Split(minerInfo.Multiaddrs[0]) + if len(split) < 2 { + return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + component0 := split[0].(*multiaddr.Component) + component1 := split[1].(*multiaddr.Component) + + var ip string + + if component0.Protocol().Code == multiaddr.P_IP4 { + ip = component0.Value() + } else if component1.Protocol().Code == multiaddr.P_IP4 { + ip = component1.Value() + } else { + return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + // reset the port to 80: + // TODO: use the appropriate port (80, better if 443 with TLS) + port := "80" + minerIP := fmt.Sprintf("%s:%s", ip, port) + klog.Infof("epoch %d: piece CID %s is stored on miner %s (%s)", epoch, piece.CommP, minerID, minerIP) + formattedURL := fmt.Sprintf("http://%s/piece/%s", minerIP, piece.CommP.String()) + + size, err := splitcarfetcher.GetContentSizeWithHeadOrZeroRange(formattedURL) + if err != nil { + return nil, fmt.Errorf("epoch %d: failed to get content size from %q: %s", epoch, formattedURL, err) + } + klog.Infof("[OK] content size for piece CID %s is %d", piece.CommP, size) + return splitcarfetcher.NewRemoteFileSplitCarReader( + piece.CommP.String(), + formattedURL, + ) + }) + if err != nil { + return fmt.Errorf("epoch %d: failed to open CAR file from pieces: %w", epoch, err) + } else { + klog.Infof("[OK] Pieces for epoch %d are all retrievable", epoch) + } + } else { + klog.Infof("Car file for epoch %d is not stored as split pieces, skipping", epoch) + } + } + + return nil + }, + } +} diff --git a/main.go b/main.go index 1925f19a..b8b08ca9 100644 --- a/main.go +++ b/main.go @@ -57,6 +57,7 @@ func main() { newCmd_XTraverse(), newCmd_Version(), newCmd_rpc(), + newCmd_check_deals(), }, } diff --git a/split-car-fetcher/fetcher.go b/split-car-fetcher/fetcher.go index acf4ce1b..74ab1a23 100644 --- a/split-car-fetcher/fetcher.go +++ b/split-car-fetcher/fetcher.go @@ -62,7 +62,7 @@ func (fscr *FileSplitCarReader) Size() int64 { return fscr.size } -func getContentSizeWithHeadOrZeroRange(url string) (int64, error) { +func GetContentSizeWithHeadOrZeroRange(url string) (int64, error) { // try sending a HEAD request to the server to get the file size: resp, err := http.Head(url) if err != nil { @@ -106,7 +106,7 @@ type RemoteFileSplitCarReader struct { } func NewRemoteFileSplitCarReader(commP string, url string) (*RemoteFileSplitCarReader, error) { - size, err := getContentSizeWithHeadOrZeroRange(url) + size, err := GetContentSizeWithHeadOrZeroRange(url) if err != nil { return nil, fmt.Errorf("failed to get content size from %q: %s", url, err) } From bdfafc4dbfde280d819b1a8e0488480aef6c0a24 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Tue, 16 Jan 2024 15:31:47 +0100 Subject: [PATCH 47/63] More logs for check-deals --- cmd-check-deals.go | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/cmd-check-deals.go b/cmd-check-deals.go index 5bc5b6ae..a4387894 100644 --- a/cmd-check-deals.go +++ b/cmd-check-deals.go @@ -4,6 +4,7 @@ import ( "fmt" "time" + "github.com/davecgh/go-spew/spew" "github.com/multiformats/go-multiaddr" "github.com/ybbus/jsonrpc/v3" @@ -69,11 +70,10 @@ func newCmd_check_deals() *cli.Command { // Check deals: for _, config := range configs { epoch := *config.Epoch - klog.Infof("Checking pieces for epoch %d", epoch) isLassieMode := config.IsFilecoinMode() isCarMode := !isLassieMode if isCarMode && config.IsSplitCarMode() { - klog.Infof("Checking pieces for epoch %d, CAR mode", epoch) + klog.Infof("Checking pieces for epoch %d from %q", epoch, config.ConfigFilepath()) metadata, err := splitcarfetcher.MetadataFromYaml(string(config.Data.Car.FromPieces.Metadata.URI)) if err != nil { @@ -107,7 +107,7 @@ func newCmd_check_deals() *cli.Command { if len(minerInfo.Multiaddrs) == 0 { return nil, fmt.Errorf("miner %s has no multiaddrs", minerID) } - // spew.Dump(minerInfo) + spew.Dump(minerInfo) // extract the IP address from the multiaddr: split := multiaddr.Split(minerInfo.Multiaddrs[0]) if len(split) < 2 { @@ -134,18 +134,34 @@ func newCmd_check_deals() *cli.Command { size, err := splitcarfetcher.GetContentSizeWithHeadOrZeroRange(formattedURL) if err != nil { - return nil, fmt.Errorf("epoch %d: failed to get content size from %q: %s", epoch, formattedURL, err) + return nil, fmt.Errorf( + "failed to get content size from %q (miner=%s): %s", + formattedURL, + minerID, + err, + ) } - klog.Infof("[OK] content size for piece CID %s is %d", piece.CommP, size) + klog.Infof( + "[OK] content size for piece CID %s is %d (from miner %s, resolved to %s)", + piece.CommP, + size, + minerID, + minerIP, + ) return splitcarfetcher.NewRemoteFileSplitCarReader( piece.CommP.String(), formattedURL, ) }) if err != nil { - return fmt.Errorf("epoch %d: failed to open CAR file from pieces: %w", epoch, err) + return fmt.Errorf( + "epoch %d from %q: failed to open CAR file from pieces: %w", + epoch, + config.ConfigFilepath(), + err, + ) } else { - klog.Infof("[OK] Pieces for epoch %d are all retrievable", epoch) + klog.Infof("[OK] Pieces for epoch %d from %q are all retrievable", epoch, config.ConfigFilepath()) } } else { klog.Infof("Car file for epoch %d is not stored as split pieces, skipping", epoch) From f0eabe7bd0d5b86718957e5a53617b4f6c8d2ec6 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Wed, 17 Jan 2024 15:20:13 +0100 Subject: [PATCH 48/63] Check all pieces before returning an error (all errors together) --- cmd-check-deals.go | 159 +++++++++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 62 deletions(-) diff --git a/cmd-check-deals.go b/cmd-check-deals.go index a4387894..6bda4f4d 100644 --- a/cmd-check-deals.go +++ b/cmd-check-deals.go @@ -1,14 +1,16 @@ package main import ( + "context" + "errors" "fmt" "time" + "github.com/anjor/carlet" "github.com/davecgh/go-spew/spew" "github.com/multiformats/go-multiaddr" "github.com/ybbus/jsonrpc/v3" - "github.com/anjor/carlet" splitcarfetcher "github.com/rpcpool/yellowstone-faithful/split-car-fetcher" "github.com/urfave/cli/v2" "k8s.io/klog/v2" @@ -93,69 +95,16 @@ func newCmd_check_deals() *cli.Command { 5*time.Second, ) - _, err = splitcarfetcher.NewSplitCarReader(metadata.CarPieces, - func(piece carlet.CarFile) (splitcarfetcher.ReaderAtCloserSize, error) { - minerID, ok := dealRegistry.GetMinerByPieceCID(piece.CommP) - if !ok { - return nil, fmt.Errorf("failed to find miner for piece CID %s", piece.CommP) - } - klog.Infof("piece CID %s is supposedly stored on miner %s", piece.CommP, minerID) - minerInfo, err := dm.GetProviderInfo(c.Context, minerID) - if err != nil { - return nil, fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) - } - if len(minerInfo.Multiaddrs) == 0 { - return nil, fmt.Errorf("miner %s has no multiaddrs", minerID) - } - spew.Dump(minerInfo) - // extract the IP address from the multiaddr: - split := multiaddr.Split(minerInfo.Multiaddrs[0]) - if len(split) < 2 { - return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) - } - component0 := split[0].(*multiaddr.Component) - component1 := split[1].(*multiaddr.Component) - - var ip string - - if component0.Protocol().Code == multiaddr.P_IP4 { - ip = component0.Value() - } else if component1.Protocol().Code == multiaddr.P_IP4 { - ip = component1.Value() - } else { - return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) - } - // reset the port to 80: - // TODO: use the appropriate port (80, better if 443 with TLS) - port := "80" - minerIP := fmt.Sprintf("%s:%s", ip, port) - klog.Infof("epoch %d: piece CID %s is stored on miner %s (%s)", epoch, piece.CommP, minerID, minerIP) - formattedURL := fmt.Sprintf("http://%s/piece/%s", minerIP, piece.CommP.String()) - - size, err := splitcarfetcher.GetContentSizeWithHeadOrZeroRange(formattedURL) - if err != nil { - return nil, fmt.Errorf( - "failed to get content size from %q (miner=%s): %s", - formattedURL, - minerID, - err, - ) - } - klog.Infof( - "[OK] content size for piece CID %s is %d (from miner %s, resolved to %s)", - piece.CommP, - size, - minerID, - minerIP, - ) - return splitcarfetcher.NewRemoteFileSplitCarReader( - piece.CommP.String(), - formattedURL, - ) - }) + err = checkAllPieces( + c.Context, + epoch, + metadata, + dealRegistry, + &dm, + ) if err != nil { return fmt.Errorf( - "epoch %d from %q: failed to open CAR file from pieces: %w", + "error while checking pieces for epoch %d from %q: failed to open CAR file from pieces: %w", epoch, config.ConfigFilepath(), err, @@ -172,3 +121,89 @@ func newCmd_check_deals() *cli.Command { }, } } + +func checkAllPieces( + ctx context.Context, + epoch uint64, + meta *splitcarfetcher.Metadata, + dealRegistry *splitcarfetcher.DealRegistry, + dm *splitcarfetcher.MinerInfoCache, +) error { + errs := make([]error, 0) + numPieces := len(meta.CarPieces.CarPieces) + for pieceIndex, piece := range meta.CarPieces.CarPieces { + pieceIndex := pieceIndex + err := func(piece carlet.CarFile) error { + minerID, ok := dealRegistry.GetMinerByPieceCID(piece.CommP) + if !ok { + return fmt.Errorf("failed to find miner for piece CID %s", piece.CommP) + } + klog.Infof( + "piece %d/%d with CID %s is supposedly stored on miner %s", + pieceIndex+1, + numPieces, + piece.CommP, + minerID, + ) + minerInfo, err := dm.GetProviderInfo(ctx, minerID) + if err != nil { + return fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) + } + if len(minerInfo.Multiaddrs) == 0 { + return fmt.Errorf("miner %s has no multiaddrs", minerID) + } + spew.Dump(minerInfo) + // extract the IP address from the multiaddr: + split := multiaddr.Split(minerInfo.Multiaddrs[0]) + if len(split) < 2 { + return fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + component0 := split[0].(*multiaddr.Component) + component1 := split[1].(*multiaddr.Component) + + var ip string + + if component0.Protocol().Code == multiaddr.P_IP4 { + ip = component0.Value() + } else if component1.Protocol().Code == multiaddr.P_IP4 { + ip = component1.Value() + } else { + return fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + // reset the port to 80: + // TODO: use the appropriate port (80, better if 443 with TLS) + port := "80" + minerIP := fmt.Sprintf("%s:%s", ip, port) + klog.Infof("epoch %d: piece CID %s is stored on miner %s (%s)", epoch, piece.CommP, minerID, minerIP) + formattedURL := fmt.Sprintf("http://%s/piece/%s", minerIP, piece.CommP.String()) + + size, err := splitcarfetcher.GetContentSizeWithHeadOrZeroRange(formattedURL) + if err != nil { + return fmt.Errorf( + "piece %d/%d with CID %s is supposedly stored on miner %s (%s), but failed to get content size from %q: %w", + pieceIndex+1, + numPieces, + piece.CommP, + minerID, + minerIP, + formattedURL, + err, + ) + } + klog.Infof( + "[OK] piece %d/%d: content size for piece CID %s is %d (from miner %s, resolved to %s)", + pieceIndex+1, + numPieces, + piece.CommP, + size, + minerID, + minerIP, + ) + return nil + }(piece) + if err != nil { + errs = append(errs, err) + } + } + return errors.Join(errs...) +} From c996ad02323ec60f6cd681bef7741fdfba3a554f Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 18 Jan 2024 18:59:42 +0100 Subject: [PATCH 49/63] Add deprecated indexes --- deprecated/bucketteer/bucketteer.go | 38 ++ deprecated/bucketteer/bucketteer_test.go | 188 ++++++++ deprecated/bucketteer/example/main.go | 146 ++++++ deprecated/bucketteer/read.go | 203 ++++++++ deprecated/bucketteer/write.go | 292 ++++++++++++ deprecated/compactindex/LICENSE | 202 ++++++++ deprecated/compactindex/README.md | 132 ++++++ deprecated/compactindex/build.go | 301 ++++++++++++ deprecated/compactindex/build_test.go | 248 ++++++++++ deprecated/compactindex/compactindex.go | 277 +++++++++++ deprecated/compactindex/compactindex_test.go | 84 ++++ deprecated/compactindex/fallocate_fake.go | 27 ++ deprecated/compactindex/fallocate_generic.go | 11 + deprecated/compactindex/fallocate_linux.go | 17 + deprecated/compactindex/query.go | 212 +++++++++ deprecated/compactindex/query_test.go | 58 +++ deprecated/compactindex36/LICENSE | 202 ++++++++ deprecated/compactindex36/README.md | 137 ++++++ deprecated/compactindex36/build.go | 310 +++++++++++++ deprecated/compactindex36/build_test.go | 438 ++++++++++++++++++ deprecated/compactindex36/compactindex.go | 280 +++++++++++ .../compactindex36/compactindex_test.go | 89 ++++ deprecated/compactindex36/fallocate_fake.go | 27 ++ .../compactindex36/fallocate_generic.go | 11 + deprecated/compactindex36/fallocate_linux.go | 17 + deprecated/compactindex36/query.go | 219 +++++++++ deprecated/compactindex36/query_test.go | 58 +++ indexes/deprecated-index-cid-to-offset.go | 76 +++ indexes/deprecated.go | 45 ++ 29 files changed, 4345 insertions(+) create mode 100644 deprecated/bucketteer/bucketteer.go create mode 100644 deprecated/bucketteer/bucketteer_test.go create mode 100644 deprecated/bucketteer/example/main.go create mode 100644 deprecated/bucketteer/read.go create mode 100644 deprecated/bucketteer/write.go create mode 100644 deprecated/compactindex/LICENSE create mode 100644 deprecated/compactindex/README.md create mode 100644 deprecated/compactindex/build.go create mode 100644 deprecated/compactindex/build_test.go create mode 100644 deprecated/compactindex/compactindex.go create mode 100644 deprecated/compactindex/compactindex_test.go create mode 100644 deprecated/compactindex/fallocate_fake.go create mode 100644 deprecated/compactindex/fallocate_generic.go create mode 100644 deprecated/compactindex/fallocate_linux.go create mode 100644 deprecated/compactindex/query.go create mode 100644 deprecated/compactindex/query_test.go create mode 100644 deprecated/compactindex36/LICENSE create mode 100644 deprecated/compactindex36/README.md create mode 100644 deprecated/compactindex36/build.go create mode 100644 deprecated/compactindex36/build_test.go create mode 100644 deprecated/compactindex36/compactindex.go create mode 100644 deprecated/compactindex36/compactindex_test.go create mode 100644 deprecated/compactindex36/fallocate_fake.go create mode 100644 deprecated/compactindex36/fallocate_generic.go create mode 100644 deprecated/compactindex36/fallocate_linux.go create mode 100644 deprecated/compactindex36/query.go create mode 100644 deprecated/compactindex36/query_test.go create mode 100644 indexes/deprecated-index-cid-to-offset.go create mode 100644 indexes/deprecated.go diff --git a/deprecated/bucketteer/bucketteer.go b/deprecated/bucketteer/bucketteer.go new file mode 100644 index 00000000..43bf18ca --- /dev/null +++ b/deprecated/bucketteer/bucketteer.go @@ -0,0 +1,38 @@ +package bucketteer + +import ( + "sort" + + "github.com/cespare/xxhash/v2" +) + +var _Magic = [8]byte{'b', 'u', 'c', 'k', 'e', 't', 't', 'e'} + +func Magic() [8]byte { + return _Magic +} + +const Version = uint64(1) + +func sortWithCompare[T any](a []T, compare func(i, j int) int) { + sort.Slice(a, func(i, j int) bool { + return compare(i, j) < 0 + }) + sorted := make([]T, len(a)) + eytzinger(a, sorted, 0, 1) + copy(a, sorted) +} + +func eytzinger[T any](in, out []T, i, k int) int { + if k <= len(in) { + i = eytzinger(in, out, i, 2*k) + out[k-1] = in[i] + i++ + i = eytzinger(in, out, i, 2*k+1) + } + return i +} + +func Hash(sig [64]byte) uint64 { + return xxhash.Sum64(sig[:]) +} diff --git a/deprecated/bucketteer/bucketteer_test.go b/deprecated/bucketteer/bucketteer_test.go new file mode 100644 index 00000000..99e4e2a5 --- /dev/null +++ b/deprecated/bucketteer/bucketteer_test.go @@ -0,0 +1,188 @@ +package bucketteer + +import ( + "os" + "path/filepath" + "testing" + + bin "github.com/gagliardetto/binary" + "github.com/stretchr/testify/require" + "golang.org/x/exp/mmap" +) + +func TestBucketteer(t *testing.T) { + path := filepath.Join(t.TempDir(), "test-bucketteer") + wr, err := NewWriter(path) + require.NoError(t, err) + firstSig := [64]byte{1, 2, 3, 4} + wr.Put(firstSig) + + if !wr.Has(firstSig) { + t.Fatal("expected to have firstSig") + } + { + sig := [64]byte{1, 2, 3, 5} + require.False(t, wr.Has(sig)) + wr.Put(sig) + require.True(t, wr.Has(sig)) + } + { + sig := [64]byte{1, 2, 3, 6} + require.False(t, wr.Has(sig)) + wr.Put(sig) + require.True(t, wr.Has(sig)) + } + { + sig := [64]byte{22, 2, 3, 6} + require.False(t, wr.Has(sig)) + wr.Put(sig) + require.True(t, wr.Has(sig)) + } + { + sig := [64]byte{99, 2, 3, 6} + require.False(t, wr.Has(sig)) + wr.Put(sig) + require.True(t, wr.Has(sig)) + } + require.Equal(t, 3, len(wr.prefixToHashes)) + { + gotSize, err := wr.Seal(map[string]string{ + "epoch": "test", + }) + require.NoError(t, err) + require.NoError(t, wr.Close()) + realSize, err := getFizeSize(path) + require.NoError(t, err) + require.Equal(t, realSize, gotSize) + + fileContent, err := os.ReadFile(path) + require.NoError(t, err) + + reader := bin.NewBorshDecoder(fileContent) + + // read header size: + headerSize, err := reader.ReadUint32(bin.LE) + require.NoError(t, err) + require.Equal(t, uint32(8+8+8+(8+(4+5)+(4+4))+(3*(2+8))), headerSize) + + // magic: + { + magicBuf := [8]byte{} + _, err := reader.Read(magicBuf[:]) + require.NoError(t, err) + require.Equal(t, _Magic, magicBuf) + } + // version: + { + got, err := reader.ReadUint64(bin.LE) + require.NoError(t, err) + require.Equal(t, Version, got) + } + { + // read meta: + numMeta, err := reader.ReadUint64(bin.LE) + require.NoError(t, err) + require.Equal(t, uint64(1), numMeta) + + key, err := reader.ReadString() + require.NoError(t, err) + require.Equal(t, "epoch", key) + + value, err := reader.ReadString() + require.NoError(t, err) + require.Equal(t, "test", value) + } + // numPrefixes: + numPrefixes, err := reader.ReadUint64(bin.LE) + require.NoError(t, err) + require.Equal(t, uint64(3), numPrefixes) + // prefix -> offset: + prefixToOffset := make(map[[2]byte]uint64) + { + for i := 0; i < int(numPrefixes); i++ { + var prefix [2]byte + _, err := reader.Read(prefix[:]) + require.NoError(t, err) + offset, err := reader.ReadUint64(bin.LE) + require.NoError(t, err) + prefixToOffset[prefix] = offset + } + } + { + require.Equal(t, + map[[2]uint8]uint64{ + {0x1, 0x2}: 0x0, + {0x16, 0x2}: 0x1c, + {0x63, 0x2}: 0x28, + }, prefixToOffset) + } + contentBuf, err := reader.ReadNBytes(reader.Remaining()) + require.NoError(t, err) + require.Equal(t, + []byte{ + 0x3, 0x0, 0x0, 0x0, // num entries + 0x49, 0xd7, 0xaf, 0x9e, 0x94, 0x4d, 0x9a, 0x6f, + 0x2f, 0x12, 0xdb, 0x5b, 0x1, 0x62, 0xae, 0x1a, + 0x3b, 0xb6, 0x71, 0x5f, 0x4, 0x4f, 0x36, 0xf2, + 0x1, 0x0, 0x0, 0x0, // num entries + 0x58, 0xe1, 0x9d, 0xde, 0x7c, 0xfb, 0xeb, 0x5a, + 0x1, 0x0, 0x0, 0x0, // num entries + 0x4c, 0xbd, 0xa3, 0xed, 0xd3, 0x8b, 0xa8, 0x44, + }, + contentBuf, + ) + contentReader := bin.NewBorshDecoder(contentBuf) + { + for prefix, offset := range prefixToOffset { + // Now read the bucket: + { + err := contentReader.SetPosition(uint(offset)) + require.NoError(t, err) + numHashes, err := contentReader.ReadUint32(bin.LE) + require.NoError(t, err) + switch prefix { + case [2]byte{1, 2}: + require.Equal(t, uint32(3), numHashes) + case [2]byte{22, 2}: + require.Equal(t, uint32(1), numHashes) + case [2]byte{99, 2}: + require.Equal(t, uint32(1), numHashes) + } + + for i := 0; i < int(numHashes); i++ { + hash, err := contentReader.ReadUint64(bin.LE) + require.NoError(t, err) + found := false + for _, h := range wr.prefixToHashes[prefix] { + if h == hash { + found = true + break + } + } + require.True(t, found) + } + } + } + } + { + // read temp file: + require.NoError(t, err) + mmr, err := mmap.Open(path) + require.NoError(t, err) + defer mmr.Close() + reader, err := NewReader(mmr) + require.NoError(t, err) + ok, err := reader.Has(firstSig) + require.NoError(t, err) + require.True(t, ok) + } + } +} + +func getFizeSize(path string) (int64, error) { + info, err := os.Stat(path) + if err != nil { + return 0, err + } + return info.Size(), nil +} diff --git a/deprecated/bucketteer/example/main.go b/deprecated/bucketteer/example/main.go new file mode 100644 index 00000000..f6afffcf --- /dev/null +++ b/deprecated/bucketteer/example/main.go @@ -0,0 +1,146 @@ +package main + +import ( + "crypto/rand" + "flag" + "fmt" + "os" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/dustin/go-humanize" + "github.com/rpcpool/yellowstone-faithful/bucketteer" + "golang.org/x/exp/mmap" +) + +func main() { + startedAt := time.Now() + defer func() { + fmt.Printf("took: %v\n", time.Since(startedAt)) + }() + var numItemsToInsert int + flag.IntVar(&numItemsToInsert, "num", 1_000_000, "num") + flag.Parse() + + file := flag.Arg(0) // "bucketteer.bin" + if file == "" { + panic("no file specified") + } + + samples := make([][64]byte, 0) + if !fileExistsAndIsNotEmpty(file) { + fmt.Println("File does not exist or is empty, creating it...") + fmt.Println("Items to insert:", humanize.Comma(int64(numItemsToInsert))) + totalWriteStartedAt := time.Now() + buWr, err := bucketteer.NewWriter(file) + if err != nil { + panic(err) + } + defer buWr.Close() + tookBatch := time.Duration(0) + for i := 1; i <= numItemsToInsert; i++ { + sig := newRandomSignature() + startedSet := time.Now() + buWr.Put(sig) + tookBatch += time.Since(startedSet) + if i%100_000 == 0 { + fmt.Print(".") + samples = append(samples, sig) + } + if i%1_000_000 == 0 { + fmt.Print(humanize.Comma(int64(i))) + fmt.Printf( + " · took: %v (%s per item)\n", + tookBatch, + tookBatch/time.Duration(1_000_000), + ) + tookBatch = 0 + } + } + + fmt.Println("writing to file...") + writeStartedAt := time.Now() + _, err = buWr.Seal(nil) + if err != nil { + panic(err) + } + fmt.Println("writing to file took:", time.Since(writeStartedAt)) + fmt.Println("total write took:", time.Since(totalWriteStartedAt)) + } + mmr, err := mmap.Open(file) + if err != nil { + panic(err) + } + defer mmr.Close() + buRd, err := bucketteer.NewReader(mmr) + if err != nil { + panic(err) + } + spew.Dump(buRd.Meta()) + if len(samples) > 0 { + fmt.Println("testing search with samples from the inserted signatures...") + tookBatch := time.Duration(0) + for _, sig := range samples { + startedSearch := time.Now() + found, err := buRd.Has(sig) + if err != nil { + panic(err) + } + if !found { + panic("not found") + } + tookBatch += time.Since(startedSearch) + } + fmt.Println("\n"+" num samples:", len(samples)) + fmt.Println(" search took:", tookBatch) + fmt.Println("avg search took:", tookBatch/time.Duration(len(samples))) + } + if true { + // now search for random signatures that are not in the Bucketteer: + numSearches := 100_000_000 + fmt.Println( + "testing search for random signatures that are not in the Bucketteer (numSearches:", + humanize.Comma(int64(numSearches)), + ")...", + ) + tookBatch := time.Duration(0) + for i := 1; i <= numSearches; i++ { + sig := newRandomSignature() + startedSearch := time.Now() + found, err := buRd.Has(sig) + if err != nil { + panic(err) + } + if found { + panic("found") + } + tookBatch += time.Since(startedSearch) + if i%100_000 == 0 { + fmt.Print(".") + } + } + fmt.Println("\n"+" num candidates:", humanize.Comma(int64(numSearches))) + fmt.Println(" search took:", tookBatch) + fmt.Println("avg search took:", tookBatch/time.Duration(numSearches)) + } +} + +func newRandomSignature() [64]byte { + var sig [64]byte + rand.Read(sig[:]) + return sig +} + +func fileExistsAndIsNotEmpty(path string) bool { + info, err := os.Stat(path) + if os.IsNotExist(err) { + return false + } + if err != nil { + panic(err) + } + if info.Size() == 0 { + return false + } + return true +} diff --git a/deprecated/bucketteer/read.go b/deprecated/bucketteer/read.go new file mode 100644 index 00000000..7c7d2c95 --- /dev/null +++ b/deprecated/bucketteer/read.go @@ -0,0 +1,203 @@ +package bucketteer + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + + bin "github.com/gagliardetto/binary" + "golang.org/x/exp/mmap" +) + +type Reader struct { + contentReader io.ReaderAt + meta map[string]string + prefixToOffset map[[2]byte]uint64 +} + +// Open opens a Bucketteer file in read-only mode, +// using memory-mapped IO. +func Open(path string) (*Reader, error) { + file, err := mmap.Open(path) + if err != nil { + return nil, err + } + return NewReader(file) +} + +func NewReader(reader io.ReaderAt) (*Reader, error) { + r := &Reader{ + prefixToOffset: make(map[[2]byte]uint64), + } + prefixToOffset, meta, headerTotalSize, err := readHeader(reader) + if err != nil { + return nil, err + } + r.meta = meta + r.prefixToOffset = prefixToOffset + r.contentReader = io.NewSectionReader(reader, headerTotalSize, 1<<63-1) + return r, nil +} + +func (r *Reader) Close() error { + if closer, ok := r.contentReader.(io.Closer); ok { + return closer.Close() + } + return nil +} + +func (r *Reader) Meta() map[string]string { + return r.meta +} + +// GetMeta returns the value of the given key. +// Returns an empty string if the key does not exist. +func (r *Reader) GetMeta(key string) string { + return r.meta[key] +} + +func readHeaderSize(reader io.ReaderAt) (int64, error) { + // read header size: + headerSizeBuf := make([]byte, 4) + if _, err := reader.ReadAt(headerSizeBuf, 0); err != nil { + return 0, err + } + headerSize := int64(binary.LittleEndian.Uint32(headerSizeBuf)) + return headerSize, nil +} + +func readHeader(reader io.ReaderAt) (map[[2]byte]uint64, map[string]string, int64, error) { + // read header size: + headerSize, err := readHeaderSize(reader) + if err != nil { + return nil, nil, 0, err + } + // read header bytes: + headerBuf := make([]byte, headerSize) + if _, err := reader.ReadAt(headerBuf, 4); err != nil { + return nil, nil, 0, err + } + // decode header: + decoder := bin.NewBorshDecoder(headerBuf) + + // magic: + { + magicBuf := make([]byte, len(_Magic[:])) + _, err := decoder.Read(magicBuf) + if err != nil { + return nil, nil, 0, err + } + if !bytes.Equal(magicBuf, _Magic[:]) { + return nil, nil, 0, fmt.Errorf("invalid magic: %x", string(magicBuf)) + } + } + // version: + { + got, err := decoder.ReadUint64(bin.LE) + if err != nil { + return nil, nil, 0, err + } + if got != Version { + return nil, nil, 0, fmt.Errorf("expected version %d, got %d", Version, got) + } + } + { + // read meta: + numMeta, err := decoder.ReadUint64(bin.LE) + if err != nil { + return nil, nil, 0, err + } + meta := make(map[string]string, numMeta) + for i := uint64(0); i < numMeta; i++ { + key, err := decoder.ReadString() + if err != nil { + return nil, nil, 0, err + } + value, err := decoder.ReadString() + if err != nil { + return nil, nil, 0, err + } + meta[key] = value + } + } + // numPrefixes: + numPrefixes, err := decoder.ReadUint64(bin.LE) + if err != nil { + return nil, nil, 0, err + } + // prefix -> offset: + prefixToOffset := make(map[[2]byte]uint64, numPrefixes) + for i := uint64(0); i < numPrefixes; i++ { + var prefix [2]byte + _, err := decoder.Read(prefix[:]) + if err != nil { + return nil, nil, 0, err + } + offset, err := decoder.ReadUint64(bin.LE) + if err != nil { + return nil, nil, 0, err + } + prefixToOffset[prefix] = offset + } + return prefixToOffset, nil, headerSize + 4, err +} + +func (r *Reader) Has(sig [64]byte) (bool, error) { + prefix := [2]byte{sig[0], sig[1]} + offset, ok := r.prefixToOffset[prefix] + if !ok { + return false, nil + } + // numHashes: + numHashesBuf := make([]byte, 4) + _, err := r.contentReader.ReadAt(numHashesBuf, int64(offset)) + if err != nil { + return false, err + } + numHashes := binary.LittleEndian.Uint32(numHashesBuf) + bucketReader := io.NewSectionReader(r.contentReader, int64(offset)+4, int64(numHashes*8)) + + // hashes: + wantedHash := Hash(sig) + got, err := searchEytzinger(0, int(numHashes), wantedHash, func(index int) (uint64, error) { + pos := int64(index * 8) + return readUint64Le(bucketReader, pos) + }) + if err != nil { + if err == ErrNotFound { + return false, nil + } + return false, err + } + return got == wantedHash, nil +} + +func searchEytzinger(min int, max int, x uint64, getter func(int) (uint64, error)) (uint64, error) { + var index int + for index < max { + k, err := getter(index) + if err != nil { + return 0, err + } + if k == x { + return k, nil + } + index = index<<1 | 1 + if k < x { + index++ + } + } + return 0, ErrNotFound +} + +var ErrNotFound = fmt.Errorf("not found") + +func readUint64Le(reader io.ReaderAt, pos int64) (uint64, error) { + buf := make([]byte, 8) + _, err := reader.ReadAt(buf, pos) + if err != nil { + return 0, err + } + return binary.LittleEndian.Uint64(buf), nil +} diff --git a/deprecated/bucketteer/write.go b/deprecated/bucketteer/write.go new file mode 100644 index 00000000..5837f1be --- /dev/null +++ b/deprecated/bucketteer/write.go @@ -0,0 +1,292 @@ +package bucketteer + +import ( + "bufio" + "bytes" + "encoding/binary" + "fmt" + "os" + "sort" + + bin "github.com/gagliardetto/binary" +) + +type Writer struct { + destination *os.File + writer *bufio.Writer + prefixToHashes map[[2]byte][]uint64 // prefix -> hashes +} + +const ( + _MiB = 1024 * 1024 + writeBufSize = _MiB * 10 +) + +func NewWriter(path string) (*Writer, error) { + if ok, err := isDir(path); err != nil { + return nil, err + } else if ok { + return nil, fmt.Errorf("path is a directory") + } + if ok, err := fileIsBlank(path); err != nil { + return nil, err + } else if !ok { + return nil, fmt.Errorf("file is not blank") + } + file, err := os.Create(path) + if err != nil { + return nil, err + } + return &Writer{ + writer: bufio.NewWriterSize(file, writeBufSize), + destination: file, + prefixToHashes: make(map[[2]byte][]uint64), + }, nil +} + +// Put adds the given signature to the Bucketteer. +// Cannot be called concurrently. +func (b *Writer) Put(sig [64]byte) { + var prefix [2]byte + copy(prefix[:], sig[:2]) + b.prefixToHashes[prefix] = append(b.prefixToHashes[prefix], Hash(sig)) +} + +// Has returns true if the Bucketteer has seen the given signature. +func (b *Writer) Has(sig [64]byte) bool { + var prefix [2]byte + copy(prefix[:], sig[:2]) + hash := Hash(sig) + for _, h := range b.prefixToHashes[prefix] { + if h == hash { + return true + } + } + return false +} + +func (b *Writer) Close() error { + return b.destination.Close() +} + +// Seal writes the Bucketteer's state to the given writer. +func (b *Writer) Seal(meta map[string]string) (int64, error) { + // truncate file and seek to beginning: + if err := b.destination.Truncate(0); err != nil { + return 0, err + } + if _, err := b.destination.Seek(0, 0); err != nil { + return 0, err + } + newHeader, size, err := seal(b.writer, b.prefixToHashes, meta) + if err != nil { + return 0, err + } + return size, overwriteFileContentAt(b.destination, 0, newHeader) +} + +func createHeader( + magic [8]byte, + version uint64, + headerSizeIn uint32, + meta map[string]string, + prefixToOffset map[[2]byte]uint64, +) ([]byte, error) { + tmpHeaderBuf := new(bytes.Buffer) + headerWriter := bin.NewBorshEncoder(tmpHeaderBuf) + + // write header size: + if err := headerWriter.WriteUint32(headerSizeIn, binary.LittleEndian); err != nil { + return nil, err + } + // write magic: + if n, err := headerWriter.Write(magic[:]); err != nil { + return nil, err + } else { + if n != 8 { + return nil, fmt.Errorf("invalid number of bytes written for magic: %d", n) + } + } + // write version uint64 + if err := headerWriter.WriteUint64(version, binary.LittleEndian); err != nil { + return nil, err + } + // write meta + { + // write num meta entries + if err := headerWriter.WriteUint64(uint64(len(meta)), binary.LittleEndian); err != nil { + return nil, err + } + // write meta entries + for k, v := range meta { + if err := headerWriter.WriteString(k); err != nil { + return nil, err + } + if err := headerWriter.WriteString(v); err != nil { + return nil, err + } + } + } + // write num buckets + if err := headerWriter.WriteUint64(uint64(len(prefixToOffset)), binary.LittleEndian); err != nil { + return nil, err + } + + prefixes := getSortedPrefixes(prefixToOffset) + // write prefix+offset pairs + for _, prefix := range prefixes { + if _, err := headerWriter.Write(prefix[:]); err != nil { + return nil, err + } + offset := prefixToOffset[prefix] + if err := headerWriter.WriteUint64(offset, binary.LittleEndian); err != nil { + return nil, err + } + } + return tmpHeaderBuf.Bytes(), nil +} + +func overwriteFileContentAt( + file *os.File, + offset int64, + data []byte, +) error { + wrote, err := file.WriteAt(data, offset) + if err != nil { + return err + } + if wrote != len(data) { + return fmt.Errorf("wrote %d bytes, expected to write %d bytes", wrote, len(data)) + } + return err +} + +func getSortedPrefixes[K any](prefixToHashes map[[2]byte]K) [][2]byte { + prefixes := make([][2]byte, 0, len(prefixToHashes)) + for prefix := range prefixToHashes { + prefixes = append(prefixes, prefix) + } + sort.Slice(prefixes, func(i, j int) bool { + return bytes.Compare(prefixes[i][:], prefixes[j][:]) < 0 + }) + return prefixes +} + +func seal( + out *bufio.Writer, + prefixToHashes map[[2]byte][]uint64, + meta map[string]string, +) ([]byte, int64, error) { + prefixes := getSortedPrefixes(prefixToHashes) + prefixToOffset := make(map[[2]byte]uint64, len(prefixes)) + for _, prefix := range prefixes { + // initialize all offsets to 0: + prefixToOffset[prefix] = 0 + } + + totalWritten := int64(0) + // create and write draft header: + header, err := createHeader( + _Magic, + Version, + 0, // header size + meta, + prefixToOffset, + ) + if err != nil { + return nil, 0, err + } + headerSize, err := out.Write(header) + if err != nil { + return nil, 0, err + } + totalWritten += int64(headerSize) + + previousOffset := uint64(0) + for _, prefix := range prefixes { + entries := getCleanSet(prefixToHashes[prefix]) + if len(entries) != len(prefixToHashes[prefix]) { + panic(fmt.Sprintf("duplicate hashes for prefix %v", prefix)) + } + sortWithCompare(entries, func(i, j int) int { + if entries[i] < entries[j] { + return -1 + } else if entries[i] > entries[j] { + return 1 + } + return 0 + }) + + thisSize := 4 + len(entries)*8 + // write the clean set to the buckets buffer + if err := binary.Write(out, binary.LittleEndian, uint32(len(entries))); err != nil { + return nil, 0, err + } + for _, h := range entries { + if err := binary.Write(out, binary.LittleEndian, h); err != nil { + return nil, 0, err + } + } + + prefixToOffset[prefix] = previousOffset + previousOffset = previousOffset + uint64(thisSize) + totalWritten += int64(thisSize) + } + + // flush the buckets buffer: + if err := out.Flush(); err != nil { + return nil, 0, err + } + + // write final header by overwriting the draft header: + updatedHeader, err := createHeader( + _Magic, + Version, + uint32(headerSize-4), // -4 because we don't count the header size itself + meta, + prefixToOffset, + ) + if err != nil { + return nil, 0, err + } + return updatedHeader, totalWritten, err +} + +// getCleanSet returns a sorted, deduplicated copy of getCleanSet. +func getCleanSet(entries []uint64) []uint64 { + // sort: + sort.Slice(entries, func(i, j int) bool { + return entries[i] < entries[j] + }) + // dedup: + out := make([]uint64, 0, len(entries)) + for i := 0; i < len(entries); i++ { + if i > 0 && entries[i] == entries[i-1] { + continue + } + out = append(out, entries[i]) + } + return out +} + +func fileIsBlank(path string) (bool, error) { + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return true, nil + } + return false, err + } + return info.Size() == 0, nil +} + +func isDir(path string) (bool, error) { + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + return info.IsDir(), nil +} diff --git a/deprecated/compactindex/LICENSE b/deprecated/compactindex/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/deprecated/compactindex/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/deprecated/compactindex/README.md b/deprecated/compactindex/README.md new file mode 100644 index 00000000..d6398818 --- /dev/null +++ b/deprecated/compactindex/README.md @@ -0,0 +1,132 @@ +# a fast flat-file index for constant datasets + +This package specifies a file format and Go implementation for indexing constant datasets. + +*`compactindex` …* +- is an immutable file format; +- maps arbitrary keys into offsets in an external flat file; +- consumes a constant amount of space per entry + - ~6-8 bytes, regardless of key size + - 3 bytes per enty +- `O(1)` complexity queries, with `2 + log2(10000)` lookups worst- & average-case (binary search); +- during construction, requires near-constant memory space and `O(n)` scratch space with regard to entries per file; +- during construction, features a constant >500k entry/s per-core write rate (2.5 GHz Intel laptop); +- works on any storage supporting random reads (regular files, HTTP range requests, on-chain, ...); +- is based on the "FKS method" which uses perfect (collision-free) hash functions in a two-level hashtable; [^1] +- is inspired by D. J. Bernstein's "constant database"; [^2] +- uses the xxHash64 non-cryptographic hash-function; [^3] + +Refer to the Go documentation for the algorithms used and implementation details. + +[![Go Reference](https://pkg.go.dev/badge/go.firedancer.io/radiance/pkg/compactindex.svg)](https://pkg.go.dev/go.firedancer.io/radiance/pkg/compactindex) + +[^1]: Fredman, M. L., Komlós, J., & Szemerédi, E. (1984). Storing a Sparse Table with 0 (1) Worst Case Access Time. Journal of the ACM, 31(3), 538–544. https://doi.org/10.1145/828.1884 +[^2]: cdb by D. J. Bernstein https://cr.yp.to/cdb.html +[^3]: Go implementation of xxHash by @cespare: https://github.com/cespare/xxhash/ + +## Interface + +In programming terms: + +```rs +fn lookup(key: &[byte]) -> Option +``` + +Given an arbitrary key, the index +- states whether the key exists in the index +- if it exists, maps the key to an integer (usually an offset into a file) + +## Examples + +Here are some example scenarios where `compactindex` is useful: + +- When working with immutable data structures + - Example: Indexing [IPLD CAR files][3] carrying Merkle-DAGs of content-addressable data +- When working with archived/constant data + - Example: Indexing files in `.tar` archives +- When dealing with immutable remote storage such as S3-like object storage + - Example: Storing the index and target file in S3, then using [HTTP range requests][4] to efficiently query data + +[3]: https://ipld.io/specs/transport/car/ +[4]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests + +Here are some things compactindex cannot do: + +- Cannot add more entries to an existing index + - Reason 1: indexes are tightly packed, so there is no space to insert new entries (though `fallocate(2)` with `FALLOC_FL_INSERT_RANGE` would technically work) + - Reason 2: the second-level hashtable uses a perfect hash function ensuring collision-free indexing of a subset of entries; + inserting new entries might cause a collision requiring + - Reason 3: adding too many entries will eventually create an imbalance in the first-level hashtable; + fixing this imbalance effectively requires re-constructing the file from scratch +- Cannot iterate over keys + - Reason: compactindex stores hashes, not the entries themselves. + This saves space but also allows for efficient random reads used during binary search + +## File Format (v0) + +**Encoding** + +The file format contains binary packed structures with byte alignment. + +Integers are encoded as little endian. + +**File Header** + +The file beings with a 32 byte file header. + +```rust +#[repr(packed)] +struct FileHeader { + magic: [u8; 8], // 0x00 + max_value: u64, // 0x08 + num_buckets: u32, // 0x10 + padding_14: [u8; 12], // 0x14 +} +``` + +- `magic` is set to the UTF-8 string `"rdcecidx"`. + The reader should reject files that don't start with this string. +- `num_buckets` is set to the number of hashtable buckets. +- `max_value` indicates the integer width of index values. +- `padding_14` must be zero. (reserved for future use) + +**Bucket Header Table** + +The file header is followed by a vector of bucket headers. +The number of is set by `num_buckets` in the file header. + +Each bucket header is 16 bytes long. + +```rust +#[repr(packed)] +struct BucketHeader { + hash_domain: u32, // 0x00 + num_entries: u32, // 0x04 + hash_len: u8, // 0x08 + padding_09: u8, // 0x09 + file_offset: u48, // 0x10 +} +``` + +- `hash_domain` is a "salt" to the per-bucket hash function. +- `num_entries` is set to the number of records in the bucket. +- `hash_len` is the size of the per-record hash in bytes and currently hardcoded to `3`. +- `padding_09` must be zero. +- `file_offset` is an offset from the beginning of the file header to the start of the bucket entries. + +**Bucket Entry Table** + +Each bucket has a vector of entries with length `num_entries`. +This structure makes up the vast majority of the index. + +```rust +#[repr(packed)] +struct Entry { + hash: u??, + value: u??, +} +``` + +The size of entry is static within a bucket. It is determined by its components: +- The size of `hash` in bytes equals `hash_len` +- The size of `value` in bytes equals the byte aligned integer width that is minimally required to represent `max_value` diff --git a/deprecated/compactindex/build.go b/deprecated/compactindex/build.go new file mode 100644 index 00000000..57eb06c0 --- /dev/null +++ b/deprecated/compactindex/build.go @@ -0,0 +1,301 @@ +package compactindex + +import ( + "bufio" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + "path/filepath" + "sort" + "syscall" +) + +// Builder creates new compactindex files. +type Builder struct { + Header + buckets []tempBucket + dir string +} + +// NewBuilder creates a new index builder. +// +// If dir is an empty string, a random temporary directory is used. +// +// numItems refers to the number of items in the index. +// +// targetFileSize is the size of the file that index entries point to. +// Can be set to zero if unknown, which results in a less efficient (larger) index. +func NewBuilder(dir string, numItems uint, targetFileSize uint64) (*Builder, error) { + if dir == "" { + var err error + dir, err = os.MkdirTemp("", "compactindex-") + if err != nil { + return nil, fmt.Errorf("failed to create temp dir: %w", err) + } + } + if targetFileSize == 0 { + targetFileSize = math.MaxUint64 + } + + numBuckets := (numItems + targetEntriesPerBucket - 1) / targetEntriesPerBucket + buckets := make([]tempBucket, numBuckets) + for i := range buckets { + name := filepath.Join(dir, fmt.Sprintf("keys-%d", i)) + f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o666) + if err != nil { + return nil, err + } + buckets[i].file = f + buckets[i].writer = bufio.NewWriter(f) + } + + return &Builder{ + Header: Header{ + FileSize: targetFileSize, + NumBuckets: uint32(numBuckets), + }, + buckets: buckets, + dir: dir, + }, nil +} + +// Insert writes a key-value mapping to the index. +// +// Index generation will fail if the same key is inserted twice. +// The writer must not pass a value greater than targetFileSize. +func (b *Builder) Insert(key []byte, value uint64) error { + return b.buckets[b.Header.BucketHash(key)].writeTuple(key, value) +} + +// Seal writes the final index to the provided file. +// This process is CPU-intensive, use context to abort prematurely. +// +// The file should be opened with access mode os.O_RDWR. +// Passing a non-empty file will result in a corrupted index. +func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { + // TODO support in-place writing. + + // Write header. + var headerBuf [headerSize]byte + b.Header.Store(&headerBuf) + _, err = f.Write(headerBuf[:]) + if err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + // Create hole to leave space for bucket header table. + bucketTableLen := int64(b.NumBuckets) * bucketHdrLen + err = fallocate(f, headerSize, bucketTableLen) + if errors.Is(err, syscall.EOPNOTSUPP) { + // The underlying file system may not support fallocate + err = fake_fallocate(f, headerSize, bucketTableLen) + if err != nil { + return fmt.Errorf("failed to fake fallocate() bucket table: %w", err) + } + } + if err != nil { + return fmt.Errorf("failed to fallocate() bucket table: %w", err) + } + // Seal each bucket. + for i := range b.buckets { + if err := b.sealBucket(ctx, i, f); err != nil { + return err + } + } + return nil +} + +// sealBucket will mine a bucket hashtable, write entries to a file, a +func (b *Builder) sealBucket(ctx context.Context, i int, f *os.File) error { + // Produce perfect hash table for bucket. + bucket := &b.buckets[i] + if err := bucket.flush(); err != nil { + return err + } + const mineAttempts uint32 = 1000 + entries, domain, err := bucket.mine(ctx, mineAttempts) + if err != nil { + return fmt.Errorf("failed to mine bucket %d: %w", i, err) + } + // Find current file length. + offset, err := f.Seek(0, io.SeekEnd) + if err != nil { + return fmt.Errorf("failed to seek to EOF: %w", err) + } + if offset < 0 { + panic("os.File.Seek() < 0") + } + // Write header to file. + desc := BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: domain, + NumEntries: uint32(bucket.records), + HashLen: 3, // TODO remove hardcoded constant + FileOffset: uint64(offset), + }, + Stride: 3 + intWidth(b.FileSize), // TODO remove hardcoded constant + OffsetWidth: intWidth(b.FileSize), + } + // Write entries to file. + wr := bufio.NewWriter(f) + entryBuf := make([]byte, desc.HashLen+intWidth(b.FileSize)) // TODO remove hardcoded constant + for _, entry := range entries { + desc.marshalEntry(entryBuf, entry) + if _, err := wr.Write(entryBuf[:]); err != nil { + return fmt.Errorf("failed to write record to index: %w", err) + } + } + if err := wr.Flush(); err != nil { + return fmt.Errorf("failed to flush bucket to index: %w", err) + } + // Write header to file. + if err := desc.BucketHeader.writeTo(f, uint(i)); err != nil { + return fmt.Errorf("failed to write bucket header %d: %w", i, err) + } + return nil +} + +func (b *Builder) Close() error { + return os.RemoveAll(b.dir) +} + +// tempBucket represents the "temporary bucket" file, +// a disk buffer containing a vector of key-value-tuples. +type tempBucket struct { + records uint + file *os.File + writer *bufio.Writer +} + +// writeTuple performs a buffered write of a KV-tuple. +func (b *tempBucket) writeTuple(key []byte, value uint64) (err error) { + b.records++ + var static [10]byte + binary.LittleEndian.PutUint16(static[0:2], uint16(len(key))) + binary.LittleEndian.PutUint64(static[2:10], value) + if _, err = b.writer.Write(static[:]); err != nil { + return err + } + _, err = b.writer.Write(key) + return +} + +// flush empties the in-memory write buffer to the file. +func (b *tempBucket) flush() error { + if err := b.writer.Flush(); err != nil { + return fmt.Errorf("failed to flush writer: %w", err) + } + b.writer = nil + return nil +} + +// mine repeatedly hashes the set of entries with different nonces. +// +// Returns a sorted list of hashtable entries upon finding a set of hashes without collisions. +// If a number of attempts was made without success, returns ErrCollision instead. +func (b *tempBucket) mine(ctx context.Context, attempts uint32) (entries []Entry, domain uint32, err error) { + entries = make([]Entry, b.records) + bitmap := make([]byte, 1<<21) + + rd := bufio.NewReader(b.file) + for domain = uint32(0); domain < attempts; domain++ { + if err = ctx.Err(); err != nil { + return + } + // Reset bitmap + for i := range bitmap { + bitmap[i] = 0 + } + // Reset reader + if _, err = b.file.Seek(0, io.SeekStart); err != nil { + return + } + rd.Reset(b.file) + + if hashErr := hashBucket(rd, entries, bitmap, domain); errors.Is(hashErr, ErrCollision) { + continue + } else if hashErr != nil { + return nil, 0, hashErr + } + + return // ok + } + + return nil, domain, ErrCollision +} + +// hashBucket reads and hashes entries from a temporary bucket file. +// +// Uses a 2^24 wide bitmap to detect collisions. +func hashBucket(rd *bufio.Reader, entries []Entry, bitmap []byte, nonce uint32) error { + // TODO Don't hardcode this, choose hash depth dynamically + mask := uint64(0xffffff) + + // Scan provided reader for entries and hash along the way. + for i := range entries { + // Read next key from file (as defined by writeTuple) + var static [10]byte + if _, err := io.ReadFull(rd, static[:]); err != nil { + return err + } + keyLen := binary.LittleEndian.Uint16(static[0:2]) + value := binary.LittleEndian.Uint64(static[2:10]) + key := make([]byte, keyLen) + if _, err := io.ReadFull(rd, key); err != nil { + return err + } + + // Hash to entry + hash := EntryHash64(nonce, key) & mask + + // Check for collision in bitmap + bi, bj := hash/8, hash%8 + chunk := bitmap[bi] + if (chunk>>bj)&1 == 1 { + return ErrCollision + } + bitmap[bi] = chunk | (1 << bj) + + // Export entry + entries[i] = Entry{ + Hash: hash, + Value: value, + } + } + + // Sort entries. + sortWithCompare(entries, func(i, j int) int { + if entries[i].Hash < entries[j].Hash { + return -1 + } else if entries[i].Hash > entries[j].Hash { + return 1 + } + return 0 + }) + + return nil +} + +var ErrCollision = errors.New("hash collision") + +func sortWithCompare[T any](a []T, compare func(i, j int) int) { + sort.Slice(a, func(i, j int) bool { + return compare(i, j) < 0 + }) + sorted := make([]T, len(a)) + eytzinger(a, sorted, 0, 1) + copy(a, sorted) +} + +func eytzinger[T any](in, out []T, i, k int) int { + if k <= len(in) { + i = eytzinger(in, out, i, 2*k) + out[k-1] = in[i] + i++ + i = eytzinger(in, out, i, 2*k+1) + } + return i +} diff --git a/deprecated/compactindex/build_test.go b/deprecated/compactindex/build_test.go new file mode 100644 index 00000000..ccf0b8a1 --- /dev/null +++ b/deprecated/compactindex/build_test.go @@ -0,0 +1,248 @@ +package compactindex + +import ( + "context" + "encoding/binary" + "errors" + "io" + "io/fs" + "math" + "math/rand" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vbauerster/mpb/v8/decor" +) + +func TestBuilder(t *testing.T) { + const numBuckets = 3 + const maxValue = math.MaxUint64 + + // Create a table with 3 buckets. + builder, err := NewBuilder("", numBuckets*targetEntriesPerBucket, maxValue) + require.NoError(t, err) + require.NotNil(t, builder) + assert.Len(t, builder.buckets, 3) + defer builder.Close() + + // Insert a few entries. + require.NoError(t, builder.Insert([]byte("hello"), 1)) + require.NoError(t, builder.Insert([]byte("world"), 2)) + require.NoError(t, builder.Insert([]byte("blub"), 3)) + + // Create index file. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal index. + require.NoError(t, builder.Seal(context.TODO(), targetFile)) + + // Assert binary content. + buf, err := os.ReadFile(targetFile.Name()) + require.NoError(t, err) + assert.Equal(t, []byte{ + // --- File header + // magic + 0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78, + // max file size + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + // num buckets + 0x03, 0x00, 0x00, 0x00, + // padding + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket header 0 + // hash domain + 0x00, 0x00, 0x00, 0x00, + // num entries + 0x01, 0x00, 0x00, 0x00, + // hash len + 0x03, + // padding + 0x00, + // file offset + 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket header 1 + // hash domain + 0x00, 0x00, 0x00, 0x00, + // num entries + 0x01, 0x00, 0x00, 0x00, + // hash len + 0x03, + // padding + 0x00, + // file offset + 0x5b, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket header 2 + // hash domain + 0x00, 0x00, 0x00, 0x00, + // num entries + 0x01, 0x00, 0x00, 0x00, + // hash len + 0x03, + // padding + 0x00, + // file offset + 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket 0 + // hash + 0xe2, 0xdb, 0x55, + // value + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket 1 + // hash + 0x92, 0xcd, 0xbb, + // value + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // --- Bucket 2 + // hash + 0xe3, 0x09, 0x6b, + // value + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, buf) + + // Reset file offset. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + + // Open index. + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + require.NotNil(t, db) + + // File header assertions. + assert.Equal(t, Header{ + FileSize: maxValue, + NumBuckets: numBuckets, + }, db.Header) + + // Get bucket handles. + buckets := make([]*Bucket, numBuckets) + for i := range buckets { + buckets[i], err = db.GetBucket(uint(i)) + require.NoError(t, err) + } + + // Ensure out-of-bounds bucket accesses fail. + _, wantErr := db.GetBucket(numBuckets) + assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") + + // Bucket header assertions. + assert.Equal(t, BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x50, + }, + Stride: 11, // 3 + 8 + OffsetWidth: 8, + }, buckets[0].BucketDescriptor) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x5b, + }, buckets[1].BucketHeader) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x66, + }, buckets[2].BucketHeader) + + // Test lookups. + entries, err := buckets[2].Load( /*batchSize*/ 4) + require.NoError(t, err) + assert.Equal(t, []Entry{ + { + Hash: 0x6b09e3, + Value: 3, + }, + }, entries) +} + +func TestBuilder_Random(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long test") + } + + const numKeys = uint(500000) + const keySize = uint(16) + const maxOffset = uint64(1000000) + const queries = int(10000) + + // Create new builder session. + builder, err := NewBuilder("", numKeys, maxOffset) + require.NoError(t, err) + require.NotNil(t, builder) + require.NotEmpty(t, builder.buckets) + + // Ensure we cleaned up after ourselves. + defer func() { + _, statErr := os.Stat(builder.dir) + assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) + }() + defer builder.Close() + + // Insert items to temp buckets. + preInsert := time.Now() + key := make([]byte, keySize) + for i := uint(0); i < numKeys; i++ { + binary.LittleEndian.PutUint64(key, uint64(i)) + err := builder.Insert(key, uint64(rand.Int63n(int64(maxOffset)))) + require.NoError(t, err) + } + t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) + + // Create file for final index. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal to final index. + preSeal := time.Now() + sealErr := builder.Seal(context.TODO(), targetFile) + require.NoError(t, sealErr, "Seal failed") + t.Logf("Sealed in %s", time.Since(preSeal)) + + // Print some stats. + targetStat, err := targetFile.Stat() + require.NoError(t, err) + t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) + t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) + t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) + + // Open index. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + + // Run query benchmark. + preQuery := time.Now() + for i := queries; i != 0; i-- { + keyN := uint64(rand.Int63n(int64(numKeys))) + binary.LittleEndian.PutUint64(key, keyN) + + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + require.True(t, value > 0) + } + t.Logf("Queried %d items", queries) + t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) +} diff --git a/deprecated/compactindex/compactindex.go b/deprecated/compactindex/compactindex.go new file mode 100644 index 00000000..1aeb18e7 --- /dev/null +++ b/deprecated/compactindex/compactindex.go @@ -0,0 +1,277 @@ +// Package compactindex is an immutable hashtable index format inspired by djb's constant database (cdb). +// +// # Design +// +// Compactindex is used to create secondary indexes over arbitrary flat files. +// Each index is a single, immutable flat file. +// +// Index files consist of a space-optimized and query-optimized key-value-like table. +// +// Instead of storing actual keys, the format stores FKS dynamic perfect hashes. +// And instead of storing values, the format contains offsets into some file. +// +// As a result, the database effectively only supports two operations, similarly to cdb. +// (Note that the actual Go interface is a bit more flexible). +// +// func Create(kv map[[]byte]uint64) *Index +// func (*Index) Lookup(key []byte) (value uint64, exist bool) +// +// # Buckets +// +// The set of items is split into buckets of approx 10000 records. +// The number of buckets is unlimited. +// +// The key-to-bucket assignment is determined by xxHash3 using uniform discrete hashing over the key space. +// +// The index file header also mentions the number of buckets and the file offset of each bucket. +// +// # Tables +// +// Each bucket contains a table of entries, indexed by a collision-free hash function. +// +// The hash function used in the entry table is xxHash. +// A 32-bit hash domain is prefixed to mine collision-free sets of hashes (FKS scheme). +// This hash domain is also recorded at the bucket header. +// +// Each bucket entry is a constant-size record consisting of a 3-byte hash and an offset to the value. +// The size of the offset integer is the minimal byte-aligned integer width that can represent the target file size. +// +// # Querying +// +// The query interface (DB) is backend-agnostic, supporting any storage medium that provides random reads. +// To name a few: Memory buffers, local files, arbitrary embedded buffers, HTTP range requests, plan9, etc... +// +// The DB struct itself performs zero memory allocations and therefore also doesn't cache. +// It is therefore recommended to provide a io.ReaderAt backed by a cache to improve performance. +// +// Given a key, the query strategy is simple: +// +// 1. Hash key to bucket using global hash function +// 2. Retrieve bucket offset from bucket header table +// 3. Hash key to entry using per-bucket hash function +// 4. Search for entry in bucket (binary search) +// +// The search strategy for locating entries in buckets can be adjusted to fit the latency/bandwidth profile of the underlying storage medium. +// +// For example, the fastest lookup strategy in memory is a binary search retrieving double cache lines at a time. +// When doing range requests against high-latency remote storage (e.g. S3 buckets), +// it is typically faster to retrieve and scan through large parts of a bucket (multiple kilobytes) at once. +// +// # Construction +// +// Constructing a compactindex requires upfront knowledge of the number of items and highest possible target offset (read: target file size). +// +// The process requires scratch space of around 16 bytes per entry. During generation, data is offloaded to disk for memory efficiency. +// +// The process works as follows: +// +// 1. Determine number of buckets and offset integer width +// based on known input params (item count and target file size). +// 2. Linear pass over input data, populating temporary files that +// contain the unsorted entries of each bucket. +// 3. For each bucket, brute force a perfect hash function that +// defines a bijection between hash values and keys in the bucket. +// 4. For each bucket, sort by hash values. +// 5. Store to index. +// +// An alternative construction approach is available when the number of items or target file size is unknown. +// In this case, a set of keys is first serialized to a flat file. +package compactindex + +import ( + "encoding/binary" + "fmt" + "math" + "math/bits" + "sort" + + "github.com/cespare/xxhash/v2" +) + +// Magic are the first eight bytes of an index. +var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} + +const Version = uint8(1) + +// Header occurs once at the beginning of the index. +type Header struct { + FileSize uint64 + NumBuckets uint32 +} + +// headerSize is the size of the header at the beginning of the file. +const headerSize = 32 + +// Load checks the Magic sequence and loads the header fields. +func (h *Header) Load(buf *[headerSize]byte) error { + // Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream. + if *(*[8]byte)(buf[:8]) != Magic { + return fmt.Errorf("not a radiance compactindex file") + } + *h = Header{ + FileSize: binary.LittleEndian.Uint64(buf[8:16]), + NumBuckets: binary.LittleEndian.Uint32(buf[16:20]), + } + // Check version. + if buf[20] != Version { + return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) + } + // 11 bytes to spare for now. Might use it in the future. + // Force to zero for now. + for _, b := range buf[21:32] { + if b != 0x00 { + return fmt.Errorf("unsupported index version") + } + } + return nil +} + +func (h *Header) Store(buf *[headerSize]byte) { + copy(buf[0:8], Magic[:]) + binary.LittleEndian.PutUint64(buf[8:16], h.FileSize) + binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets) + buf[20] = Version + for i := 21; i < 32; i++ { + buf[i] = 0 + } +} + +// BucketHash returns the bucket index for the given key. +// +// Uses a truncated xxHash64 rotated until the result fits. +func (h *Header) BucketHash(key []byte) uint { + u := xxhash.Sum64(key) + n := uint64(h.NumBuckets) + r := (-n) % n + for u < r { + u = hashUint64(u) + } + return uint(u % n) +} + +// hashUint64 is a reversible uint64 permutation based on Google's +// Murmur3 hash finalizer (public domain) +func hashUint64(x uint64) uint64 { + x ^= x >> 33 + x *= 0xff51afd7ed558ccd + x ^= x >> 33 + x *= 0xc4ceb9fe1a85ec53 + x ^= x >> 33 + return x +} + +// BucketHeader occurs at the beginning of each bucket. +type BucketHeader struct { + HashDomain uint32 + NumEntries uint32 + HashLen uint8 + FileOffset uint64 +} + +// bucketHdrLen is the size of the header preceding the hash table entries. +const bucketHdrLen = 16 + +func (b *BucketHeader) Store(buf *[bucketHdrLen]byte) { + binary.LittleEndian.PutUint32(buf[0:4], b.HashDomain) + binary.LittleEndian.PutUint32(buf[4:8], b.NumEntries) + buf[8] = b.HashLen + buf[9] = 0 + putUintLe(buf[10:16], b.FileOffset) +} + +func (b *BucketHeader) Load(buf *[bucketHdrLen]byte) { + b.HashDomain = binary.LittleEndian.Uint32(buf[0:4]) + b.NumEntries = binary.LittleEndian.Uint32(buf[4:8]) + b.HashLen = buf[8] + b.FileOffset = uintLe(buf[10:16]) +} + +// Hash returns the per-bucket hash of a key. +func (b *BucketHeader) Hash(key []byte) uint64 { + xsum := EntryHash64(b.HashDomain, key) + // Mask sum by hash length. + return xsum & (math.MaxUint64 >> (64 - b.HashLen*8)) +} + +type BucketDescriptor struct { + BucketHeader + Stride uint8 // size of one entry in bucket + OffsetWidth uint8 // with of offset field in bucket +} + +func (b *BucketDescriptor) unmarshalEntry(buf []byte) (e Entry) { + e.Hash = uintLe(buf[0:b.HashLen]) + e.Value = uintLe(buf[b.HashLen : b.HashLen+b.OffsetWidth]) + return +} + +func (b *BucketDescriptor) marshalEntry(buf []byte, e Entry) { + if len(buf) < int(b.Stride) { + panic("serializeEntry: buf too small") + } + putUintLe(buf[0:b.HashLen], e.Hash) + putUintLe(buf[b.HashLen:b.HashLen+b.OffsetWidth], e.Value) +} + +// SearchSortedEntries performs an in-memory binary search for a given hash. +func SearchSortedEntries(entries []Entry, hash uint64) *Entry { + i, found := sort.Find(len(entries), func(i int) int { + other := entries[i].Hash + // Note: This is safe because neither side exceeds 2^24. + return int(hash) - int(other) + }) + if !found { + return nil + } + if i >= len(entries) || entries[i].Hash != hash { + return nil + } + return &entries[i] +} + +// EntryHash64 is a xxHash-based hash function using an arbitrary prefix. +func EntryHash64(prefix uint32, key []byte) uint64 { + const blockSize = 32 + var prefixBlock [blockSize]byte + binary.LittleEndian.PutUint32(prefixBlock[:4], prefix) + + var digest xxhash.Digest + digest.Reset() + digest.Write(prefixBlock[:]) + digest.Write(key) + return digest.Sum64() +} + +// Entry is a single element in a hash table. +type Entry struct { + Hash uint64 + Value uint64 +} + +// intWidth returns the number of bytes minimally required to represent the given integer. +func intWidth(n uint64) uint8 { + msb := 64 - bits.LeadingZeros64(n) + return uint8((msb + 7) / 8) +} + +// maxCls64 returns the max integer that has the same amount of leading zeros as n. +func maxCls64(n uint64) uint64 { + return math.MaxUint64 >> bits.LeadingZeros64(n) +} + +// uintLe decodes an unsigned little-endian integer without bounds assertions. +// out-of-bounds bits are set to zero. +func uintLe(buf []byte) uint64 { + var full [8]byte + copy(full[:], buf) + return binary.LittleEndian.Uint64(full[:]) +} + +// putUintLe encodes an unsigned little-endian integer without bounds assertions. +// Returns true if the integer fully fit in the provided buffer. +func putUintLe(buf []byte, x uint64) bool { + var full [8]byte + binary.LittleEndian.PutUint64(full[:], x) + copy(buf, full[:]) + return int(intWidth(x)) <= len(buf) +} diff --git a/deprecated/compactindex/compactindex_test.go b/deprecated/compactindex/compactindex_test.go new file mode 100644 index 00000000..75e56082 --- /dev/null +++ b/deprecated/compactindex/compactindex_test.go @@ -0,0 +1,84 @@ +package compactindex + +import ( + "math" + "math/rand" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMaxCls64(t *testing.T) { + cases := [][2]uint64{ + {0x0000_0000_0000_0000, 0x0000_0000_0000_0000}, + {0x0000_0000_0000_0001, 0x0000_0000_0000_0001}, + {0x0000_0000_0000_0003, 0x0000_0000_0000_0002}, + {0x0000_0000_0000_0003, 0x0000_0000_0000_0003}, + {0x0000_0000_0000_0007, 0x0000_0000_0000_0004}, + {0x0000_0000_FFFF_FFFF, 0x0000_0000_F000_000F}, + {0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF}, + } + for _, tc := range cases { + assert.Equal(t, tc[0], maxCls64(tc[1])) + } +} + +func TestHeader_BucketHash(t *testing.T) { + const numItems = 500000 + const numBuckets = 1000 + + header := Header{ + NumBuckets: numBuckets, + } + + keys := make([][]byte, numItems) + hits := make([]int, numBuckets) + for i := range keys { + var buf [16]byte + n, _ := rand.Read(buf[:]) + keys[i] = buf[:n] + } + + // Bounds check and count hits. + for _, key := range keys { + idx := header.BucketHash(key) + require.True(t, idx < numBuckets) + hits[idx]++ + } + + // Calculate standard deviation. + mean := float64(numItems) / float64(numBuckets) + var cumVariance float64 + for _, bucketHits := range hits { + delta := float64(bucketHits) - mean + cumVariance += (delta * delta) + } + variance := cumVariance / float64(len(hits)) + stddev := math.Sqrt(variance) + t.Logf("mean % 12.2f", mean) + normStddev := stddev / mean + t.Logf("stddev % 10.2f", stddev) + t.Logf("1σ / mean % 7.2f%%", 100*normStddev) + + const failNormStddev = 0.08 + if normStddev > failNormStddev { + t.Logf("FAIL: > %f%%", 100*failNormStddev) + t.Fail() + } else { + t.Logf(" OK: <= %f%%", 100*failNormStddev) + } + + // Print percentiles. + sort.Ints(hits) + t.Logf("min % 10d", hits[0]) + t.Logf("p01 % 10d", hits[int(math.Round(0.01*float64(len(hits))))]) + t.Logf("p05 % 10d", hits[int(math.Round(0.05*float64(len(hits))))]) + t.Logf("p10 % 10d", hits[int(math.Round(0.10*float64(len(hits))))]) + t.Logf("p50 % 10d", hits[int(math.Round(0.50*float64(len(hits))))]) + t.Logf("p90 % 10d", hits[int(math.Round(0.90*float64(len(hits))))]) + t.Logf("p95 % 10d", hits[int(math.Round(0.95*float64(len(hits))))]) + t.Logf("p99 % 10d", hits[int(math.Round(0.99*float64(len(hits))))]) + t.Logf("max % 10d", hits[len(hits)-1]) +} diff --git a/deprecated/compactindex/fallocate_fake.go b/deprecated/compactindex/fallocate_fake.go new file mode 100644 index 00000000..d345a40f --- /dev/null +++ b/deprecated/compactindex/fallocate_fake.go @@ -0,0 +1,27 @@ +package compactindex + +import ( + "fmt" + "os" +) + +func fake_fallocate(f *os.File, offset int64, size int64) error { + const blockSize = 4096 + var zero [blockSize]byte + + for size > 0 { + step := size + if step > blockSize { + step = blockSize + } + + if _, err := f.Write(zero[:step]); err != nil { + return fmt.Errorf("failure while generic fallocate: %w", err) + } + + offset += step + size -= step + } + + return nil +} diff --git a/deprecated/compactindex/fallocate_generic.go b/deprecated/compactindex/fallocate_generic.go new file mode 100644 index 00000000..e0fb1b33 --- /dev/null +++ b/deprecated/compactindex/fallocate_generic.go @@ -0,0 +1,11 @@ +//go:build !linux + +package compactindex + +import ( + "os" +) + +func fallocate(f *os.File, offset int64, size int64) error { + return fake_fallocate(f, offset, size) +} diff --git a/deprecated/compactindex/fallocate_linux.go b/deprecated/compactindex/fallocate_linux.go new file mode 100644 index 00000000..5cdde837 --- /dev/null +++ b/deprecated/compactindex/fallocate_linux.go @@ -0,0 +1,17 @@ +//go:build linux + +package compactindex + +import ( + "fmt" + "os" + "syscall" +) + +func fallocate(f *os.File, offset int64, size int64) error { + err := syscall.Fallocate(int(f.Fd()), 0, offset, size) + if err != nil { + return fmt.Errorf("failure while linux fallocate: %w", err) + } + return nil +} diff --git a/deprecated/compactindex/query.go b/deprecated/compactindex/query.go new file mode 100644 index 00000000..49b5b31d --- /dev/null +++ b/deprecated/compactindex/query.go @@ -0,0 +1,212 @@ +package compactindex + +import ( + "errors" + "fmt" + "io" +) + +// DB is a compactindex handle. +type DB struct { + Header + Stream io.ReaderAt + prefetch bool +} + +// Open returns a handle to access a compactindex. +// +// The provided stream must start with the Magic byte sequence. +// Tip: Use io.NewSectionReader to create aligned substreams when dealing with a file that contains multiple indexes. +func Open(stream io.ReaderAt) (*DB, error) { + // Read the static 32-byte header. + // Ignore errors if the read fails after filling the buffer (e.g. EOF). + var fileHeader [headerSize]byte + n, readErr := stream.ReadAt(fileHeader[:], 0) + if n < len(fileHeader) { + // ReadAt must return non-nil error here. + return nil, readErr + } + db := new(DB) + if err := db.Header.Load(&fileHeader); err != nil { + return nil, err + } + db.Stream = stream + return db, nil +} + +func (db *DB) Prefetch(yes bool) { + db.prefetch = yes +} + +// Lookup queries for a key in the index and returns the value (offset), if any. +// +// Returns ErrNotFound if the key is unknown. +func (db *DB) Lookup(key []byte) (uint64, error) { + bucket, err := db.LookupBucket(key) + if err != nil { + return 0, err + } + return bucket.Lookup(key) +} + +// LookupBucket returns a handle to the bucket that might contain the given key. +func (db *DB) LookupBucket(key []byte) (*Bucket, error) { + return db.GetBucket(db.Header.BucketHash(key)) +} + +// GetBucket returns a handle to the bucket at the given index. +func (db *DB) GetBucket(i uint) (*Bucket, error) { + if i >= uint(db.Header.NumBuckets) { + return nil, fmt.Errorf("out of bounds bucket index: %d >= %d", i, db.Header.NumBuckets) + } + + // Fill bucket handle. + bucket := &Bucket{ + BucketDescriptor: BucketDescriptor{ + Stride: db.entryStride(), + OffsetWidth: intWidth(db.FileSize), + }, + } + // Read bucket header. + readErr := bucket.BucketHeader.readFrom(db.Stream, i) + if readErr != nil { + return nil, readErr + } + bucket.Entries = io.NewSectionReader(db.Stream, int64(bucket.FileOffset), int64(bucket.NumEntries)*int64(bucket.Stride)) + if db.prefetch { + // TODO: find good value for numEntriesToPrefetch + numEntriesToPrefetch := minInt64(3_000, int64(bucket.NumEntries)) + prefetchSize := (4 + 3) * numEntriesToPrefetch + buf := make([]byte, prefetchSize) + _, err := bucket.Entries.ReadAt(buf, 0) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + } + return bucket, nil +} + +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func (db *DB) entryStride() uint8 { + hashSize := 3 // TODO remove hardcoded constant + offsetSize := intWidth(db.FileSize) + return uint8(hashSize) + offsetSize +} + +func bucketOffset(i uint) int64 { + return headerSize + int64(i)*bucketHdrLen +} + +func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { + var buf [bucketHdrLen]byte + n, err := rd.ReadAt(buf[:], bucketOffset(i)) + if n < len(buf) { + return err + } + b.Load(&buf) + return nil +} + +func (b *BucketHeader) writeTo(wr io.WriterAt, i uint) error { + var buf [bucketHdrLen]byte + b.Store(&buf) + _, err := wr.WriteAt(buf[:], bucketOffset(i)) + return err +} + +// Bucket is a database handle pointing to a subset of the index. +type Bucket struct { + BucketDescriptor + Entries *io.SectionReader +} + +// maxEntriesPerBucket is the hardcoded maximum permitted number of entries per bucket. +const maxEntriesPerBucket = 1 << 24 // (16 * stride) MiB + +// targetEntriesPerBucket is the average number of records in each hashtable bucket we aim for. +const targetEntriesPerBucket = 10000 + +// Load retrieves all entries in the hashtable. +func (b *Bucket) Load(batchSize int) ([]Entry, error) { + if batchSize <= 0 { + batchSize = 512 // default to reasonable batch size + } + // TODO bounds check + if b.NumEntries > maxEntriesPerBucket { + return nil, fmt.Errorf("refusing to load bucket with %d entries", b.NumEntries) + } + entries := make([]Entry, 0, b.NumEntries) + + stride := int(b.Stride) + buf := make([]byte, batchSize*stride) + off := int64(0) + for { + // Read another chunk. + n, err := b.Entries.ReadAt(buf, off) + // Decode all entries in it. + sub := buf[:n] + for len(sub) >= stride { + entries = append(entries, b.unmarshalEntry(sub)) + sub = sub[stride:] + off += int64(stride) + } + // Handle error. + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + break + } else if err != nil { + return nil, err + } + } + + return entries, nil +} + +// TODO: This binary search algo is not optimized for high-latency remotes yet. + +// Lookup queries for a key using binary search. +func (b *Bucket) Lookup(key []byte) (uint64, error) { + return b.binarySearch(b.Hash(key)) +} + +func (b *Bucket) binarySearch(target uint64) (uint64, error) { + low := 0 + high := int(b.NumEntries) + return searchEytzinger(low, high, target, b.loadEntry) +} + +func searchEytzinger(min int, max int, x uint64, getter func(int) (Entry, error)) (uint64, error) { + var index int + for index < max { + k, err := getter(index) + if err != nil { + return 0, err + } + if k.Hash == x { + return k.Value, nil + } + index = index<<1 | 1 + if k.Hash < x { + index++ + } + } + return 0, ErrNotFound +} + +func (b *Bucket) loadEntry(i int) (Entry, error) { + off := int64(i) * int64(b.Stride) + buf := make([]byte, b.Stride) + n, err := b.Entries.ReadAt(buf, off) + if n != len(buf) { + return Entry{}, err + } + return b.unmarshalEntry(buf), nil +} + +// ErrNotFound marks a missing entry. +var ErrNotFound = errors.New("not found") diff --git a/deprecated/compactindex/query_test.go b/deprecated/compactindex/query_test.go new file mode 100644 index 00000000..0908372a --- /dev/null +++ b/deprecated/compactindex/query_test.go @@ -0,0 +1,58 @@ +package compactindex + +import ( + "bytes" + "errors" + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type failReader struct{ err error } + +func (rd failReader) ReadAt([]byte, int64) (int, error) { + return 0, rd.err +} + +func TestOpen_ReadFail(t *testing.T) { + err := errors.New("oh no!") + db, dbErr := Open(failReader{err}) + require.Nil(t, db) + require.Same(t, err, dbErr) +} + +func TestOpen_InvalidMagic(t *testing.T) { + var buf [32]byte + rand.Read(buf[:]) + buf[1] = '.' // make test deterministic + + db, dbErr := Open(bytes.NewReader(buf[:])) + require.Nil(t, db) + require.EqualError(t, dbErr, "not a radiance compactindex file") +} + +func TestOpen_HeaderOnly(t *testing.T) { + buf := [32]byte{ + // Magic + 'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x', + // FileSize + 0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // NumBuckets + 0x42, 0x00, 0x00, 0x00, + // Padding + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + } + + db, dbErr := Open(bytes.NewReader(buf[:])) + require.NotNil(t, db) + require.NoError(t, dbErr) + + assert.NotNil(t, db.Stream) + assert.Equal(t, Header{ + FileSize: 0x1337, + NumBuckets: 0x42, + }, db.Header) +} diff --git a/deprecated/compactindex36/LICENSE b/deprecated/compactindex36/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/deprecated/compactindex36/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/deprecated/compactindex36/README.md b/deprecated/compactindex36/README.md new file mode 100644 index 00000000..ef24d1e4 --- /dev/null +++ b/deprecated/compactindex36/README.md @@ -0,0 +1,137 @@ +# a fast flat-file index for constant datasets + +This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +The following changes have been made: + - The package has been renamed to `compactindex36` to avoid conflicts with the original package + - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +This package specifies a file format and Go implementation for indexing constant datasets. + +*`compactindex` …* +- is an immutable file format; +- maps arbitrary keys into offsets in an external flat file; +- consumes a constant amount of space per entry + - ~6-8 bytes, regardless of key size + - 3 bytes per enty +- `O(1)` complexity queries, with `2 + log2(10000)` lookups worst- & average-case (binary search); +- during construction, requires near-constant memory space and `O(n)` scratch space with regard to entries per file; +- during construction, features a constant >500k entry/s per-core write rate (2.5 GHz Intel laptop); +- works on any storage supporting random reads (regular files, HTTP range requests, on-chain, ...); +- is based on the "FKS method" which uses perfect (collision-free) hash functions in a two-level hashtable; [^1] +- is inspired by D. J. Bernstein's "constant database"; [^2] +- uses the xxHash64 non-cryptographic hash-function; [^3] + +Refer to the Go documentation for the algorithms used and implementation details. + +[![Go Reference](https://pkg.go.dev/badge/go.firedancer.io/radiance/pkg/compactindex.svg)](https://pkg.go.dev/go.firedancer.io/radiance/pkg/compactindex) + +[^1]: Fredman, M. L., Komlós, J., & Szemerédi, E. (1984). Storing a Sparse Table with 0 (1) Worst Case Access Time. Journal of the ACM, 31(3), 538–544. https://doi.org/10.1145/828.1884 +[^2]: cdb by D. J. Bernstein https://cr.yp.to/cdb.html +[^3]: Go implementation of xxHash by @cespare: https://github.com/cespare/xxhash/ + +## Interface + +In programming terms: + +```rs +fn lookup(key: &[byte]) -> Option +``` + +Given an arbitrary key, the index +- states whether the key exists in the index +- if it exists, maps the key to an integer (usually an offset into a file) + +## Examples + +Here are some example scenarios where `compactindex` is useful: + +- When working with immutable data structures + - Example: Indexing [IPLD CAR files][3] carrying Merkle-DAGs of content-addressable data +- When working with archived/constant data + - Example: Indexing files in `.tar` archives +- When dealing with immutable remote storage such as S3-like object storage + - Example: Storing the index and target file in S3, then using [HTTP range requests][4] to efficiently query data + +[3]: https://ipld.io/specs/transport/car/ +[4]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests + +Here are some things compactindex cannot do: + +- Cannot add more entries to an existing index + - Reason 1: indexes are tightly packed, so there is no space to insert new entries (though `fallocate(2)` with `FALLOC_FL_INSERT_RANGE` would technically work) + - Reason 2: the second-level hashtable uses a perfect hash function ensuring collision-free indexing of a subset of entries; + inserting new entries might cause a collision requiring + - Reason 3: adding too many entries will eventually create an imbalance in the first-level hashtable; + fixing this imbalance effectively requires re-constructing the file from scratch +- Cannot iterate over keys + - Reason: compactindex stores hashes, not the entries themselves. + This saves space but also allows for efficient random reads used during binary search + +## File Format (v0) + +**Encoding** + +The file format contains binary packed structures with byte alignment. + +Integers are encoded as little endian. + +**File Header** + +The file beings with a 32 byte file header. + +```rust +#[repr(packed)] +struct FileHeader { + magic: [u8; 8], // 0x00 + max_value: u64, // 0x08 + num_buckets: u32, // 0x10 + padding_14: [u8; 12], // 0x14 +} +``` + +- `magic` is set to the UTF-8 string `"rdcecidx"`. + The reader should reject files that don't start with this string. +- `num_buckets` is set to the number of hashtable buckets. +- `max_value` indicates the integer width of index values. +- `padding_14` must be zero. (reserved for future use) + +**Bucket Header Table** + +The file header is followed by a vector of bucket headers. +The number of is set by `num_buckets` in the file header. + +Each bucket header is 16 bytes long. + +```rust +#[repr(packed)] +struct BucketHeader { + hash_domain: u32, // 0x00 + num_entries: u32, // 0x04 + hash_len: u8, // 0x08 + padding_09: u8, // 0x09 + file_offset: u48, // 0x10 +} +``` + +- `hash_domain` is a "salt" to the per-bucket hash function. +- `num_entries` is set to the number of records in the bucket. +- `hash_len` is the size of the per-record hash in bytes and currently hardcoded to `3`. +- `padding_09` must be zero. +- `file_offset` is an offset from the beginning of the file header to the start of the bucket entries. + +**Bucket Entry Table** + +Each bucket has a vector of entries with length `num_entries`. +This structure makes up the vast majority of the index. + +```rust +#[repr(packed)] +struct Entry { + hash: u??, + value: u??, +} +``` + +The size of entry is static within a bucket. It is determined by its components: +- The size of `hash` in bytes equals `hash_len` +- The size of `value` in bytes equals the byte aligned integer width that is minimally required to represent `max_value` diff --git a/deprecated/compactindex36/build.go b/deprecated/compactindex36/build.go new file mode 100644 index 00000000..c1499673 --- /dev/null +++ b/deprecated/compactindex36/build.go @@ -0,0 +1,310 @@ +package compactindex36 + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindex36` to avoid conflicts with the original package +// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +import ( + "bufio" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + "path/filepath" + "sort" + "syscall" +) + +// Builder creates new compactindex files. +type Builder struct { + Header + buckets []tempBucket + dir string +} + +// NewBuilder creates a new index builder. +// +// If dir is an empty string, a random temporary directory is used. +// +// numItems refers to the number of items in the index. +// +// targetFileSize is the size of the file that index entries point to. +// Can be set to zero if unknown, which results in a less efficient (larger) index. +func NewBuilder(dir string, numItems uint, targetFileSize uint64) (*Builder, error) { + if dir == "" { + var err error + dir, err = os.MkdirTemp("", "compactindex-") + if err != nil { + return nil, fmt.Errorf("failed to create temp dir: %w", err) + } + } + if targetFileSize == 0 { + targetFileSize = math.MaxUint64 + } + + numBuckets := (numItems + targetEntriesPerBucket - 1) / targetEntriesPerBucket + buckets := make([]tempBucket, numBuckets) + for i := range buckets { + name := filepath.Join(dir, fmt.Sprintf("keys-%d", i)) + f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0o666) + if err != nil { + return nil, err + } + buckets[i].file = f + buckets[i].writer = bufio.NewWriter(f) + } + + return &Builder{ + Header: Header{ + FileSize: targetFileSize, + NumBuckets: uint32(numBuckets), + }, + buckets: buckets, + dir: dir, + }, nil +} + +// Insert writes a key-value mapping to the index. +// +// Index generation will fail if the same key is inserted twice. +// The writer must not pass a value greater than targetFileSize. +func (b *Builder) Insert(key []byte, value [36]byte) error { + return b.buckets[b.Header.BucketHash(key)].writeTuple(key, value) +} + +// Seal writes the final index to the provided file. +// This process is CPU-intensive, use context to abort prematurely. +// +// The file should be opened with access mode os.O_RDWR. +// Passing a non-empty file will result in a corrupted index. +func (b *Builder) Seal(ctx context.Context, f *os.File) (err error) { + // TODO support in-place writing. + + // Write header. + var headerBuf [headerSize]byte + b.Header.Store(&headerBuf) + _, err = f.Write(headerBuf[:]) + if err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + // Create hole to leave space for bucket header table. + bucketTableLen := int64(b.NumBuckets) * bucketHdrLen + err = fallocate(f, headerSize, bucketTableLen) + if errors.Is(err, syscall.EOPNOTSUPP) { + // The underlying file system may not support fallocate + err = fake_fallocate(f, headerSize, bucketTableLen) + if err != nil { + return fmt.Errorf("failed to fake fallocate() bucket table: %w", err) + } + } + if err != nil { + return fmt.Errorf("failed to fallocate() bucket table: %w", err) + } + // Seal each bucket. + for i := range b.buckets { + if err := b.sealBucket(ctx, i, f); err != nil { + return err + } + } + return nil +} + +// sealBucket will mine a bucket hashtable, write entries to a file, a +func (b *Builder) sealBucket(ctx context.Context, i int, f *os.File) error { + // Produce perfect hash table for bucket. + bucket := &b.buckets[i] + if err := bucket.flush(); err != nil { + return err + } + const mineAttempts uint32 = 1000 + entries, domain, err := bucket.mine(ctx, mineAttempts) + if err != nil { + return fmt.Errorf("failed to mine bucket %d: %w", i, err) + } + // Find current file length. + offset, err := f.Seek(0, io.SeekEnd) + if err != nil { + return fmt.Errorf("failed to seek to EOF: %w", err) + } + if offset < 0 { + panic("os.File.Seek() < 0") + } + // Write header to file. + desc := BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: domain, + NumEntries: uint32(bucket.records), + HashLen: 3, // TODO remove hardcoded constant + FileOffset: uint64(offset), + }, + Stride: 3 + valueLength(), // TODO remove hardcoded constant + OffsetWidth: valueLength(), + } + // Write entries to file. + wr := bufio.NewWriter(f) + entryBuf := make([]byte, desc.HashLen+valueLength()) // TODO remove hardcoded constant + for _, entry := range entries { + desc.marshalEntry(entryBuf, entry) + if _, err := wr.Write(entryBuf[:]); err != nil { + return fmt.Errorf("failed to write record to index: %w", err) + } + } + if err := wr.Flush(); err != nil { + return fmt.Errorf("failed to flush bucket to index: %w", err) + } + // Write header to file. + if err := desc.BucketHeader.writeTo(f, uint(i)); err != nil { + return fmt.Errorf("failed to write bucket header %d: %w", i, err) + } + return nil +} + +func (b *Builder) Close() error { + return os.RemoveAll(b.dir) +} + +// tempBucket represents the "temporary bucket" file, +// a disk buffer containing a vector of key-value-tuples. +type tempBucket struct { + records uint + file *os.File + writer *bufio.Writer +} + +// writeTuple performs a buffered write of a KV-tuple. +func (b *tempBucket) writeTuple(key []byte, value [36]byte) (err error) { + b.records++ + var static [38]byte + binary.LittleEndian.PutUint16(static[0:2], uint16(len(key))) + copy(static[2:38], value[:]) + if _, err = b.writer.Write(static[:]); err != nil { + return err + } + _, err = b.writer.Write(key) + return +} + +// flush empties the in-memory write buffer to the file. +func (b *tempBucket) flush() error { + if err := b.writer.Flush(); err != nil { + return fmt.Errorf("failed to flush writer: %w", err) + } + b.writer = nil + return nil +} + +// mine repeatedly hashes the set of entries with different nonces. +// +// Returns a sorted list of hashtable entries upon finding a set of hashes without collisions. +// If a number of attempts was made without success, returns ErrCollision instead. +func (b *tempBucket) mine(ctx context.Context, attempts uint32) (entries []Entry, domain uint32, err error) { + entries = make([]Entry, b.records) + bitmap := make([]byte, 1<<21) + + rd := bufio.NewReader(b.file) + for domain = uint32(0); domain < attempts; domain++ { + if err = ctx.Err(); err != nil { + return + } + // Reset bitmap + for i := range bitmap { + bitmap[i] = 0 + } + // Reset reader + if _, err = b.file.Seek(0, io.SeekStart); err != nil { + return + } + rd.Reset(b.file) + + if hashErr := hashBucket(rd, entries, bitmap, domain); errors.Is(hashErr, ErrCollision) { + continue + } else if hashErr != nil { + return nil, 0, hashErr + } + + return // ok + } + + return nil, domain, ErrCollision +} + +// hashBucket reads and hashes entries from a temporary bucket file. +// +// Uses a 2^24 wide bitmap to detect collisions. +func hashBucket(rd *bufio.Reader, entries []Entry, bitmap []byte, nonce uint32) error { + // TODO Don't hardcode this, choose hash depth dynamically + mask := uint64(0xffffff) + + // Scan provided reader for entries and hash along the way. + for i := range entries { + // Read next key from file (as defined by writeTuple) + var static [38]byte + if _, err := io.ReadFull(rd, static[:]); err != nil { + return err + } + keyLen := binary.LittleEndian.Uint16(static[0:2]) + var value [36]byte + copy(value[:], static[2:38]) + key := make([]byte, keyLen) + if _, err := io.ReadFull(rd, key); err != nil { + return err + } + + // Hash to entry + hash := EntryHash64(nonce, key) & mask + + // Check for collision in bitmap + bi, bj := hash/8, hash%8 + chunk := bitmap[bi] + if (chunk>>bj)&1 == 1 { + return ErrCollision + } + bitmap[bi] = chunk | (1 << bj) + + // Export entry + entries[i] = Entry{ + Hash: hash, + Value: value, + } + } + + // Sort entries. + // sort.Slice(entries, func(i, j int) bool { + // return entries[i].Hash < entries[j].Hash + // }) + sortWithCompare(entries, func(i, j int) int { + if entries[i].Hash < entries[j].Hash { + return -1 + } else if entries[i].Hash > entries[j].Hash { + return 1 + } + return 0 + }) + + return nil +} + +var ErrCollision = errors.New("hash collision") + +func sortWithCompare[T any](a []T, compare func(i, j int) int) { + sort.Slice(a, func(i, j int) bool { + return compare(i, j) < 0 + }) + sorted := make([]T, len(a)) + eytzinger(a, sorted, 0, 1) + copy(a, sorted) +} + +func eytzinger[T any](in, out []T, i, k int) int { + if k <= len(in) { + i = eytzinger(in, out, i, 2*k) + out[k-1] = in[i] + i++ + i = eytzinger(in, out, i, 2*k+1) + } + return i +} diff --git a/deprecated/compactindex36/build_test.go b/deprecated/compactindex36/build_test.go new file mode 100644 index 00000000..46fa70b1 --- /dev/null +++ b/deprecated/compactindex36/build_test.go @@ -0,0 +1,438 @@ +package compactindex36 + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindex36` to avoid conflicts with the original package +// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +import ( + "bytes" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "io/fs" + "math" + "math/rand" + "os" + "strings" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/ipfs/go-cid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vbauerster/mpb/v8/decor" +) + +var testCidStrings = []string{ + "bafyreiba5kzq6wf6neax6ascsh5khxhuy7zc6vqsu6zac32i7ilv4u62nm", + "bafyreie42alzugm43fiqv64ss3h5fh3xriaeamul7d7qmrrbxe6fpjo5b4", + "bafyreidam5koitaftfx7sydge5ta3ig2j5qbabqcql4umpom3yuia4sbm4", + "bafyreia3pebgypo4oqgdg4pqpjfybmcdbsbavcdscotji4wj2gfc3r4icm", + "bafyreigudmeashua4432mbq3tawwnsz3qfpmm5tjpwahopn7cxttotqdge", + "bafyreic3azak2ds4fomkw35pmvsznu46sgonmketlnfaqnoc6owi4t64my", + "bafyreib6t4ooiajnebkwgk4z57fhcvejc663a6haq6cb6tjjluj4fuulla", + "bafyreidmohyrgchkgavx7wubebip5agb4ngisnlkqaot4kz2eo635ny5m4", + "bafyreicpmxvpxwjemofmic6aka72dliueqxtsklrilkofwbqgn6ffuz7ka", + "bafyreifkjdmj3kmi2hkoqcqweunbktouxo6sy362rysl34ffyjinebylim", + "bafyreidzql2rmbs3chtq2cmbnncvfyz2tjclwqx4vnowvyph77fomh26qi", + "bafyreig4kpaq6rf5y46qgqhdzgr5uauubfqyevbmj6pmtaxxhh3tkyzury", + "bafyreianxqyomvh6dl533cs25z7yfda2z62ity3w7sdqf3kk4tmogu7t24", + "bafyreicaq6dv5jsq4du2tqiefr3baepnj4ei3bpxvg5g6np7ygacgbw5aq", + "bafyreia4b2nleifcp54w4scrjy7fgctsoy6zz4mkot3gw6xydqkrc2wdtq", + "bafyreierpgsryprxfgshtzjarnb662d5akhg7om6utubggjwtlg6qwwj5i", + "bafyreidufcwvs7fvot2blqnwciaxre35s3ip6xxkncrus4voci3ktots2q", + "bafyreif23uzartrw62g5pywtrsz3xsl2wdw73o4fvtsf76gqgx37mfpqjm", + "bafyreianu4oifizvqyop753ao4hrocftlbnn6kzm7xtsm4ryaz6uawkgmu", + "bafyreidekyir7cti4jch652nnmimrftoqynjxl6vzjimfkqxh42rx27yiy", + "bafyreia3zuym3akg4gp5ewlmdwxnybrsqrab4m6tpsgxq65az6z7r5jtba", + "bafyreihherovnppygar6h5hu4we4hkxrvoqtpkawwcmn7hkyeq6cisouyu", + "bafyreicmqd5dhn2hv2qcskf27vsml36p6srz6zoxjfjkmnu7ltczqtbkbe", + "bafyreihy2c7lomf3i3nucd5bbwvox3luhtnzujkybchgmyxenmanon7rxe", + "bafyreicld6buy3mr4ibs2jzakoaptdj7xvpjo4pwhwiuywnrzfzoh5ahqi", + "bafyreifyapa5a5ii72hfmqiwgsjto6iarshfwwvrrxdw3bhr62ucuutl4e", + "bafyreigrlvwdaivwthwvihcbyrnl5pl7jfor72xlaivi2f6ajypy4yku3a", + "bafyreiamvgkbpuahegu3mhxxujzvxk2t5hrykhrfw47yurlxqumkv243iy", + "bafyreib4qf7qpjmpr2eqi7mqwqxw2fznnkvhzkpj3udiloxqay5fhk5wui", + "bafyreidbol6tdhj42rdpchpafszgmnmg7tgvi2uwou7s2whiamznzawhk4", + "bafyreidrpejzimhuwq6j74jzv2odzriuitwmdkp2ibojzcax6jdpqiztti", + "bafyreidrgb4vmgvsreebrj6apscopszfbgw5e7llh22kk2cdayyeoyggwy", + "bafyreigpzlopkl2ttxfdf6n5sgxyda4bvlglre7nkjq37uecmvf47f6ttm", + "bafyreidcq3csrifsyeed42fbky42w7bxhvg6fd42l7qkw3cnxliab4e7nu", + "bafyreibchdux4qchrrz67kikde273mjth475fedjisvoazf3zhmodlkx7a", + "bafyreie4rdlgpfcrrdlonofkwlrefh6z5hcwieasatkddozvyknwqahh4q", + "bafyreibhwuih7ekso6zypyr4uwl37xewyu7foy2clqvz4l7lbgwxpslyyu", + "bafyreigltijqq3m6h7h6du5o4ynqwmimtslnsmyu3njwlnpuyadyev6awa", + "bafyreihwtszo3p7ujg2wsuhsqon5tidxxnyin2t42uhj7zq6xta7fo2suy", + "bafyreie2uggjajncn2lna6ytq2sw2uu4xw724pe6wj4ihhiawnnjm5sgwa", + "bafyreignb5gdw7fwfycoipjqbkvkve7dkuugr3s5ylkaucn3ks7klxh4te", + "bafyreib3iwnufpnoxgf7z5w3vtygu2z2kcqxj3quxypupfgmr53tyt6wdq", + "bafyreic7kxsh7nmfpxmrm727yug2rfnrhfuavmpll3cms4r6cpnbbuwgqm", + "bafyreig2o4yrzlwo74eom4v65tenr6yjh2v23vbl7sjffrppzceenxs3eq", + "bafyreidletnh5bxnc6k2p3idnul5qatfcf4qqrgmkjxolgpu7wolye47hm", + "bafyreigv2nni66nb6be5dchkonpb2t556qplv5xz4vdolwmyz4m32aufdi", + "bafyreid66pezal5svaidpvxc3zz6w5eksxcjn6omelhsqhj5jmcmxhgjhm", + "bafyreihjhwpvm2soq5syyovsiqrchsuojsdk4imj2gqk6pikc4rxdqtmny", + "bafyreidt3oveadwf5jrmxatrwa5bdxvfyxnrucypmtqwiu2pvrrztrj5xe", + "bafyreid6y6r44wqcwql5yyitmw5mpfmrrlsois2unbqzmtlvyeefqahnnu", + "bafyreic6evvtf3y3slkbwhzbjuvspqu2jxf7qr267rhigmox6f4a5a36eq", + "bafyreiekep5a55yvebqzzi6x7xyotse57zfwcpyeh2xermqkvxlkvpxh24", + "bafyreigwb22sgfg56dc2jnnvxttjyhwfp4itevlukqj2wfz5ebru72elv4", + "bafyreiebz2fxh64dqvbiwmqnyj5rj63txl5u7abmets2imhn2su6tcuvyu", + "bafyreigcm7wkxlsyc26acgb7nfjho2twh6au2pbk35w6bsbv2qt7rt7iaq", + "bafyreieiuq6g74i25huoumvey7oynljndt2d4qvbddqkhpysrexu7ixsuy", + "bafyreihuhj5slybgbqzdr4mpkyo5dwvqjxfhicardbph6htiyeut2frol4", + "bafyreiaskg4kwqrpdcatnymvno4xf54uewysdiz3357fdct2tlnx2gpkqq", + "bafyreicakit2lbmg3wo4uoox4rc2gv3odzrrkrr32zwk7qaolpoc7uyz5u", + "bafyreih5jcnhw4evhq5j4n75miruqfofo2dv46hdtqyd5ht2eqeu7g5cme", + "bafyreicwtl6ulct4ckjnq57gmctw3wjo6ctvjbbr7l4bwfbzpj3y3g6unm", + "bafyreiebgoqj3nawzcwjy4t67uljnmvfh55fiqaxsskld6qpjvd2majesq", + "bafyreif472dxwhnyjhxmxoto3czfblhssgmhrpsqcmrwzprywk45wqdtmi", + "bafyreiaz444on546zihfuygqchlw4r4vu2tuw5xnelm6dsodqcno23pvzu", + "bafyreidgzghcd2lfdcylsccvlj43f5ujj7xtriu6ojp7jog5iainecagka", + "bafyreiehvi56dn3zm2ltfgecss2ydfmcb2hmf6hk76b6ebpoxhquajawze", + "bafyreie4wcortvdsirbontddokin6wgm25xg46lu3qxcyyjj6rgkuk5cca", + "bafyreicurlgiukht7wnxy3za3hz5fzs2a62ggc6i3rqhzhck4p2lgt5754", + "bafyreihn2zwm7m3tqfwa53me4qxiit66yiny5sxtkvvjewjfkbjrgmeswu", + "bafyreid7m33qok7d66vsyc5mq257rya5sg24rzv5qwbghwsimclt5ll7pi", +} + +var testCids = func() []cid.Cid { + var cids []cid.Cid + for _, s := range testCidStrings { + c, err := cid.Decode(s) + if err != nil { + panic(err) + } + cids = append(cids, c) + } + return cids +}() + +func concatBytes(bs ...[]byte) []byte { + var out []byte + for _, b := range bs { + out = append(out, b...) + } + return out +} + +func numberToHexBytes(n int) string { + return (fmt.Sprintf("0x%02x", n)) +} + +func FormatByteSlice(buf []byte) string { + elems := make([]string, 0) + for _, v := range buf { + elems = append(elems, numberToHexBytes(int(v))) + } + + return "{" + strings.Join(elems, ", ") + "}" + fmt.Sprintf("(len=%v)", len(elems)) +} + +func splitBufferWithProvidedSizes(buf []byte, sizes []int) [][]byte { + var out [][]byte + var offset int + for _, size := range sizes { + out = append(out, buf[offset:offset+size]) + offset += size + } + return out +} + +func compareBufferArrays(a, b [][]byte) []bool { + var out []bool + + for i := 0; i < len(a); i++ { + out = append(out, bytes.Equal(a[i], b[i])) + } + + return out +} + +func TestBuilder(t *testing.T) { + const numBuckets = 3 + const maxValue = math.MaxUint64 + + // Create a table with 3 buckets. + builder, err := NewBuilder("", numBuckets*targetEntriesPerBucket, maxValue) + require.NoError(t, err) + require.NotNil(t, builder) + assert.Len(t, builder.buckets, 3) + defer builder.Close() + + // Insert a few entries. + keys := []string{"hello", "world", "blub", "foo"} + for i, key := range keys { + require.NoError(t, builder.Insert([]byte(key), [36]byte(testCids[i].Bytes()))) + } + { + // print test values + for _, tc := range testCids { + spew.Dump(FormatByteSlice(tc.Bytes())) + } + } + + // Create index file. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal index. + require.NoError(t, builder.Seal(context.TODO(), targetFile)) + + // Assert binary content. + buf, err := os.ReadFile(targetFile.Name()) + require.NoError(t, err) + expected := concatBytes( + // --- File header + // magic + []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, // 0 + // max file size + []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, // 1 + // num buckets + []byte{0x03, 0x00, 0x00, 0x00}, // 2 + // padding + []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 3 + + // --- Bucket header 0 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 4 + // num entries + []byte{0x01, 0x00, 0x00, 0x00}, // 5 + // hash len + []byte{0x03}, // 6 + // padding + []byte{0x00}, // 7 + // file offset + []byte{0x50, 0x00, 0x00, 0x00, 0x00, 0x00}, // 8 + + // --- Bucket header 1 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 9 + // num entries + []byte{0x01, 0x00, 0x00, 0x00}, // 10 + // hash len + []byte{0x03}, // 11 + // padding + []byte{0x00}, // 12 + // file offset + []byte{0x77, 0x00, 0x00, 0x00, 0x00, 0x00}, // 13 + + // --- Bucket header 2 + // hash domain + []byte{0x00, 0x00, 0x00, 0x00}, // 14 + // num entries + []byte{0x02, 0x00, 0x00, 0x00}, // 15 + // hash len + []byte{0x03}, // 16 + // padding + []byte{0x00}, // 17 + // file offset + []byte{0x9e, 0x00, 0x00, 0x00, 0x00, 0x00}, // 18 + + // --- Bucket 0 + // hash + []byte{0xe2, 0xdb, 0x55}, // 19 + // value + []byte{0x1, 0x71, 0x12, 0x20, 0x20, 0xea, 0xb3, 0xf, 0x58, 0xbe, 0x69, 0x1, 0x7f, 0x2, 0x42, 0x91, 0xfa, 0xa3, 0xdc, 0xf4, 0xc7, 0xf2, 0x2f, 0x56, 0x12, 0xa7, 0xb2, 0x1, 0x6f, 0x48, 0xfa, 0x17, 0x5e, 0x53, 0xda, 0x6b}, // 20 + + // --- Bucket 2 + // hash + []byte{0x92, 0xcd, 0xbb}, // 21 + // value + []byte{0x01, 0x71, 0x12, 0x20, 0x9c, 0xd0, 0x17, 0x9a, 0x19, 0x9c, 0xd9, 0x51, 0x0a, 0xfb, 0x92, 0x96, 0xcf, 0xd2, 0x9f, 0x77, 0x8a, 0x00, 0x40, 0x32, 0x8b, 0xf8, 0xff, 0x06, 0x46, 0x21, 0xb9, 0x3c, 0x57, 0xa5, 0xdd, 0x0f}, // 22 + // hash + []byte{0x98, 0x3d, 0xbd}, // 25 + // value + []byte{0x01, 0x71, 0x12, 0x20, 0x1b, 0x79, 0x02, 0x6c, 0x3d, 0xdc, 0x74, 0x0c, 0x33, 0x71, 0xf0, 0x7a, 0x4b, 0x80, 0xb0, 0x43, 0x0c, 0x82, 0x0a, 0x88, 0x72, 0x13, 0xa6, 0x94, 0x72, 0xc9, 0xd1, 0x8a, 0x2d, 0xc7, 0x88, 0x13}, // 26 + // hash + []byte{0xe3, 0x09, 0x6b}, // 23 + // value + []byte{0x1, 0x71, 0x12, 0x20, 0x60, 0x67, 0x54, 0xe4, 0x4c, 0x5, 0x99, 0x6f, 0xf9, 0x60, 0x66, 0x27, 0x66, 0xd, 0xa0, 0xda, 0x4f, 0x60, 0x10, 0x6, 0x2, 0x82, 0xf9, 0x46, 0x3d, 0xcc, 0xde, 0x28, 0x80, 0x72, 0x41, 0x67}, // 24 + ) + assert.Equal(t, expected, buf) + + { + splitSizes := []int{ + // --- File header + 8, 8, 4, 12, + // --- Bucket header 0 + 4, 4, 1, 1, 6, + // --- Bucket header 1 + 4, 4, 1, 1, 6, + // --- Bucket header 2 + 4, 4, 1, 1, 6, + // --- Bucket 0 + 3, 36, + // --- Bucket 2 + 3, 36, 3, 36, 3, 36, + } + splitExpected := splitBufferWithProvidedSizes(expected, splitSizes) + splitGot := splitBufferWithProvidedSizes(buf, splitSizes) + + comparations := compareBufferArrays(splitExpected, splitGot) + + for i, equal := range comparations { + if !equal { + t.Errorf("%d: \nexpected: %v, \n got: %v", i, FormatByteSlice(splitExpected[i]), FormatByteSlice(splitGot[i])) + } + } + + } + + // Reset file offset. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + + // Open index. + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + require.NotNil(t, db) + + // File header assertions. + assert.Equal(t, Header{ + FileSize: maxValue, + NumBuckets: numBuckets, + }, db.Header) + + // Get bucket handles. + buckets := make([]*Bucket, numBuckets) + for i := range buckets { + buckets[i], err = db.GetBucket(uint(i)) + require.NoError(t, err) + } + + // Ensure out-of-bounds bucket accesses fail. + _, wantErr := db.GetBucket(numBuckets) + assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3") + + // Bucket header assertions. + assert.Equal(t, BucketDescriptor{ + BucketHeader: BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 0x50, + }, + Stride: 39, // 3 + 36 + OffsetWidth: 36, + }, buckets[0].BucketDescriptor) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 1, + HashLen: 3, + FileOffset: 119, + }, buckets[1].BucketHeader) + assert.Equal(t, BucketHeader{ + HashDomain: 0x00, + NumEntries: 2, + HashLen: 3, + FileOffset: 158, + }, buckets[2].BucketHeader) + + // Test lookups. + entries, err := buckets[2].Load( /*batchSize*/ 3) + require.NoError(t, err) + assert.Equal(t, []Entry{ + { + Hash: 12402072, + Value: [36]byte(testCids[3].Bytes()), + }, + { + Hash: 7014883, + Value: [36]byte(testCids[2].Bytes()), + }, + }, entries) + + { + for i, keyString := range keys { + key := []byte(keyString) + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + assert.Equal(t, [36]byte(testCids[i].Bytes()), value) + } + } +} + +func TestBuilder_Random(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long test") + } + + numKeys := uint(len(testCids)) + const keySize = uint(16) + const maxOffset = uint64(1000000) + const queries = int(10000) + + // Create new builder session. + builder, err := NewBuilder("", numKeys, maxOffset) + require.NoError(t, err) + require.NotNil(t, builder) + require.NotEmpty(t, builder.buckets) + + // Ensure we cleaned up after ourselves. + defer func() { + _, statErr := os.Stat(builder.dir) + assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr) + }() + defer builder.Close() + + // Insert items to temp buckets. + preInsert := time.Now() + key := make([]byte, keySize) + for i := uint(0); i < numKeys; i++ { + binary.LittleEndian.PutUint64(key, uint64(i)) + err := builder.Insert(key, [36]byte(testCids[i].Bytes())) + require.NoError(t, err) + } + t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert)) + + // Create file for final index. + targetFile, err := os.CreateTemp("", "compactindex-final-") + require.NoError(t, err) + defer os.Remove(targetFile.Name()) + defer targetFile.Close() + + // Seal to final index. + preSeal := time.Now() + sealErr := builder.Seal(context.TODO(), targetFile) + require.NoError(t, sealErr, "Seal failed") + t.Logf("Sealed in %s", time.Since(preSeal)) + + // Print some stats. + targetStat, err := targetFile.Stat() + require.NoError(t, err) + t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size())) + t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys)) + t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds()) + + // Open index. + _, seekErr := targetFile.Seek(0, io.SeekStart) + require.NoError(t, seekErr) + db, err := Open(targetFile) + require.NoError(t, err, "Failed to open generated index") + + // Run query benchmark. + preQuery := time.Now() + for i := queries; i != 0; i-- { + keyN := uint64(rand.Int63n(int64(numKeys))) + binary.LittleEndian.PutUint64(key, keyN) + + bucket, err := db.LookupBucket(key) + require.NoError(t, err) + + value, err := bucket.Lookup(key) + require.NoError(t, err) + require.Equal(t, [36]byte(testCids[keyN].Bytes()), value) + } + t.Logf("Queried %d items", queries) + t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds()) +} diff --git a/deprecated/compactindex36/compactindex.go b/deprecated/compactindex36/compactindex.go new file mode 100644 index 00000000..4720795d --- /dev/null +++ b/deprecated/compactindex36/compactindex.go @@ -0,0 +1,280 @@ +// Package compactindex is an immutable hashtable index format inspired by djb's constant database (cdb). +// +// # Design +// +// Compactindex is used to create secondary indexes over arbitrary flat files. +// Each index is a single, immutable flat file. +// +// Index files consist of a space-optimized and query-optimized key-value-like table. +// +// Instead of storing actual keys, the format stores FKS dynamic perfect hashes. +// And instead of storing values, the format contains offsets into some file. +// +// As a result, the database effectively only supports two operations, similarly to cdb. +// (Note that the actual Go interface is a bit more flexible). +// +// func Create(kv map[[]byte]uint64) *Index +// func (*Index) Lookup(key []byte) (value uint64, exist bool) +// +// # Buckets +// +// The set of items is split into buckets of approx 10000 records. +// The number of buckets is unlimited. +// +// The key-to-bucket assignment is determined by xxHash3 using uniform discrete hashing over the key space. +// +// The index file header also mentions the number of buckets and the file offset of each bucket. +// +// # Tables +// +// Each bucket contains a table of entries, indexed by a collision-free hash function. +// +// The hash function used in the entry table is xxHash. +// A 32-bit hash domain is prefixed to mine collision-free sets of hashes (FKS scheme). +// This hash domain is also recorded at the bucket header. +// +// Each bucket entry is a constant-size record consisting of a 3-byte hash and an offset to the value. +// The size of the offset integer is the minimal byte-aligned integer width that can represent the target file size. +// +// # Querying +// +// The query interface (DB) is backend-agnostic, supporting any storage medium that provides random reads. +// To name a few: Memory buffers, local files, arbitrary embedded buffers, HTTP range requests, plan9, etc... +// +// The DB struct itself performs zero memory allocations and therefore also doesn't cache. +// It is therefore recommended to provide a io.ReaderAt backed by a cache to improve performance. +// +// Given a key, the query strategy is simple: +// +// 1. Hash key to bucket using global hash function +// 2. Retrieve bucket offset from bucket header table +// 3. Hash key to entry using per-bucket hash function +// 4. Search for entry in bucket (binary search) +// +// The search strategy for locating entries in buckets can be adjusted to fit the latency/bandwidth profile of the underlying storage medium. +// +// For example, the fastest lookup strategy in memory is a binary search retrieving double cache lines at a time. +// When doing range requests against high-latency remote storage (e.g. S3 buckets), +// it is typically faster to retrieve and scan through large parts of a bucket (multiple kilobytes) at once. +// +// # Construction +// +// Constructing a compactindex requires upfront knowledge of the number of items and highest possible target offset (read: target file size). +// +// The process requires scratch space of around 16 bytes per entry. During generation, data is offloaded to disk for memory efficiency. +// +// The process works as follows: +// +// 1. Determine number of buckets and offset integer width +// based on known input params (item count and target file size). +// 2. Linear pass over input data, populating temporary files that +// contain the unsorted entries of each bucket. +// 3. For each bucket, brute force a perfect hash function that +// defines a bijection between hash values and keys in the bucket. +// 4. For each bucket, sort by hash values. +// 5. Store to index. +// +// An alternative construction approach is available when the number of items or target file size is unknown. +// In this case, a set of keys is first serialized to a flat file. +package compactindex36 + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindex36` to avoid conflicts with the original package +// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +import ( + "encoding/binary" + "fmt" + "math" + "math/bits" + "sort" + + "github.com/cespare/xxhash/v2" +) + +// Magic are the first eight bytes of an index. +var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} + +const Version = uint8(1) + +// Header occurs once at the beginning of the index. +type Header struct { + FileSize uint64 + NumBuckets uint32 +} + +// headerSize is the size of the header at the beginning of the file. +const headerSize = 32 + +// Load checks the Magic sequence and loads the header fields. +func (h *Header) Load(buf *[headerSize]byte) error { + // Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream. + if *(*[8]byte)(buf[:8]) != Magic { + return fmt.Errorf("not a radiance compactindex file") + } + *h = Header{ + FileSize: binary.LittleEndian.Uint64(buf[8:16]), + NumBuckets: binary.LittleEndian.Uint32(buf[16:20]), + } + // Check version. + if buf[20] != Version { + return fmt.Errorf("unsupported index version: want %d, got %d", Version, buf[20]) + } + // 11 bytes to spare for now. Might use it in the future. + // Force to zero for now. + for _, b := range buf[21:32] { + if b != 0x00 { + return fmt.Errorf("unsupported index version") + } + } + return nil +} + +func (h *Header) Store(buf *[headerSize]byte) { + copy(buf[0:8], Magic[:]) + binary.LittleEndian.PutUint64(buf[8:16], h.FileSize) + binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets) + buf[20] = Version + for i := 21; i < 32; i++ { + buf[i] = 0 + } +} + +// BucketHash returns the bucket index for the given key. +// +// Uses a truncated xxHash64 rotated until the result fits. +func (h *Header) BucketHash(key []byte) uint { + u := xxhash.Sum64(key) + n := uint64(h.NumBuckets) + r := (-n) % n + for u < r { + u = hashUint64(u) + } + return uint(u % n) +} + +// hashUint64 is a reversible uint64 permutation based on Google's +// Murmur3 hash finalizer (public domain) +func hashUint64(x uint64) uint64 { + x ^= x >> 33 + x *= 0xff51afd7ed558ccd + x ^= x >> 33 + x *= 0xc4ceb9fe1a85ec53 + x ^= x >> 33 + return x +} + +// BucketHeader occurs at the beginning of each bucket. +type BucketHeader struct { + HashDomain uint32 + NumEntries uint32 + HashLen uint8 + FileOffset uint64 +} + +// bucketHdrLen is the size of the header preceding the hash table entries. +const bucketHdrLen = 16 + +func (b *BucketHeader) Store(buf *[bucketHdrLen]byte) { + binary.LittleEndian.PutUint32(buf[0:4], b.HashDomain) + binary.LittleEndian.PutUint32(buf[4:8], b.NumEntries) + buf[8] = b.HashLen + buf[9] = 0 + putUintLe(buf[10:16], b.FileOffset) +} + +func (b *BucketHeader) Load(buf *[bucketHdrLen]byte) { + b.HashDomain = binary.LittleEndian.Uint32(buf[0:4]) + b.NumEntries = binary.LittleEndian.Uint32(buf[4:8]) + b.HashLen = buf[8] + b.FileOffset = uintLe(buf[10:16]) +} + +// Hash returns the per-bucket hash of a key. +func (b *BucketHeader) Hash(key []byte) uint64 { + xsum := EntryHash64(b.HashDomain, key) + // Mask sum by hash length. + return xsum & (math.MaxUint64 >> (64 - b.HashLen*8)) +} + +type BucketDescriptor struct { + BucketHeader + Stride uint8 // size of one entry in bucket + OffsetWidth uint8 // with of offset field in bucket +} + +func (b *BucketDescriptor) unmarshalEntry(buf []byte) (e Entry) { + e.Hash = uintLe(buf[0:b.HashLen]) + copy(e.Value[:], buf[b.HashLen:b.HashLen+b.OffsetWidth]) + return +} + +func (b *BucketDescriptor) marshalEntry(buf []byte, e Entry) { + if len(buf) < int(b.Stride) { + panic("serializeEntry: buf too small") + } + putUintLe(buf[0:b.HashLen], e.Hash) + copy(buf[b.HashLen:b.HashLen+b.OffsetWidth], e.Value[:]) +} + +// SearchSortedEntries performs an in-memory binary search for a given hash. +func SearchSortedEntries(entries []Entry, hash uint64) *Entry { + i, found := sort.Find(len(entries), func(i int) int { + other := entries[i].Hash + // Note: This is safe because neither side exceeds 2^24. + return int(hash) - int(other) + }) + if !found { + return nil + } + if i >= len(entries) || entries[i].Hash != hash { + return nil + } + return &entries[i] +} + +// EntryHash64 is a xxHash-based hash function using an arbitrary prefix. +func EntryHash64(prefix uint32, key []byte) uint64 { + const blockSize = 32 + var prefixBlock [blockSize]byte + binary.LittleEndian.PutUint32(prefixBlock[:4], prefix) + + var digest xxhash.Digest + digest.Reset() + digest.Write(prefixBlock[:]) + digest.Write(key) + return digest.Sum64() +} + +// Entry is a single element in a hash table. +type Entry struct { + Hash uint64 + Value [36]byte +} + +func valueLength() uint8 { + return 36 // 36 is the length of the CIDs we use. +} + +// maxCls64 returns the max integer that has the same amount of leading zeros as n. +func maxCls64(n uint64) uint64 { + return math.MaxUint64 >> bits.LeadingZeros64(n) +} + +// uintLe decodes an unsigned little-endian integer without bounds assertions. +// out-of-bounds bits are set to zero. +func uintLe(buf []byte) uint64 { + var full [8]byte + copy(full[:], buf) + return binary.LittleEndian.Uint64(full[:]) +} + +// putUintLe encodes an unsigned little-endian integer without bounds assertions. +// Returns true if the integer fully fit in the provided buffer. +func putUintLe(buf []byte, x uint64) bool { + var full [8]byte + binary.LittleEndian.PutUint64(full[:], x) + copy(buf, full[:]) + return int(valueLength()) <= len(buf) +} diff --git a/deprecated/compactindex36/compactindex_test.go b/deprecated/compactindex36/compactindex_test.go new file mode 100644 index 00000000..f8bdebe1 --- /dev/null +++ b/deprecated/compactindex36/compactindex_test.go @@ -0,0 +1,89 @@ +package compactindex36 + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindex36` to avoid conflicts with the original package +// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +import ( + "math" + "math/rand" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMaxCls64(t *testing.T) { + cases := [][2]uint64{ + {0x0000_0000_0000_0000, 0x0000_0000_0000_0000}, + {0x0000_0000_0000_0001, 0x0000_0000_0000_0001}, + {0x0000_0000_0000_0003, 0x0000_0000_0000_0002}, + {0x0000_0000_0000_0003, 0x0000_0000_0000_0003}, + {0x0000_0000_0000_0007, 0x0000_0000_0000_0004}, + {0x0000_0000_FFFF_FFFF, 0x0000_0000_F000_000F}, + {0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF_FFFF_FFFF}, + } + for _, tc := range cases { + assert.Equal(t, tc[0], maxCls64(tc[1])) + } +} + +func TestHeader_BucketHash(t *testing.T) { + const numItems = 500000 + const numBuckets = 1000 + + header := Header{ + NumBuckets: numBuckets, + } + + keys := make([][]byte, numItems) + hits := make([]int, numBuckets) + for i := range keys { + var buf [16]byte + n, _ := rand.Read(buf[:]) + keys[i] = buf[:n] + } + + // Bounds check and count hits. + for _, key := range keys { + idx := header.BucketHash(key) + require.True(t, idx < numBuckets) + hits[idx]++ + } + + // Calculate standard deviation. + mean := float64(numItems) / float64(numBuckets) + var cumVariance float64 + for _, bucketHits := range hits { + delta := float64(bucketHits) - mean + cumVariance += (delta * delta) + } + variance := cumVariance / float64(len(hits)) + stddev := math.Sqrt(variance) + t.Logf("mean % 12.2f", mean) + normStddev := stddev / mean + t.Logf("stddev % 10.2f", stddev) + t.Logf("1σ / mean % 7.2f%%", 100*normStddev) + + const failNormStddev = 0.08 + if normStddev > failNormStddev { + t.Logf("FAIL: > %f%%", 100*failNormStddev) + t.Fail() + } else { + t.Logf(" OK: <= %f%%", 100*failNormStddev) + } + + // Print percentiles. + sort.Ints(hits) + t.Logf("min % 10d", hits[0]) + t.Logf("p01 % 10d", hits[int(math.Round(0.01*float64(len(hits))))]) + t.Logf("p05 % 10d", hits[int(math.Round(0.05*float64(len(hits))))]) + t.Logf("p10 % 10d", hits[int(math.Round(0.10*float64(len(hits))))]) + t.Logf("p50 % 10d", hits[int(math.Round(0.50*float64(len(hits))))]) + t.Logf("p90 % 10d", hits[int(math.Round(0.90*float64(len(hits))))]) + t.Logf("p95 % 10d", hits[int(math.Round(0.95*float64(len(hits))))]) + t.Logf("p99 % 10d", hits[int(math.Round(0.99*float64(len(hits))))]) + t.Logf("max % 10d", hits[len(hits)-1]) +} diff --git a/deprecated/compactindex36/fallocate_fake.go b/deprecated/compactindex36/fallocate_fake.go new file mode 100644 index 00000000..434ca8b8 --- /dev/null +++ b/deprecated/compactindex36/fallocate_fake.go @@ -0,0 +1,27 @@ +package compactindex36 + +import ( + "fmt" + "os" +) + +func fake_fallocate(f *os.File, offset int64, size int64) error { + const blockSize = 4096 + var zero [blockSize]byte + + for size > 0 { + step := size + if step > blockSize { + step = blockSize + } + + if _, err := f.Write(zero[:step]); err != nil { + return fmt.Errorf("failure while generic fallocate: %w", err) + } + + offset += step + size -= step + } + + return nil +} diff --git a/deprecated/compactindex36/fallocate_generic.go b/deprecated/compactindex36/fallocate_generic.go new file mode 100644 index 00000000..6b4a0210 --- /dev/null +++ b/deprecated/compactindex36/fallocate_generic.go @@ -0,0 +1,11 @@ +//go:build !linux + +package compactindex36 + +import ( + "os" +) + +func fallocate(f *os.File, offset int64, size int64) error { + return fake_fallocate(f, offset, size) +} diff --git a/deprecated/compactindex36/fallocate_linux.go b/deprecated/compactindex36/fallocate_linux.go new file mode 100644 index 00000000..eeebd9bb --- /dev/null +++ b/deprecated/compactindex36/fallocate_linux.go @@ -0,0 +1,17 @@ +//go:build linux + +package compactindex36 + +import ( + "fmt" + "os" + "syscall" +) + +func fallocate(f *os.File, offset int64, size int64) error { + err := syscall.Fallocate(int(f.Fd()), 0, offset, size) + if err != nil { + return fmt.Errorf("failure while linux fallocate: %w", err) + } + return nil +} diff --git a/deprecated/compactindex36/query.go b/deprecated/compactindex36/query.go new file mode 100644 index 00000000..c8e06a5c --- /dev/null +++ b/deprecated/compactindex36/query.go @@ -0,0 +1,219 @@ +package compactindex36 + +// This is a fork of the original project at https://github.com/firedancer-io/radiance/tree/main/pkg/compactindex +// The following changes have been made: +// - The package has been renamed to `compactindex36` to avoid conflicts with the original package +// - The values it indexes are 36-bit values instead of 8-bit values. This allows to index CIDs (in particular sha256+CBOR CIDs) directly. + +import ( + "errors" + "fmt" + "io" +) + +// DB is a compactindex handle. +type DB struct { + Header + Stream io.ReaderAt + prefetch bool +} + +// Open returns a handle to access a compactindex. +// +// The provided stream must start with the Magic byte sequence. +// Tip: Use io.NewSectionReader to create aligned substreams when dealing with a file that contains multiple indexes. +func Open(stream io.ReaderAt) (*DB, error) { + // Read the static 32-byte header. + // Ignore errors if the read fails after filling the buffer (e.g. EOF). + var fileHeader [headerSize]byte + n, readErr := stream.ReadAt(fileHeader[:], 0) + if n < len(fileHeader) { + // ReadAt must return non-nil error here. + return nil, readErr + } + db := new(DB) + if err := db.Header.Load(&fileHeader); err != nil { + return nil, err + } + db.Stream = stream + return db, nil +} + +func (db *DB) Prefetch(yes bool) { + db.prefetch = yes +} + +// Lookup queries for a key in the index and returns the value (offset), if any. +// +// Returns ErrNotFound if the key is unknown. +func (db *DB) Lookup(key []byte) ([36]byte, error) { + bucket, err := db.LookupBucket(key) + if err != nil { + return Empty, err + } + return bucket.Lookup(key) +} + +// LookupBucket returns a handle to the bucket that might contain the given key. +func (db *DB) LookupBucket(key []byte) (*Bucket, error) { + return db.GetBucket(db.Header.BucketHash(key)) +} + +// GetBucket returns a handle to the bucket at the given index. +func (db *DB) GetBucket(i uint) (*Bucket, error) { + if i >= uint(db.Header.NumBuckets) { + return nil, fmt.Errorf("out of bounds bucket index: %d >= %d", i, db.Header.NumBuckets) + } + + // Fill bucket handle. + bucket := &Bucket{ + BucketDescriptor: BucketDescriptor{ + Stride: db.entryStride(), + OffsetWidth: valueLength(), + }, + } + // Read bucket header. + readErr := bucket.BucketHeader.readFrom(db.Stream, i) + if readErr != nil { + return nil, readErr + } + bucket.Entries = io.NewSectionReader(db.Stream, int64(bucket.FileOffset), int64(bucket.NumEntries)*int64(bucket.Stride)) + if db.prefetch { + // TODO: find good value for numEntriesToPrefetch + numEntriesToPrefetch := minInt64(3_000, int64(bucket.NumEntries)) + prefetchSize := (36 + 3) * numEntriesToPrefetch + buf := make([]byte, prefetchSize) + _, err := bucket.Entries.ReadAt(buf, 0) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + } + return bucket, nil +} + +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func (db *DB) entryStride() uint8 { + hashSize := 3 // TODO remove hardcoded constant + offsetSize := valueLength() + return uint8(hashSize) + offsetSize +} + +func bucketOffset(i uint) int64 { + return headerSize + int64(i)*bucketHdrLen +} + +func (b *BucketHeader) readFrom(rd io.ReaderAt, i uint) error { + var buf [bucketHdrLen]byte + n, err := rd.ReadAt(buf[:], bucketOffset(i)) + if n < len(buf) { + return err + } + b.Load(&buf) + return nil +} + +func (b *BucketHeader) writeTo(wr io.WriterAt, i uint) error { + var buf [bucketHdrLen]byte + b.Store(&buf) + _, err := wr.WriteAt(buf[:], bucketOffset(i)) + return err +} + +// Bucket is a database handle pointing to a subset of the index. +type Bucket struct { + BucketDescriptor + Entries *io.SectionReader +} + +// maxEntriesPerBucket is the hardcoded maximum permitted number of entries per bucket. +const maxEntriesPerBucket = 1 << 24 // (16 * stride) MiB + +// targetEntriesPerBucket is the average number of records in each hashtable bucket we aim for. +const targetEntriesPerBucket = 10000 + +// Load retrieves all entries in the hashtable. +func (b *Bucket) Load(batchSize int) ([]Entry, error) { + if batchSize <= 0 { + batchSize = 512 // default to reasonable batch size + } + // TODO bounds check + if b.NumEntries > maxEntriesPerBucket { + return nil, fmt.Errorf("refusing to load bucket with %d entries", b.NumEntries) + } + entries := make([]Entry, 0, b.NumEntries) + + stride := int(b.Stride) + buf := make([]byte, batchSize*stride) + off := int64(0) + for { + // Read another chunk. + n, err := b.Entries.ReadAt(buf, off) + // Decode all entries in it. + sub := buf[:n] + for len(sub) >= stride { + entries = append(entries, b.unmarshalEntry(sub)) + sub = sub[stride:] + off += int64(stride) + } + // Handle error. + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + break + } else if err != nil { + return nil, err + } + } + + return entries, nil +} + +// TODO: This binary search algo is not optimized for high-latency remotes yet. + +// Lookup queries for a key using binary search. +func (b *Bucket) Lookup(key []byte) ([36]byte, error) { + return b.binarySearch(b.Hash(key)) +} + +var Empty [36]byte + +func (b *Bucket) binarySearch(target uint64) ([36]byte, error) { + low := 0 + high := int(b.NumEntries) + return searchEytzinger(low, high, target, b.loadEntry) +} + +func (b *Bucket) loadEntry(i int) (Entry, error) { + off := int64(i) * int64(b.Stride) + buf := make([]byte, b.Stride) + n, err := b.Entries.ReadAt(buf, off) + if n != len(buf) { + return Entry{}, err + } + return b.unmarshalEntry(buf), nil +} + +// ErrNotFound marks a missing entry. +var ErrNotFound = errors.New("not found") + +func searchEytzinger(min int, max int, x uint64, getter func(int) (Entry, error)) ([36]byte, error) { + var index int + for index < max { + k, err := getter(index) + if err != nil { + return Empty, err + } + if k.Hash == x { + return k.Value, nil + } + index = index<<1 | 1 + if k.Hash < x { + index++ + } + } + return Empty, ErrNotFound +} diff --git a/deprecated/compactindex36/query_test.go b/deprecated/compactindex36/query_test.go new file mode 100644 index 00000000..64efd84d --- /dev/null +++ b/deprecated/compactindex36/query_test.go @@ -0,0 +1,58 @@ +package compactindex36 + +import ( + "bytes" + "errors" + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type failReader struct{ err error } + +func (rd failReader) ReadAt([]byte, int64) (int, error) { + return 0, rd.err +} + +func TestOpen_ReadFail(t *testing.T) { + err := errors.New("oh no!") + db, dbErr := Open(failReader{err}) + require.Nil(t, db) + require.Same(t, err, dbErr) +} + +func TestOpen_InvalidMagic(t *testing.T) { + var buf [32]byte + rand.Read(buf[:]) + buf[1] = '.' // make test deterministic + + db, dbErr := Open(bytes.NewReader(buf[:])) + require.Nil(t, db) + require.EqualError(t, dbErr, "not a radiance compactindex file") +} + +func TestOpen_HeaderOnly(t *testing.T) { + buf := [32]byte{ + // Magic + 'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x', + // FileSize + 0x37, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // NumBuckets + 0x42, 0x00, 0x00, 0x00, + // Padding + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + } + + db, dbErr := Open(bytes.NewReader(buf[:])) + require.NotNil(t, db) + require.NoError(t, dbErr) + + assert.NotNil(t, db.Stream) + assert.Equal(t, Header{ + FileSize: 0x1337, + NumBuckets: 0x42, + }, db.Header) +} diff --git a/indexes/deprecated-index-cid-to-offset.go b/indexes/deprecated-index-cid-to-offset.go new file mode 100644 index 00000000..2e18602a --- /dev/null +++ b/indexes/deprecated-index-cid-to-offset.go @@ -0,0 +1,76 @@ +package indexes + +import ( + "fmt" + "io" + "os" + + "github.com/ipfs/go-cid" + "github.com/rpcpool/yellowstone-faithful/deprecated/compactindex" +) + +type Deprecated_CidToOffset_Reader struct { + file io.Closer + index *compactindex.DB +} + +func Deprecated_Open_CidToOffset(file string) (*Deprecated_CidToOffset_Reader, error) { + is, err := IsFileOldFormatByPath(file) + if err != nil { + return nil, err + } + if !is { + return nil, fmt.Errorf("not old format") + } + reader, err := os.Open(file) + if err != nil { + return nil, fmt.Errorf("failed to open index file: %w", err) + } + return Deprecated_OpenWithReader_CidToOffset(reader) +} + +func Deprecated_OpenWithReader_CidToOffset(reader ReaderAtCloser) (*Deprecated_CidToOffset_Reader, error) { + index, err := compactindex.Open(reader) + if err != nil { + return nil, fmt.Errorf("failed to open index: %w", err) + } + // meta, err := getDefaultMetadata(index) + // if err != nil { + // return nil, err + // } + // if !IsValidNetwork(meta.Network) { + // return nil, fmt.Errorf("invalid network") + // } + // if meta.RootCid == cid.Undef { + // return nil, fmt.Errorf("root cid is undefined") + // } + // if err := meta.AssertIndexKind(Kind_CidToOffset); err != nil { + // return nil, err + // } + return &Deprecated_CidToOffset_Reader{ + file: reader, + index: index, + }, nil +} + +// Get returns the offset for the given cid. +func (r *Deprecated_CidToOffset_Reader) Get(cid_ cid.Cid) (uint64, error) { + if cid_ == cid.Undef { + return 0, fmt.Errorf("cid is undefined") + } + key := cid_.Bytes() + return r.index.Lookup(key) +} + +func (r *Deprecated_CidToOffset_Reader) Close() error { + return r.file.Close() +} + +// Meta returns the metadata for the index. +func (r *Deprecated_CidToOffset_Reader) Meta() *Metadata { + return nil +} + +func (r *Deprecated_CidToOffset_Reader) Prefetch(b bool) { + r.index.Prefetch(b) +} diff --git a/indexes/deprecated.go b/indexes/deprecated.go new file mode 100644 index 00000000..2462ea03 --- /dev/null +++ b/indexes/deprecated.go @@ -0,0 +1,45 @@ +package indexes + +import ( + "fmt" + "io" + "os" + + "github.com/rpcpool/yellowstone-faithful/compactindexsized" +) + +var oldMagic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} + +func IsOldMagic(magicBytes [8]byte) bool { + return magicBytes == oldMagic +} + +func IsFileOldFormatByPath(path string) (bool, error) { + file, err := os.Open(path) + if err != nil { + return false, fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + return IsFileOldFormat(file) +} + +func IsFileOldFormat(file io.ReaderAt) (bool, error) { + var magic [8]byte + if _, err := file.ReadAt(magic[:], 0); err != nil { + return false, fmt.Errorf("failed to read magic: %w", err) + } + return IsOldMagic(magic), nil +} + +func IsFileNewFormat(path string) (bool, error) { + file, err := os.Open(path) + if err != nil { + return false, fmt.Errorf("failed to open file: %w", err) + } + defer file.Close() + var magic [8]byte + if _, err := io.ReadFull(file, magic[:]); err != nil { + return false, fmt.Errorf("failed to read magic: %w", err) + } + return magic == compactindexsized.Magic, nil +} From 4aeb564e2c65ba78e56a205cc04ee4c10cab02ce Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 18 Jan 2024 19:00:04 +0100 Subject: [PATCH 50/63] chmod +x --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 4f1b7e36..703ee48c 100644 --- a/Makefile +++ b/Makefile @@ -21,10 +21,12 @@ jsonParsed-linux: build-rust-wrapper compile: @echo "\nCompiling faithful-cli binary for current platform ..." go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/faithful-cli . + chmod +x ./bin/faithful-cli compile-all: compile-linux compile-mac compile-windows compile-linux: @echo "\nCompiling faithful-cli binary for linux amd64 ..." GOOS=linux GOARCH=amd64 go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/linux/amd64/faithful-cli_linux_amd64 . + chmod +x ./bin/linux/amd64/faithful-cli_linux_amd64 compile-mac: @echo "\nCompiling faithful-cli binary for mac amd64 ..." GOOS=darwin GOARCH=amd64 go build -ldflags="$(BASE_LD_FLAGS)" -o ./bin/darwin/amd64/faithful-cli_darwin_amd64 . From 4bd8ca645a6f6ff7f8c98cd4274f5370f8468518 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 18 Jan 2024 19:03:05 +0100 Subject: [PATCH 51/63] Add support for deprecated indexes --- compactindexsized/compactindex.go | 2 +- config.go | 37 +++- epoch.go | 287 ++++++++++++++++++++---------- gsfa/gsfa-read.go | 4 + gsfa/manifest/manifest.go | 28 +-- indexes/index-sig-to-cid.go | 46 ++++- indexes/index-slot-to-cid.go | 46 ++++- indexes/uints.go | 4 +- multiepoch-getBlock.go | 4 +- multiepoch-getTransaction.go | 9 +- storage.go | 2 +- 11 files changed, 347 insertions(+), 122 deletions(-) diff --git a/compactindexsized/compactindex.go b/compactindexsized/compactindex.go index 22be1581..c1555829 100644 --- a/compactindexsized/compactindex.go +++ b/compactindexsized/compactindex.go @@ -96,7 +96,7 @@ import ( ) // Magic are the first eight bytes of an index. -var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} +var Magic = [8]byte{'c', 'o', 'm', 'p', 'i', 's', 'z', 'd'} const Version = uint8(1) diff --git a/config.go b/config.go index a6341ee4..68b1ec31 100644 --- a/config.go +++ b/config.go @@ -119,7 +119,10 @@ type Config struct { Indexes struct { CidToOffsetAndSize struct { URI URI `json:"uri" yaml:"uri"` - } `json:"cid_to_offset_and_size" yaml:"cid_to_offset_and_size"` + } `json:"cid_to_offset_and_size" yaml:"cid_to_offset_and_size"` // Latest index version. Includes offset and size. + CidToOffset struct { + URI URI `json:"uri" yaml:"uri"` + } `json:"cid_to_offset" yaml:"cid_to_offset"` // Legacy index, deprecated. Only includes offset. SlotToCid struct { URI URI `json:"uri" yaml:"uri"` } `json:"slot_to_cid" yaml:"slot_to_cid"` @@ -138,6 +141,12 @@ type Config struct { } `json:"genesis" yaml:"genesis"` } +// IsDeprecatedIndexes returns true if the config is using the deprecated indexes version. +func (c *Config) IsDeprecatedIndexes() bool { + // CidToOffsetAndSize is not set and CidToOffset is set. + return c.Indexes.CidToOffsetAndSize.URI.IsZero() && !c.Indexes.CidToOffset.URI.IsZero() +} + func (c *Config) ConfigFilepath() string { return c.originalFilepath } @@ -253,11 +262,24 @@ func (c *Config) Validate() error { } } } - if c.Indexes.CidToOffsetAndSize.URI.IsZero() { - return fmt.Errorf("indexes.cid_to_offset_and_size.uri must be set") + // CidToOffsetAndSize and CidToOffset cannot be both set or both unset. + if !c.Indexes.CidToOffsetAndSize.URI.IsZero() && !c.Indexes.CidToOffset.URI.IsZero() { + return fmt.Errorf("indexes.cid_to_offset_and_size.uri and indexes.cid_to_offset.uri cannot both be set") + } + if c.Indexes.CidToOffsetAndSize.URI.IsZero() && c.Indexes.CidToOffset.URI.IsZero() { + return fmt.Errorf("indexes.cid_to_offset_and_size.uri and indexes.cid_to_offset.uri cannot both be unset") + } + // validate CidToOffsetAndSize URI: + if !c.Indexes.CidToOffsetAndSize.URI.IsZero() { + if err := isSupportedURI(c.Indexes.CidToOffsetAndSize.URI, "indexes.cid_to_offset_and_size.uri"); err != nil { + return err + } } - if err := isSupportedURI(c.Indexes.CidToOffsetAndSize.URI, "indexes.cid_to_offset_and_size.uri"); err != nil { - return err + // validate CidToOffset URI: + if !c.Indexes.CidToOffset.URI.IsZero() { + if err := isSupportedURI(c.Indexes.CidToOffset.URI, "indexes.cid_to_offset.uri"); err != nil { + return err + } } } else { if c.Data.Filecoin == nil { @@ -308,9 +330,12 @@ func (c *Config) Validate() error { { // check that the URIs are valid if isCarMode { - if !c.Indexes.CidToOffsetAndSize.URI.IsValid() { + if !c.Indexes.CidToOffsetAndSize.URI.IsZero() && !c.Indexes.CidToOffsetAndSize.URI.IsValid() { return fmt.Errorf("indexes.cid_to_offset_and_size.uri is invalid") } + if !c.Indexes.CidToOffset.URI.IsZero() && !c.Indexes.CidToOffset.URI.IsValid() { + return fmt.Errorf("indexes.cid_to_offset.uri is invalid") + } if c.Data.Car.FromPieces != nil { if !c.Data.Car.FromPieces.Metadata.URI.IsValid() { return fmt.Errorf("data.car.from_pieces.metadata.uri is invalid") diff --git a/epoch.go b/epoch.go index 1bdd9b4e..cb45dd5e 100644 --- a/epoch.go +++ b/epoch.go @@ -24,6 +24,7 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/libp2p/go-libp2p/core/peer" "github.com/rpcpool/yellowstone-faithful/bucketteer" + deprecatedbucketter "github.com/rpcpool/yellowstone-faithful/deprecated/bucketteer" "github.com/rpcpool/yellowstone-faithful/gsfa" hugecache "github.com/rpcpool/yellowstone-faithful/huge-cache" "github.com/rpcpool/yellowstone-faithful/indexes" @@ -43,18 +44,19 @@ type Epoch struct { // genesis: genesis *GenesisContainer // contains indexes and block data for the epoch - lassieFetcher *lassieWrapper - localCarReader *carv2.Reader - remoteCarReader ReaderAtCloser - carHeaderSize uint64 - rootCid cid.Cid - cidToOffsetAndSizeIndex *indexes.CidToOffsetAndSize_Reader - slotToCidIndex *indexes.SlotToCid_Reader - sigToCidIndex *indexes.SigToCid_Reader - sigExists *bucketteer.Reader - gsfaReader *gsfa.GsfaReader - onClose []func() error - allCache *hugecache.Cache + lassieFetcher *lassieWrapper + localCarReader *carv2.Reader + remoteCarReader ReaderAtCloser + carHeaderSize uint64 + rootCid cid.Cid + cidToOffsetAndSizeIndex *indexes.CidToOffsetAndSize_Reader + deprecated_cidToOffsetIndex *indexes.Deprecated_CidToOffset_Reader + slotToCidIndex *indexes.SlotToCid_Reader + sigToCidIndex *indexes.SigToCid_Reader + sigExists SigExistsIndex + gsfaReader *gsfa.GsfaReader + onClose []func() error + allCache *hugecache.Cache } func (r *Epoch) GetCache() *hugecache.Cache { @@ -122,30 +124,52 @@ func NewEpochFromConfig( } } if isCarMode { - // The CAR-mode requires a cid-to-offset index. - cidToOffsetAndSizeIndexFile, err := openIndexStorage( - c.Context, - string(config.Indexes.CidToOffsetAndSize.URI), - DebugMode, - ) - if err != nil { - return nil, fmt.Errorf("failed to open cid-to-offset index file: %w", err) - } - ep.onClose = append(ep.onClose, cidToOffsetAndSizeIndexFile.Close) + if config.IsDeprecatedIndexes() { + // The CAR-mode requires a cid-to-offset index. + cidToOffsetIndexFile, err := openIndexStorage( + c.Context, + string(config.Indexes.CidToOffset.URI), + DebugMode, + ) + if err != nil { + return nil, fmt.Errorf("failed to open cid-to-offset index file: %w", err) + } + ep.onClose = append(ep.onClose, cidToOffsetIndexFile.Close) - cidToOffsetIndex, err := indexes.OpenWithReader_CidToOffsetAndSize(cidToOffsetAndSizeIndexFile) - if err != nil { - return nil, fmt.Errorf("failed to open cid-to-offset index: %w", err) - } - if config.Indexes.CidToOffsetAndSize.URI.IsRemoteWeb() { - cidToOffsetIndex.Prefetch(true) - } - ep.cidToOffsetAndSizeIndex = cidToOffsetIndex + cidToOffsetIndex, err := indexes.Deprecated_OpenWithReader_CidToOffset(cidToOffsetIndexFile) + if err != nil { + return nil, fmt.Errorf("failed to open cid-to-offset index: %w", err) + } + if config.Indexes.CidToOffsetAndSize.URI.IsRemoteWeb() { + cidToOffsetIndex.Prefetch(true) + } + ep.deprecated_cidToOffsetIndex = cidToOffsetIndex + } else { + // The CAR-mode requires a cid-to-offset index. + cidToOffsetAndSizeIndexFile, err := openIndexStorage( + c.Context, + string(config.Indexes.CidToOffsetAndSize.URI), + DebugMode, + ) + if err != nil { + return nil, fmt.Errorf("failed to open cid-to-offset index file: %w", err) + } + ep.onClose = append(ep.onClose, cidToOffsetAndSizeIndexFile.Close) - if ep.Epoch() != cidToOffsetIndex.Meta().Epoch { - return nil, fmt.Errorf("epoch mismatch in cid-to-offset-and-size index: expected %d, got %d", ep.Epoch(), cidToOffsetIndex.Meta().Epoch) + cidToOffsetAndSizeIndex, err := indexes.OpenWithReader_CidToOffsetAndSize(cidToOffsetAndSizeIndexFile) + if err != nil { + return nil, fmt.Errorf("failed to open cid-to-offset index: %w", err) + } + if config.Indexes.CidToOffsetAndSize.URI.IsRemoteWeb() { + cidToOffsetAndSizeIndex.Prefetch(true) + } + ep.cidToOffsetAndSizeIndex = cidToOffsetAndSizeIndex + + if ep.Epoch() != cidToOffsetAndSizeIndex.Meta().Epoch { + return nil, fmt.Errorf("epoch mismatch in cid-to-offset-and-size index: expected %d, got %d", ep.Epoch(), cidToOffsetAndSizeIndex.Meta().Epoch) + } + lastRootCid = cidToOffsetAndSizeIndex.Meta().RootCid } - lastRootCid = cidToOffsetIndex.Meta().RootCid } { @@ -168,13 +192,15 @@ func NewEpochFromConfig( } ep.slotToCidIndex = slotToCidIndex - if ep.Epoch() != slotToCidIndex.Meta().Epoch { - return nil, fmt.Errorf("epoch mismatch in slot-to-cid index: expected %d, got %d", ep.Epoch(), slotToCidIndex.Meta().Epoch) - } - if lastRootCid != cid.Undef && !lastRootCid.Equals(slotToCidIndex.Meta().RootCid) { - return nil, fmt.Errorf("root CID mismatch in slot-to-cid index: expected %s, got %s", lastRootCid, slotToCidIndex.Meta().RootCid) + if !slotToCidIndex.IsDeprecatedOldVersion() { + if ep.Epoch() != slotToCidIndex.Meta().Epoch { + return nil, fmt.Errorf("epoch mismatch in slot-to-cid index: expected %d, got %d", ep.Epoch(), slotToCidIndex.Meta().Epoch) + } + if lastRootCid != cid.Undef && !lastRootCid.Equals(slotToCidIndex.Meta().RootCid) { + return nil, fmt.Errorf("root CID mismatch in slot-to-cid index: expected %s, got %s", lastRootCid, slotToCidIndex.Meta().RootCid) + } + lastRootCid = slotToCidIndex.Meta().RootCid } - lastRootCid = slotToCidIndex.Meta().RootCid } { @@ -197,12 +223,13 @@ func NewEpochFromConfig( } ep.sigToCidIndex = sigToCidIndex - if ep.Epoch() != sigToCidIndex.Meta().Epoch { - return nil, fmt.Errorf("epoch mismatch in sig-to-cid index: expected %d, got %d", ep.Epoch(), sigToCidIndex.Meta().Epoch) - } - - if !lastRootCid.Equals(sigToCidIndex.Meta().RootCid) { - return nil, fmt.Errorf("root CID mismatch in sig-to-cid index: expected %s, got %s", lastRootCid, sigToCidIndex.Meta().RootCid) + if !sigToCidIndex.IsDeprecatedOldVersion() { + if ep.Epoch() != sigToCidIndex.Meta().Epoch { + return nil, fmt.Errorf("epoch mismatch in sig-to-cid index: expected %d, got %d", ep.Epoch(), sigToCidIndex.Meta().Epoch) + } + if !lastRootCid.Equals(sigToCidIndex.Meta().RootCid) { + return nil, fmt.Errorf("root CID mismatch in sig-to-cid index: expected %s, got %s", lastRootCid, sigToCidIndex.Meta().RootCid) + } } } @@ -215,20 +242,22 @@ func NewEpochFromConfig( ep.onClose = append(ep.onClose, gsfaIndex.Close) ep.gsfaReader = gsfaIndex - gotIndexEpoch, ok := gsfaIndex.Meta().GetUint64(indexmeta.MetadataKey_Epoch) - if !ok { - return nil, fmt.Errorf("the gsfa index does not have the epoch metadata") - } - if ep.Epoch() != gotIndexEpoch { - return nil, fmt.Errorf("epoch mismatch in gsfa index: expected %d, got %d", ep.Epoch(), gotIndexEpoch) - } - - gotRootCid, ok := gsfaIndex.Meta().GetCid(indexmeta.MetadataKey_RootCid) - if !ok { - return nil, fmt.Errorf("the gsfa index does not have the root CID metadata") - } - if !lastRootCid.Equals(gotRootCid) { - return nil, fmt.Errorf("root CID mismatch in gsfa index: expected %s, got %s", lastRootCid, gotRootCid) + if gsfaIndex.Version() >= 2 { + gotIndexEpoch, ok := gsfaIndex.Meta().GetUint64(indexmeta.MetadataKey_Epoch) + if !ok { + return nil, fmt.Errorf("the gsfa index does not have the epoch metadata") + } + if ep.Epoch() != gotIndexEpoch { + return nil, fmt.Errorf("epoch mismatch in gsfa index: expected %d, got %d", ep.Epoch(), gotIndexEpoch) + } + + gotRootCid, ok := gsfaIndex.Meta().GetCid(indexmeta.MetadataKey_RootCid) + if !ok { + return nil, fmt.Errorf("the gsfa index does not have the root CID metadata") + } + if !lastRootCid.Equals(gotRootCid) { + return nil, fmt.Errorf("root CID mismatch in gsfa index: expected %s, got %s", lastRootCid, gotRootCid) + } } } } @@ -380,36 +409,53 @@ func NewEpochFromConfig( } ep.onClose = append(ep.onClose, sigExistsFile.Close) - sigExists, err := bucketteer.NewReader(sigExistsFile) - if err != nil { - return nil, fmt.Errorf("failed to open sig-exists index: %w", err) - } - ep.onClose = append(ep.onClose, sigExists.Close) + if config.IsDeprecatedIndexes() { + sigExists, err := deprecatedbucketter.NewReader(sigExistsFile) + if err != nil { + return nil, fmt.Errorf("failed to open sig-exists index: %w", err) + } + ep.onClose = append(ep.onClose, sigExists.Close) - { - // warm up the cache - for i := 0; i < 100_000; i++ { - sigExists.Has(newRandomSignature()) + { + // warm up the cache + for i := 0; i < 100_000; i++ { + sigExists.Has(newRandomSignature()) + } } - } - ep.sigExists = sigExists + ep.sigExists = sigExists + } else { + sigExists, err := bucketteer.NewReader(sigExistsFile) + if err != nil { + return nil, fmt.Errorf("failed to open sig-exists index: %w", err) + } + ep.onClose = append(ep.onClose, sigExists.Close) - gotEpoch, ok := sigExists.Meta().GetUint64(indexmeta.MetadataKey_Epoch) - if !ok { - return nil, fmt.Errorf("the sig-exists index does not have the epoch metadata") - } - if ep.Epoch() != gotEpoch { - return nil, fmt.Errorf("epoch mismatch in sig-exists index: expected %d, got %d", ep.Epoch(), gotEpoch) - } + { + // warm up the cache + for i := 0; i < 100_000; i++ { + sigExists.Has(newRandomSignature()) + } + } - gotRootCid, ok := sigExists.Meta().GetCid(indexmeta.MetadataKey_RootCid) - if !ok { - return nil, fmt.Errorf("the sig-exists index does not have the root CID metadata") - } + ep.sigExists = sigExists + + gotEpoch, ok := sigExists.Meta().GetUint64(indexmeta.MetadataKey_Epoch) + if !ok { + return nil, fmt.Errorf("the sig-exists index does not have the epoch metadata") + } + if ep.Epoch() != gotEpoch { + return nil, fmt.Errorf("epoch mismatch in sig-exists index: expected %d, got %d", ep.Epoch(), gotEpoch) + } + + gotRootCid, ok := sigExists.Meta().GetCid(indexmeta.MetadataKey_RootCid) + if !ok { + return nil, fmt.Errorf("the sig-exists index does not have the root CID metadata") + } - if !lastRootCid.Equals(gotRootCid) { - return nil, fmt.Errorf("root CID mismatch in sig-exists index: expected %s, got %s", lastRootCid, gotRootCid) + if !lastRootCid.Equals(gotRootCid) { + return nil, fmt.Errorf("root CID mismatch in sig-exists index: expected %s, got %s", lastRootCid, gotRootCid) + } } } @@ -552,14 +598,14 @@ func (s *Epoch) GetNodeByCid(ctx context.Context, wantedCid cid.Cid) ([]byte, er klog.Errorf("failed to get node from lassie: %v", err) return nil, err } - // Find CAR file offset for CID in index. - offset, err := s.FindOffsetFromCid(ctx, wantedCid) + // Find CAR file oas for CID in index. + oas, err := s.FindOffsetAndSizeFromCid(ctx, wantedCid) if err != nil { klog.Errorf("failed to find offset for CID %s: %v", wantedCid, err) // not found or error return nil, err } - return s.GetNodeByOffset(ctx, wantedCid, offset) + return s.GetNodeByOffsetAndSize(ctx, wantedCid, oas) } func (s *Epoch) ReadAtFromCar(ctx context.Context, offset uint64, length uint64) ([]byte, error) { @@ -586,7 +632,7 @@ func (s *Epoch) ReadAtFromCar(ctx context.Context, offset uint64, length uint64) return data, nil } -func (s *Epoch) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offsetAndSize *indexes.OffsetAndSize) ([]byte, error) { +func (s *Epoch) GetNodeByOffsetAndSize(ctx context.Context, wantedCid cid.Cid, offsetAndSize *indexes.OffsetAndSize) ([]byte, error) { if offsetAndSize == nil { return nil, fmt.Errorf("offsetAndSize must not be nil") } @@ -600,7 +646,7 @@ func (s *Epoch) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offsetAn if s.remoteCarReader == nil { return nil, fmt.Errorf("no CAR reader available") } - return readNodeFromReaderAt(s.remoteCarReader, wantedCid, offset, length) + return readNodeFromReaderAtWithOffsetAndSize(s.remoteCarReader, wantedCid, offset, length) } // Get reader and seek to offset, then read node. dr, err := s.localCarReader.DataReader() @@ -614,6 +660,39 @@ func (s *Epoch) GetNodeByOffset(ctx context.Context, wantedCid cid.Cid, offsetAn return readNodeWithKnownSize(br, wantedCid, length) } +func (s *Epoch) getNodeSize(ctx context.Context, offset uint64) (uint64, error) { + if s.localCarReader == nil { + // try remote reader + if s.remoteCarReader == nil { + return 0, fmt.Errorf("no CAR reader available") + } + return readNodeSizeFromReaderAtWithOffset(s.remoteCarReader, offset) + } + // Get reader and seek to offset, then read node. + dr, err := s.localCarReader.DataReader() + if err != nil { + klog.Errorf("failed to get data reader: %v", err) + return 0, err + } + return readNodeSizeFromReaderAtWithOffset(dr, offset) +} + +func readNodeSizeFromReaderAtWithOffset(reader io.ReaderAt, offset uint64) (uint64, error) { + // read MaxVarintLen64 bytes + lenBuf := make([]byte, binary.MaxVarintLen64) + _, err := reader.ReadAt(lenBuf, int64(offset)) + if err != nil { + return 0, err + } + // read uvarint + dataLen, n := binary.Uvarint(lenBuf) + dataLen += uint64(n) + if dataLen > uint64(util.MaxAllowedSectionSize) { // Don't OOM + return 0, errors.New("malformed car; header is bigger than util.MaxAllowedSectionSize") + } + return dataLen, nil +} + func readNodeWithKnownSize(br *bufio.Reader, wantedCid cid.Cid, length uint64) ([]byte, error) { section := make([]byte, length) _, err := io.ReadFull(br, section) @@ -674,10 +753,14 @@ func (ser *Epoch) FindCidFromSignature(ctx context.Context, sig solana.Signature return ser.sigToCidIndex.Get(sig) } -func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (os *indexes.OffsetAndSize, e error) { +func (ser *Epoch) FindOffsetAndSizeFromCid(ctx context.Context, cid cid.Cid) (os *indexes.OffsetAndSize, e error) { startedAt := time.Now() defer func() { - klog.Infof("Found offset and size for CID %s in %s: o=%d s=%d", cid, time.Since(startedAt), os.Offset, os.Size) + if os != nil { + klog.Infof("Found offset and size for CID %s in %s: o=%d s=%d", cid, time.Since(startedAt), os.Offset, os.Size) + } else { + klog.Infof("Offset and size for CID %s in %s: not found", cid, time.Since(startedAt)) + } }() // try from cache @@ -686,6 +769,30 @@ func (ser *Epoch) FindOffsetFromCid(ctx context.Context, cid cid.Cid) (os *index } else if has { return osi, nil } + + if ser.config.IsDeprecatedIndexes() { + offset, err := ser.deprecated_cidToOffsetIndex.Get(cid) + if err != nil { + return nil, err + } + + klog.Infof("Found offset for CID %s in %s: %d", cid, time.Since(startedAt), offset) + + size, err := ser.getNodeSize(ctx, offset) + if err != nil { + return nil, err + } + + klog.Infof("Found size for CID %s in %s: %d", cid, time.Since(startedAt), size) + + found := &indexes.OffsetAndSize{ + Offset: offset, + Size: size, + } + ser.GetCache().PutCidToOffsetAndSize(cid, found) + return found, nil + } + found, err := ser.cidToOffsetAndSizeIndex.Get(cid) if err != nil { return nil, err diff --git a/gsfa/gsfa-read.go b/gsfa/gsfa-read.go index ebf009cd..40c1ced8 100644 --- a/gsfa/gsfa-read.go +++ b/gsfa/gsfa-read.go @@ -100,6 +100,10 @@ func (index *GsfaReader) Meta() indexmeta.Meta { return index.man.Meta() } +func (index *GsfaReader) Version() uint64 { + return index.man.Version() +} + func (index *GsfaReader) Get( ctx context.Context, pk solana.PublicKey, diff --git a/gsfa/manifest/manifest.go b/gsfa/manifest/manifest.go index f7ded229..bfc6f2a4 100644 --- a/gsfa/manifest/manifest.go +++ b/gsfa/manifest/manifest.go @@ -60,17 +60,19 @@ func readHeader(file *os.File) (*Header, error) { if err != nil { return nil, err } - var meta indexmeta.Meta - err = meta.UnmarshalWithDecoder(bufio.NewReader(file)) - if err != nil { - return nil, err + header := &Header{ + version: version, + } + if version >= 2 { + var meta indexmeta.Meta + err = meta.UnmarshalWithDecoder(bufio.NewReader(file)) + if err != nil { + return nil, err + } + header.metaByteSize = int64(len(meta.Bytes())) + header.meta = meta } - metaByteSize := len(meta.Bytes()) - return &Header{ - version: version, - metaByteSize: int64(metaByteSize), - meta: meta, - }, nil + return header, nil } func writeHeader(file *os.File, meta indexmeta.Meta, version uint64) error { @@ -122,7 +124,7 @@ func NewManifest(filename string, meta indexmeta.Meta) (*Manifest, error) { if err != nil { return nil, err } - if header.Version() != _Version { + if header.Version() != _Version && header.Version() != 1 { return nil, fmt.Errorf("unsupported manifest version: %d", header.Version()) } man.header = header @@ -192,6 +194,10 @@ func (m *Manifest) getContentLength() (int64, error) { return currentFileSize - int64(headerLenWithoutMeta) - m.header.metaByteSize, nil } +func (m *Manifest) Version() uint64 { + return m.header.version +} + // Put appends the given uint64 tuple to the file. func (m *Manifest) Put(key, value uint64) error { m.mu.Lock() diff --git a/indexes/index-sig-to-cid.go b/indexes/index-sig-to-cid.go index 226aaccb..69d7558c 100644 --- a/indexes/index-sig-to-cid.go +++ b/indexes/index-sig-to-cid.go @@ -10,6 +10,7 @@ import ( "github.com/gagliardetto/solana-go" "github.com/ipfs/go-cid" "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/deprecated/compactindex36" ) type SigToCid_Writer struct { @@ -121,9 +122,10 @@ func (w *SigToCid_Writer) GetFilepath() string { } type SigToCid_Reader struct { - file io.Closer - meta *Metadata - index *compactindexsized.DB + file io.Closer + meta *Metadata + index *compactindexsized.DB + deprecatedIndex *compactindex36.DB } func Open_SigToCid(filepath string) (*SigToCid_Reader, error) { @@ -140,6 +142,13 @@ type ReaderAtCloser interface { } func OpenWithReader_SigToCid(reader ReaderAtCloser) (*SigToCid_Reader, error) { + isOld, err := IsFileOldFormat(reader) + if err != nil { + return nil, err + } + if isOld { + return OpenWithReader_SigToCid_Deprecated(reader) + } index, err := compactindexsized.Open(reader) if err != nil { return nil, err @@ -164,10 +173,37 @@ func OpenWithReader_SigToCid(reader ReaderAtCloser) (*SigToCid_Reader, error) { }, nil } +func OpenWithReader_SigToCid_Deprecated(reader ReaderAtCloser) (*SigToCid_Reader, error) { + index, err := compactindex36.Open(reader) + if err != nil { + return nil, err + } + return &SigToCid_Reader{ + file: reader, + deprecatedIndex: index, + }, nil +} + +func (r *SigToCid_Reader) IsDeprecatedOldVersion() bool { + return r.deprecatedIndex != nil +} + func (r *SigToCid_Reader) Get(sig solana.Signature) (cid.Cid, error) { if sig.IsZero() { return cid.Undef, fmt.Errorf("sig is undefined") } + if r.IsDeprecatedOldVersion() { + key := sig[:] + value, err := r.deprecatedIndex.Lookup(key) + if err != nil { + return cid.Undef, err + } + _, c, err := cid.CidFromBytes(value[:]) + if err != nil { + return cid.Undef, err + } + return c, nil + } key := sig[:] value, err := r.index.Lookup(key) if err != nil { @@ -190,5 +226,9 @@ func (r *SigToCid_Reader) Meta() *Metadata { } func (r *SigToCid_Reader) Prefetch(b bool) { + if r.IsDeprecatedOldVersion() { + r.deprecatedIndex.Prefetch(b) + return + } r.index.Prefetch(b) } diff --git a/indexes/index-slot-to-cid.go b/indexes/index-slot-to-cid.go index 14088a8b..59f9cc44 100644 --- a/indexes/index-slot-to-cid.go +++ b/indexes/index-slot-to-cid.go @@ -9,6 +9,7 @@ import ( "github.com/ipfs/go-cid" "github.com/rpcpool/yellowstone-faithful/compactindexsized" + "github.com/rpcpool/yellowstone-faithful/deprecated/compactindex36" ) type SlotToCid_Writer struct { @@ -120,9 +121,10 @@ func (w *SlotToCid_Writer) GetFilepath() string { } type SlotToCid_Reader struct { - file io.Closer - meta *Metadata - index *compactindexsized.DB + file io.Closer + meta *Metadata + index *compactindexsized.DB + deprecatedIndex *compactindex36.DB } func Open_SlotToCid(filepath string) (*SlotToCid_Reader, error) { @@ -134,6 +136,13 @@ func Open_SlotToCid(filepath string) (*SlotToCid_Reader, error) { } func OpenWithReader_SlotToCid(reader ReaderAtCloser) (*SlotToCid_Reader, error) { + isOld, err := IsFileOldFormat(reader) + if err != nil { + return nil, err + } + if isOld { + return OpenWithReader_SlotToCid_Deprecated(reader) + } index, err := compactindexsized.Open(reader) if err != nil { return nil, err @@ -158,7 +167,34 @@ func OpenWithReader_SlotToCid(reader ReaderAtCloser) (*SlotToCid_Reader, error) }, nil } +func OpenWithReader_SlotToCid_Deprecated(reader ReaderAtCloser) (*SlotToCid_Reader, error) { + index, err := compactindex36.Open(reader) + if err != nil { + return nil, err + } + return &SlotToCid_Reader{ + file: reader, + deprecatedIndex: index, + }, nil +} + +func (r *SlotToCid_Reader) IsDeprecatedOldVersion() bool { + return r.deprecatedIndex != nil +} + func (r *SlotToCid_Reader) Get(slot uint64) (cid.Cid, error) { + if r.IsDeprecatedOldVersion() { + key := uint64tob(slot) + value, err := r.deprecatedIndex.Lookup(key) + if err != nil { + return cid.Undef, err + } + _, c, err := cid.CidFromBytes(value[:]) + if err != nil { + return cid.Undef, err + } + return c, nil + } key := uint64tob(slot) value, err := r.index.Lookup(key) if err != nil { @@ -181,5 +217,9 @@ func (r *SlotToCid_Reader) Meta() *Metadata { } func (r *SlotToCid_Reader) Prefetch(b bool) { + if r.IsDeprecatedOldVersion() { + r.deprecatedIndex.Prefetch(b) + return + } r.index.Prefetch(b) } diff --git a/indexes/uints.go b/indexes/uints.go index f98682c5..b9844b34 100644 --- a/indexes/uints.go +++ b/indexes/uints.go @@ -57,14 +57,14 @@ func btoUint48(buf []byte) uint64 { return binary.LittleEndian.Uint64(cloneAndPad(buf, 2)) } -// uint64tob converts a uint64 to an 8-byte slice. +// uint64tob converts a uint64 to an 8-byte little-endian slice. func uint64tob(v uint64) []byte { buf := make([]byte, 8) binary.LittleEndian.PutUint64(buf, v) return buf } -// btoUint64 converts an 8-byte slice to a uint64. +// btoUint64 converts an 8-byte little-endian slice to a uint64. func btoUint64(buf []byte) uint64 { _ = buf[7] // bounds check hint to compiler return binary.LittleEndian.Uint64(buf) diff --git a/multiepoch-getBlock.go b/multiepoch-getBlock.go index 496e3755..6ee05b65 100644 --- a/multiepoch-getBlock.go +++ b/multiepoch-getBlock.go @@ -117,7 +117,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex var blockOffset, parentOffset uint64 wg := new(errgroup.Group) wg.Go(func() (err error) { - offsetAndSize, err := epochHandler.FindOffsetFromCid(ctx, blockCid) + offsetAndSize, err := epochHandler.FindOffsetAndSizeFromCid(ctx, blockCid) if err != nil { return err } @@ -130,7 +130,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex parentOffset = epochHandler.carHeaderSize return nil } - offsetAndSize, err := epochHandler.FindOffsetFromCid(ctx, parentBlockCid) + offsetAndSize, err := epochHandler.FindOffsetAndSizeFromCid(ctx, parentBlockCid) if err != nil { return err } diff --git a/multiepoch-getTransaction.go b/multiepoch-getTransaction.go index 64ffd8f0..4d1c8dc5 100644 --- a/multiepoch-getTransaction.go +++ b/multiepoch-getTransaction.go @@ -8,16 +8,19 @@ import ( "time" "github.com/gagliardetto/solana-go" - "github.com/rpcpool/yellowstone-faithful/bucketteer" "github.com/rpcpool/yellowstone-faithful/compactindexsized" "github.com/sourcegraph/jsonrpc2" "k8s.io/klog/v2" ) -func (multi *MultiEpoch) getAllBucketteers() map[uint64]*bucketteer.Reader { +type SigExistsIndex interface { + Has(sig [64]byte) (bool, error) +} + +func (multi *MultiEpoch) getAllBucketteers() map[uint64]SigExistsIndex { multi.mu.RLock() defer multi.mu.RUnlock() - bucketteers := make(map[uint64]*bucketteer.Reader) + bucketteers := make(map[uint64]SigExistsIndex) for _, epoch := range multi.epochs { if epoch.sigExists != nil { bucketteers[epoch.Epoch()] = epoch.sigExists diff --git a/storage.go b/storage.go index dfd2bd25..28574292 100644 --- a/storage.go +++ b/storage.go @@ -90,7 +90,7 @@ func readSectionFromReaderAt(reader ReaderAtCloser, offset uint64, length uint64 return data, nil } -func readNodeFromReaderAt(reader ReaderAtCloser, wantedCid cid.Cid, offset uint64, length uint64) ([]byte, error) { +func readNodeFromReaderAtWithOffsetAndSize(reader ReaderAtCloser, wantedCid cid.Cid, offset uint64, length uint64) ([]byte, error) { // read MaxVarintLen64 bytes section := make([]byte, length) _, err := reader.ReadAt(section, int64(offset)) From 184ed3299a636b383ac710eaeacb53e6345d4515 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 18 Jan 2024 20:26:29 +0100 Subject: [PATCH 52/63] Add more documentation to README file. --- README.md | 170 +++++++++++++++++++++----- cmd-rpc.go | 1 + cmd-version.go | 3 +- cmd-x-index-all.go | 1 + cmd-x-index.go | 1 + site/filecoin-rpc.markdown | 34 ++++-- site/indexes.markdown | 30 ++--- site/installation.markdown | 4 +- site/old-faithful-net.markdown | 8 +- site/rpc-server.markdown | 2 +- tools/run-rpc-server-local-indexes.sh | 12 +- tools/run-rpc-server-local.sh | 7 +- tools/run-rpc-server-remote.sh | 11 +- 13 files changed, 201 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index 6c76e518..e44d9922 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,115 @@ This repo provides the `faithful-cli` command line interface. This tool allows y - getBlock - getTransaction - getSignaturesForAddress + - getBlockTime + - getGenesisHash (for epoch 0) + - getFirstAvailableBlock + - getSlot + - getVersion + +## RPC server + +The RPC server is available via the `faithful-cli rpc` command. + +The command accepts a list of [epoch config files](#epoch-configuration-files) and dirs as arguments. Each config file is specific for an epoch and provides the location of the block/transaction data and the indexes for that epoch. The indexes are used to map Solana block numbers, transaction signatures and addresses to their respective CIDs. The indexes are generated from the CAR file and can be generated via the `faithful-cli index` command (see [Index generation](#index-generation)). + +It supports the following flags: + +- `--listen`: The address to listen on, e.g. `--listen=:8888` +- `--include`: You can specify one or more (reuse the same flag multiple times) glob patterns to include files or dirs that match them, e.g. `--include=/path/epoch-*.yml`. +- `--exclude`: You can specify one or more (reuse the same flag multiple times) glob patterns to exclude files or dirs that match them, e.g. `--exclude=/something-*/epoch-*.yml`. +- `--debug`: Enable debug logging. +- `--proxy`: Proxy requests to a downstream RPC server if the data can't be found in the archive, e.g. `--proxy=/path/to/my-rpc.json`. See [RPC server proxying](#rpc-server-proxying) for more details. +- `--gsfa-only-signatures`: When enabled, the RPC server will only return signatures for getSignaturesForAddress requests instead of the full transaction data. +- `--watch`: When specified, all the provided epoch files and dirs will be watched for changes and the RPC server will automatically reload the data when changes are detected. Usage: `--watch` (boolean flag). This is useful when you want to provide just a folder and then add new epochs to it without having to restart the server. +- `--epoch-load-concurrency=2`: How many epochs to load in parallel when starting the RPC server. Defaults to number of CPUs. This is useful when you have a lot of epochs and want to speed up the initial load time. +- `--max-cache=`: How much memory to use for caching. Defaults to 0 (no limit). This is useful when you want to limit the memory usage of the RPC server. + +NOTES: + +- By default, the RPC server doesn't support the `jsonParsed` format. You need to build the RPC server with the `make jsonParsed-linux` flag to enable this. + +## Epoch configuration files + +To run a Faithful RPC server you need to specify configuration files for the epoch(s) you want to host. An epoch config file looks like this: + +```yml +epoch: 0 # epoch number (required) +data: # data section (required) + car: + # Source the data from a CAR file (car-mode). + # The URI can be a local filepath or an HTTP url. + # This makes the indexes.cid_to_offset_and_size required. + # If you are running in filecoin-mode, you can omit the car section entirely. + uri: /media/runner/solana/cars/epoch-0.car + filecoin: + # filecoin-mode section: source the data directly from filecoin. + # If you are running in car-mode, you can omit this section. + # if enable=true, then the data will be sourced from filecoin. + # if enable=false, then the data will be sourced from a CAR file (see 'car' section above). + enable: false +genesis: # genesis section (required for epoch 0 only) + # Local filepath to the genesis tarball. + # You can download the genesis tarball from + # wget https://api.mainnet-beta.solana.com/genesis.tar.bz2 + uri: /media/runner/solana/genesis.tar.bz2 +indexes: # indexes section (required) + cid_to_offset_and_size: + # Required when using a CAR file; you can provide either a local filepath or a HTTP url. + # Not used when running in filecoin-mode. + uri: '/media/runner/solana/indexes/epoch-0/epoch-0-bafyreifljyxj55v6jycjf2y7tdibwwwqx75eqf5mn2thip2sswyc536zqq-mainnet-cid-to-offset-and-size.index' + slot_to_cid: + # required (always); you can provide either a local filepath or a HTTP url: + uri: '/media/runner/solana/indexes/epoch-0/epoch-0-bafyreifljyxj55v6jycjf2y7tdibwwwqx75eqf5mn2thip2sswyc536zqq-mainnet-slot-to-cid.index' + sig_to_cid: + # required (always); you can provide either a local filepath or a HTTP url: + uri: '/media/runner/solana/indexes/epoch-0/epoch-0-bafyreifljyxj55v6jycjf2y7tdibwwwqx75eqf5mn2thip2sswyc536zqq-mainnet-sig-to-cid.index' + sig_exists: + # required (always); you can provide either a local filepath or a HTTP url: + uri: '/media/runner/solana/indexes/epoch-0/epoch-0-bafyreifljyxj55v6jycjf2y7tdibwwwqx75eqf5mn2thip2sswyc536zqq-mainnet-sig-exists.index' + gsfa: # getSignaturesForAddress index + # optional; must be a local directory path. + uri: '/media/runner/solana/indexes/epoch-0/gsfa/epoch-0-bafyreifljyxj55v6jycjf2y7tdibwwwqx75eqf5mn2thip2sswyc536zqq-gsfa.indexdir' +``` + +NOTES: + +- The `uri` parameter supports both HTTP URIs as well as file based ones (where not specified otherwise). +- If you specify an HTTP URI, you need to make sure that the url supports HTTP Range requests. S3 or similar APIs will support this. + +## Index generation + +To run the old-faithful RPC server you need to generate indexes for the CAR files. You can do this via the `faithful-cli index` command. + +- `faithful-cli index all `: Generate all **required** indexes for a CAR file. +- `faithful-cli index gsfa `: Generate the gsfa index for a CAR file. + +NOTES: + +- You need to have the CAR file available locally. +- The `cid_to_offset_and_size` index has an older version, which you can specify with `cid_to_offset` instead of `cid_to_offset_and_size`. + +Flags: + +- `--tmp-dir=/path/to/tmp/dir`: Where to store temporary files. Defaults to the system temp dir. (optional) +- `--verify`: Verify the indexes after generation. (optional) +- `--network=`: Which network to use for the gsfa index. Defaults to `mainnet` (other options: `testnet`, `devnet`). (optional) + +## RPC server proxying + +The RPC server provides a proxy mode which allows it to forward traffic it can't serve to a downstream RPC server. To configure this, simply provide the command line argument `--proxy=/path/to/faithful-proxy-config.json` pointing it to a config file. The config file should look like this: + +```json +{ + "target": "https://api.mainnet-beta.solana.com", + "headers": { + "My-Header": "My-Value" + }, + "proxyFailedRequests": true +} +``` + +The `proxyFailedRequests` flag will make the RPC server proxy not only RPC methods that it doesn't support, but also retry requests that failed to be served from the archives (e.g. a `getBlock` request that failed to be served from the archives because that epoch is not available). ### RPC server from old-faithful.net @@ -47,19 +156,15 @@ $ ../tools/download-gsfa.sh 0 ./epoch0 If you have a local copy of a CAR archive and the indexes and run a RPC server servicing data from them. For example: -``` -/usr/local/bin/faithful-cli rpc-server-car \ +```bash +/usr/local/bin/faithful-cli rpc \ --listen $PORT \ - epoch-455.car \ - epoch-455.car.*.cid-to-offset.index \ - epoch-455.car.*.slot-to-cid.index \ - epoch-455.car.*.sig-to-cid.index \ - epoch-455.car-*-gsfa-index + /path/to/epoch-455.yml ``` You can download the CAR files either via Filecoin or via the bucket provided by Triton. There are helper scripts in the `tools` folder. To download the full epoch data: -``` +```bash $ mkdir epoch0 $ cd epoch0 $ ../tools/download-epoch.sh 0 @@ -68,7 +173,7 @@ $ ../tools/download-gsfa.sh 0 ``` Once files are downloaded there are also utility scripts to run the server: -``` +```bash $ ./tools/run-rpc-server-local.sh 0 ./epoch0 ``` @@ -80,15 +185,16 @@ The filecoin RPC server allows provide getBlock, getTransaction and getSignature You can run it in the following way: -``` -faithful-cli rpc-server-filecoin -config 455.yml +```bash +faithful-cli rpc 455.yml ``` The config file points faithful to the location of the required indexes (`455.yaml`): -``` +```bash indexes: slot_to_cid: './epoch-455.car.bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y.slot-to-cid.index' sig_to_cid: './epoch-455.car.bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y.sig-to-cid.index' + sig_exists: './epoch-455.car.bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y.sig-exists.index' gsfa: './epoch-455.car.gsfa.index' ``` @@ -96,8 +202,8 @@ Due to latency in fetching signatures, typically the getSignaturesForAddress ind There is a mode in which you can use a remote gSFA index, which limits it to only return signatures and not additional transaction meta data. In this mode, you can use a remote gSFA index. To enable this mode run faithful-cli in the following way: -``` -faithful-cli rpc-server-filecoin -config 455.yml -gsfa-only-signatures=true +```bash +faithful-cli rpc -gsfa-only-signatures=true 455.yml ``` ### Filecoin fetch via CID @@ -110,7 +216,7 @@ The production RPC server is accessible via `faithful-cli rpc`. More documentati ### Limitations -The testing server (`rpc-server-car` and `rpc-server-filecoin`) only supports single Epoch access. The production server supports handling a full set of epochs. +The (deprecated) testing server (`rpc-server-car` and `rpc-server-filecoin`) only supports single Epoch access. The production server supports handling a full set of epochs. Filecoin retrievals without a CDN can also be slow. We are working on integration with Filecoin CDNs and other caching solutions. Fastest retrievals will happen if you service from local disk. @@ -127,8 +233,8 @@ Indexes will be needed to map Solana's block numbers, transaction signatures and - slot-to-cid: Lookup a CID based on a slot number - tx-to-cid: Lookup a CID based on a transaction signature - gsfa: An index mapping Solana addresses to a list of singatures - - cid-to-offset: Index for a specific CAR file, used by the local rpc server (see above) to find CIDs in a car file - - sig-exists: An index to speed up lookups for signatures when using multiepoch support in the production server + - cid-to-offset-and-size: Index for a specific CAR file, used by the local rpc server (see above) to find CIDs in a car file + - sig-exists: An index to speed up lookups for signatures when using multiepoch support in the production server. ### Archive access @@ -142,7 +248,7 @@ The data that you will need to be able to run a local RPC server is: 1) the Epoch car file containing all the data for that epoch 2) the slot-to-cid index for that epoch 3) the tx-to-cid index for that epoch - 4) the cid-to-offset index for that epoch car file + 4) the cid-to-offset-and-size index for that epoch car file 5) the sig-exists index for that epoch (optional, but important to speed up multiepoch fetches) 6) Optionally (if you want to support getSignaturesForAddress): the gsfa index @@ -180,7 +286,7 @@ The data generation flow is illustrated below: Once you have downloaded rocksdb ledger archives you can run the Radiance tool to generate a car file for an epoch. Make sure you have all the slots available in rocksdb ledger archive for the epoch. You may need to download multiple ledger snapshots in order to have a full set of slots available. Once you know you have a rocksdb that covers all the slots for the epoch run the radiance tool like follows: ``` -radiance car create2 107 --db=46223992/rocksdb --out=/storage/car/epoch-107.car +radiance car create 107 --db=46223992/rocksdb --out=/storage/car/epoch-107.car ``` This will produce a car file called epoch-107.car containing all the blocks and transactions for that epoch. @@ -189,36 +295,36 @@ This will produce a car file called epoch-107.car containing all the blocks and Once the radiance tooling has been used to prepare a car file (or if you have downloaded a car file externally) you can generate indexes from this car file by using the `faithful-cli`: -``` +```bash NAME: - faithful index + faithful CLI index - Create various kinds of indexes for CAR files. USAGE: - faithful index command [command options] [arguments...] + faithful CLI index command [command options] [arguments...] DESCRIPTION: Create various kinds of indexes for CAR files. COMMANDS: - cid-to-offset - slot-to-cid - sig-to-cid - all - gsfa - sig-exists + cid-to-offset + slot-to-cid + sig-to-cid + all Create all the necessary indexes for a Solana epoch. + gsfa + sig-exists help, h Shows a list of commands or help for one command OPTIONS: --help, -h show help ``` -For example, to generate the three indexes cid-to-offset, slot-to-cid, sig-to-cid, sig-exists you would run: +For example, to generate the three indexes cid-to-offset-and-size, slot-to-cid, sig-to-cid, sig-exists you would run: -``` -faithful-cli index all epoch-107.car . +```bash +faithful-cli index all epoch-107.car /storage/indexes/epoch-107 ``` -This would generate the indexes in the current dir for epoch-107. +This would generate the indexes in `/storage/indexes/epoch-107` for epoch-107. ## Contributing diff --git a/cmd-rpc.go b/cmd-rpc.go index 3231e00a..4add3088 100644 --- a/cmd-rpc.go +++ b/cmd-rpc.go @@ -33,6 +33,7 @@ func newCmd_rpc() *cli.Command { var maxCacheSizeMB int return &cli.Command{ Name: "rpc", + Usage: "Start a Solana JSON RPC server.", Description: "Provide multiple epoch config files, and start a Solana JSON RPC that exposes getTransaction, getBlock, and (optionally) getSignaturesForAddress", ArgsUsage: "", Before: func(c *cli.Context) error { diff --git a/cmd-version.go b/cmd-version.go index ae0edaf6..aa1f68ce 100644 --- a/cmd-version.go +++ b/cmd-version.go @@ -10,7 +10,8 @@ import ( func newCmd_Version() *cli.Command { return &cli.Command{ Name: "version", - Description: "Print version information", + Usage: "Print version information of this binary.", + Description: "Print version information of this binary.", Before: func(c *cli.Context) error { return nil }, diff --git a/cmd-x-index-all.go b/cmd-x-index-all.go index 1e6c0e59..5a21e9cf 100644 --- a/cmd-x-index-all.go +++ b/cmd-x-index-all.go @@ -28,6 +28,7 @@ func newCmd_Index_all() *cli.Command { var network indexes.Network return &cli.Command{ Name: "all", + Usage: "Create all the necessary indexes for a Solana epoch.", Description: "Given a CAR file containing a Solana epoch, create all the necessary indexes and save them in the specified index dir.", ArgsUsage: " ", Before: func(c *cli.Context) error { diff --git a/cmd-x-index.go b/cmd-x-index.go index 73664051..58e58fc1 100644 --- a/cmd-x-index.go +++ b/cmd-x-index.go @@ -7,6 +7,7 @@ import ( func newCmd_Index() *cli.Command { return &cli.Command{ Name: "index", + Usage: "Create various kinds of indexes for CAR files.", Description: "Create various kinds of indexes for CAR files.", Before: func(c *cli.Context) error { return nil diff --git a/site/filecoin-rpc.markdown b/site/filecoin-rpc.markdown index 03ca5537..bb029171 100644 --- a/site/filecoin-rpc.markdown +++ b/site/filecoin-rpc.markdown @@ -12,15 +12,29 @@ The filecoin RPC server allows provide getBlock, getTransaction and getSignature You can run it in the following way: ``` -faithful-cli rpc-server-filecoin -config 455.yml +faithful-cli rpc 455.yml ``` -The config file points faithful to the location of the required indexes (`455.yaml`): -``` -indexes: - slot_to_cid: './epoch-455.car.bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y.slot-to-cid.index' - sig_to_cid: './epoch-455.car.bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y.sig-to-cid.index' - gsfa: './epoch-455.car.gsfa.index' +The config file specifies to use data from filecoin and indexes from local files. The config file looks like this: + +```yml +epoch: 455 # epoch number (required) +data: # data section (required) + filecoin: + # filecoin-mode section: source the data directly from filecoin. + enable: false +indexes: # indexes section (required) + # required (always); you can provide either a local filepath or a HTTP url: + uri: '/media/runner/solana/indexes/epoch-455/epoch-455-bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y-mainnet-slot-to-cid.index' + sig_to_cid: + # required (always); you can provide either a local filepath or a HTTP url: + uri: '/media/runner/solana/indexes/epoch-455/epoch-455-bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y-mainnet-sig-to-cid.index' + sig_exists: + # required (always); you can provide either a local filepath or a HTTP url: + uri: '/media/runner/solana/indexes/epoch-455/epoch-455-bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y-mainnet-sig-exists.index' + gsfa: # getSignaturesForAddress index + # optional; must be a local directory path. + uri: '/media/runner/solana/indexes/epoch-455/gsfa/epoch-455-bafyreibkequ55hyrhyk6f24ctsofzri6bjykh76jxl3zju4oazu3u3ru7y-gsfa.indexdir' ``` Due to latency in fetching signatures, typically the getSignaturesForAddress index needs to be stored in a local directory, but the other indexes can be fetched via HTTP or via local file system access. If you provide a URL, you need to make sure that the url supports HTTP Range requests. S3 or similar APIs will support this. @@ -29,10 +43,10 @@ You can enter URLs from old-faithful.net in these config files. There is a mode in which you can use a remote gSFA index, which limits it to only return signatures and not additional transaction meta data. In this mode, you can use a remote gSFA index. To enable this mode run faithful-cli in the following way: -``` -faithful-cli rpc-server-filecoin -config 455.yml -gsfa-only-signatures=true +```bash +faithful-cli rpc -gsfa-only-signatures=true 455.yml ``` ## Configuration files -Configuration files for filecoin accesses are still under development. \ No newline at end of file +Configuration files for filecoin accesses are still under development. diff --git a/site/indexes.markdown b/site/indexes.markdown index 81d341c1..891d2180 100644 --- a/site/indexes.markdown +++ b/site/indexes.markdown @@ -12,31 +12,31 @@ Indexes will be needed to map Solana's block numbers, transaction signatures and - slot-to-cid: Lookup a CID based on a slot number - tx-to-cid: Lookup a CID based on a transaction signature - - cid-to-offset: Index for a specific CAR file, used by the local rpc server (see above) to find CIDs in a car file + - cid-to-offset-and-size: Index for a specific CAR file, used by the local rpc server (see above) to find CIDs in a car file - sig-exists: Index for quick checking whether a specific signature exists in an epoch or not. In addition to these Old Faithful supports an index called `gsfa` that maps Solana addresses to a list of transaction signatures. ## Index generation -Once the radiance tooling has been used to prepare a car file (or if you have downloaded a car file externally) you can generate indexes from this car file by using the `faithful-cli`: +Once the radiance tooling has been used to prepare a car file (or if you have downloaded a car file externally) you can generate indexes from this car file by using the `faithful-cli index` command. The command has the following usage: -``` +```bash NAME: - faithful index + faithful CLI index - Create various kinds of indexes for CAR files. USAGE: - faithful index command [command options] [arguments...] + faithful CLI index command [command options] [arguments...] DESCRIPTION: Create various kinds of indexes for CAR files. COMMANDS: - cid-to-offset - slot-to-cid - sig-to-cid - all - gsfa + cid-to-offset + slot-to-cid + sig-to-cid + all Create all the necessary indexes for a Solana epoch. + gsfa sig-exists help, h Shows a list of commands or help for one command @@ -46,14 +46,14 @@ OPTIONS: For example, to generate the three indexes cid-to-offset, slot-to-cid, sig-to-cid, sig-exists you would run: -``` -faithful-cli index all epoch-107.car . +```bash +faithful-cli index all epoch-107.car ./ ``` -This would generate the indexes in the current dir for epoch-107. +This would generate the **required** indexes in the current dir for epoch-107. The optional GSFA index would need to be run separately as follows: -``` -faithful-cli index gsfa epoch-107.car . +```bash +faithful-cli index gsfa epoch-107.car ./ ``` diff --git a/site/installation.markdown b/site/installation.markdown index b59e579e..03059e59 100644 --- a/site/installation.markdown +++ b/site/installation.markdown @@ -11,4 +11,6 @@ The easiest way to install faithful-cli is to download the pre-built binaries fo ## Building from source -You can also build from source by cloning [the github repo](https://github.com/rpcpool/yellowstone-faithful/releases) and then running `make compile-linux` / `make compile-windows` / `make compile-mac`. \ No newline at end of file +You can also build from source by cloning [the github repo](https://github.com/rpcpool/yellowstone-faithful/releases) and then running `make compile-linux` / `make compile-windows` / `make compile-mac`. + +To build with `jsonParsed` format support, you need to run `make jsonParsed-linux`. diff --git a/site/old-faithful-net.markdown b/site/old-faithful-net.markdown index 05815c27..d20b915d 100644 --- a/site/old-faithful-net.markdown +++ b/site/old-faithful-net.markdown @@ -42,13 +42,9 @@ $ ../tools/download-gsfa.sh 0 ./epoch0 If you have a local copy of a CAR archive and the indexes and run a RPC server servicing data from them. For example: ``` -/usr/local/bin/faithful-cli rpc-server-car \ +/usr/local/bin/faithful-cli rpc \ --listen $PORT \ - epoch-455.car \ - epoch-455.car.*.cid-to-offset.index \ - epoch-455.car.*.slot-to-cid.index \ - epoch-455.car.*.sig-to-cid.index \ - epoch-455.car-*-gsfa-index + epoch-455.yaml ``` You can download the CAR files either via Filecoin or via the bucket provided by Triton. There are helper scripts in the `tools` folder. To download the full epoch data: diff --git a/site/rpc-server.markdown b/site/rpc-server.markdown index a3ea1853..c3ccd33f 100644 --- a/site/rpc-server.markdown +++ b/site/rpc-server.markdown @@ -23,7 +23,7 @@ It can query data from multiple locations: ## Quickstart: RPC server from old-faithful.net -The Quickstart RPC server is useful for testing and development. It allows you to spin up an RPC server that hosts a single epoch, either from Filecoin or from CAR files/indexes hosted locally on filesystem or on a separate HTTP endpoint. The quickstart server is available with the commands `faithful-cli rpc-server-car` and `faithful-cli rpc-server-filecoin`. +The Quickstart RPC server is useful for testing and development. It allows you to spin up an RPC server that hosts a single epoch, either from Filecoin or from CAR files/indexes hosted locally on filesystem or on a separate HTTP endpoint. The quickstart server is available with the commands `faithful-cli rpc`. We are hosting data on old-faithful.net for testing and cloning purposes. This allows you to run a sample test server without downloading any data. There is a simple tool that you can run for this available from [https://github.com/rpcpool/yellowstone-faithful/tree/main/tools](https://github.com/rpcpool/yellowstone-faithful/tree/main/tools). You can run a fully remote server like this: diff --git a/tools/run-rpc-server-local-indexes.sh b/tools/run-rpc-server-local-indexes.sh index fea98734..dc82c34d 100755 --- a/tools/run-rpc-server-local-indexes.sh +++ b/tools/run-rpc-server-local-indexes.sh @@ -25,11 +25,11 @@ fi INDEX_DIR=${2:-.} -EPOCH_URL=https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.car +# TODO: fix with the correct URL for the epoch config file. +EPOCH_CONFIG_URL=https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.yml + +wget -q ${EPOCH_CONFIG_URL} -O epoch-${EPOCH}.yml set -x -faithful-cli rpc-server-car --listen ":7999" \ - ${EPOCH_URL} \ - ${INDEX_DIR}/epoch-${EPOCH}.car.*.cid-to-offset.index \ - ${INDEX_DIR}/epoch-${EPOCH}.car.*.slot-to-cid.index \ - ${INDEX_DIR}/epoch-${EPOCH}.car.*.sig-to-cid.index \ No newline at end of file +faithful-cli rpc --listen ":7999" \ + epoch-${EPOCH}.yml diff --git a/tools/run-rpc-server-local.sh b/tools/run-rpc-server-local.sh index 69bbb3c3..1256be30 100755 --- a/tools/run-rpc-server-local.sh +++ b/tools/run-rpc-server-local.sh @@ -26,8 +26,5 @@ fi EPOCH_DIR=${2:-.} set -x -faithful-cli rpc-server-car --listen ":7999" \ - ${EPOCH_DIR}/epoch-${EPOCH}.car \ - ${EPOCH_DIR}/epoch-${EPOCH}.car.*.cid-to-offset.index \ - ${EPOCH_DIR}/epoch-${EPOCH}.car.*.slot-to-cid.index \ - ${EPOCH_DIR}/epoch-${EPOCH}.car.*.sig-to-cid.index \ No newline at end of file +faithful-cli rpc --listen ":7999" \ + ${EPOCH_DIR}/epoch-${EPOCH}.yml diff --git a/tools/run-rpc-server-remote.sh b/tools/run-rpc-server-remote.sh index 9c5978fa..925db221 100755 --- a/tools/run-rpc-server-remote.sh +++ b/tools/run-rpc-server-remote.sh @@ -44,11 +44,10 @@ fi CID_URL=https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.cid EPOCH_CID=$($READ_COMMAND $CID_URL) -EPOCH_URL=https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.car +EPOCH_CONFIG_URL=https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.yml + +wget -q ${EPOCH_CONFIG_URL} -O epoch-${EPOCH}.yml set -x -faithful-cli rpc-server-car --listen ":7999" \ - ${EPOCH_URL} \ - https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.car.${EPOCH_CID}.cid-to-offset.index \ - https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.car.${EPOCH_CID}.slot-to-cid.index \ - https://files.old-faithful.net/${EPOCH}/epoch-${EPOCH}.car.${EPOCH_CID}.sig-to-cid.index +faithful-cli rpc --listen ":7999" \ + epoch-${EPOCH}.yml From 753f1973dda7f1d47088df06ba4aecec35a8c2d3 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 18 Jan 2024 20:46:53 +0100 Subject: [PATCH 53/63] Improve address lookup support for jsonParsed format. --- multiepoch-getBlock.go | 2 +- request-response.go | 46 +++++++++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/multiepoch-getBlock.go b/multiepoch-getBlock.go index 6ee05b65..a959f0cb 100644 --- a/multiepoch-getBlock.go +++ b/multiepoch-getBlock.go @@ -378,7 +378,7 @@ func (multi *MultiEpoch) handleGetBlock(ctx context.Context, conn *requestContex } txResp.Meta = meta - encodedTx, err := encodeTransactionResponseBasedOnWantedEncoding(*params.Options.Encoding, tx) + encodedTx, err := encodeTransactionResponseBasedOnWantedEncoding(*params.Options.Encoding, tx, meta) if err != nil { return &jsonrpc2.Error{ Code: jsonrpc2.CodeInternalError, diff --git a/request-response.go b/request-response.go index 2983bb1d..3b5fec24 100644 --- a/request-response.go +++ b/request-response.go @@ -14,6 +14,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/mostynb/zstdpool-freelist" "github.com/mr-tron/base58" + "github.com/rpcpool/yellowstone-faithful/third_party/solana_proto/confirmed_block" "github.com/rpcpool/yellowstone-faithful/txstatus" "github.com/sourcegraph/jsonrpc2" "github.com/valyala/fasthttp" @@ -165,7 +166,7 @@ func (req *GetBlockRequest) Validate() error { solana.EncodingBase64, solana.EncodingBase64Zstd, solana.EncodingJSON, - solana.EncodingJSONParsed, // TODO: add support for this + solana.EncodingJSONParsed, ) { return fmt.Errorf("unsupported encoding") } @@ -378,6 +379,7 @@ var zstdEncoderPool = zstdpool.NewEncoderPool() func encodeTransactionResponseBasedOnWantedEncoding( encoding solana.EncodingType, tx solana.Transaction, + meta any, ) (any, error) { switch encoding { case solana.EncodingBase58, solana.EncodingBase64, solana.EncodingBase64Zstd: @@ -410,13 +412,30 @@ func encodeTransactionResponseBasedOnWantedEncoding( }, AccountKeys: txstatus.AccountKeys{ StaticKeys: tx.Message.AccountKeys, - // TODO: add support for dynamic keys? From meta? - // DynamicKeys: &LoadedAddresses{ - // Writable: []solana.PublicKey{}, - // Readonly: []solana.PublicKey{ - // solana.TokenLendingProgramID, - // }, - // }, + // TODO: test this: + DynamicKeys: func() *txstatus.LoadedAddresses { + switch v := meta.(type) { + case *confirmed_block.TransactionStatusMeta: + return &txstatus.LoadedAddresses{ + Writable: func() []solana.PublicKey { + out := make([]solana.PublicKey, len(v.LoadedWritableAddresses)) + for i, v := range v.LoadedWritableAddresses { + out[i] = solana.PublicKeyFromBytes(v) + } + return out + }(), + Readonly: func() []solana.PublicKey { + out := make([]solana.PublicKey, len(v.LoadedReadonlyAddresses)) + for i, v := range v.LoadedReadonlyAddresses { + out[i] = solana.PublicKeyFromBytes(v) + } + return out + }(), + } + default: + return nil + } + }(), }, StackHeight: nil, } @@ -427,12 +446,21 @@ func encodeTransactionResponseBasedOnWantedEncoding( "accounts": func() []string { out := make([]string, len(inst.Accounts)) for i, v := range inst.Accounts { - // TODO: add support for dynamic keys? From meta? if v >= uint16(len(tx.Message.AccountKeys)) { continue } out[i] = tx.Message.AccountKeys[v].String() } + // TODO: validate that the order is correct + switch v := meta.(type) { + case *confirmed_block.TransactionStatusMeta: + for _, wr := range v.LoadedWritableAddresses { + out = append(out, solana.PublicKeyFromBytes(wr).String()) + } + for _, ro := range v.LoadedReadonlyAddresses { + out = append(out, solana.PublicKeyFromBytes(ro).String()) + } + } return out }(), "data": base58.Encode(inst.Data), From 5d09877691738b8c2bf18ebdd4a17ec4f4cba0fb Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Thu, 18 Jan 2024 20:57:17 +0100 Subject: [PATCH 54/63] Fix encodeTransactionResponseBasedOnWantedEncoding --- multiepoch-getTransaction.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiepoch-getTransaction.go b/multiepoch-getTransaction.go index 4d1c8dc5..f061e9e9 100644 --- a/multiepoch-getTransaction.go +++ b/multiepoch-getTransaction.go @@ -216,7 +216,7 @@ func (multi *MultiEpoch) handleGetTransaction(ctx context.Context, conn *request } response.Meta = meta - encodedTx, err := encodeTransactionResponseBasedOnWantedEncoding(*params.Options.Encoding, tx) + encodedTx, err := encodeTransactionResponseBasedOnWantedEncoding(*params.Options.Encoding, tx, meta) if err != nil { return &jsonrpc2.Error{ Code: jsonrpc2.CodeInternalError, From bc6eeec49a196dc21d65a9a7a400f508c98ea91b Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Sun, 21 Jan 2024 19:45:24 +0100 Subject: [PATCH 55/63] Add index magic replacer --- tools/deprecated/replace-index-magic.go | 64 +++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 tools/deprecated/replace-index-magic.go diff --git a/tools/deprecated/replace-index-magic.go b/tools/deprecated/replace-index-magic.go new file mode 100644 index 00000000..2789a466 --- /dev/null +++ b/tools/deprecated/replace-index-magic.go @@ -0,0 +1,64 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "os" +) + +// for file in $(find /media/runner/solana-2/indexes -name "*.index" | grep mainnet); do +// echo $file +// go run . $file +// done + +var ( + oldMagic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'} + // compact index sized + newMagic = [8]byte{'c', 'o', 'm', 'p', 'i', 's', 'z', 'd'} +) + +func main() { + var dry bool + flag.BoolVar(&dry, "dry", false, "dry run") + flag.Parse() + file := flag.Arg(0) + if file == "" { + panic("need file arg") + } + fmt.Println() + fmt.Printf("File: %s\n", file) + // open read write + f, err := os.OpenFile(file, os.O_RDWR, 0) + if err != nil { + panic(err) + } + defer f.Close() + b := make([]byte, 8) + _, err = io.ReadFull(f, b) + if err != nil { + panic(err) + } + + fmt.Printf("First 8 bytes = %v , as string = %s\n", b, b) + target := oldMagic + if !bytes.Equal(b, target[:]) { + fmt.Printf("Doesn't match old magic, but has %v\n", b) + return + } + if dry { + fmt.Printf("⚪ Dry run, not replacing\n") + return + } + fmt.Printf("Found old magic; replacing with new magic\n") + _, err = f.Seek(0, 0) + if err != nil { + panic(err) + } + _, err = f.Write(newMagic[:]) + if err != nil { + panic(err) + } + fmt.Printf("✅ Replaced old magic with new magic.\n") +} From b838cea3531dbfd73614e4d68a53aca5a4237380 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Sun, 21 Jan 2024 20:57:24 +0100 Subject: [PATCH 56/63] Cleanup --- tools/deprecated/replace-index-magic.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/deprecated/replace-index-magic.go b/tools/deprecated/replace-index-magic.go index 2789a466..fcbcb8bf 100644 --- a/tools/deprecated/replace-index-magic.go +++ b/tools/deprecated/replace-index-magic.go @@ -44,11 +44,11 @@ func main() { fmt.Printf("First 8 bytes = %v , as string = %s\n", b, b) target := oldMagic if !bytes.Equal(b, target[:]) { - fmt.Printf("Doesn't match old magic, but has %v\n", b) + fmt.Printf("Doesn't match old magic; skipping\n") return } if dry { - fmt.Printf("⚪ Dry run, not replacing\n") + fmt.Printf("⚪ Dry run, not replacing and exiting\n") return } fmt.Printf("Found old magic; replacing with new magic\n") From 5267ce56cb4c4a23b4b8773fd5a4878df9fc273e Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 26 Jan 2024 15:38:21 +0100 Subject: [PATCH 57/63] check-deals: add whitelist of providers --- cmd-check-deals.go | 52 ++++++++++++++++++++++++++++++++++++++ split-car-fetcher/deals.go | 6 +++++ 2 files changed, 58 insertions(+) diff --git a/cmd-check-deals.go b/cmd-check-deals.go index 6bda4f4d..873f4f62 100644 --- a/cmd-check-deals.go +++ b/cmd-check-deals.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strings" "time" "github.com/anjor/carlet" @@ -16,9 +17,39 @@ import ( "k8s.io/klog/v2" ) +type commaSeparatedStringSliceFlag struct { + slice []string +} + +func (f *commaSeparatedStringSliceFlag) String() string { + return fmt.Sprintf("%v", f.slice) +} + +func (f *commaSeparatedStringSliceFlag) Set(value string) error { + // split by ",": + split := strings.Split(value, ",") + for _, item := range split { + // trim spaces: + item = strings.TrimSpace(item) + f.slice = append(f.slice, item) + } + return nil +} + +// Has +func (f *commaSeparatedStringSliceFlag) Has(value string) bool { + for _, item := range f.slice { + if item == value { + return true + } + } + return false +} + func newCmd_check_deals() *cli.Command { var includePatterns cli.StringSlice var excludePatterns cli.StringSlice + var providerWhitelist commaSeparatedStringSliceFlag return &cli.Command{ Name: "check-deals", Description: "Validate remote split car retrieval for the given config files", @@ -39,6 +70,12 @@ func newCmd_check_deals() *cli.Command { Value: cli.NewStringSlice(".git"), Destination: &excludePatterns, }, + // provider-whitelist + &cli.GenericFlag{ + Name: "provider-whitelist", + Usage: "Whitelist of providers to check", + Value: &providerWhitelist, + }, }, Action: func(c *cli.Context) error { src := c.Args().Slice() @@ -69,6 +106,13 @@ func newCmd_check_deals() *cli.Command { klog.Infof("Loaded %d epoch configs (NO VALIDATION)", len(configs)) klog.Info("Will check remote storage pieces for each epoch config") + // Check provider whitelist: + if len(providerWhitelist.slice) > 0 { + klog.Infof("Provider whitelist: %v", providerWhitelist.slice) + } else { + klog.Infof("Provider whitelist: ") + } + // Check deals: for _, config := range configs { epoch := *config.Epoch @@ -100,6 +144,7 @@ func newCmd_check_deals() *cli.Command { epoch, metadata, dealRegistry, + providerWhitelist, &dm, ) if err != nil { @@ -127,6 +172,7 @@ func checkAllPieces( epoch uint64, meta *splitcarfetcher.Metadata, dealRegistry *splitcarfetcher.DealRegistry, + providerWhitelist commaSeparatedStringSliceFlag, dm *splitcarfetcher.MinerInfoCache, ) error { errs := make([]error, 0) @@ -145,6 +191,12 @@ func checkAllPieces( piece.CommP, minerID, ) + if len(providerWhitelist.slice) > 0 { + if !providerWhitelist.Has(minerID.String()) { + klog.Infof("skipping piece %d/%d with CID %s, because miner %s is not in the whitelist", pieceIndex+1, numPieces, piece.CommP, minerID) + return nil + } + } minerInfo, err := dm.GetProviderInfo(ctx, minerID) if err != nil { return fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) diff --git a/split-car-fetcher/deals.go b/split-car-fetcher/deals.go index 42f442a6..f485fb20 100644 --- a/split-car-fetcher/deals.go +++ b/split-car-fetcher/deals.go @@ -9,6 +9,7 @@ import ( "github.com/filecoin-project/go-address" "github.com/ipfs/go-cid" + "k8s.io/klog/v2" ) // provider,deal_uuid,file_name,url,commp_piece_cid,file_size,padded_size,payload_cid @@ -99,6 +100,11 @@ func DealsFromCSV(path string) (*DealRegistry, error) { PayloadCID: record[7], } + // if the same piece CID is associated with multiple deals, the last one wins, but print a warning + if _, ok := registry.pieceToDeal[deal.CommpPieceCID]; ok { + klog.Warningf("WARNING: piece CID %s is associated with multiple deals, the last one wins\n", deal.CommpPieceCID) + } + registry.pieceToDeal[deal.CommpPieceCID] = deal } From d33ec4d95bcf08072005b9182432369df7c5c85e Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 26 Jan 2024 15:43:52 +0100 Subject: [PATCH 58/63] Improve docs --- cmd-check-deals.go | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cmd-check-deals.go b/cmd-check-deals.go index 873f4f62..352d6965 100644 --- a/cmd-check-deals.go +++ b/cmd-check-deals.go @@ -49,7 +49,7 @@ func (f *commaSeparatedStringSliceFlag) Has(value string) bool { func newCmd_check_deals() *cli.Command { var includePatterns cli.StringSlice var excludePatterns cli.StringSlice - var providerWhitelist commaSeparatedStringSliceFlag + var providerAllowlist commaSeparatedStringSliceFlag return &cli.Command{ Name: "check-deals", Description: "Validate remote split car retrieval for the given config files", @@ -70,11 +70,10 @@ func newCmd_check_deals() *cli.Command { Value: cli.NewStringSlice(".git"), Destination: &excludePatterns, }, - // provider-whitelist &cli.GenericFlag{ - Name: "provider-whitelist", - Usage: "Whitelist of providers to check", - Value: &providerWhitelist, + Name: "provider-allowlist", + Usage: "List of providers to allow checking (comma-separated, can be specified multiple times); will ignore all pieces that correspond to a provider not in the allowlist.", + Value: &providerAllowlist, }, }, Action: func(c *cli.Context) error { @@ -106,11 +105,11 @@ func newCmd_check_deals() *cli.Command { klog.Infof("Loaded %d epoch configs (NO VALIDATION)", len(configs)) klog.Info("Will check remote storage pieces for each epoch config") - // Check provider whitelist: - if len(providerWhitelist.slice) > 0 { - klog.Infof("Provider whitelist: %v", providerWhitelist.slice) + // Check provider allowlist: + if len(providerAllowlist.slice) > 0 { + klog.Infof("Provider allowlist: %v", providerAllowlist.slice) } else { - klog.Infof("Provider whitelist: ") + klog.Infof("Provider allowlist: ") } // Check deals: @@ -144,7 +143,7 @@ func newCmd_check_deals() *cli.Command { epoch, metadata, dealRegistry, - providerWhitelist, + providerAllowlist, &dm, ) if err != nil { @@ -172,7 +171,7 @@ func checkAllPieces( epoch uint64, meta *splitcarfetcher.Metadata, dealRegistry *splitcarfetcher.DealRegistry, - providerWhitelist commaSeparatedStringSliceFlag, + providerAllowlist commaSeparatedStringSliceFlag, dm *splitcarfetcher.MinerInfoCache, ) error { errs := make([]error, 0) @@ -191,9 +190,9 @@ func checkAllPieces( piece.CommP, minerID, ) - if len(providerWhitelist.slice) > 0 { - if !providerWhitelist.Has(minerID.String()) { - klog.Infof("skipping piece %d/%d with CID %s, because miner %s is not in the whitelist", pieceIndex+1, numPieces, piece.CommP, minerID) + if len(providerAllowlist.slice) > 0 { + if !providerAllowlist.Has(minerID.String()) { + klog.Infof("skipping piece %d/%d with CID %s, because miner %s is not in the allowlist", pieceIndex+1, numPieces, piece.CommP, minerID) return nil } } From 321e2bba0e40a696268a160e4f7aac1df3dfec25 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 26 Jan 2024 15:56:37 +0100 Subject: [PATCH 59/63] Cleanup --- cmd-check-deals.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmd-check-deals.go b/cmd-check-deals.go index 352d6965..384ed128 100644 --- a/cmd-check-deals.go +++ b/cmd-check-deals.go @@ -46,6 +46,10 @@ func (f *commaSeparatedStringSliceFlag) Has(value string) bool { return false } +func (f *commaSeparatedStringSliceFlag) Len() int { + return len(f.slice) +} + func newCmd_check_deals() *cli.Command { var includePatterns cli.StringSlice var excludePatterns cli.StringSlice @@ -106,7 +110,7 @@ func newCmd_check_deals() *cli.Command { klog.Info("Will check remote storage pieces for each epoch config") // Check provider allowlist: - if len(providerAllowlist.slice) > 0 { + if providerAllowlist.Len() > 0 { klog.Infof("Provider allowlist: %v", providerAllowlist.slice) } else { klog.Infof("Provider allowlist: ") @@ -190,7 +194,7 @@ func checkAllPieces( piece.CommP, minerID, ) - if len(providerAllowlist.slice) > 0 { + if providerAllowlist.Len() > 0 { if !providerAllowlist.Has(minerID.String()) { klog.Infof("skipping piece %d/%d with CID %s, because miner %s is not in the allowlist", pieceIndex+1, numPieces, piece.CommP, minerID) return nil From 042ceb1ebb89197f99f51d5664aa09482250cb8b Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 26 Jan 2024 15:56:47 +0100 Subject: [PATCH 60/63] Use go 1.21.x --- .github/workflows/build-release.yml | 4 ++-- .github/workflows/tests.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index bb48538d..49b02652 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -3,7 +3,7 @@ name: main on: push: tags: - - "v*.*.*" + - 'v*.*.*' jobs: build: @@ -17,7 +17,7 @@ jobs: - name: Setup go env uses: actions/setup-go@v3 with: - go-version: '1.20' + go-version: '1.21' check-latest: true - name: Build cli diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3e2f091c..d98eb839 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,7 +4,7 @@ jobs: test: strategy: matrix: - go-version: [1.20.x] + go-version: [1.21.x] os: [ubuntu-latest] runs-on: ${{ matrix.os }} steps: From b30e35fb2863be84f8702588c06db6bb747d3777 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Fri, 26 Jan 2024 16:06:54 +0100 Subject: [PATCH 61/63] Fix tests --- compactindexsized/build48_test.go | 2 +- compactindexsized/query_test.go | 2 +- deprecated/bucketteer/example/main.go | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/compactindexsized/build48_test.go b/compactindexsized/build48_test.go index c5b6a042..1ea155fe 100644 --- a/compactindexsized/build48_test.go +++ b/compactindexsized/build48_test.go @@ -166,7 +166,7 @@ func TestBuilder48(t *testing.T) { expected := concatBytes( // --- File header // magic - []byte{0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78}, + []byte{'c', 'o', 'm', 'p', 'i', 's', 'z', 'd'}, // header size i32tob(31), // value size (36 bytes in this case) diff --git a/compactindexsized/query_test.go b/compactindexsized/query_test.go index b5efdf7b..b8d47997 100644 --- a/compactindexsized/query_test.go +++ b/compactindexsized/query_test.go @@ -37,7 +37,7 @@ func TestOpen_InvalidMagic(t *testing.T) { func TestOpen_HeaderOnly(t *testing.T) { buf := concatBytes( // Magic - []byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'}, + []byte{'c', 'o', 'm', 'p', 'i', 's', 'z', 'd'}, // header size i32tob(30), // FileSize diff --git a/deprecated/bucketteer/example/main.go b/deprecated/bucketteer/example/main.go index f6afffcf..387c3c50 100644 --- a/deprecated/bucketteer/example/main.go +++ b/deprecated/bucketteer/example/main.go @@ -10,6 +10,7 @@ import ( "github.com/davecgh/go-spew/spew" "github.com/dustin/go-humanize" "github.com/rpcpool/yellowstone-faithful/bucketteer" + "github.com/rpcpool/yellowstone-faithful/indexmeta" "golang.org/x/exp/mmap" ) @@ -60,7 +61,7 @@ func main() { fmt.Println("writing to file...") writeStartedAt := time.Now() - _, err = buWr.Seal(nil) + _, err = buWr.Seal(indexmeta.Meta{}) if err != nil { panic(err) } From 056fe16505cda9f2e8d5a834ed02e5fc77cd409c Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Sun, 28 Jan 2024 18:06:44 +0100 Subject: [PATCH 62/63] Add versioning to config, and support for data.car.from_pieces.piece_to_uri Closes #80, #79 --- cmd-check-deals.go | 2 +- config.go | 51 ++++++++++++++-- epoch.go | 144 ++++++++++++++++++++++++++------------------- 3 files changed, 129 insertions(+), 68 deletions(-) diff --git a/cmd-check-deals.go b/cmd-check-deals.go index 384ed128..3f9f06d6 100644 --- a/cmd-check-deals.go +++ b/cmd-check-deals.go @@ -121,7 +121,7 @@ func newCmd_check_deals() *cli.Command { epoch := *config.Epoch isLassieMode := config.IsFilecoinMode() isCarMode := !isLassieMode - if isCarMode && config.IsSplitCarMode() { + if isCarMode && config.IsCarFromPieces() { klog.Infof("Checking pieces for epoch %d from %q", epoch, config.ConfigFilepath()) metadata, err := splitcarfetcher.MetadataFromYaml(string(config.Data.Car.FromPieces.Metadata.URI)) diff --git a/config.go b/config.go index 68b1ec31..154b3d98 100644 --- a/config.go +++ b/config.go @@ -12,8 +12,15 @@ import ( "github.com/libp2p/go-libp2p/core/peer" ) +const ConfigVersion = 1 + type URI string +// String() returns the URI as a string. +func (u URI) String() string { + return string(u) +} + // IsZero returns true if the URI is empty. func (u URI) IsZero() bool { return u == "" @@ -93,10 +100,15 @@ func hashFileSha256(filePath string) (string, error) { return fmt.Sprintf("%x", h.Sum(nil)), nil } +type PieceURLInfo struct { + URI URI `json:"uri" yaml:"uri"` // URL to the piece. +} + type Config struct { originalFilepath string hashOfConfigFile string Epoch *uint64 `json:"epoch" yaml:"epoch"` + Version *uint64 `json:"version" yaml:"version"` Data struct { Car *struct { URI URI `json:"uri" yaml:"uri"` @@ -107,6 +119,7 @@ type Config struct { Deals struct { URI URI `json:"uri" yaml:"uri"` // Local path to the deals file. } `json:"deals" yaml:"deals"` + PieceToURI map[cid.Cid]PieceURLInfo `json:"piece_to_uri" yaml:"piece_to_uri"` // Map of piece CID to URL. } `json:"from_pieces" yaml:"from_pieces"` } `json:"car" yaml:"car"` Filecoin *struct { @@ -173,8 +186,12 @@ func (c *Config) IsFilecoinMode() bool { return c.Data.Filecoin != nil && c.Data.Filecoin.Enable } -func (c *Config) IsSplitCarMode() bool { - return c.Data.Car != nil && c.Data.Car.FromPieces != nil && !c.Data.Car.FromPieces.Metadata.URI.IsZero() && !c.Data.Car.FromPieces.Deals.URI.IsZero() +func (c *Config) IsCarFromPieces() bool { + if c.Data.Car == nil || c.Data.Car.FromPieces == nil { + return false + } + fromPieces := c.Data.Car.FromPieces + return !fromPieces.Metadata.URI.IsZero() && (!fromPieces.Deals.URI.IsZero() || len(fromPieces.PieceToURI) > 0) } type ConfigSlice []*Config @@ -223,6 +240,12 @@ func (c *Config) Validate() error { if c.Epoch == nil { return fmt.Errorf("epoch must be set") } + if c.Version == nil { + return fmt.Errorf("version must be set") + } + if *c.Version != ConfigVersion { + return fmt.Errorf("version must be %d", ConfigVersion) + } // Distinguish between CAR-mode and Filecoin-mode. // In CAR-mode, the data is fetched from a CAR file (local or remote). // In Filecoin-mode, the data is fetched from Filecoin directly (by CID via Lassie). @@ -254,12 +277,28 @@ func (c *Config) Validate() error { } } { - if c.Data.Car.FromPieces.Deals.URI.IsZero() { - return fmt.Errorf("data.car.from_pieces.deals.uri must be set") + if c.Data.Car.FromPieces.Deals.URI.IsZero() && len(c.Data.Car.FromPieces.PieceToURI) == 0 { + return fmt.Errorf("data.car.from_pieces.deals.uri or data.car.from_pieces.piece_to_uri must be set") } - if !c.Data.Car.FromPieces.Deals.URI.IsLocal() { + if !c.Data.Car.FromPieces.Deals.URI.IsZero() && len(c.Data.Car.FromPieces.PieceToURI) > 0 { + return fmt.Errorf("data.car.from_pieces.deals.uri and data.car.from_pieces.piece_to_uri cannot both be set") + } + if !c.Data.Car.FromPieces.Deals.URI.IsZero() && !c.Data.Car.FromPieces.Deals.URI.IsLocal() { return fmt.Errorf("data.car.from_pieces.deals.uri must be a local file") } + if len(c.Data.Car.FromPieces.PieceToURI) > 0 { + for pieceCID, uri := range c.Data.Car.FromPieces.PieceToURI { + if !pieceCID.Defined() { + return fmt.Errorf("data.car.from_pieces.piece_to_uri[%s] must be a valid CID", pieceCID) + } + if uri.URI.IsZero() { + return fmt.Errorf("data.car.from_pieces.piece_to_uri[%s].uri must be set", pieceCID) + } + if !uri.URI.IsRemoteWeb() { + return fmt.Errorf("data.car.from_pieces.piece_to_uri[%s].uri must be a remote web URI", pieceCID) + } + } + } } } // CidToOffsetAndSize and CidToOffset cannot be both set or both unset. @@ -340,7 +379,7 @@ func (c *Config) Validate() error { if !c.Data.Car.FromPieces.Metadata.URI.IsValid() { return fmt.Errorf("data.car.from_pieces.metadata.uri is invalid") } - if !c.Data.Car.FromPieces.Deals.URI.IsValid() { + if !c.Data.Car.FromPieces.Deals.URI.IsZero() && !c.Data.Car.FromPieces.Deals.URI.IsValid() { return fmt.Errorf("data.car.from_pieces.deals.uri is invalid") } } else { diff --git a/epoch.go b/epoch.go index cb45dd5e..7f7c4c3f 100644 --- a/epoch.go +++ b/epoch.go @@ -283,76 +283,95 @@ func NewEpochFromConfig( var localCarReader *carv2.Reader var remoteCarReader ReaderAtCloser var err error - if config.IsSplitCarMode() { + if config.IsCarFromPieces() { metadata, err := splitcarfetcher.MetadataFromYaml(string(config.Data.Car.FromPieces.Metadata.URI)) if err != nil { return nil, fmt.Errorf("failed to read pieces metadata: %w", err) } - dealRegistry, err := splitcarfetcher.DealsFromCSV(string(config.Data.Car.FromPieces.Deals.URI)) - if err != nil { - return nil, fmt.Errorf("failed to read deals: %w", err) - } + isFromDeals := !config.Data.Car.FromPieces.Deals.URI.IsZero() - lotusAPIAddress := "https://api.node.glif.io" - cl := jsonrpc.NewClient(lotusAPIAddress) - dm := splitcarfetcher.NewMinerInfo( - cl, - 5*time.Minute, - 5*time.Second, - ) + if isFromDeals { + dealRegistry, err := splitcarfetcher.DealsFromCSV(string(config.Data.Car.FromPieces.Deals.URI)) + if err != nil { + return nil, fmt.Errorf("failed to read deals: %w", err) + } - scr, err := splitcarfetcher.NewSplitCarReader(metadata.CarPieces, - func(piece carlet.CarFile) (splitcarfetcher.ReaderAtCloserSize, error) { - minerID, ok := dealRegistry.GetMinerByPieceCID(piece.CommP) - if !ok { - return nil, fmt.Errorf("failed to find miner for piece CID %s", piece.CommP) - } - klog.Infof("piece CID %s is stored on miner %s", piece.CommP, minerID) - minerInfo, err := dm.GetProviderInfo(c.Context, minerID) - if err != nil { - return nil, fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) - } - if len(minerInfo.Multiaddrs) == 0 { - return nil, fmt.Errorf("miner %s has no multiaddrs", minerID) - } - spew.Dump(minerInfo) - // extract the IP address from the multiaddr: - split := multiaddr.Split(minerInfo.Multiaddrs[0]) - if len(split) < 2 { - return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) - } - component0 := split[0].(*multiaddr.Component) - component1 := split[1].(*multiaddr.Component) - - var ip string - var port string - - if component0.Protocol().Code == multiaddr.P_IP4 { - ip = component0.Value() - port = component1.Value() - } else if component1.Protocol().Code == multiaddr.P_IP4 { - ip = component1.Value() - port = component0.Value() - } else { - return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) - } - // reset the port to 80: - // TODO: use the appropriate port (80, better if 443 with TLS) - port = "80" - minerIP := fmt.Sprintf("%s:%s", ip, port) - klog.Infof("piece CID %s is stored on miner %s (%s)", piece.CommP, minerID, minerIP) - formattedURL := fmt.Sprintf("http://%s/piece/%s", minerIP, piece.CommP.String()) - return splitcarfetcher.NewRemoteFileSplitCarReader( - piece.CommP.String(), - formattedURL, - ) - }) - if err != nil { - return nil, fmt.Errorf("failed to open CAR file from pieces: %w", err) + lotusAPIAddress := "https://api.node.glif.io" + cl := jsonrpc.NewClient(lotusAPIAddress) + dm := splitcarfetcher.NewMinerInfo( + cl, + 5*time.Minute, + 5*time.Second, + ) + + scr, err := splitcarfetcher.NewSplitCarReader( + metadata.CarPieces, + func(piece carlet.CarFile) (splitcarfetcher.ReaderAtCloserSize, error) { + minerID, ok := dealRegistry.GetMinerByPieceCID(piece.CommP) + if !ok { + return nil, fmt.Errorf("failed to find miner for piece CID %s", piece.CommP) + } + klog.Infof("piece CID %s is stored on miner %s", piece.CommP, minerID) + minerInfo, err := dm.GetProviderInfo(c.Context, minerID) + if err != nil { + return nil, fmt.Errorf("failed to get miner info for miner %s, for piece %s: %w", minerID, piece.CommP, err) + } + if len(minerInfo.Multiaddrs) == 0 { + return nil, fmt.Errorf("miner %s has no multiaddrs", minerID) + } + spew.Dump(minerInfo) + // extract the IP address from the multiaddr: + split := multiaddr.Split(minerInfo.Multiaddrs[0]) + if len(split) < 2 { + return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + component0 := split[0].(*multiaddr.Component) + component1 := split[1].(*multiaddr.Component) + + var ip string + // TODO: use the appropriate port (80, better if 443 with TLS) + port := "80" + + if component0.Protocol().Code == multiaddr.P_IP4 { + ip = component0.Value() + } else if component1.Protocol().Code == multiaddr.P_IP4 { + ip = component1.Value() + } else { + return nil, fmt.Errorf("invalid multiaddr: %s", minerInfo.Multiaddrs[0]) + } + minerIP := fmt.Sprintf("%s:%s", ip, port) + klog.Infof("piece CID %s is stored on miner %s (%s)", piece.CommP, minerID, minerIP) + formattedURL := fmt.Sprintf("http://%s/piece/%s", minerIP, piece.CommP.String()) + return splitcarfetcher.NewRemoteFileSplitCarReader( + piece.CommP.String(), + formattedURL, + ) + }) + if err != nil { + return nil, fmt.Errorf("failed to open CAR file from pieces: %w", err) + } + remoteCarReader = scr + } else { + // is from pieceToURL mapping: + scrFromURLs, err := splitcarfetcher.NewSplitCarReader( + metadata.CarPieces, + func(piece carlet.CarFile) (splitcarfetcher.ReaderAtCloserSize, error) { + pieceURL, ok := config.Data.Car.FromPieces.PieceToURI[piece.CommP] + if !ok { + return nil, fmt.Errorf("failed to find URL for piece CID %s", piece.CommP) + } + return splitcarfetcher.NewRemoteFileSplitCarReader( + piece.CommP.String(), + pieceURL.URI.String(), + ) + }) + if err != nil { + return nil, fmt.Errorf("failed to open CAR file from pieces: %w", err) + } + remoteCarReader = scrFromURLs } - remoteCarReader = scr } else { localCarReader, remoteCarReader, err = openCarStorage(c.Context, string(config.Data.Car.URI)) if err != nil { @@ -397,6 +416,9 @@ func NewEpochFromConfig( headerSize := uint64(buf.Len()) ep.carHeaderSize = headerSize } + if remoteCarReader == nil && localCarReader == nil { + return nil, fmt.Errorf("no CAR reader available") + } } { sigExistsFile, err := openIndexStorage( From ff81ac52b370e1330611c89cc75aff284958e0d0 Mon Sep 17 00:00:00 2001 From: gagliardetto Date: Sun, 28 Jan 2024 19:27:41 +0100 Subject: [PATCH 63/63] Update docs: add version --- README.md | 1 + site/production-server.markdown | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e44d9922..31d01885 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ To run a Faithful RPC server you need to specify configuration files for the epo ```yml epoch: 0 # epoch number (required) +version: 1 # version number (required) data: # data section (required) car: # Source the data from a CAR file (car-mode). diff --git a/site/production-server.markdown b/site/production-server.markdown index 480683ad..74387f65 100644 --- a/site/production-server.markdown +++ b/site/production-server.markdown @@ -16,13 +16,14 @@ The production server is available via the `faithful-cli rpc` command. To run a Faithful RPC server you need to specify configuration files for the epoch(s) you want to host. For multi-epoch support you need to generate epoch config files for the epochs that you want to host. An epoch config file looks like this: -``` +```yml data: car: uri: /faithful/493/epoch-493.car filecoin: enable: false epoch: 493 +version: 1 indexes: cid_to_offset: uri: /faithful/493/epoch-493.car.bafyreidlbcsg46dn5mqppioijyqb5cn6j23rkcoazl7skif74kpa3lihxa.cid-to-offset.index @@ -38,12 +39,13 @@ The `uri` parameter supports both HTTP URIs as well as file based ones. If you want you can also run the RPC server using some (or all) epochs via Filecoin: -``` +```yml data: filecoin: enable: true root_cid: bafyreigq7w4bwspbsf7j4ykov34fcf6skrn663n4ywfalgxlhp7o5nes5a epoch: 494 +version: 1 indexes: cid_to_offset: uri: /faithful/494/epoch-494.car.bafyreigq7w4bwspbsf7j4ykov34fcf6skrn663n4ywfalgxlhp7o5nes5a.cid-to-offset.index