Skip to content

Commit

Permalink
verify funciton
Browse files Browse the repository at this point in the history
  • Loading branch information
anjor committed Aug 19, 2024
1 parent 66c9ff2 commit c01e84a
Showing 1 changed file with 123 additions and 114 deletions.
237 changes: 123 additions & 114 deletions index-cid-to-subset-offset.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"bufio"
"context"
"errors"
"fmt"
Expand All @@ -9,9 +10,13 @@ import (
"path/filepath"
"time"

"github.com/davecgh/go-spew/spew"
"github.com/dustin/go-humanize"
"github.com/ipld/go-car/util"
carv2 "github.com/ipld/go-car/v2"
"github.com/rpcpool/yellowstone-faithful/carreader"
"github.com/rpcpool/yellowstone-faithful/indexes"
"github.com/rpcpool/yellowstone-faithful/iplddecoders"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -119,117 +124,121 @@ func CreateIndex_cid2subsetOffset(
return indexFilePath, nil
}

// func VerifyIndex_cid2offset(ctx context.Context, carPath string, indexFilePath string) error {
// // Check if the CAR file exists:
// exists, err := fileExists(carPath)
// if err != nil {
// return fmt.Errorf("failed to check if CAR file exists: %w", err)
// }
// if !exists {
// return fmt.Errorf("CAR file %s does not exist", carPath)
// }

// // Check if the index file exists:
// exists, err = fileExists(indexFilePath)
// if err != nil {
// return fmt.Errorf("failed to check if index file exists: %w", err)
// }
// if !exists {
// return fmt.Errorf("index file %s does not exist", indexFilePath)
// }

// carFile, err := os.Open(carPath)
// if err != nil {
// return fmt.Errorf("failed to open car file: %w", err)
// }
// defer carFile.Close()

// rd, err := carreader.New(carFile)
// if err != nil {
// return fmt.Errorf("failed to create car reader: %w", err)
// }
// // check it has 1 root
// if len(rd.Header.Roots) != 1 {
// return fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.Header.Roots))
// }

// c2o, err := indexes.Open_CidToOffsetAndSize(indexFilePath)
// if err != nil {
// return fmt.Errorf("failed to open index: %w", err)
// }
// {
// // find root cid
// rootCID := rd.Header.Roots[0]
// offset, err := c2o.Get(rootCID)
// if err != nil {
// return fmt.Errorf("failed to get offset from index: %w", err)
// }
// cr, err := carv2.OpenReader(carPath)
// if err != nil {
// return fmt.Errorf("failed to open CAR file: %w", err)
// }
// defer cr.Close()

// dr, err := cr.DataReader()
// if err != nil {
// return fmt.Errorf("failed to open CAR data reader: %w", err)
// }
// dr.Seek(int64(offset.Offset), io.SeekStart)
// br := bufio.NewReader(dr)

// gotCid, data, err := util.ReadNode(br)
// if err != nil {
// return err
// }
// // verify that the CID we read matches the one we expected.
// if !gotCid.Equals(rootCID) {
// return fmt.Errorf("CID mismatch: expected %s, got %s", rootCID, gotCid)
// }
// // try parsing the data as an Epoch node.
// decoded, err := iplddecoders.DecodeEpoch(data)
// if err != nil {
// return fmt.Errorf("failed to decode root node: %w", err)
// }
// spew.Dump(decoded)
// }

// startedAt := time.Now()
// numItems := 0
// defer func() {
// klog.Infof("Finished in %s", time.Since(startedAt))
// klog.Infof("Read %d nodes", numItems)
// }()

// totalOffset := uint64(0)
// {
// if size, err := rd.HeaderSize(); err != nil {
// return err
// } else {
// totalOffset += size
// }
// }
// for {
// c, sectionLen, err := rd.NextInfo()
// if errors.Is(err, io.EOF) {
// klog.Infof("EOF")
// break
// }
// numItems++
// if numItems%100000 == 0 {
// printToStderr(".")
// }
// offset, err := c2o.Get(c)
// if err != nil {
// return fmt.Errorf("failed to lookup offset for %s: %w", c, err)
// }
// if offset.Offset != totalOffset {
// return fmt.Errorf("offset mismatch for %s: %d != %d", c, offset, totalOffset)
// }
// if offset.Size != sectionLen {
// return fmt.Errorf("length mismatch for %s: %d != %d", c, offset, sectionLen)
// }

// totalOffset += sectionLen
// }
// return nil
// }
func VerifyIndex_cid2subsetOffset(ctx context.Context, carPaths []string, indexFilePath string) error {
// Check if the index file exists:
exists, err := fileExists(indexFilePath)
if err != nil {
return fmt.Errorf("failed to check if index file exists: %w", err)
}
if !exists {
return fmt.Errorf("index file %s does not exist", indexFilePath)
}

c2so, err := indexes.Open_CidToSubsetOffsetAndSize(indexFilePath)
if err != nil {
return fmt.Errorf("failed to open index: %w", err)
}

startedAt := time.Now()
numItems := 0
defer func() {
klog.Infof("Finished in %s", time.Since(startedAt))
klog.Infof("Read %d nodes", numItems)
}()

for _, carPath := range carPaths {
// Check if the CAR file exists:
exists, err := fileExists(carPath)
if err != nil {
return fmt.Errorf("failed to check if CAR file exists: %w", err)
}
if !exists {
return fmt.Errorf("CAR file %s does not exist", carPath)
}

carFile, err := os.Open(carPath)
if err != nil {
return fmt.Errorf("failed to open car file: %w", err)
}

rd, err := carreader.New(carFile)
if err != nil {
return fmt.Errorf("failed to create car reader: %w", err)
}
// check it has 1 root
if len(rd.Header.Roots) != 1 {
return fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.Header.Roots))
}

{
// find root cid
rootCID := rd.Header.Roots[0]
subsetAndOffset, err := c2so.Get(rootCID)
if err != nil {
return fmt.Errorf("failed to get subset and offset from index: %w", err)
}
cr, err := carv2.OpenReader(carPath)
if err != nil {
return fmt.Errorf("failed to open CAR file: %w", err)
}

dr, err := cr.DataReader()
if err != nil {
return fmt.Errorf("failed to open CAR data reader: %w", err)
}
dr.Seek(int64(subsetAndOffset.Offset), io.SeekStart)
br := bufio.NewReader(dr)

gotCid, data, err := util.ReadNode(br)
if err != nil {
return err
}
// verify that the CID we read matches the one we expected.
if !gotCid.Equals(rootCID) {
return fmt.Errorf("CID mismatch: expected %s, got %s", rootCID, gotCid)
}
// try parsing the data as a Subset node.
decoded, err := iplddecoders.DecodeSubset(data)
if err != nil {
return fmt.Errorf("failed to decode root node: %w", err)
}
spew.Dump(decoded)
cr.Close()
}

totalOffset := uint64(0)
{
if size, err := rd.HeaderSize(); err != nil {
return err
} else {
totalOffset += size
}
}
for {
c, sectionLen, err := rd.NextInfo()
if errors.Is(err, io.EOF) {
klog.Infof("EOF")
break
}
numItems++
if numItems%100000 == 0 {
printToStderr(".")
}
offset, err := c2so.Get(c)
if err != nil {
return fmt.Errorf("failed to lookup offset for %s: %w", c, err)
}
if offset.Offset != totalOffset {
return fmt.Errorf("offset mismatch for %s: %d != %d", c, offset, totalOffset)
}
if offset.Size != sectionLen {
return fmt.Errorf("length mismatch for %s: %d != %d", c, offset, sectionLen)
}

totalOffset += sectionLen
}
carFile.Close()

}
return nil
}

0 comments on commit c01e84a

Please sign in to comment.