Skip to content

Commit

Permalink
wiring
Browse files Browse the repository at this point in the history
  • Loading branch information
anjor committed Aug 12, 2024
1 parent 5257e05 commit a4fedaa
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 0 deletions.
110 changes: 110 additions & 0 deletions cmd-x-index-cid2subsetoffset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package main

import (
"context"
"fmt"
"os"
"time"

"github.com/rpcpool/yellowstone-faithful/indexes"
"github.com/urfave/cli/v2"
"k8s.io/klog/v2"
)

func newCmd_Index_cid2subsetOffset() *cli.Command {
var verify bool
var epoch uint64
var network indexes.Network
var indexDir string
return &cli.Command{
Name: "cid-to-offset",
Description: "Given all split CAR files corresponding to a Solana epoch, create an index of the file that maps CIDs to offsets in the CAR file.",
ArgsUsage: "<car-paths> <index-dir>",
Before: func(c *cli.Context) error {
if network == "" {
network = indexes.NetworkMainnet
}
return nil
},
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "verify",
Usage: "verify the index after creating it",
Destination: &verify,
},
&cli.StringFlag{
Name: "tmp-dir",
Usage: "temporary directory to use for storing intermediate files",
Value: os.TempDir(),
},
&cli.Uint64Flag{
Name: "epoch",
Usage: "the epoch of the CAR files",
Destination: &epoch,
Required: true,
},
&cli.StringFlag{
Name: "network",
Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet",
Action: func(c *cli.Context, s string) error {
network = indexes.Network(s)
if !indexes.IsValidNetwork(network) {
return fmt.Errorf("invalid network: %q", network)
}
return nil
},
},
&cli.StringFlag{
Name: "index-dir",
Usage: "directory to store the index",
Destination: &indexDir,
Required: true,
},
},
Subcommands: []*cli.Command{},
Action: func(c *cli.Context) error {
carPaths := c.Args().Slice()

Check failure on line 66 in cmd-x-index-cid2subsetoffset.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

carPaths declared and not used
tmpDir := c.String("tmp-dir")

if ok, err := isDirectory(indexDir); err != nil {
return err
} else if !ok {
return fmt.Errorf("index-dir is not a directory")
}

{
startedAt := time.Now()
defer func() {
klog.Infof("Finished in %s", time.Since(startedAt))
}()
klog.Infof("Creating CID-to-offset index for %s", carPath)

Check failure on line 80 in cmd-x-index-cid2subsetoffset.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: carPath
indexFilepath, err := CreateIndex_cid2offset(
context.TODO(),
epoch,
network,
tmpDir,
carPath,

Check failure on line 86 in cmd-x-index-cid2subsetoffset.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: carPath
indexDir,
)
if err != nil {
panic(err)
}
klog.Info("Index created")
if verify {
klog.Infof("Verifying index for %s located at %s", carPath, indexFilepath)

Check failure on line 94 in cmd-x-index-cid2subsetoffset.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: carPath
startedAt := time.Now()
defer func() {
klog.Infof("Finished in %s", time.Since(startedAt))
}()
err := VerifyIndex_cid2offset(context.TODO(), carPath, indexFilepath)

Check failure on line 99 in cmd-x-index-cid2subsetoffset.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: carPath
if err != nil {
return cli.Exit(err, 1)
}
klog.Info("Index verified")
return nil
}
}
return nil
},
}
}
84 changes: 84 additions & 0 deletions index-cid-to-subset-offset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package main

import (
"context"
"fmt"
"os"
"path/filepath"
"time"

"github.com/dustin/go-humanize"
"github.com/rpcpool/yellowstone-faithful/carreader"
"github.com/rpcpool/yellowstone-faithful/indexes"
"k8s.io/klog/v2"
)

func CreateIndex_cid2subsetOffset(
ctx context.Context,
epoch uint64,
network indexes.Network,
tmpDir string,
carPaths []string,
indexDir string,
) (string, error) {

for _, carPath := range carPaths {
// Check if the CAR file exists:
exists, err := fileExists(carPath)
if err != nil {
return "", fmt.Errorf("failed to check if CAR file exists: %w", err)
}
if !exists {
return "", fmt.Errorf("CAR file %q does not exist", carPath)
}

carFile, err := os.Open(carPath)
if err != nil {
return "", fmt.Errorf("failed to open car file: %w", err)
}
defer carFile.Close()

rd, err := carreader.New(carFile)
if err != nil {
return "", fmt.Errorf("failed to create car reader: %w", err)
}
// check it has 1 root
if len(rd.Header.Roots) != 1 {
return "", fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.Header.Roots))
}

klog.Infof("Getting car file size")
targetFileSize, err := getFileSize(carPath)
if err != nil {
return "", fmt.Errorf("failed to get car file size: %w", err)
}

klog.Infof("Counting items in car file...")
numItems, err := carCountItems(carPath)
if err != nil {
return "", fmt.Errorf("failed to count items in car file: %w", err)
}
klog.Infof("Found %s items in car file", humanize.Comma(int64(numItems)))

tmpDir = filepath.Join(tmpDir, "index-cid-to-subset-offset-"+time.Now().Format("20060102-150405.000000000"))
if err = os.MkdirAll(tmpDir, 0o755); err != nil {
return "", fmt.Errorf("failed to create tmp dir: %w", err)
}

rootCid := rd.Header.Roots[0]

klog.Infof("Creating builder with %d items and target file size %d", numItems, targetFileSize)
c2so, err := indexes.NewWriter_CidToSubsetOffsetAndSize(
epoch,
rootCid,
network,
tmpDir,
numItems,
)
if err != nil {
return "", fmt.Errorf("failed to open index store: %w", err)
}
defer c2so.Close()
}

}

Check failure on line 84 in index-cid-to-subset-offset.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

missing return

0 comments on commit a4fedaa

Please sign in to comment.