diff --git a/cmd-x-index-cid2subsetoffset.go b/cmd-x-index-cid2subsetoffset.go new file mode 100644 index 00000000..216226f1 --- /dev/null +++ b/cmd-x-index-cid2subsetoffset.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/rpcpool/yellowstone-faithful/indexes" + "github.com/urfave/cli/v2" + "k8s.io/klog/v2" +) + +func newCmd_Index_cid2subsetOffset() *cli.Command { + var verify bool + var epoch uint64 + var network indexes.Network + var indexDir string + return &cli.Command{ + Name: "cid-to-offset", + Description: "Given all split CAR files corresponding to a Solana epoch, create an index of the file that maps CIDs to offsets in the CAR file.", + ArgsUsage: " ", + Before: func(c *cli.Context) error { + if network == "" { + network = indexes.NetworkMainnet + } + return nil + }, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "verify", + Usage: "verify the index after creating it", + Destination: &verify, + }, + &cli.StringFlag{ + Name: "tmp-dir", + Usage: "temporary directory to use for storing intermediate files", + Value: os.TempDir(), + }, + &cli.Uint64Flag{ + Name: "epoch", + Usage: "the epoch of the CAR files", + Destination: &epoch, + Required: true, + }, + &cli.StringFlag{ + Name: "network", + Usage: "the cluster of the epoch; one of: mainnet, testnet, devnet", + Action: func(c *cli.Context, s string) error { + network = indexes.Network(s) + if !indexes.IsValidNetwork(network) { + return fmt.Errorf("invalid network: %q", network) + } + return nil + }, + }, + &cli.StringFlag{ + Name: "index-dir", + Usage: "directory to store the index", + Destination: &indexDir, + Required: true, + }, + }, + Subcommands: []*cli.Command{}, + Action: func(c *cli.Context) error { + carPaths := c.Args().Slice() + tmpDir := c.String("tmp-dir") + + if ok, err := isDirectory(indexDir); err != nil { + return err + } else if !ok { + return fmt.Errorf("index-dir is not a directory") + } + + { + startedAt := time.Now() + defer func() { + klog.Infof("Finished in %s", time.Since(startedAt)) + }() + klog.Infof("Creating CID-to-offset index for %s", carPath) + indexFilepath, err := CreateIndex_cid2offset( + context.TODO(), + epoch, + network, + tmpDir, + carPath, + indexDir, + ) + if err != nil { + panic(err) + } + klog.Info("Index created") + if verify { + klog.Infof("Verifying index for %s located at %s", carPath, indexFilepath) + startedAt := time.Now() + defer func() { + klog.Infof("Finished in %s", time.Since(startedAt)) + }() + err := VerifyIndex_cid2offset(context.TODO(), carPath, indexFilepath) + if err != nil { + return cli.Exit(err, 1) + } + klog.Info("Index verified") + return nil + } + } + return nil + }, + } +} diff --git a/index-cid-to-subset-offset.go b/index-cid-to-subset-offset.go new file mode 100644 index 00000000..55fafa90 --- /dev/null +++ b/index-cid-to-subset-offset.go @@ -0,0 +1,84 @@ +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/dustin/go-humanize" + "github.com/rpcpool/yellowstone-faithful/carreader" + "github.com/rpcpool/yellowstone-faithful/indexes" + "k8s.io/klog/v2" +) + +func CreateIndex_cid2subsetOffset( + ctx context.Context, + epoch uint64, + network indexes.Network, + tmpDir string, + carPaths []string, + indexDir string, +) (string, error) { + + for _, carPath := range carPaths { + // Check if the CAR file exists: + exists, err := fileExists(carPath) + if err != nil { + return "", fmt.Errorf("failed to check if CAR file exists: %w", err) + } + if !exists { + return "", fmt.Errorf("CAR file %q does not exist", carPath) + } + + carFile, err := os.Open(carPath) + if err != nil { + return "", fmt.Errorf("failed to open car file: %w", err) + } + defer carFile.Close() + + rd, err := carreader.New(carFile) + if err != nil { + return "", fmt.Errorf("failed to create car reader: %w", err) + } + // check it has 1 root + if len(rd.Header.Roots) != 1 { + return "", fmt.Errorf("car file must have exactly 1 root, but has %d", len(rd.Header.Roots)) + } + + klog.Infof("Getting car file size") + targetFileSize, err := getFileSize(carPath) + if err != nil { + return "", fmt.Errorf("failed to get car file size: %w", err) + } + + klog.Infof("Counting items in car file...") + numItems, err := carCountItems(carPath) + if err != nil { + return "", fmt.Errorf("failed to count items in car file: %w", err) + } + klog.Infof("Found %s items in car file", humanize.Comma(int64(numItems))) + + tmpDir = filepath.Join(tmpDir, "index-cid-to-subset-offset-"+time.Now().Format("20060102-150405.000000000")) + if err = os.MkdirAll(tmpDir, 0o755); err != nil { + return "", fmt.Errorf("failed to create tmp dir: %w", err) + } + + rootCid := rd.Header.Roots[0] + + klog.Infof("Creating builder with %d items and target file size %d", numItems, targetFileSize) + c2so, err := indexes.NewWriter_CidToSubsetOffsetAndSize( + epoch, + rootCid, + network, + tmpDir, + numItems, + ) + if err != nil { + return "", fmt.Errorf("failed to open index store: %w", err) + } + defer c2so.Close() + } + +}