From f60195fdd2d38f1a98d09edd9fefe8dec1f5f94e Mon Sep 17 00:00:00 2001
From: Adrien Aury <44274230+adrienaury@users.noreply.github.com>
Date: Thu, 28 Mar 2024 14:14:05 +0100
Subject: [PATCH] perf: fix performance issue with dump (#9)
* perf: fix issue with dump
* perf: fix issue with dump
---
CHANGELOG.md | 1 +
internal/app/cli/dump.go | 22 +++-
internal/infra/backend_full.go | 117 ++++++++++++++++++++
internal/infra/backend_iterate_once.go | 145 +++++++++++++++++++++++++
4 files changed, 280 insertions(+), 5 deletions(-)
create mode 100644 internal/infra/backend_full.go
create mode 100644 internal/infra/backend_iterate_once.go
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9b6c5ca..a59bfc1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@ Types of changes
## [0.3.0]
- `Added` cpu and memory profiling with `--profiling mem|cpu` flag
+- `Fixed` performance issues on dump in exchange for higher RAM consumption, using `--limited-ram` flag will fall back to the 0.2.0 dump version
## [0.2.0]
diff --git a/internal/app/cli/dump.go b/internal/app/cli/dump.go
index 228b304..45a1273 100644
--- a/internal/app/cli/dump.go
+++ b/internal/app/cli/dump.go
@@ -29,8 +29,9 @@ import (
func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.File) *cobra.Command {
var (
- include []string
- watch bool
+ include []string
+ watch bool
+ limitedRAM bool
)
cmd := &cobra.Command{ //nolint:exhaustruct
@@ -39,7 +40,7 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F
Example: " " + parent + " dump clients",
Args: cobra.ExactArgs(1),
Run: func(_ *cobra.Command, args []string) {
- if err := dump(args[0], include, watch); err != nil {
+ if err := dump(args[0], include, watch, limitedRAM); err != nil {
log.Fatal().Err(err).Int("return", 1).Msg("end SILO")
}
},
@@ -47,6 +48,7 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F
cmd.Flags().StringSliceVarP(&include, "include", "i", []string{}, "include only these columns, exclude all others")
cmd.Flags().BoolVarP(&watch, "watch", "w", false, "watch statistics about dumped entities in stderr")
+ cmd.Flags().BoolVar(&limitedRAM, "limited-ram", false, "limit RAM usage, slower but more efficient on RAM usage")
cmd.Flags().SortFlags = false
@@ -57,8 +59,18 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F
return cmd
}
-func dump(path string, include []string, watch bool) error {
- backend, err := infra.NewBackend(path)
+func dump(path string, include []string, watch bool, limitedRAM bool) error {
+ var (
+ backend silo.Backend
+ err error
+ )
+
+ if limitedRAM {
+ backend, err = infra.NewBackend(path)
+ } else {
+ backend, err = infra.NewBackendFull(path)
+ }
+
if err != nil {
return fmt.Errorf("%w", err)
}
diff --git a/internal/infra/backend_full.go b/internal/infra/backend_full.go
new file mode 100644
index 0000000..61c85b1
--- /dev/null
+++ b/internal/infra/backend_full.go
@@ -0,0 +1,117 @@
+// Copyright (C) 2024 CGI France
+//
+// This file is part of SILO.
+//
+// SILO is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SILO is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SILO. If not, see .
+
+package infra
+
+import (
+ "fmt"
+
+ "github.com/cgi-fr/silo/pkg/silo"
+ "github.com/cockroachdb/pebble"
+)
+
+type BackendFull struct {
+ Backend
+}
+
+func NewBackendFull(path string) (BackendFull, error) {
+ backend, err := NewBackend(path)
+ if err != nil {
+ return BackendFull{backend}, err
+ }
+
+ return BackendFull{backend}, nil
+}
+
+func (b BackendFull) Snapshot() silo.Snapshot { //nolint:ireturn
+ return NewSnapshotFull(b.db)
+}
+
+type SnapshotFull struct {
+ db *pebble.DB
+ nodes map[string][]byte
+ loaded bool
+}
+
+const DefaultFullMapCap = 1024
+
+func NewSnapshotFull(db *pebble.DB) silo.Snapshot { //nolint:ireturn
+ return &SnapshotFull{
+ db: db,
+ nodes: make(map[string][]byte, DefaultFullMapCap),
+ loaded: false,
+ }
+}
+
+func (s *SnapshotFull) Load() error {
+ iter, err := s.db.NewIter(&pebble.IterOptions{}) //nolint:exhaustruct
+ if err != nil {
+ return fmt.Errorf("%w", err)
+ }
+
+ for iter.First(); iter.Valid(); iter.Next() {
+ s.nodes[string(iter.Key())] = iter.Value()
+ }
+
+ s.loaded = true
+
+ return nil
+}
+
+func (s *SnapshotFull) Next() (silo.DataNode, bool, error) {
+ if !s.loaded {
+ if err := s.Load(); err != nil {
+ return silo.DataNode{Key: "", Data: ""}, false, err
+ }
+ }
+
+ for key := range s.nodes {
+ node, err := decodeKey([]byte(key))
+ if err != nil {
+ return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
+ }
+
+ return node, true, nil
+ }
+
+ return silo.DataNode{Key: "", Data: ""}, false, nil
+}
+
+func (s *SnapshotFull) PullAll(node silo.DataNode) ([]silo.DataNode, error) {
+ key, err := node.Binary()
+ if err != nil {
+ return nil, fmt.Errorf("%w", err)
+ }
+
+ item, has := s.nodes[string(key)]
+ if !has {
+ return []silo.DataNode{}, nil
+ }
+
+ set, err := decode(item)
+ if err != nil {
+ return nil, fmt.Errorf("%w", err)
+ }
+
+ delete(s.nodes, string(key))
+
+ return set, nil
+}
+
+func (s *SnapshotFull) Close() error {
+ return nil
+}
diff --git a/internal/infra/backend_iterate_once.go b/internal/infra/backend_iterate_once.go
new file mode 100644
index 0000000..07f0e95
--- /dev/null
+++ b/internal/infra/backend_iterate_once.go
@@ -0,0 +1,145 @@
+// Copyright (C) 2024 CGI France
+//
+// This file is part of SILO.
+//
+// SILO is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// SILO is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with SILO. If not, see .
+
+package infra
+
+import (
+ "errors"
+ "fmt"
+
+ "github.com/cgi-fr/silo/pkg/silo"
+ "github.com/cockroachdb/pebble"
+)
+
+type BackendInterateOnce struct {
+ Backend
+}
+
+func NewBackendInterateOnce(path string) (BackendInterateOnce, error) {
+ backend, err := NewBackend(path)
+ if err != nil {
+ return BackendInterateOnce{backend}, err
+ }
+
+ return BackendInterateOnce{backend}, nil
+}
+
+func (b BackendInterateOnce) Snapshot() silo.Snapshot { //nolint:ireturn
+ return NewSnapshotInterateOnce(b.db)
+}
+
+type SnapshotInterateOnce struct {
+ db *pebble.DB
+ iter *pebble.Iterator
+ pulled map[string]bool
+}
+
+const DefaultPulledMapCap = 128
+
+func NewSnapshotInterateOnce(db *pebble.DB) silo.Snapshot { //nolint:ireturn
+ return SnapshotInterateOnce{
+ db: db,
+ iter: nil,
+ pulled: make(map[string]bool, DefaultPulledMapCap),
+ }
+}
+
+func (s SnapshotInterateOnce) Next() (silo.DataNode, bool, error) {
+ if s.iter == nil { //nolint:nestif
+ var err error
+ if s.iter, err = s.db.NewIter(&pebble.IterOptions{}); err != nil { //nolint:exhaustruct
+ return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
+ }
+
+ if !s.iter.First() {
+ return silo.DataNode{Key: "", Data: ""}, false, nil
+ }
+
+ if _, pulled := s.pulled[string(s.iter.Key())]; !pulled {
+ node, err := decodeKey(s.iter.Key())
+ if err != nil {
+ return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
+ }
+
+ return node, true, nil
+ }
+ }
+
+ for {
+ if !s.iter.Next() {
+ return silo.DataNode{Key: "", Data: ""}, false, nil
+ }
+
+ if _, pulled := s.pulled[string(s.iter.Key())]; !pulled {
+ node, err := decodeKey(s.iter.Key())
+ if err != nil {
+ return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
+ }
+
+ return node, true, nil
+ }
+ }
+}
+
+func (s SnapshotInterateOnce) PullAll(node silo.DataNode) ([]silo.DataNode, error) {
+ key, err := node.Binary()
+ if err != nil {
+ return nil, fmt.Errorf("%w", err)
+ }
+
+ if _, pulled := s.pulled[string(key)]; pulled {
+ return []silo.DataNode{}, nil
+ }
+
+ s.pulled[string(key)] = true
+
+ item, closer, err := s.db.Get(key)
+ if errors.Is(err, pebble.ErrNotFound) {
+ return []silo.DataNode{}, nil
+ } else if err != nil {
+ return nil, fmt.Errorf("%w", err)
+ }
+ defer closer.Close()
+
+ set, err := decode(item)
+ if err != nil {
+ return nil, fmt.Errorf("%w", err)
+ }
+
+ return set, nil
+}
+
+func (s SnapshotInterateOnce) Close() error {
+ if s.iter == nil {
+ return nil
+ }
+
+ if err := s.iter.Close(); err != nil {
+ return fmt.Errorf("%w", err)
+ }
+
+ return nil
+}
+
+func decodeKey(rawKey []byte) (silo.DataNode, error) {
+ key, err := silo.DecodeDataNode(rawKey)
+ if err != nil {
+ return silo.DataNode{Key: "", Data: ""}, fmt.Errorf("%w", err)
+ }
+
+ return key, nil
+}