From f60195fdd2d38f1a98d09edd9fefe8dec1f5f94e Mon Sep 17 00:00:00 2001 From: Adrien Aury <44274230+adrienaury@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:14:05 +0100 Subject: [PATCH] perf: fix performance issue with dump (#9) * perf: fix issue with dump * perf: fix issue with dump --- CHANGELOG.md | 1 + internal/app/cli/dump.go | 22 +++- internal/infra/backend_full.go | 117 ++++++++++++++++++++ internal/infra/backend_iterate_once.go | 145 +++++++++++++++++++++++++ 4 files changed, 280 insertions(+), 5 deletions(-) create mode 100644 internal/infra/backend_full.go create mode 100644 internal/infra/backend_iterate_once.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b6c5ca..a59bfc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Types of changes ## [0.3.0] - `Added` cpu and memory profiling with `--profiling mem|cpu` flag +- `Fixed` performance issues on dump in exchange for higher RAM consumption, using `--limited-ram` flag will fall back to the 0.2.0 dump version ## [0.2.0] diff --git a/internal/app/cli/dump.go b/internal/app/cli/dump.go index 228b304..45a1273 100644 --- a/internal/app/cli/dump.go +++ b/internal/app/cli/dump.go @@ -29,8 +29,9 @@ import ( func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.File) *cobra.Command { var ( - include []string - watch bool + include []string + watch bool + limitedRAM bool ) cmd := &cobra.Command{ //nolint:exhaustruct @@ -39,7 +40,7 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F Example: " " + parent + " dump clients", Args: cobra.ExactArgs(1), Run: func(_ *cobra.Command, args []string) { - if err := dump(args[0], include, watch); err != nil { + if err := dump(args[0], include, watch, limitedRAM); err != nil { log.Fatal().Err(err).Int("return", 1).Msg("end SILO") } }, @@ -47,6 +48,7 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F cmd.Flags().StringSliceVarP(&include, "include", "i", []string{}, "include only these columns, exclude all others") cmd.Flags().BoolVarP(&watch, "watch", "w", false, "watch statistics about dumped entities in stderr") + cmd.Flags().BoolVar(&limitedRAM, "limited-ram", false, "limit RAM usage, slower but more efficient on RAM usage") cmd.Flags().SortFlags = false @@ -57,8 +59,18 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F return cmd } -func dump(path string, include []string, watch bool) error { - backend, err := infra.NewBackend(path) +func dump(path string, include []string, watch bool, limitedRAM bool) error { + var ( + backend silo.Backend + err error + ) + + if limitedRAM { + backend, err = infra.NewBackend(path) + } else { + backend, err = infra.NewBackendFull(path) + } + if err != nil { return fmt.Errorf("%w", err) } diff --git a/internal/infra/backend_full.go b/internal/infra/backend_full.go new file mode 100644 index 0000000..61c85b1 --- /dev/null +++ b/internal/infra/backend_full.go @@ -0,0 +1,117 @@ +// Copyright (C) 2024 CGI France +// +// This file is part of SILO. +// +// SILO is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// SILO is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with SILO. If not, see . + +package infra + +import ( + "fmt" + + "github.com/cgi-fr/silo/pkg/silo" + "github.com/cockroachdb/pebble" +) + +type BackendFull struct { + Backend +} + +func NewBackendFull(path string) (BackendFull, error) { + backend, err := NewBackend(path) + if err != nil { + return BackendFull{backend}, err + } + + return BackendFull{backend}, nil +} + +func (b BackendFull) Snapshot() silo.Snapshot { //nolint:ireturn + return NewSnapshotFull(b.db) +} + +type SnapshotFull struct { + db *pebble.DB + nodes map[string][]byte + loaded bool +} + +const DefaultFullMapCap = 1024 + +func NewSnapshotFull(db *pebble.DB) silo.Snapshot { //nolint:ireturn + return &SnapshotFull{ + db: db, + nodes: make(map[string][]byte, DefaultFullMapCap), + loaded: false, + } +} + +func (s *SnapshotFull) Load() error { + iter, err := s.db.NewIter(&pebble.IterOptions{}) //nolint:exhaustruct + if err != nil { + return fmt.Errorf("%w", err) + } + + for iter.First(); iter.Valid(); iter.Next() { + s.nodes[string(iter.Key())] = iter.Value() + } + + s.loaded = true + + return nil +} + +func (s *SnapshotFull) Next() (silo.DataNode, bool, error) { + if !s.loaded { + if err := s.Load(); err != nil { + return silo.DataNode{Key: "", Data: ""}, false, err + } + } + + for key := range s.nodes { + node, err := decodeKey([]byte(key)) + if err != nil { + return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err) + } + + return node, true, nil + } + + return silo.DataNode{Key: "", Data: ""}, false, nil +} + +func (s *SnapshotFull) PullAll(node silo.DataNode) ([]silo.DataNode, error) { + key, err := node.Binary() + if err != nil { + return nil, fmt.Errorf("%w", err) + } + + item, has := s.nodes[string(key)] + if !has { + return []silo.DataNode{}, nil + } + + set, err := decode(item) + if err != nil { + return nil, fmt.Errorf("%w", err) + } + + delete(s.nodes, string(key)) + + return set, nil +} + +func (s *SnapshotFull) Close() error { + return nil +} diff --git a/internal/infra/backend_iterate_once.go b/internal/infra/backend_iterate_once.go new file mode 100644 index 0000000..07f0e95 --- /dev/null +++ b/internal/infra/backend_iterate_once.go @@ -0,0 +1,145 @@ +// Copyright (C) 2024 CGI France +// +// This file is part of SILO. +// +// SILO is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// SILO is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with SILO. If not, see . + +package infra + +import ( + "errors" + "fmt" + + "github.com/cgi-fr/silo/pkg/silo" + "github.com/cockroachdb/pebble" +) + +type BackendInterateOnce struct { + Backend +} + +func NewBackendInterateOnce(path string) (BackendInterateOnce, error) { + backend, err := NewBackend(path) + if err != nil { + return BackendInterateOnce{backend}, err + } + + return BackendInterateOnce{backend}, nil +} + +func (b BackendInterateOnce) Snapshot() silo.Snapshot { //nolint:ireturn + return NewSnapshotInterateOnce(b.db) +} + +type SnapshotInterateOnce struct { + db *pebble.DB + iter *pebble.Iterator + pulled map[string]bool +} + +const DefaultPulledMapCap = 128 + +func NewSnapshotInterateOnce(db *pebble.DB) silo.Snapshot { //nolint:ireturn + return SnapshotInterateOnce{ + db: db, + iter: nil, + pulled: make(map[string]bool, DefaultPulledMapCap), + } +} + +func (s SnapshotInterateOnce) Next() (silo.DataNode, bool, error) { + if s.iter == nil { //nolint:nestif + var err error + if s.iter, err = s.db.NewIter(&pebble.IterOptions{}); err != nil { //nolint:exhaustruct + return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err) + } + + if !s.iter.First() { + return silo.DataNode{Key: "", Data: ""}, false, nil + } + + if _, pulled := s.pulled[string(s.iter.Key())]; !pulled { + node, err := decodeKey(s.iter.Key()) + if err != nil { + return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err) + } + + return node, true, nil + } + } + + for { + if !s.iter.Next() { + return silo.DataNode{Key: "", Data: ""}, false, nil + } + + if _, pulled := s.pulled[string(s.iter.Key())]; !pulled { + node, err := decodeKey(s.iter.Key()) + if err != nil { + return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err) + } + + return node, true, nil + } + } +} + +func (s SnapshotInterateOnce) PullAll(node silo.DataNode) ([]silo.DataNode, error) { + key, err := node.Binary() + if err != nil { + return nil, fmt.Errorf("%w", err) + } + + if _, pulled := s.pulled[string(key)]; pulled { + return []silo.DataNode{}, nil + } + + s.pulled[string(key)] = true + + item, closer, err := s.db.Get(key) + if errors.Is(err, pebble.ErrNotFound) { + return []silo.DataNode{}, nil + } else if err != nil { + return nil, fmt.Errorf("%w", err) + } + defer closer.Close() + + set, err := decode(item) + if err != nil { + return nil, fmt.Errorf("%w", err) + } + + return set, nil +} + +func (s SnapshotInterateOnce) Close() error { + if s.iter == nil { + return nil + } + + if err := s.iter.Close(); err != nil { + return fmt.Errorf("%w", err) + } + + return nil +} + +func decodeKey(rawKey []byte) (silo.DataNode, error) { + key, err := silo.DecodeDataNode(rawKey) + if err != nil { + return silo.DataNode{Key: "", Data: ""}, fmt.Errorf("%w", err) + } + + return key, nil +}