Skip to content

Commit

Permalink
perf: use fast json marshaller (#37)
Browse files Browse the repository at this point in the history
* perf: use fast json marshaller

* chore: test golint with timeout 10m

* feat: add new flags for output and profiling

* test: output flag

* feat: auto create folders for report

* feat: auto create folders for report
  • Loading branch information
adrienaury authored Sep 29, 2023
1 parent 403508a commit 505e0a4
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 11 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.7.0]

- `Added` improvements for global performance (2 to 3 times faster).
- `Added` new flag `--output` or `-o` to customize the filename and path of the HTML report.
- `Added` new flag `--profile` to create a CPU profiling dumpfile.

## [0.6.0]

- `Added` log unmasked values with the `watch` flag.
Expand Down
4 changes: 2 additions & 2 deletions build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ targets:
steps:
- if: len(linters) == 0
then:
- $: golangci-lint run --enable-all ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")}
- $: golangci-lint run --timeout 10m --enable-all ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")}
:: true
else:
- $: golangci-lint run ={replace(join(appendpath("--enable", linters), " "), "/", " ")} ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")}
- $: golangci-lint run --timeout 10m ={replace(join(appendpath("--enable", linters), " "), "/", " ")} ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")}
:: true

test:
Expand Down
45 changes: 41 additions & 4 deletions cmd/mimo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package main
import (
"fmt"
"os"
"path"
"runtime"
"slices"
"sort"
Expand All @@ -28,11 +29,14 @@ import (
"github.com/cgi-fr/mimo/internal/infra"
"github.com/cgi-fr/mimo/pkg/mimo"
"github.com/mattn/go-isatty"
"github.com/pkg/profile"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
)

const defaultPerm = 0o600 // user can read/write, everyone else can't do anything

//nolint:gochecknoglobals
var (
name string // provisioned by ldflags
Expand All @@ -46,10 +50,12 @@ var (
debug bool
colormode string

profiling bool
configfile string
watchFields []string
diskStorage bool
persist string
reportPath string
)

func main() {
Expand Down Expand Up @@ -92,12 +98,13 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "add debug information to logs (very slow)")
rootCmd.PersistentFlags().BoolVar(&jsonlog, "log-json", false, "output logs in JSON format")
rootCmd.PersistentFlags().StringVar(&colormode, "color", "auto", "use colors in log outputs : yes, no or auto")
rootCmd.PersistentFlags().StringVar(&configfile, "config", "", "name of the YAML configuration file to use")
rootCmd.PersistentFlags().StringVarP(&configfile, "config", "c", "", "name of the YAML configuration file to use")
rootCmd.PersistentFlags().StringSliceVarP(&watchFields, "watch", "w", []string{}, "watch specified fields")

rootCmd.PersistentFlags().BoolVar(&profiling, "profiling", false, "enable cpu profiling and generate a cpu.pprof file")
rootCmd.PersistentFlags().BoolVar(&diskStorage, "disk-storage", false, "enable data storage on disk")
rootCmd.PersistentFlags().StringVar(&persist, "persist", "",
"persist data in the specified directory (implies --disk-storage)")
rootCmd.PersistentFlags().StringVarP(&reportPath, "output", "o", "report.html", "output path for the HTML report")

if err := rootCmd.Execute(); err != nil {
log.Err(err).Msg("error when executing command")
Expand Down Expand Up @@ -137,7 +144,7 @@ func run(_ *cobra.Command, realJSONLineFileName string) error {

var report mimo.Report

if report, err = driver.Analyze(); err != nil {
if report, err = runAnalyse(driver, profiling); err != nil {
return fmt.Errorf("%w", err)
}

Expand All @@ -148,7 +155,16 @@ func run(_ *cobra.Command, realJSONLineFileName string) error {
haserror = appendColumnMetric(report, colname, haserror)
}

if err = infra.NewReportExporter().Export(report, "report.html"); err != nil {
reportPath = strings.TrimSpace(reportPath)
if strings.HasSuffix(reportPath, string(os.PathSeparator)) {
reportPath += "report.html"
}

if err := os.MkdirAll(path.Dir(reportPath), defaultPerm); err != nil {
return fmt.Errorf("%w", err)
}

if err = infra.NewReportExporter().Export(report, reportPath); err != nil {
return fmt.Errorf("%w", err)
}

Expand All @@ -159,6 +175,27 @@ func run(_ *cobra.Command, realJSONLineFileName string) error {
return nil
}

func runAnalyse(driver mimo.Driver, profiling bool) (mimo.Report, error) {
var cpuProfiler interface{ Stop() }

if profiling {
cpuProfiler = profile.Start(profile.ProfilePath("."))
}

var report mimo.Report

report, err := driver.Analyze()
if err != nil {
return report, fmt.Errorf("%w", err)
}

if profiling {
cpuProfiler.Stop()
}

return report, nil
}

func selectMultimapFactory() mimo.MultimapFactory {
var multimapFactory mimo.MultimapFactory

Expand Down
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ go 1.21
require (
github.com/Masterminds/sprig/v3 v3.2.3
github.com/cockroachdb/pebble v0.0.0-20230819001538-1798fbf5956c
github.com/goccy/go-json v0.10.2
github.com/mattn/go-isatty v0.0.14
github.com/ohler55/ojg v1.19.2
github.com/pkg/profile v1.7.0
github.com/rs/zerolog v1.28.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.7.0
github.com/stretchr/testify v1.8.0
golang.org/x/text v0.4.0
gopkg.in/yaml.v3 v3.0.1
)
Expand All @@ -26,9 +28,11 @@ require (
github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect
github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/felixge/fgprof v0.9.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
github.com/google/uuid v1.1.1 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.11 // indirect
Expand Down
16 changes: 14 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
Expand All @@ -131,6 +133,8 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
Expand Down Expand Up @@ -194,6 +198,8 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf
github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
Expand All @@ -210,6 +216,7 @@ github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/hydrogen18/memlistener v0.0.0-20141126152155-54553eb933fb/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA=
Expand Down Expand Up @@ -306,6 +313,8 @@ github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
Expand Down Expand Up @@ -362,12 +371,14 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
Expand Down Expand Up @@ -541,6 +552,7 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
5 changes: 3 additions & 2 deletions internal/infra/datarowreader_jsonline.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ package infra

import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io"
"os"

"github.com/goccy/go-json"

"github.com/cgi-fr/mimo/pkg/mimo"
)

Expand Down Expand Up @@ -55,7 +56,7 @@ func (drr *DataRowReaderJSONLine) ReadDataRow() (mimo.DataRow, error) {
}

data = mimo.DataRow{}
if err := json.Unmarshal(drr.input.Bytes(), &data); err != nil {
if err := json.UnmarshalNoEscape(drr.input.Bytes(), &data); err != nil {
return nil, fmt.Errorf("%w", err)
}
}
Expand Down
40 changes: 40 additions & 0 deletions test/reports/report.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>MIMO Report</title>
<meta name="viewport" content="width=device-width,initial-scale=1" />
<meta name="description" content="MIMO Report" />
</head>
<body>
<h1>MIMO Report</h1>
<table border="1" cellspacing="0" cellpadding="5">
<thead>
<th>Field</th>
<th>Nil</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
<th>Coherent Rate</th>
<th>Identifiable Rate</th>
<th>K</th>
</thead>
<tbody>

<tr>
<td>value</td>
<td>0</td>
<td>0</td>
<td>2</td>
<td>0</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">0.00 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">1</td>
</tr>

</tbody>
</table>
</body>
</html>
40 changes: 40 additions & 0 deletions test/reports/report_output_full.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>MIMO Report</title>
<meta name="viewport" content="width=device-width,initial-scale=1" />
<meta name="description" content="MIMO Report" />
</head>
<body>
<h1>MIMO Report</h1>
<table border="1" cellspacing="0" cellpadding="5">
<thead>
<th>Field</th>
<th>Nil</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
<th>Coherent Rate</th>
<th>Identifiable Rate</th>
<th>K</th>
</thead>
<tbody>

<tr>
<td>value</td>
<td>0</td>
<td>0</td>
<td>2</td>
<td>0</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">0.00 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">1</td>
</tr>

</tbody>
</table>
</body>
</html>
23 changes: 23 additions & 0 deletions test/suites/11-output-flag.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: validate metrics
testcases:
- name: full name report
steps:
- script: echo '{"value":"A"}' > working/real.jsonl
- script: echo '{"value":"A"}' >> working/real.jsonl

- script: echo '{"value":"X"}' > working/masked.jsonl
- script: echo '{"value":"Y"}' >> working/masked.jsonl

- script: cat working/masked.jsonl | mimo --output ../reports/report_output_full.html -v3 working/real.jsonl
- script: test -f ../reports/report_output_full.html

- name: only directory report
steps:
- script: echo '{"value":"A"}' > working/real.jsonl
- script: echo '{"value":"A"}' >> working/real.jsonl

- script: echo '{"value":"X"}' > working/masked.jsonl
- script: echo '{"value":"Y"}' >> working/masked.jsonl

- script: cat working/masked.jsonl | mimo --output ../reports/ -v3 working/real.jsonl
- script: test -f ../reports/report.html

0 comments on commit 505e0a4

Please sign in to comment.