diff --git a/CHANGELOG.md b/CHANGELOG.md index bc631f2..243356d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,12 @@ Types of changes - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## [0.7.0] + +- `Added` improvements for global performance (2 to 3 times faster). +- `Added` new flag `--output` or `-o` to customize the filename and path of the HTML report. +- `Added` new flag `--profile` to create a CPU profiling dumpfile. + ## [0.6.0] - `Added` log unmasked values with the `watch` flag. diff --git a/build.yml b/build.yml index 2c7d7f2..8fc1034 100644 --- a/build.yml +++ b/build.yml @@ -221,10 +221,10 @@ targets: steps: - if: len(linters) == 0 then: - - $: golangci-lint run --enable-all ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")} + - $: golangci-lint run --timeout 10m --enable-all ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")} :: true else: - - $: golangci-lint run ={replace(join(appendpath("--enable", linters), " "), "/", " ")} ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")} + - $: golangci-lint run --timeout 10m ={replace(join(appendpath("--enable", linters), " "), "/", " ")} ={replace(join(appendpath("--disable", lintersno), " "), "/", " ")} :: true test: diff --git a/cmd/mimo/main.go b/cmd/mimo/main.go index 18af076..f61c34d 100644 --- a/cmd/mimo/main.go +++ b/cmd/mimo/main.go @@ -20,6 +20,7 @@ package main import ( "fmt" "os" + "path" "runtime" "slices" "sort" @@ -28,11 +29,14 @@ import ( "github.com/cgi-fr/mimo/internal/infra" "github.com/cgi-fr/mimo/pkg/mimo" "github.com/mattn/go-isatty" + "github.com/pkg/profile" "github.com/rs/zerolog" "github.com/rs/zerolog/log" "github.com/spf13/cobra" ) +const defaultPerm = 0o600 // user can read/write, everyone else can't do anything + //nolint:gochecknoglobals var ( name string // provisioned by ldflags @@ -46,10 +50,12 @@ var ( debug bool colormode string + profiling bool configfile string watchFields []string diskStorage bool persist string + reportPath string ) func main() { @@ -92,12 +98,13 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "add debug information to logs (very slow)") rootCmd.PersistentFlags().BoolVar(&jsonlog, "log-json", false, "output logs in JSON format") rootCmd.PersistentFlags().StringVar(&colormode, "color", "auto", "use colors in log outputs : yes, no or auto") - rootCmd.PersistentFlags().StringVar(&configfile, "config", "", "name of the YAML configuration file to use") + rootCmd.PersistentFlags().StringVarP(&configfile, "config", "c", "", "name of the YAML configuration file to use") rootCmd.PersistentFlags().StringSliceVarP(&watchFields, "watch", "w", []string{}, "watch specified fields") - + rootCmd.PersistentFlags().BoolVar(&profiling, "profiling", false, "enable cpu profiling and generate a cpu.pprof file") rootCmd.PersistentFlags().BoolVar(&diskStorage, "disk-storage", false, "enable data storage on disk") rootCmd.PersistentFlags().StringVar(&persist, "persist", "", "persist data in the specified directory (implies --disk-storage)") + rootCmd.PersistentFlags().StringVarP(&reportPath, "output", "o", "report.html", "output path for the HTML report") if err := rootCmd.Execute(); err != nil { log.Err(err).Msg("error when executing command") @@ -137,7 +144,7 @@ func run(_ *cobra.Command, realJSONLineFileName string) error { var report mimo.Report - if report, err = driver.Analyze(); err != nil { + if report, err = runAnalyse(driver, profiling); err != nil { return fmt.Errorf("%w", err) } @@ -148,7 +155,16 @@ func run(_ *cobra.Command, realJSONLineFileName string) error { haserror = appendColumnMetric(report, colname, haserror) } - if err = infra.NewReportExporter().Export(report, "report.html"); err != nil { + reportPath = strings.TrimSpace(reportPath) + if strings.HasSuffix(reportPath, string(os.PathSeparator)) { + reportPath += "report.html" + } + + if err := os.MkdirAll(path.Dir(reportPath), defaultPerm); err != nil { + return fmt.Errorf("%w", err) + } + + if err = infra.NewReportExporter().Export(report, reportPath); err != nil { return fmt.Errorf("%w", err) } @@ -159,6 +175,27 @@ func run(_ *cobra.Command, realJSONLineFileName string) error { return nil } +func runAnalyse(driver mimo.Driver, profiling bool) (mimo.Report, error) { + var cpuProfiler interface{ Stop() } + + if profiling { + cpuProfiler = profile.Start(profile.ProfilePath(".")) + } + + var report mimo.Report + + report, err := driver.Analyze() + if err != nil { + return report, fmt.Errorf("%w", err) + } + + if profiling { + cpuProfiler.Stop() + } + + return report, nil +} + func selectMultimapFactory() mimo.MultimapFactory { var multimapFactory mimo.MultimapFactory diff --git a/go.mod b/go.mod index 1b330bf..596816c 100644 --- a/go.mod +++ b/go.mod @@ -5,11 +5,13 @@ go 1.21 require ( github.com/Masterminds/sprig/v3 v3.2.3 github.com/cockroachdb/pebble v0.0.0-20230819001538-1798fbf5956c + github.com/goccy/go-json v0.10.2 github.com/mattn/go-isatty v0.0.14 github.com/ohler55/ojg v1.19.2 + github.com/pkg/profile v1.7.0 github.com/rs/zerolog v1.28.0 github.com/spf13/cobra v1.7.0 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.8.0 golang.org/x/text v0.4.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -26,9 +28,11 @@ require ( github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/felixge/fgprof v0.9.3 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/golang/snappy v0.0.4 // indirect + github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect github.com/google/uuid v1.1.1 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/imdario/mergo v0.3.11 // indirect diff --git a/go.sum b/go.sum index 0d665d9..d0756b3 100644 --- a/go.sum +++ b/go.sum @@ -108,6 +108,8 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7 github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g= +github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= @@ -131,6 +133,8 @@ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/me github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -194,6 +198,8 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20211214055906-6f57359322fd h1:1FjCyPC+syAzJ5/2S8fqdZK1R22vvA0J7JZKcuOIQ7Y= +github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -210,6 +216,7 @@ github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4 github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/hydrogen18/memlistener v0.0.0-20141126152155-54553eb933fb/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= @@ -306,6 +313,8 @@ github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA= +github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= @@ -362,12 +371,14 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= @@ -541,6 +552,7 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/infra/datarowreader_jsonline.go b/internal/infra/datarowreader_jsonline.go index a9fb65e..89f8caa 100644 --- a/internal/infra/datarowreader_jsonline.go +++ b/internal/infra/datarowreader_jsonline.go @@ -19,12 +19,13 @@ package infra import ( "bufio" - "encoding/json" "errors" "fmt" "io" "os" + "github.com/goccy/go-json" + "github.com/cgi-fr/mimo/pkg/mimo" ) @@ -55,7 +56,7 @@ func (drr *DataRowReaderJSONLine) ReadDataRow() (mimo.DataRow, error) { } data = mimo.DataRow{} - if err := json.Unmarshal(drr.input.Bytes(), &data); err != nil { + if err := json.UnmarshalNoEscape(drr.input.Bytes(), &data); err != nil { return nil, fmt.Errorf("%w", err) } } diff --git a/test/reports/report.html b/test/reports/report.html new file mode 100644 index 0000000..d978702 --- /dev/null +++ b/test/reports/report.html @@ -0,0 +1,40 @@ + + + + + MIMO Report + + + + +

MIMO Report

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldNilIgnoredMaskedMissedMasking RateCoherent RateIdentifiable RateK
value0020100.00 %0.00 %100.00 %1
+ + diff --git a/test/reports/report_output_full.html b/test/reports/report_output_full.html new file mode 100644 index 0000000..d978702 --- /dev/null +++ b/test/reports/report_output_full.html @@ -0,0 +1,40 @@ + + + + + MIMO Report + + + + +

MIMO Report

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldNilIgnoredMaskedMissedMasking RateCoherent RateIdentifiable RateK
value0020100.00 %0.00 %100.00 %1
+ + diff --git a/test/suites/11-output-flag.yml b/test/suites/11-output-flag.yml new file mode 100644 index 0000000..06c7f38 --- /dev/null +++ b/test/suites/11-output-flag.yml @@ -0,0 +1,23 @@ +name: validate metrics +testcases: + - name: full name report + steps: + - script: echo '{"value":"A"}' > working/real.jsonl + - script: echo '{"value":"A"}' >> working/real.jsonl + + - script: echo '{"value":"X"}' > working/masked.jsonl + - script: echo '{"value":"Y"}' >> working/masked.jsonl + + - script: cat working/masked.jsonl | mimo --output ../reports/report_output_full.html -v3 working/real.jsonl + - script: test -f ../reports/report_output_full.html + + - name: only directory report + steps: + - script: echo '{"value":"A"}' > working/real.jsonl + - script: echo '{"value":"A"}' >> working/real.jsonl + + - script: echo '{"value":"X"}' > working/masked.jsonl + - script: echo '{"value":"Y"}' >> working/masked.jsonl + + - script: cat working/masked.jsonl | mimo --output ../reports/ -v3 working/real.jsonl + - script: test -f ../reports/report.html