Skip to content

Commit

Permalink
refactor: stateless (#33)
Browse files Browse the repository at this point in the history
* refactor: wip! counter and sampler

* refactor: wip! metrics string

* refactor: wip! string len freq

* refactor: wip! string sort by freq

* refactor: wip! modelv2

* refactor: build column

* refactor: driver

* refactor: infra file reader

* refactor: fix driver

* refactor: reader v2

* refactor: reader v2

* refactor: lint + sample-size flag

* refactor: numeric metric

* refactor: stable output

* refactor: bool metric

* refactor: cleanup

* refactor: update schema

* refactor: count distinct

* refactor: bool samples

* refactor: put benchmark back

* refactor: disable 100000 lines bench

* refactor: add logs

* refactor: bench set global level warn

* refactor: test int

* chore: add yq in ci

* docs: update readme
  • Loading branch information
adrienaury authored Dec 8, 2023
1 parent 3f38faa commit 58c3e27
Show file tree
Hide file tree
Showing 51 changed files with 1,374 additions and 1,970 deletions.
4 changes: 3 additions & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ RUN apk add --update --progress --no-cache make gomplate

ARG VERSION_GOLICENSE=0.2.0
ARG VERSION_MILLER=6.2.0
ARG VERSION_YQ=4.40.4
RUN wget -nv -O- https://github.com/mitchellh/golicense/releases/download/v${VERSION_GOLICENSE}/golicense_${VERSION_GOLICENSE}_linux_x86_64.tar.gz | tar xz -C /usr/bin golicense \
&& wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/golicense /usr/bin/mlr
&& wget -nv -O /usr/bin/yq https://github.com/mikefarah/yq/releases/download/v${VERSION_YQ}/yq_linux_amd64 \
&& chmod +x /usr/bin/golicense /usr/bin/mlr /usr/bin/yq

COPY --from=pimo /usr/bin/pimo /usr/bin/pimo

Expand Down
4 changes: 3 additions & 1 deletion .devcontainer/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ RUN apk add --update --progress --no-cache make gomplate

ARG VERSION_GOLICENSE=0.2.0
ARG VERSION_MILLER=6.2.0
ARG VERSION_YQ=4.40.4
RUN wget -nv -O- https://github.com/mitchellh/golicense/releases/download/v${VERSION_GOLICENSE}/golicense_${VERSION_GOLICENSE}_linux_x86_64.tar.gz | tar xz -C /usr/bin golicense \
&& wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/golicense /usr/bin/mlr
&& wget -nv -O /usr/bin/yq https://github.com/mikefarah/yq/releases/download/v${VERSION_YQ}/yq_linux_amd64 \
&& chmod +x /usr/bin/golicense /usr/bin/mlr /usr/bin/yq

COPY --from=pimo /usr/bin/pimo /usr/bin/pimo
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.3.0]

- `Added` moved `min` and `max` to the main metric.
- `Added` `countNulls` to the main metric.
- `Added` all main metrics to the lengths section in string metrics.
- `Removed` `leastFrequentLen` and `mostFrequentLen` all lengths are listed with the most frequent length in first position

## [0.2.0]

- `Added` new string metrics `minLen` and `maxLen`
Expand Down
6 changes: 6 additions & 0 deletions build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,12 @@ targets:
- ldflags = ldflags + " -s -w" # Omit the DWARF symbol table. Omit the symbol table and debug information.
- call: compile

test-int-debug:
doc: "Run all integration tests"
depends: ["info"]
steps:
- $: venom run test/suites/*

test-int:
doc: "Run all integration tests"
depends: ["info", "refresh", "lint", "test", "benchmark", "release"]
Expand Down
128 changes: 64 additions & 64 deletions cmd/rimo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,39 @@ import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/cgi-fr/rimo/internal/infra"
"github.com/cgi-fr/rimo/pkg/model"
"github.com/cgi-fr/rimo/pkg/rimo"
"github.com/mattn/go-isatty"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
)

// Provisioned by ldflags.
const DefaultSampleSize = uint(5)

//nolint:gochecknoglobals
var (
name string //nolint: gochecknoglobals
version string //nolint: gochecknoglobals
commit string //nolint: gochecknoglobals
buildDate string //nolint: gochecknoglobals
builtBy string //nolint: gochecknoglobals
name string // provisioned by ldflags
version string // provisioned by ldflags
commit string // provisioned by ldflags
buildDate string // provisioned by ldflags
builtBy string // provisioned by ldflags

verbosity string
jsonlog bool
debug bool
colormode string

sampleSize uint
distinct bool //nolint: gochecknoglobals
)

func main() { //nolint:funlen
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}) //nolint: exhaustruct

cobra.OnInitialize(initLog)
log.Info().Msgf("%v %v (commit=%v date=%v by=%v)", name, version, commit, buildDate, builtBy)

rootCmd := &cobra.Command{ //nolint:exhaustruct
Expand All @@ -54,6 +66,12 @@ func main() { //nolint:funlen
There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDate, builtBy),
}

rootCmd.PersistentFlags().StringVarP(&verbosity, "verbosity", "v", "warn",
"set level of log verbosity : none (0), error (1), warn (2), info (3), debug (4), trace (5)")
rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "add debug information to logs (very slow)")
rootCmd.PersistentFlags().BoolVar(&jsonlog, "log-json", false, "output logs in JSON format")
rootCmd.PersistentFlags().StringVar(&colormode, "color", "auto", "use colors in log outputs : yes, no or auto")

rimoSchemaCmd := &cobra.Command{ //nolint:exhaustruct
Use: "jsonschema",
Short: "Return rimo jsonschema",
Expand All @@ -77,32 +95,21 @@ func main() { //nolint:funlen
outputDir := args[1]

// Reader

inputList, err := BuildFilepathList(inputDir, ".jsonl")
if err != nil {
log.Fatal().Msgf("error listing files: %v", err)
}

reader, err := infra.FilesReaderFactory(inputList)
reader, err := infra.NewJSONLFolderReader(inputDir)
if err != nil {
log.Fatal().Msgf("error creating reader: %v", err)
}

// Writer
// (could be relocated to infra.FilesReader)
baseName, _, err := infra.ExtractName(inputList[0])
if err != nil {
log.Fatal().Msgf("error extracting base name: %v", err)
}

outputPath := filepath.Join(outputDir, fmt.Sprintf("%s.yaml", baseName))
outputPath := filepath.Join(outputDir, fmt.Sprintf("%s.yaml", reader.BaseName()))

writer, err := infra.YAMLWriterFactory(outputPath)
if err != nil {
log.Fatal().Msgf("error creating writer: %v", err)
}

err = rimo.AnalyseBase(reader, writer)
driver := rimo.Driver{SampleSize: sampleSize, Distinct: distinct}

err = driver.AnalyseBase(reader, writer)
if err != nil {
log.Fatal().Msgf("error generating rimo.yaml: %v", err)
}
Expand All @@ -111,6 +118,9 @@ func main() { //nolint:funlen
},
}

rimoAnalyseCmd.Flags().UintVar(&sampleSize, "sample-size", DefaultSampleSize, "number of sample value to collect")
rimoAnalyseCmd.Flags().BoolVarP(&distinct, "distinct", "d", false, "count distinct values")

rootCmd.AddCommand(rimoAnalyseCmd)
rootCmd.AddCommand(rimoSchemaCmd)

Expand All @@ -120,54 +130,44 @@ func main() { //nolint:funlen
}
}

func FilesList(path string, extension string) ([]string, error) {
pattern := filepath.Join(path, "*"+extension)
func initLog() {
color := false

files, err := filepath.Glob(pattern)
if err != nil {
return nil, fmt.Errorf("error listing files: %w", err)
switch strings.ToLower(colormode) {
case "auto":
if isatty.IsTerminal(os.Stdout.Fd()) && runtime.GOOS != "windows" {
color = true
}
case "yes", "true", "1", "on", "enable":
color = true
}

return files, nil
}

var ErrNoFile = fmt.Errorf("no file found")

func BuildFilepathList(path string, extension string) ([]string, error) {
err := ValidateDirPath(path)
if err != nil {
return nil, fmt.Errorf("failed to validate input directory: %w", err)
}

pattern := filepath.Join(path, "*"+extension)

files, err := filepath.Glob(pattern)
if err != nil {
return nil, fmt.Errorf("error listing files: %w", err)
if jsonlog {
log.Logger = zerolog.New(os.Stderr)
} else {
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, NoColor: !color}) //nolint:exhaustruct
}

if len(files) == 0 {
return nil, fmt.Errorf("%w : no %s files found in %s", ErrNoFile, extension, path)
if debug {
log.Logger = log.Logger.With().Caller().Logger()
}

return files, nil
setVerbosity()
}

func ValidateDirPath(path string) error {
fileInfo, err := os.Stat(path)
if os.IsNotExist(err) {
return fmt.Errorf("%w: %s", infra.ErrDirDoesNotExist, path)
} else if err != nil {
return fmt.Errorf("failed to get directory info: %w", err)
func setVerbosity() {
switch verbosity {
case "trace", "5":
zerolog.SetGlobalLevel(zerolog.TraceLevel)
case "debug", "4":
zerolog.SetGlobalLevel(zerolog.DebugLevel)
case "info", "3":
zerolog.SetGlobalLevel(zerolog.InfoLevel)
case "warn", "2":
zerolog.SetGlobalLevel(zerolog.WarnLevel)
case "error", "1":
zerolog.SetGlobalLevel(zerolog.ErrorLevel)
default:
zerolog.SetGlobalLevel(zerolog.Disabled)
}

if !fileInfo.IsDir() {
return fmt.Errorf("%w: %s", infra.ErrPathIsNotDir, path)
}

if fileInfo.Mode().Perm()&infra.WriteDirPerm != infra.WriteDirPerm {
return fmt.Errorf("%w: %s", infra.ErrWriteDirPermission, path)
}

return nil
}
14 changes: 7 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,28 @@ module github.com/cgi-fr/rimo
go 1.20

require (
github.com/hexops/valast v1.4.4
github.com/goccy/go-json v0.10.2
github.com/rs/zerolog v1.30.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.8.4
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
gopkg.in/yaml.v3 v3.0.1
)

require gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
require (
github.com/kr/pretty v0.3.1 // indirect
github.com/rogpeppe/go-internal v1.10.0 // indirect
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/iancoleman/orderedmap v0.3.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/invopop/jsonschema v0.7.0 // direct
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-isatty v0.0.19
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/mod v0.13.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/tools v0.14.0 // indirect
mvdan.cc/gofumpt v0.5.0 // indirect
)
22 changes: 8 additions & 14 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/hexops/autogold v0.8.1 h1:wvyd/bAJ+Dy+DcE09BoLk6r4Fa5R5W+O+GUzmR985WM=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/valast v1.4.4 h1:rETyycw+/L2ZVJHHNxEBgh8KUn+87WugH9MxcEv9PGs=
github.com/hexops/valast v1.4.4/go.mod h1:Jcy1pNH7LNraVaAZDLyv21hHg2WBv9Nf9FL6fGxU7o4=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
github.com/iancoleman/orderedmap v0.3.0 h1:5cbR2grmZR/DiVt+VJopEhtVs9YGInGIxAoMJn+Ichc=
github.com/iancoleman/orderedmap v0.3.0/go.mod h1:XuLcCUkdL5owUCQeF2Ue9uuw1EptkJDkXXS7VoV7XGE=
Expand All @@ -19,18 +15,23 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf
github.com/invopop/jsonschema v0.7.0 h1:2vgQcBz1n256N+FpX3Jq7Y17AjYt46Ig3zIWyy770So=
github.com/invopop/jsonschema v0.7.0/go.mod h1:O9uiLokuu0+MGFlyiaqtWxwqJm41/+8Nj0lD7A36YH0=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.30.0 h1:SymVODrcRsaRaSInD9yQtKbtWqwsfoPcRff/oRXLj4c=
github.com/rs/zerolog v1.30.0/go.mod h1:/tk+P47gFdPXq4QYjvCmT5/Gsug2nagsFWBWhAiSi1w=
Expand All @@ -45,21 +46,14 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
mvdan.cc/gofumpt v0.5.0 h1:0EQ+Z56k8tXjj/6TQD25BFNKQXpCvT0rnansIc7Ug5E=
mvdan.cc/gofumpt v0.5.0/go.mod h1:HBeVDtMKRZpXyxFciAirzdKklDlGu8aAy1wEbH5Y9js=
Loading

0 comments on commit 58c3e27

Please sign in to comment.