From 9379b8ab5dbb953d439d3ee6ad973143877b7db9 Mon Sep 17 00:00:00 2001 From: Adrien Aury <44274230+adrienaury@users.noreply.github.com> Date: Sun, 10 Sep 2023 21:40:29 +0200 Subject: [PATCH] feat: debug information when constraint fail (#29) * style: fix typo * feat: wip! debug info * feat: wip! debug info * feat: debug info * test: debug info when constraint fail * chore: add pimo in ci --- .devcontainer/Dockerfile.ci | 3 + CHANGELOG.md | 1 + cmd/mimo/main.go | 21 ++++++ internal/infra/config_loader.go | 2 +- internal/infra/pebble_multimap.go | 65 +++++++++++++++++ pkg/mimo/driven.go | 2 + pkg/mimo/in_memory_multimap.go | 50 +++++++++++++ pkg/mimo/model.go | 73 ++++++++++++++++++- pkg/mimo/model_config.go | 2 +- test/configs/config_debug_constraint.yaml | 8 ++ test/reports/report_debug_constraints_no.html | 40 ++++++++++ .../reports/report_debug_constraints_yes.html | 40 ++++++++++ test/suites/08-debug-constraints.yml | 29 ++++++++ 13 files changed, 333 insertions(+), 3 deletions(-) create mode 100644 test/configs/config_debug_constraint.yaml create mode 100644 test/reports/report_debug_constraints_no.html create mode 100644 test/reports/report_debug_constraints_yes.html create mode 100644 test/suites/08-debug-constraints.yml diff --git a/.devcontainer/Dockerfile.ci b/.devcontainer/Dockerfile.ci index a1d30e6..e59fb9b 100644 --- a/.devcontainer/Dockerfile.ci +++ b/.devcontainer/Dockerfile.ci @@ -7,3 +7,6 @@ RUN apk add --update --progress --no-cache make gomplate ARG VERSION_MILLER=6.2.0 RUN wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \ && chmod +x /usr/bin/mlr + +ARG VERSION_PIMO=1.19.0 +RUN wget -O- https://github.com/CGI-FR/PIMO/releases/download/v${VERSION_PIMO}/pimo_${VERSION_PIMO}_linux_amd64.tar.gz | tar xz -C /usr/bin pimo diff --git a/CHANGELOG.md b/CHANGELOG.md index 55f9ba1..483184f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Types of changes ## [0.4.1] +- `Added` debug information in logs when a constraint fail. - `Fixed` error handling, fatal errors will not print mimo help. - `Fixed` all counters are now persisted (with persist option). diff --git a/cmd/mimo/main.go b/cmd/mimo/main.go index 0a70dce..6f2e2d1 100644 --- a/cmd/mimo/main.go +++ b/cmd/mimo/main.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "runtime" + "slices" "sort" "strings" @@ -236,11 +237,31 @@ func appendColumnMetric(report mimo.Report, colname string, haserror bool) bool Float64("rate-identifiable", metrics.Identifiant.Rate()). Msg("summmary for column " + colname) haserror = true + + logSamples("coherence", "real-value", "pseudonyms", metrics.GetInvalidSamplesForCoherentRate(10)) //nolint:gomnd + logSamples("identifiant", "pseudonym", "real-values", metrics.GetInvalidSamplesForIdentifiantRate(10)) //nolint:gomnd } return haserror } +func logSamples(target, labelForValue, labelForAssigned string, samples []mimo.Sample) { + for _, sample := range samples { + lenMax := fmt.Sprintf("%d", len(sample.AssignedValues)) + + if len(sample.AssignedValues) > 10 { //nolint:gomnd + sample.AssignedValues = sample.AssignedValues[:10] + } + + slices.Sort(sample.AssignedValues) + + log.Error(). + Str(labelForValue, sample.OriginalValue). + Strs(labelForAssigned, sample.AssignedValues). + Msg("sample value that failed " + target + " because it was attributed " + lenMax + " " + labelForAssigned) + } +} + func initLog() { color := false diff --git a/internal/infra/config_loader.go b/internal/infra/config_loader.go index 709fe3a..b21d025 100644 --- a/internal/infra/config_loader.go +++ b/internal/infra/config_loader.go @@ -110,7 +110,7 @@ func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) { case "maskingRate": constraint.Target = mimo.MaskingRate case "coherentRate": - constraint.Target = mimo.CohenrentRate + constraint.Target = mimo.CoherentRate case "identifiantRate": constraint.Target = mimo.IdentifiantRate default: diff --git a/internal/infra/pebble_multimap.go b/internal/infra/pebble_multimap.go index 3490515..69d0616 100644 --- a/internal/infra/pebble_multimap.go +++ b/internal/infra/pebble_multimap.go @@ -23,6 +23,7 @@ import ( "errors" "fmt" "os" + "strings" "github.com/cgi-fr/mimo/pkg/mimo" "github.com/rs/zerolog/log" @@ -153,6 +154,70 @@ func (b PebbleMultimapBackend) GetSize(key string) int { return int(count) } +func (b PebbleMultimapBackend) GetSamplesMulti(maxlen int) []mimo.Sample { + samples := []mimo.Sample{} + + iter, _ := b.db.NewIter(b.prefixIterOptions([]byte(KeyPrefix))) + + for valid := iter.First(); valid; valid = iter.Next() { + key := strings.TrimPrefix(string(iter.Key()), KeyPrefix) + array := map[string]int{} + + _ = json.Unmarshal(iter.Value(), &array) + + if len(array) > 1 { + assignedValues := []string{} + + for assignedValue := range array { + assignedValues = append(assignedValues, assignedValue) + } + + samples = append(samples, mimo.Sample{ + OriginalValue: key, + AssignedValues: assignedValues, + }) + } + + if len(samples) == maxlen { + break + } + } + + return samples +} + +func (b PebbleMultimapBackend) GetSamplesMono(maxlen int) []mimo.Sample { + samples := []mimo.Sample{} + + iter, _ := b.db.NewIter(b.prefixIterOptions([]byte(KeyPrefix))) + + for valid := iter.First(); valid; valid = iter.Next() { + key := strings.TrimPrefix(string(iter.Key()), KeyPrefix) + array := map[string]int{} + + _ = json.Unmarshal(iter.Value(), &array) + + if len(array) == 1 { + assignedValues := []string{} + + for assignedValue := range array { + assignedValues = append(assignedValues, assignedValue) + } + + samples = append(samples, mimo.Sample{ + OriginalValue: key, + AssignedValues: assignedValues, + }) + } + + if len(samples) == maxlen { + break + } + } + + return samples +} + func (b PebbleMultimapBackend) NewSizeIterator() mimo.SizeIterator { //nolint: ireturn iter, _ := b.db.NewIter(b.prefixIterOptions([]byte(CountPrefix))) diff --git a/pkg/mimo/driven.go b/pkg/mimo/driven.go index dfe70e3..5d81c6b 100644 --- a/pkg/mimo/driven.go +++ b/pkg/mimo/driven.go @@ -32,6 +32,8 @@ type MultimapBackend interface { SetKey(key string, value map[string]int) error GetSize(key string) int NewSizeIterator() SizeIterator + GetSamplesMono(n int) []Sample + GetSamplesMulti(n int) []Sample } type SizeIterator interface { diff --git a/pkg/mimo/in_memory_multimap.go b/pkg/mimo/in_memory_multimap.go index 826cb9d..8cf4cb2 100644 --- a/pkg/mimo/in_memory_multimap.go +++ b/pkg/mimo/in_memory_multimap.go @@ -43,6 +43,56 @@ func (m InMemoryMultimapBackend) GetSize(key string) int { return len(m[key]) } +func (m InMemoryMultimapBackend) GetSamplesMulti(maxlen int) []Sample { + samples := []Sample{} + + for value, array := range m { + if len(array) > 1 { + assignedValues := []string{} + + for assignedValue := range array { + assignedValues = append(assignedValues, assignedValue) + } + + samples = append(samples, Sample{ + OriginalValue: value, + AssignedValues: assignedValues, + }) + } + + if len(samples) == maxlen { + break + } + } + + return samples +} + +func (m InMemoryMultimapBackend) GetSamplesMono(maxlen int) []Sample { + samples := []Sample{} + + for value, array := range m { + if len(array) == 1 { + assignedValues := []string{} + + for assignedValue := range array { + assignedValues = append(assignedValues, assignedValue) + } + + samples = append(samples, Sample{ + OriginalValue: value, + AssignedValues: assignedValues, + }) + } + + if len(samples) == maxlen { + break + } + } + + return samples +} + // CountMin returns the minimum count of values associated to a key across the map. func (m InMemoryMultimapBackend) NewSizeIterator() SizeIterator { //nolint: ireturn sizes := []int{} diff --git a/pkg/mimo/model.go b/pkg/mimo/model.go index 5783352..62ae441 100644 --- a/pkg/mimo/model.go +++ b/pkg/mimo/model.go @@ -188,7 +188,7 @@ func (m Metrics) CoherenceRateValidate() int { result := 0 for _, constraint := range m.Constraints { - if constraint.Target == CohenrentRate { + if constraint.Target == CoherentRate { if !validate(constraint.Type, constraint.Value, m.Coherence.Rate()) { return -1 } @@ -247,6 +247,72 @@ func (m Metrics) Validate() int { return 0 } +// GetInvalidSamplesForCoherentRate will return at most n invalid sample if a constraint on coherent rate failed. +func (m Metrics) GetInvalidSamplesForCoherentRate(maxlen int) []Sample { + constraint := m.findFailedCoherentConstraint() + samples := []Sample{} + + if constraint != nil { + if (constraint.Type == ShouldEqual && constraint.Value > m.Coherence.Rate()) || + constraint.Type == ShouldBeGreaterThan || constraint.Type == ShouldBeGreaterThanOrEqualTo { + samples = append(samples, m.Coherence.Backend.GetSamplesMulti(maxlen)...) + } + + if (constraint.Type == ShouldEqual && constraint.Value < m.Coherence.Rate()) || + constraint.Type == ShouldBeLessThanOrEqualTo || constraint.Type == ShouldBeLowerThan { + samples = append(samples, m.Coherence.Backend.GetSamplesMono(maxlen)...) + } + } + + return samples +} + +func (m Metrics) findFailedCoherentConstraint() *Constraint { + for _, c := range m.Constraints { + c := c + if c.Target == CoherentRate { + if !validate(c.Type, c.Value, m.Coherence.Rate()) { + return &c + } + } + } + + return nil +} + +// GetInvalidSamplesForIdentifiantRate will return at most n invalid sample if a constraint on identifiant rate failed. +func (m Metrics) GetInvalidSamplesForIdentifiantRate(maxlen int) []Sample { + constraint := m.findFailedIdentifiantConstraint() + samples := []Sample{} + + if constraint != nil { + if (constraint.Type == ShouldEqual && constraint.Value > m.Identifiant.Rate()) || + constraint.Type == ShouldBeGreaterThan || constraint.Type == ShouldBeGreaterThanOrEqualTo { + samples = append(samples, m.Identifiant.Backend.GetSamplesMulti(maxlen)...) + } + + if (constraint.Type == ShouldEqual && constraint.Value < m.Identifiant.Rate()) || + constraint.Type == ShouldBeLessThanOrEqualTo || constraint.Type == ShouldBeLowerThan { + samples = append(samples, m.Identifiant.Backend.GetSamplesMono(maxlen)...) + } + } + + return samples +} + +func (m Metrics) findFailedIdentifiantConstraint() *Constraint { + for _, c := range m.Constraints { + c := c + if c.Target == IdentifiantRate { + if !validate(c.Type, c.Value, m.Identifiant.Rate()) { + return &c + } + } + } + + return nil +} + type Report struct { Metrics map[string]Metrics subs Suscribers @@ -451,3 +517,8 @@ func isExcluded(exclude []any, value any, valueStr string) bool { return false } + +type Sample struct { + OriginalValue string + AssignedValues []string +} diff --git a/pkg/mimo/model_config.go b/pkg/mimo/model_config.go index 4024f73..4277c9c 100644 --- a/pkg/mimo/model_config.go +++ b/pkg/mimo/model_config.go @@ -46,7 +46,7 @@ type ConstraintTarget int const ( MaskingRate ConstraintTarget = iota - CohenrentRate + CoherentRate IdentifiantRate ) diff --git a/test/configs/config_debug_constraint.yaml b/test/configs/config_debug_constraint.yaml new file mode 100644 index 0000000..d888a33 --- /dev/null +++ b/test/configs/config_debug_constraint.yaml @@ -0,0 +1,8 @@ +version: "1" +metrics: + - name: "name" + constraints: + coherentRate: + shouldBeGreaterThan: 0 + identifiantRate: + shouldBeGreaterThan: 0 diff --git a/test/reports/report_debug_constraints_no.html b/test/reports/report_debug_constraints_no.html new file mode 100644 index 0000000..a5a079a --- /dev/null +++ b/test/reports/report_debug_constraints_no.html @@ -0,0 +1,40 @@ + + + + + MIMO Report + + + + +

MIMO Report

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldNilIgnoredMaskedMissedMasking RateCoherent RateIdentifiable RateK
name00564456.00 %0.00 %0.00 %2
+ + diff --git a/test/reports/report_debug_constraints_yes.html b/test/reports/report_debug_constraints_yes.html new file mode 100644 index 0000000..8a87944 --- /dev/null +++ b/test/reports/report_debug_constraints_yes.html @@ -0,0 +1,40 @@ + + + + + MIMO Report + + + + +

MIMO Report

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldNilIgnoredMaskedMissedMasking RateCoherent RateIdentifiable RateK
name00564456.00 %0.00 %0.00 %2
+ + diff --git a/test/suites/08-debug-constraints.yml b/test/suites/08-debug-constraints.yml new file mode 100644 index 0000000..d16ea8e --- /dev/null +++ b/test/suites/08-debug-constraints.yml @@ -0,0 +1,29 @@ +# Venom Test Suite definition +# Check Venom documentation for more information : https://github.com/ovh/venom +name: failed constraints should provide sample data in logs for debugging +testcases: + - name: debug info is not present in err log if no constraint + steps: + - script: pimo --empty-input --seed 1 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/real.jsonl + - script: pimo --empty-input --seed 2 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/masked.jsonl + + - script: cat working/masked.jsonl | mimo -verror working/real.jsonl + assertions: + - result.code ShouldEqual 0 + + - script: mv report.html ../reports/report_debug_constraints_no.html + + - name: debug info is present in err log constraint fail + steps: + - script: pimo --empty-input --seed 1 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/real.jsonl + - script: pimo --empty-input --seed 2 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/masked.jsonl + + - script: cat working/masked.jsonl | mimo --config ../configs/config_debug_constraint.yaml -verror working/real.jsonl + assertions: + - result.code ShouldEqual 1 + - result.systemerr ShouldContainSubstring ERR sample value that failed coherence because it was attributed 2 pseudonyms pseudonyms=["\"Jane\"","\"John\""] real-value="\"John\"_" + - result.systemerr ShouldContainSubstring ERR sample value that failed coherence because it was attributed 2 pseudonyms pseudonyms=["\"Jane\"","\"John\""] real-value="\"Jane\"_" + - result.systemerr ShouldContainSubstring ERR sample value that failed identifiant because it was attributed 2 real-values pseudonym="\"John\"" real-values=["\"Jane\"","\"John\""] + - result.systemerr ShouldContainSubstring ERR sample value that failed identifiant because it was attributed 2 real-values pseudonym="\"Jane\"" real-values=["\"Jane\"","\"John\""] + + - script: mv report.html ../reports/report_debug_constraints_yes.html