From 9379b8ab5dbb953d439d3ee6ad973143877b7db9 Mon Sep 17 00:00:00 2001
From: Adrien Aury <44274230+adrienaury@users.noreply.github.com>
Date: Sun, 10 Sep 2023 21:40:29 +0200
Subject: [PATCH] feat: debug information when constraint fail (#29)
* style: fix typo
* feat: wip! debug info
* feat: wip! debug info
* feat: debug info
* test: debug info when constraint fail
* chore: add pimo in ci
---
.devcontainer/Dockerfile.ci | 3 +
CHANGELOG.md | 1 +
cmd/mimo/main.go | 21 ++++++
internal/infra/config_loader.go | 2 +-
internal/infra/pebble_multimap.go | 65 +++++++++++++++++
pkg/mimo/driven.go | 2 +
pkg/mimo/in_memory_multimap.go | 50 +++++++++++++
pkg/mimo/model.go | 73 ++++++++++++++++++-
pkg/mimo/model_config.go | 2 +-
test/configs/config_debug_constraint.yaml | 8 ++
test/reports/report_debug_constraints_no.html | 40 ++++++++++
.../reports/report_debug_constraints_yes.html | 40 ++++++++++
test/suites/08-debug-constraints.yml | 29 ++++++++
13 files changed, 333 insertions(+), 3 deletions(-)
create mode 100644 test/configs/config_debug_constraint.yaml
create mode 100644 test/reports/report_debug_constraints_no.html
create mode 100644 test/reports/report_debug_constraints_yes.html
create mode 100644 test/suites/08-debug-constraints.yml
diff --git a/.devcontainer/Dockerfile.ci b/.devcontainer/Dockerfile.ci
index a1d30e6..e59fb9b 100644
--- a/.devcontainer/Dockerfile.ci
+++ b/.devcontainer/Dockerfile.ci
@@ -7,3 +7,6 @@ RUN apk add --update --progress --no-cache make gomplate
ARG VERSION_MILLER=6.2.0
RUN wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/mlr
+
+ARG VERSION_PIMO=1.19.0
+RUN wget -O- https://github.com/CGI-FR/PIMO/releases/download/v${VERSION_PIMO}/pimo_${VERSION_PIMO}_linux_amd64.tar.gz | tar xz -C /usr/bin pimo
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55f9ba1..483184f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ Types of changes
## [0.4.1]
+- `Added` debug information in logs when a constraint fail.
- `Fixed` error handling, fatal errors will not print mimo help.
- `Fixed` all counters are now persisted (with persist option).
diff --git a/cmd/mimo/main.go b/cmd/mimo/main.go
index 0a70dce..6f2e2d1 100644
--- a/cmd/mimo/main.go
+++ b/cmd/mimo/main.go
@@ -21,6 +21,7 @@ import (
"fmt"
"os"
"runtime"
+ "slices"
"sort"
"strings"
@@ -236,11 +237,31 @@ func appendColumnMetric(report mimo.Report, colname string, haserror bool) bool
Float64("rate-identifiable", metrics.Identifiant.Rate()).
Msg("summmary for column " + colname)
haserror = true
+
+ logSamples("coherence", "real-value", "pseudonyms", metrics.GetInvalidSamplesForCoherentRate(10)) //nolint:gomnd
+ logSamples("identifiant", "pseudonym", "real-values", metrics.GetInvalidSamplesForIdentifiantRate(10)) //nolint:gomnd
}
return haserror
}
+func logSamples(target, labelForValue, labelForAssigned string, samples []mimo.Sample) {
+ for _, sample := range samples {
+ lenMax := fmt.Sprintf("%d", len(sample.AssignedValues))
+
+ if len(sample.AssignedValues) > 10 { //nolint:gomnd
+ sample.AssignedValues = sample.AssignedValues[:10]
+ }
+
+ slices.Sort(sample.AssignedValues)
+
+ log.Error().
+ Str(labelForValue, sample.OriginalValue).
+ Strs(labelForAssigned, sample.AssignedValues).
+ Msg("sample value that failed " + target + " because it was attributed " + lenMax + " " + labelForAssigned)
+ }
+}
+
func initLog() {
color := false
diff --git a/internal/infra/config_loader.go b/internal/infra/config_loader.go
index 709fe3a..b21d025 100644
--- a/internal/infra/config_loader.go
+++ b/internal/infra/config_loader.go
@@ -110,7 +110,7 @@ func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {
case "maskingRate":
constraint.Target = mimo.MaskingRate
case "coherentRate":
- constraint.Target = mimo.CohenrentRate
+ constraint.Target = mimo.CoherentRate
case "identifiantRate":
constraint.Target = mimo.IdentifiantRate
default:
diff --git a/internal/infra/pebble_multimap.go b/internal/infra/pebble_multimap.go
index 3490515..69d0616 100644
--- a/internal/infra/pebble_multimap.go
+++ b/internal/infra/pebble_multimap.go
@@ -23,6 +23,7 @@ import (
"errors"
"fmt"
"os"
+ "strings"
"github.com/cgi-fr/mimo/pkg/mimo"
"github.com/rs/zerolog/log"
@@ -153,6 +154,70 @@ func (b PebbleMultimapBackend) GetSize(key string) int {
return int(count)
}
+func (b PebbleMultimapBackend) GetSamplesMulti(maxlen int) []mimo.Sample {
+ samples := []mimo.Sample{}
+
+ iter, _ := b.db.NewIter(b.prefixIterOptions([]byte(KeyPrefix)))
+
+ for valid := iter.First(); valid; valid = iter.Next() {
+ key := strings.TrimPrefix(string(iter.Key()), KeyPrefix)
+ array := map[string]int{}
+
+ _ = json.Unmarshal(iter.Value(), &array)
+
+ if len(array) > 1 {
+ assignedValues := []string{}
+
+ for assignedValue := range array {
+ assignedValues = append(assignedValues, assignedValue)
+ }
+
+ samples = append(samples, mimo.Sample{
+ OriginalValue: key,
+ AssignedValues: assignedValues,
+ })
+ }
+
+ if len(samples) == maxlen {
+ break
+ }
+ }
+
+ return samples
+}
+
+func (b PebbleMultimapBackend) GetSamplesMono(maxlen int) []mimo.Sample {
+ samples := []mimo.Sample{}
+
+ iter, _ := b.db.NewIter(b.prefixIterOptions([]byte(KeyPrefix)))
+
+ for valid := iter.First(); valid; valid = iter.Next() {
+ key := strings.TrimPrefix(string(iter.Key()), KeyPrefix)
+ array := map[string]int{}
+
+ _ = json.Unmarshal(iter.Value(), &array)
+
+ if len(array) == 1 {
+ assignedValues := []string{}
+
+ for assignedValue := range array {
+ assignedValues = append(assignedValues, assignedValue)
+ }
+
+ samples = append(samples, mimo.Sample{
+ OriginalValue: key,
+ AssignedValues: assignedValues,
+ })
+ }
+
+ if len(samples) == maxlen {
+ break
+ }
+ }
+
+ return samples
+}
+
func (b PebbleMultimapBackend) NewSizeIterator() mimo.SizeIterator { //nolint: ireturn
iter, _ := b.db.NewIter(b.prefixIterOptions([]byte(CountPrefix)))
diff --git a/pkg/mimo/driven.go b/pkg/mimo/driven.go
index dfe70e3..5d81c6b 100644
--- a/pkg/mimo/driven.go
+++ b/pkg/mimo/driven.go
@@ -32,6 +32,8 @@ type MultimapBackend interface {
SetKey(key string, value map[string]int) error
GetSize(key string) int
NewSizeIterator() SizeIterator
+ GetSamplesMono(n int) []Sample
+ GetSamplesMulti(n int) []Sample
}
type SizeIterator interface {
diff --git a/pkg/mimo/in_memory_multimap.go b/pkg/mimo/in_memory_multimap.go
index 826cb9d..8cf4cb2 100644
--- a/pkg/mimo/in_memory_multimap.go
+++ b/pkg/mimo/in_memory_multimap.go
@@ -43,6 +43,56 @@ func (m InMemoryMultimapBackend) GetSize(key string) int {
return len(m[key])
}
+func (m InMemoryMultimapBackend) GetSamplesMulti(maxlen int) []Sample {
+ samples := []Sample{}
+
+ for value, array := range m {
+ if len(array) > 1 {
+ assignedValues := []string{}
+
+ for assignedValue := range array {
+ assignedValues = append(assignedValues, assignedValue)
+ }
+
+ samples = append(samples, Sample{
+ OriginalValue: value,
+ AssignedValues: assignedValues,
+ })
+ }
+
+ if len(samples) == maxlen {
+ break
+ }
+ }
+
+ return samples
+}
+
+func (m InMemoryMultimapBackend) GetSamplesMono(maxlen int) []Sample {
+ samples := []Sample{}
+
+ for value, array := range m {
+ if len(array) == 1 {
+ assignedValues := []string{}
+
+ for assignedValue := range array {
+ assignedValues = append(assignedValues, assignedValue)
+ }
+
+ samples = append(samples, Sample{
+ OriginalValue: value,
+ AssignedValues: assignedValues,
+ })
+ }
+
+ if len(samples) == maxlen {
+ break
+ }
+ }
+
+ return samples
+}
+
// CountMin returns the minimum count of values associated to a key across the map.
func (m InMemoryMultimapBackend) NewSizeIterator() SizeIterator { //nolint: ireturn
sizes := []int{}
diff --git a/pkg/mimo/model.go b/pkg/mimo/model.go
index 5783352..62ae441 100644
--- a/pkg/mimo/model.go
+++ b/pkg/mimo/model.go
@@ -188,7 +188,7 @@ func (m Metrics) CoherenceRateValidate() int {
result := 0
for _, constraint := range m.Constraints {
- if constraint.Target == CohenrentRate {
+ if constraint.Target == CoherentRate {
if !validate(constraint.Type, constraint.Value, m.Coherence.Rate()) {
return -1
}
@@ -247,6 +247,72 @@ func (m Metrics) Validate() int {
return 0
}
+// GetInvalidSamplesForCoherentRate will return at most n invalid sample if a constraint on coherent rate failed.
+func (m Metrics) GetInvalidSamplesForCoherentRate(maxlen int) []Sample {
+ constraint := m.findFailedCoherentConstraint()
+ samples := []Sample{}
+
+ if constraint != nil {
+ if (constraint.Type == ShouldEqual && constraint.Value > m.Coherence.Rate()) ||
+ constraint.Type == ShouldBeGreaterThan || constraint.Type == ShouldBeGreaterThanOrEqualTo {
+ samples = append(samples, m.Coherence.Backend.GetSamplesMulti(maxlen)...)
+ }
+
+ if (constraint.Type == ShouldEqual && constraint.Value < m.Coherence.Rate()) ||
+ constraint.Type == ShouldBeLessThanOrEqualTo || constraint.Type == ShouldBeLowerThan {
+ samples = append(samples, m.Coherence.Backend.GetSamplesMono(maxlen)...)
+ }
+ }
+
+ return samples
+}
+
+func (m Metrics) findFailedCoherentConstraint() *Constraint {
+ for _, c := range m.Constraints {
+ c := c
+ if c.Target == CoherentRate {
+ if !validate(c.Type, c.Value, m.Coherence.Rate()) {
+ return &c
+ }
+ }
+ }
+
+ return nil
+}
+
+// GetInvalidSamplesForIdentifiantRate will return at most n invalid sample if a constraint on identifiant rate failed.
+func (m Metrics) GetInvalidSamplesForIdentifiantRate(maxlen int) []Sample {
+ constraint := m.findFailedIdentifiantConstraint()
+ samples := []Sample{}
+
+ if constraint != nil {
+ if (constraint.Type == ShouldEqual && constraint.Value > m.Identifiant.Rate()) ||
+ constraint.Type == ShouldBeGreaterThan || constraint.Type == ShouldBeGreaterThanOrEqualTo {
+ samples = append(samples, m.Identifiant.Backend.GetSamplesMulti(maxlen)...)
+ }
+
+ if (constraint.Type == ShouldEqual && constraint.Value < m.Identifiant.Rate()) ||
+ constraint.Type == ShouldBeLessThanOrEqualTo || constraint.Type == ShouldBeLowerThan {
+ samples = append(samples, m.Identifiant.Backend.GetSamplesMono(maxlen)...)
+ }
+ }
+
+ return samples
+}
+
+func (m Metrics) findFailedIdentifiantConstraint() *Constraint {
+ for _, c := range m.Constraints {
+ c := c
+ if c.Target == IdentifiantRate {
+ if !validate(c.Type, c.Value, m.Identifiant.Rate()) {
+ return &c
+ }
+ }
+ }
+
+ return nil
+}
+
type Report struct {
Metrics map[string]Metrics
subs Suscribers
@@ -451,3 +517,8 @@ func isExcluded(exclude []any, value any, valueStr string) bool {
return false
}
+
+type Sample struct {
+ OriginalValue string
+ AssignedValues []string
+}
diff --git a/pkg/mimo/model_config.go b/pkg/mimo/model_config.go
index 4024f73..4277c9c 100644
--- a/pkg/mimo/model_config.go
+++ b/pkg/mimo/model_config.go
@@ -46,7 +46,7 @@ type ConstraintTarget int
const (
MaskingRate ConstraintTarget = iota
- CohenrentRate
+ CoherentRate
IdentifiantRate
)
diff --git a/test/configs/config_debug_constraint.yaml b/test/configs/config_debug_constraint.yaml
new file mode 100644
index 0000000..d888a33
--- /dev/null
+++ b/test/configs/config_debug_constraint.yaml
@@ -0,0 +1,8 @@
+version: "1"
+metrics:
+ - name: "name"
+ constraints:
+ coherentRate:
+ shouldBeGreaterThan: 0
+ identifiantRate:
+ shouldBeGreaterThan: 0
diff --git a/test/reports/report_debug_constraints_no.html b/test/reports/report_debug_constraints_no.html
new file mode 100644
index 0000000..a5a079a
--- /dev/null
+++ b/test/reports/report_debug_constraints_no.html
@@ -0,0 +1,40 @@
+
+
+
+
+ MIMO Report
+
+
+
+
+ MIMO Report
+
+
+ Field |
+ Nil |
+ Ignored |
+ Masked |
+ Missed |
+ Masking Rate |
+ Coherent Rate |
+ Identifiable Rate |
+ K |
+
+
+
+
+ name |
+ 0 |
+ 0 |
+ 56 |
+ 44 |
+ 56.00 % |
+ 0.00 % |
+ 0.00 % |
+ 2 |
+
+
+
+
+
+
diff --git a/test/reports/report_debug_constraints_yes.html b/test/reports/report_debug_constraints_yes.html
new file mode 100644
index 0000000..8a87944
--- /dev/null
+++ b/test/reports/report_debug_constraints_yes.html
@@ -0,0 +1,40 @@
+
+
+
+
+ MIMO Report
+
+
+
+
+ MIMO Report
+
+
+ Field |
+ Nil |
+ Ignored |
+ Masked |
+ Missed |
+ Masking Rate |
+ Coherent Rate |
+ Identifiable Rate |
+ K |
+
+
+
+
+ name |
+ 0 |
+ 0 |
+ 56 |
+ 44 |
+ 56.00 % |
+ 0.00 % |
+ 0.00 % |
+ 2 |
+
+
+
+
+
+
diff --git a/test/suites/08-debug-constraints.yml b/test/suites/08-debug-constraints.yml
new file mode 100644
index 0000000..d16ea8e
--- /dev/null
+++ b/test/suites/08-debug-constraints.yml
@@ -0,0 +1,29 @@
+# Venom Test Suite definition
+# Check Venom documentation for more information : https://github.com/ovh/venom
+name: failed constraints should provide sample data in logs for debugging
+testcases:
+ - name: debug info is not present in err log if no constraint
+ steps:
+ - script: pimo --empty-input --seed 1 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/real.jsonl
+ - script: pimo --empty-input --seed 2 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/masked.jsonl
+
+ - script: cat working/masked.jsonl | mimo -verror working/real.jsonl
+ assertions:
+ - result.code ShouldEqual 0
+
+ - script: mv report.html ../reports/report_debug_constraints_no.html
+
+ - name: debug info is present in err log constraint fail
+ steps:
+ - script: pimo --empty-input --seed 1 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/real.jsonl
+ - script: pimo --empty-input --seed 2 --repeat 100 --mask 'name=[{add:""},{randomChoice:["John","Jane"]}]' > working/masked.jsonl
+
+ - script: cat working/masked.jsonl | mimo --config ../configs/config_debug_constraint.yaml -verror working/real.jsonl
+ assertions:
+ - result.code ShouldEqual 1
+ - result.systemerr ShouldContainSubstring ERR sample value that failed coherence because it was attributed 2 pseudonyms pseudonyms=["\"Jane\"","\"John\""] real-value="\"John\"_"
+ - result.systemerr ShouldContainSubstring ERR sample value that failed coherence because it was attributed 2 pseudonyms pseudonyms=["\"Jane\"","\"John\""] real-value="\"Jane\"_"
+ - result.systemerr ShouldContainSubstring ERR sample value that failed identifiant because it was attributed 2 real-values pseudonym="\"John\"" real-values=["\"Jane\"","\"John\""]
+ - result.systemerr ShouldContainSubstring ERR sample value that failed identifiant because it was attributed 2 real-values pseudonym="\"Jane\"" real-values=["\"Jane\"","\"John\""]
+
+ - script: mv report.html ../reports/report_debug_constraints_yes.html