diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 9596204..dc933ba 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,13 +1,14 @@
-FROM adrienaury/go-devcontainer:v2.0
+FROM adrienaury/go-devcontainer:v3.1
USER root
RUN apk add --update --progress --no-cache make gomplate
-ARG VERSION_GOLICENSE=0.2.0
ARG VERSION_MILLER=6.2.0
-RUN wget -nv -O- https://github.com/mitchellh/golicense/releases/download/v${VERSION_GOLICENSE}/golicense_${VERSION_GOLICENSE}_linux_x86_64.tar.gz | tar xz -C /usr/bin golicense \
- && wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
- && chmod +x /usr/bin/golicense /usr/bin/mlr
+RUN wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
+ && chmod +x /usr/bin/mlr
+
+ARG VERSION_PIMO=1.19.0
+RUN wget -O- https://github.com/CGI-FR/PIMO/releases/download/v${VERSION_PIMO}/pimo_${VERSION_PIMO}_linux_amd64.tar.gz | tar xz -C /usr/bin pimo
USER vscode
diff --git a/.devcontainer/Dockerfile.ci b/.devcontainer/Dockerfile.ci
index af9cf4f..a1d30e6 100644
--- a/.devcontainer/Dockerfile.ci
+++ b/.devcontainer/Dockerfile.ci
@@ -1,11 +1,9 @@
-FROM adrienaury/go-devcontainer-ci:v2.0
+FROM adrienaury/go-devcontainer-ci:v3.1
USER root
RUN apk add --update --progress --no-cache make gomplate
-ARG VERSION_GOLICENSE=0.2.0
ARG VERSION_MILLER=6.2.0
-RUN wget -nv -O- https://github.com/mitchellh/golicense/releases/download/v${VERSION_GOLICENSE}/golicense_${VERSION_GOLICENSE}_linux_x86_64.tar.gz | tar xz -C /usr/bin golicense \
- && wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
- && chmod +x /usr/bin/golicense /usr/bin/mlr
+RUN wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
+ && chmod +x /usr/bin/mlr
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a924f1..333bab7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,10 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.
+## [0.2.0]
+
+- `Added` configuration file with `metrics[].exclude`, `metrics[]coherentWith` and `metrics[]constraints` parameters.
+
## [0.1.0]
-- `Added` First official version.
+- `Added` first official version.
diff --git a/README.md b/README.md
index af2fbdc..269a2c9 100644
--- a/README.md
+++ b/README.md
@@ -8,26 +8,66 @@
# MIMO : Masked Input Metrics Output
+Measure the quality of a pseudonymization transformation by masking.
+
+MIMO will compute the following indicators for each columns :
+
+- masking rate : percentage of values actually masked; ignoring null or inexisting values in real data
+- coherent rate : percentage of real unique values that are masked coherently (the same single pseudonym is used for each distinct real value)
+- identifiant rate : percentage of unique pseudonyms that are attributed to a single real value
+
+The result is a HTML report that contains the computed indicators for each column.
+
+![MIMO Report](docs/MIMO-report.png)
+
## Usage
+### Real time usage
+
```console
> mkfifo real.jsonl # create a pipe file to store the real json stream before pseudonymization
> lino pull prod | tee real.jsonl | pimo | mimo real.jsonl | lino push dev
8:27AM WRN field is not completely masked fieldname=surname
-
- MIMO REPORT
-===========================================
-fieldname | masking rate | collision rate |
-----------|--------------|----------------|
-name | 100 % | 0 % |
-surname | 99 % | 0 % |
> rm real.jsonl # pipe file can be removed after
```
+Here is a single command that run an example on synthesized data (require PIMO) :
+
```bash
pimo --empty-input --repeat 1000 --mask 'name=[{add:""},{randomChoiceInUri:"pimo://nameFR"}]' | tee real.jsonl | pimo --mask 'name={randomChoiceInUri:"pimo://nameFR"}' | mimo real.jsonl
```
+### After process usage
+
+MIMO can also be used on existing file on disk.
+
+```console
+> cat masked.jsonl | mimo real.jsonl
+```
+
+### Configuration
+
+Here is an example configuration file.
+
+```yaml
+version: "1"
+metrics:
+ - name: "name" # required : name of the column to configure
+ exclude: [""] # optional : these values will be ignored during computation of the masking rate
+ coherentWith: ["name"] # optional : which values to use for the computation of the coherent rate
+ constraints: # optional : list of constraints to validate at the end of the execution
+ maskingRate:
+ shouldEqual: 1
+ coherentRate:
+ shouldBeGreaterThan: 0.5
+```
+
+You need to inform MIMO of this configuration file with the `--config` flag :
+
+```console
+> cat masked.jsonl | mimo --config myconfig.yaml real.jonsl
+```
+
## Contributing
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
diff --git a/build.yml b/build.yml
index b6ab76d..6c422a0 100644
--- a/build.yml
+++ b/build.yml
@@ -51,6 +51,7 @@ properties:
"ifshort",
"nosnakecase",
"exhaustivestruct",
+ "depguard",
]
snapshot: false # If true, do not upload release when publish target is used
dockerfiles: # List of Dockerfiles to build, defined by a map of {key=Dockerfile name ; value=path to build context}, the image name will be determined by the extension of the Dockerfile
diff --git a/cmd/mimo/main.go b/cmd/mimo/main.go
index 2f5b9df..885d1e9 100644
--- a/cmd/mimo/main.go
+++ b/cmd/mimo/main.go
@@ -44,6 +44,8 @@ var (
jsonlog bool
debug bool
colormode string
+
+ configfile string
)
func main() {
@@ -84,6 +86,7 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "add debug information to logs (very slow)")
rootCmd.PersistentFlags().BoolVar(&jsonlog, "log-json", false, "output logs in JSON format")
rootCmd.PersistentFlags().StringVar(&colormode, "color", "auto", "use colors in log outputs : yes, no or auto")
+ rootCmd.PersistentFlags().StringVar(&configfile, "config", "", "name of the YAML configuration file to use")
if err := rootCmd.Execute(); err != nil {
log.Err(err).Msg("error when executing command")
@@ -100,6 +103,17 @@ func run(_ *cobra.Command, realJSONLineFileName string) {
}
driver := mimo.NewDriver(realReader, maskedReader, infra.SubscriberLogger{})
+
+ if configfile != "" {
+ if config, err := infra.LoadConfig(configfile); err != nil {
+ log.Fatal().Err(err).Msg("end MIMO")
+ } else {
+ driver.Configure(config)
+ }
+ }
+
+ haserror := false
+
if report, err := driver.Analyze(); err != nil {
log.Error().Err(err).Msg("end of program")
} else {
@@ -107,19 +121,37 @@ func run(_ *cobra.Command, realJSONLineFileName string) {
sort.Strings(columns)
for _, colname := range columns {
metrics := report.ColumnMetric(colname)
- log.Info().
- Str("field", colname).
- Int64("count-nil", metrics.NilCount).
- Int64("count-empty", metrics.EmptyCount).
- Int64("count-masked", metrics.MaskedCount).
- Int64("count-missed", metrics.NonMaskedCount()).
- Float64("rate-masking", metrics.MaskedRate()).
- Float64("rate-coherence", metrics.Coherence.Rate()).
- Float64("rate-identifiable", metrics.Identifiant.Rate()).
- Msg("summmary for column " + colname)
+ if metrics.Validate() >= 0 {
+ log.Info().
+ Str("field", colname).
+ Int64("count-nil", metrics.NilCount).
+ Int64("count-empty", metrics.EmptyCount).
+ Int64("count-masked", metrics.MaskedCount).
+ Int64("count-missed", metrics.NonMaskedCount()).
+ Float64("rate-masking", metrics.MaskedRate()).
+ Float64("rate-coherence", metrics.Coherence.Rate()).
+ Float64("rate-identifiable", metrics.Identifiant.Rate()).
+ Msg("summmary for column " + colname)
+ } else {
+ log.Error().
+ Str("field", colname).
+ Int64("count-nil", metrics.NilCount).
+ Int64("count-empty", metrics.EmptyCount).
+ Int64("count-masked", metrics.MaskedCount).
+ Int64("count-missed", metrics.NonMaskedCount()).
+ Float64("rate-masking", metrics.MaskedRate()).
+ Float64("rate-coherence", metrics.Coherence.Rate()).
+ Float64("rate-identifiable", metrics.Identifiant.Rate()).
+ Msg("summmary for column " + colname)
+ haserror = true
+ }
}
_ = infra.NewReportExporter().Export(report, "report.html")
}
+
+ if haserror {
+ os.Exit(1)
+ }
}
func initLog() {
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..3576d6e
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,10 @@
+version: "1"
+metrics:
+ - name: "name"
+ exclude: [""]
+ coherentWith: ["name"]
+ constraints:
+ maskingRate:
+ shouldEqual: 1
+ coherentRate:
+ shouldBeGreaterThan: 0.5
diff --git a/docs/MIMO-report.png b/docs/MIMO-report.png
new file mode 100644
index 0000000..2c28a36
Binary files /dev/null and b/docs/MIMO-report.png differ
diff --git a/go.mod b/go.mod
index 7b9b9d1..c4781c3 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
module github.com/cgi-fr/mimo
-go 1.20
+go 1.21
require (
github.com/Masterminds/sprig/v3 v3.2.3
@@ -8,6 +8,7 @@ require (
github.com/rs/zerolog v1.28.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.5.1
+ gopkg.in/yaml.v3 v3.0.1
)
require (
diff --git a/go.sum b/go.sum
index 816e45a..61cdcc1 100644
--- a/go.sum
+++ b/go.sum
@@ -82,4 +82,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/internal/infra/config_loader.go b/internal/infra/config_loader.go
new file mode 100644
index 0000000..3b63122
--- /dev/null
+++ b/internal/infra/config_loader.go
@@ -0,0 +1,131 @@
+// Copyright (C) 2023 CGI France
+//
+// This file is part of MIMO.
+//
+// MIMO is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// MIMO is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with MIMO. If not, see .
+
+package infra
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/cgi-fr/mimo/pkg/mimo"
+ "github.com/rs/zerolog/log"
+ "gopkg.in/yaml.v3"
+)
+
+// Version of the YAML strcuture.
+const Version string = "1"
+
+// YAMLStructure of the file.
+type YAMLStructure struct {
+ Version string `yaml:"version"`
+ Columns []YAMLColumn `yaml:"metrics,omitempty"`
+}
+
+// YAMLColumn defines how to store a column config in YAML format.
+type YAMLColumn struct {
+ Name string `yaml:"name"`
+ Exclude []any `yaml:"exclude,omitempty"`
+ CoherentWith []string `yaml:"coherentWith,omitempty"`
+ Constraints map[string]YAMLConstraint `yaml:"constraints,omitempty"`
+}
+
+type YAMLConstraint map[string]float64
+
+func LoadConfig(filename string) (mimo.Config, error) {
+ config := &YAMLStructure{
+ Version: Version,
+ Columns: []YAMLColumn{},
+ }
+
+ if _, err := os.Stat(filename); os.IsNotExist(err) {
+ return mimo.NewConfig(), fmt.Errorf("%w: %s", ErrConfigFileNotExists, filename)
+ }
+
+ log.Debug().Str("file", filename).Msg("loading config from file")
+
+ dat, err := os.ReadFile(filename)
+ if err != nil {
+ return mimo.NewConfig(), fmt.Errorf("%w: %s", err, filename)
+ }
+
+ err = yaml.Unmarshal(dat, config)
+ if err != nil {
+ return mimo.NewConfig(), fmt.Errorf("%w: %s", err, filename)
+ }
+
+ if config.Version != Version {
+ return mimo.NewConfig(), fmt.Errorf("%w: %s", ErrConfigInvalidVersion, filename)
+ }
+
+ return CreateConfig(config)
+}
+
+//nolint:cyclop
+func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {
+ config := mimo.NewConfig()
+
+ for _, yamlcolumn := range yamlconfig.Columns {
+ column := mimo.ColumnConfig{
+ Exclude: yamlcolumn.Exclude,
+ CoherentWith: yamlcolumn.CoherentWith,
+ Constraints: []mimo.Constraint{},
+ }
+
+ for target, yamlconstraint := range yamlcolumn.Constraints {
+ for constraintType, value := range yamlconstraint {
+ constraint := mimo.Constraint{
+ Target: 0,
+ Type: 0,
+ Value: value,
+ }
+
+ switch target {
+ case "maskingRate":
+ constraint.Target = mimo.MaskingRate
+ case "coherentRate":
+ constraint.Target = mimo.CohenrentRate
+ case "identifiantRate":
+ constraint.Target = mimo.IdentifiantRate
+ default:
+ return config, fmt.Errorf("%w: %s", ErrConfigInvalidConstraintTarget, target)
+ }
+
+ switch constraintType {
+ case "shouldEqual":
+ constraint.Type = mimo.ShouldEqual
+ case "shouldBeGreaterThan":
+ constraint.Type = mimo.ShouldBeGreaterThan
+ case "shouldBeGreaterThanOrEqualTo":
+ constraint.Type = mimo.ShouldBeGreaterThanOrEqualTo
+ case "shouldBeLowerThan":
+ constraint.Type = mimo.ShouldBeLowerThan
+ case "shouldBeLessThanOrEqualTo":
+ constraint.Type = mimo.ShouldBeLessThanOrEqualTo
+ default:
+ return config, fmt.Errorf("%w: %s", ErrConfigInvalidConstraintType, constraintType)
+ }
+
+ column.Constraints = append(column.Constraints, constraint)
+ }
+ }
+
+ config.ColumnNames = append(config.ColumnNames, yamlcolumn.Name)
+ config.ColumnConfigs[yamlcolumn.Name] = column
+ }
+
+ return config, nil
+}
diff --git a/internal/infra/errors.go b/internal/infra/errors.go
new file mode 100644
index 0000000..8133f02
--- /dev/null
+++ b/internal/infra/errors.go
@@ -0,0 +1,34 @@
+// Copyright (C) 2023 CGI France
+//
+// This file is part of MIMO.
+//
+// MIMO is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// MIMO is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with MIMO. If not, see .
+
+package infra
+
+import "errors"
+
+var (
+ // ErrConfigFileNotExists is returned when a config file doesn't exist.
+ ErrConfigFileNotExists = errors.New("error config file does not exist")
+
+ // ErrConfigInvalidVersion is returned when a config file has an invalid version.
+ ErrConfigInvalidVersion = errors.New("invalid version in config file")
+
+ // ErrConfigInvalidConstraintType is returned when a config file has an invalid constraint type.
+ ErrConfigInvalidConstraintType = errors.New("invalid constraint type in config file")
+
+ // ErrConfigInvalidConstraintTarget is returned when a config file has an invalid constraint target.
+ ErrConfigInvalidConstraintTarget = errors.New("invalid constraint target in config file")
+)
diff --git a/internal/infra/subscriber_log.go b/internal/infra/subscriber_log.go
index 729bb72..bc47cb8 100644
--- a/internal/infra/subscriber_log.go
+++ b/internal/infra/subscriber_log.go
@@ -25,6 +25,6 @@ func (sl SubscriberLogger) NewField(fieldname string) {
log.Info().Str("name", fieldname).Msg("new field")
}
-func (sl SubscriberLogger) FirstNonMaskedValue(fieldname string, value any) {
+func (sl SubscriberLogger) FirstNonMaskedValue(fieldname string, _ any) {
log.Info().Str("name", fieldname).Msg("unmasked value detected")
}
diff --git a/internal/infra/template/default.html b/internal/infra/template/default.html
index 8c65fe1..fce023f 100644
--- a/internal/infra/template/default.html
+++ b/internal/infra/template/default.html
@@ -28,9 +28,9 @@
{{ end }}
diff --git a/pkg/mimo/driver.go b/pkg/mimo/driver.go
index 9f94c2c..6d937fb 100644
--- a/pkg/mimo/driver.go
+++ b/pkg/mimo/driver.go
@@ -35,11 +35,15 @@ func NewDriver(realReader DataRowReader, maskedReader DataRowReader, subs ...Eve
realDataSource: realReader,
maskDataSource: maskedReader,
subscribers: subs,
- report: NewReport(subs),
+ report: NewReport(subs, NewConfig()),
}
}
-func (d Driver) Analyze() (Report, error) {
+func (d *Driver) Configure(c Config) {
+ d.report.config = c
+}
+
+func (d *Driver) Analyze() (Report, error) {
for {
realRow, err := d.realDataSource.ReadDataRow()
if err != nil {
diff --git a/pkg/mimo/model.go b/pkg/mimo/model.go
index fd6d854..f3b20ad 100644
--- a/pkg/mimo/model.go
+++ b/pkg/mimo/model.go
@@ -20,7 +20,9 @@ package mimo
import (
"encoding/json"
"fmt"
+ "slices"
"strconv"
+ "strings"
)
type DataRow map[string]any
@@ -40,15 +42,16 @@ func (subs Suscribers) PostFirstNonMaskedValue(fieldname string, value any) {
}
type Metrics struct {
- TotalCount int64 // TotalCount is the number of values analyzed
- NilCount int64 // NilCount is the number of null values in real data
- EmptyCount int64 // EmptyCount is the number of empty values in real data (empty string or numbers at 0 value)
- MaskedCount int64 // MaskedCount is the number of non-blank real values masked
- Coherence Multimap // Coherence is a multimap used to compute the coherence rate
- Identifiant Multimap // Identifiant is a multimap used to compute the identifiable rate
+ TotalCount int64 // TotalCount is the number of values analyzed
+ NilCount int64 // NilCount is the number of null values in real data
+ EmptyCount int64 // EmptyCount is the number of empty values in real data (empty string or numbers at 0 value)
+ MaskedCount int64 // MaskedCount is the number of non-blank real values masked
+ Coherence Multimap // Coherence is a multimap used to compute the coherence rate
+ Identifiant Multimap // Identifiant is a multimap used to compute the identifiable rate
+ Constraints []Constraint // Constraints is the set of rules to validate
}
-func NewMetrics() Metrics {
+func NewMetrics(constraints ...Constraint) Metrics {
return Metrics{
TotalCount: 0,
NilCount: 0,
@@ -56,10 +59,18 @@ func NewMetrics() Metrics {
MaskedCount: 0,
Coherence: Multimap{},
Identifiant: Multimap{},
+ Constraints: constraints,
}
}
-func (m *Metrics) Update(fieldname string, realValue any, maskedValue any, subs Suscribers) bool {
+func (m *Metrics) Update(
+ fieldname string,
+ realValue any,
+ maskedValue any,
+ coherenceValue []any,
+ subs Suscribers,
+ config ColumnConfig,
+) bool {
nonBlankCount := m.NonBlankCount()
realValueStr, realValueOk := toString(realValue)
@@ -71,7 +82,7 @@ func (m *Metrics) Update(fieldname string, realValue any, maskedValue any, subs
m.TotalCount++
- m.Coherence.Add(realValueStr, maskedValueStr)
+ m.Coherence.Add(toStringSlice(coherenceValue), maskedValueStr)
m.Identifiant.Add(maskedValueStr, realValueStr)
if realValue == nil {
@@ -80,6 +91,12 @@ func (m *Metrics) Update(fieldname string, realValue any, maskedValue any, subs
return true
}
+ if slices.Contains(config.Exclude, realValue) {
+ m.EmptyCount++
+
+ return true
+ }
+
if realValueOk && maskedValueOk {
if realValueStr != maskedValueStr {
m.MaskedCount++
@@ -119,25 +136,127 @@ func (m Metrics) MaskedRate() float64 {
return float64(m.MaskedCount) / float64(m.NonBlankCount())
}
+// MaskedRateValidate returns :
+// - -1 if at least one constraint fail on the MaskedRate,
+// - 0 if no constraint exist on the MaskedRate,
+// - 1 if all constraints succeed on the MaskedRate,
+func (m Metrics) MaskedRateValidate() int {
+ result := 0
+
+ for _, constraint := range m.Constraints {
+ if constraint.Target == MaskingRate {
+ if !validate(constraint.Type, constraint.Value, m.MaskedRate()) {
+ return -1
+ }
+
+ result = 1
+ }
+ }
+
+ return result
+}
+
+// CoherenceRateValidate returns :
+// - -1 if at least one constraint fail on the CoherenceRate,
+// - 0 if no constraint exist on the CoherenceRate,
+// - 1 if all constraints succeed on the CoherenceRate,
+func (m Metrics) CoherenceRateValidate() int {
+ result := 0
+
+ for _, constraint := range m.Constraints {
+ if constraint.Target == CohenrentRate {
+ if !validate(constraint.Type, constraint.Value, m.Coherence.Rate()) {
+ return -1
+ }
+
+ result = 1
+ }
+ }
+
+ return result
+}
+
+// IdentifiantRateValidate returns :
+// - -1 if at least one constraint fail on the IdentifiantRate,
+// - 0 if no constraint exist on the IdentifiantRate,
+// - 1 if all constraints succeed on the IdentifiantRate,
+func (m Metrics) IdentifiantRateValidate() int {
+ result := 0
+
+ for _, constraint := range m.Constraints {
+ if constraint.Target == IdentifiantRate {
+ if !validate(constraint.Type, constraint.Value, m.Identifiant.Rate()) {
+ return -1
+ }
+
+ result = 1
+ }
+ }
+
+ return result
+}
+
+// Validate returns :
+// - -1 if at least one constraint fail,
+// - 0 if no constraint exist,
+// - 1 if all constraints succeed ,
+func (m Metrics) Validate() int {
+ resultMaskedRate := m.MaskedRateValidate()
+ if resultMaskedRate < 0 {
+ return -1
+ }
+
+ resultCoherentRate := m.CoherenceRateValidate()
+ if resultCoherentRate < 0 {
+ return -1
+ }
+
+ resultIdentifiantRate := m.IdentifiantRateValidate()
+ if resultIdentifiantRate < 0 {
+ return -1
+ }
+
+ if resultMaskedRate > 0 || resultCoherentRate > 0 || resultIdentifiantRate > 0 {
+ return 1
+ }
+
+ return 0
+}
+
type Report struct {
Metrics map[string]Metrics
subs Suscribers
+ config Config
}
-func NewReport(subs []EventSubscriber) Report {
- return Report{make(map[string]Metrics), subs}
+func NewReport(subs []EventSubscriber, config Config) Report {
+ return Report{make(map[string]Metrics), subs, config}
}
func (r Report) Update(realRow DataRow, maskedRow DataRow) {
for key, realValue := range realRow {
metrics, exists := r.Metrics[key]
if !exists {
- metrics = NewMetrics()
+ metrics = NewMetrics(r.config.ColumnConfigs[key].Constraints...)
r.subs.PostNewField(key)
}
- if metrics.Update(key, realValue, maskedRow[key], r.subs) {
+ config := NewDefaultColumnConfig(key)
+ if cfg, ok := r.config.ColumnConfigs[key]; ok {
+ config = cfg
+ }
+
+ coherenceValues := make([]any, len(config.CoherentWith))
+ for i, coherentColumn := range config.CoherentWith {
+ coherenceValues[i] = realRow[coherentColumn]
+ }
+
+ if len(coherenceValues) == 0 {
+ coherenceValues = []any{realValue}
+ }
+
+ if metrics.Update(key, realValue, maskedRow[key], coherenceValues, r.subs, config) {
r.Metrics[key] = metrics
}
}
@@ -173,3 +292,34 @@ func toString(value any) (string, bool) {
return str, true
}
+
+func toStringSlice(values []any) string {
+ result := &strings.Builder{}
+
+ for _, value := range values {
+ if str, ok := toString(value); ok {
+ result.WriteString(str)
+ }
+
+ result.WriteString("_")
+ }
+
+ return result.String()
+}
+
+func validate(constraint ConstraintType, reference float64, value float64) bool {
+ switch constraint {
+ case ShouldEqual:
+ return value == reference
+ case ShouldBeGreaterThan:
+ return value > reference
+ case ShouldBeGreaterThanOrEqualTo:
+ return value >= reference
+ case ShouldBeLowerThan:
+ return value < reference
+ case ShouldBeLessThanOrEqualTo:
+ return value <= reference
+ default:
+ return false
+ }
+}
diff --git a/pkg/mimo/model_config.go b/pkg/mimo/model_config.go
new file mode 100644
index 0000000..a53c1e4
--- /dev/null
+++ b/pkg/mimo/model_config.go
@@ -0,0 +1,68 @@
+// Copyright (C) 2023 CGI France
+//
+// This file is part of MIMO.
+//
+// MIMO is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// MIMO is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with MIMO. If not, see .
+
+package mimo
+
+type Config struct {
+ ColumnNames []string
+ ColumnConfigs map[string]ColumnConfig
+}
+
+type ColumnConfig struct {
+ Exclude []any // exclude values from the masking rate computation (default: exclude only nil values)
+ CoherentWith []string // list of fields from witch the coherent rate is computed (default: the current field)
+ Constraints []Constraint // list of constraints to validate
+}
+
+type Constraint struct {
+ Target ConstraintTarget
+ Type ConstraintType
+ Value float64
+}
+
+type ConstraintTarget int
+
+const (
+ MaskingRate ConstraintTarget = iota
+ CohenrentRate
+ IdentifiantRate
+)
+
+type ConstraintType int
+
+const (
+ ShouldEqual ConstraintType = iota
+ ShouldBeGreaterThan
+ ShouldBeGreaterThanOrEqualTo
+ ShouldBeLowerThan
+ ShouldBeLessThanOrEqualTo
+)
+
+func NewConfig() Config {
+ return Config{
+ ColumnNames: []string{},
+ ColumnConfigs: map[string]ColumnConfig{},
+ }
+}
+
+func NewDefaultColumnConfig(columnname string) ColumnConfig {
+ return ColumnConfig{
+ Exclude: []any{},
+ CoherentWith: []string{columnname},
+ Constraints: []Constraint{},
+ }
+}
diff --git a/report-example.html b/report-example.html
deleted file mode 100644
index f70ecc6..0000000
--- a/report-example.html
+++ /dev/null
@@ -1,45 +0,0 @@
-
-
-
-
- MIMO Report
-
-
-
-
-
-