Skip to content

Commit

Permalink
feat(config): add config file loading (#7)
Browse files Browse the repository at this point in the history
* feat(config): add config file loading

* chore: bump devcontainer golang 1.21

* feat: exclude value + coherent source

* feat: exclude value + coherent source

* style: lint

* test: config exclude and coherentWith

* chore: reorg tests

* chore: test update ci container

* Update Dockerfile.ci

* Update Dockerfile

* feat: constraints validation

* test: constraints

* feat: exit code 1 if constraint fail

* docs: update readme and changelog
  • Loading branch information
adrienaury authored Aug 21, 2023
1 parent f403d2b commit 47c0489
Show file tree
Hide file tree
Showing 30 changed files with 855 additions and 98 deletions.
11 changes: 6 additions & 5 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
FROM adrienaury/go-devcontainer:v2.0
FROM adrienaury/go-devcontainer:v3.1

USER root

RUN apk add --update --progress --no-cache make gomplate

ARG VERSION_GOLICENSE=0.2.0
ARG VERSION_MILLER=6.2.0
RUN wget -nv -O- https://github.com/mitchellh/golicense/releases/download/v${VERSION_GOLICENSE}/golicense_${VERSION_GOLICENSE}_linux_x86_64.tar.gz | tar xz -C /usr/bin golicense \
&& wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/golicense /usr/bin/mlr
RUN wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/mlr

ARG VERSION_PIMO=1.19.0
RUN wget -O- https://github.com/CGI-FR/PIMO/releases/download/v${VERSION_PIMO}/pimo_${VERSION_PIMO}_linux_amd64.tar.gz | tar xz -C /usr/bin pimo

USER vscode
8 changes: 3 additions & 5 deletions .devcontainer/Dockerfile.ci
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
FROM adrienaury/go-devcontainer-ci:v2.0
FROM adrienaury/go-devcontainer-ci:v3.1

USER root

RUN apk add --update --progress --no-cache make gomplate

ARG VERSION_GOLICENSE=0.2.0
ARG VERSION_MILLER=6.2.0
RUN wget -nv -O- https://github.com/mitchellh/golicense/releases/download/v${VERSION_GOLICENSE}/golicense_${VERSION_GOLICENSE}_linux_x86_64.tar.gz | tar xz -C /usr/bin golicense \
&& wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/golicense /usr/bin/mlr
RUN wget -nv -O- https://github.com/johnkerl/miller/releases/download/v${VERSION_MILLER}/miller-${VERSION_MILLER}-linux-amd64.tar.gz | tar xz --strip-components 1 -C /usr/bin miller-${VERSION_MILLER}-linux-amd64/mlr \
&& chmod +x /usr/bin/mlr
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.2.0]

- `Added` configuration file with `metrics[].exclude`, `metrics[]coherentWith` and `metrics[]constraints` parameters.

## [0.1.0]

- `Added` First official version.
- `Added` first official version.
54 changes: 47 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,66 @@

# MIMO : Masked Input Metrics Output

Measure the quality of a pseudonymization transformation by masking.

MIMO will compute the following indicators for each columns :

- masking rate : percentage of values actually masked; ignoring null or inexisting values in real data
- coherent rate : percentage of real unique values that are masked coherently (the same single pseudonym is used for each distinct real value)
- identifiant rate : percentage of unique pseudonyms that are attributed to a single real value

The result is a HTML report that contains the computed indicators for each column.

![MIMO Report](docs/MIMO-report.png)

## Usage

### Real time usage

```console
> mkfifo real.jsonl # create a pipe file to store the real json stream before pseudonymization
> lino pull prod | tee real.jsonl | pimo | mimo real.jsonl | lino push dev
8:27AM WRN field is not completely masked fieldname=surname

MIMO REPORT
===========================================
fieldname | masking rate | collision rate |
----------|--------------|----------------|
name | 100 % | 0 % |
surname | 99 % | 0 % |
> rm real.jsonl # pipe file can be removed after
```

Here is a single command that run an example on synthesized data (require PIMO) :

```bash
pimo --empty-input --repeat 1000 --mask 'name=[{add:""},{randomChoiceInUri:"pimo://nameFR"}]' | tee real.jsonl | pimo --mask 'name={randomChoiceInUri:"pimo://nameFR"}' | mimo real.jsonl
```

### After process usage

MIMO can also be used on existing file on disk.

```console
> cat masked.jsonl | mimo real.jsonl
```

### Configuration

Here is an example configuration file.

```yaml
version: "1"
metrics:
- name: "name" # required : name of the column to configure
exclude: [""] # optional : these values will be ignored during computation of the masking rate
coherentWith: ["name"] # optional : which values to use for the computation of the coherent rate
constraints: # optional : list of constraints to validate at the end of the execution
maskingRate:
shouldEqual: 1
coherentRate:
shouldBeGreaterThan: 0.5
```
You need to inform MIMO of this configuration file with the `--config` flag :

```console
> cat masked.jsonl | mimo --config myconfig.yaml real.jonsl
```

## Contributing

Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
Expand Down
1 change: 1 addition & 0 deletions build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ properties:
"ifshort",
"nosnakecase",
"exhaustivestruct",
"depguard",
]
snapshot: false # If true, do not upload release when publish target is used
dockerfiles: # List of Dockerfiles to build, defined by a map of {key=Dockerfile name ; value=path to build context}, the image name will be determined by the extension of the Dockerfile
Expand Down
52 changes: 42 additions & 10 deletions cmd/mimo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ var (
jsonlog bool
debug bool
colormode string

configfile string
)

func main() {
Expand Down Expand Up @@ -84,6 +86,7 @@ There is NO WARRANTY, to the extent permitted by law.`, version, commit, buildDa
rootCmd.PersistentFlags().BoolVar(&debug, "debug", false, "add debug information to logs (very slow)")
rootCmd.PersistentFlags().BoolVar(&jsonlog, "log-json", false, "output logs in JSON format")
rootCmd.PersistentFlags().StringVar(&colormode, "color", "auto", "use colors in log outputs : yes, no or auto")
rootCmd.PersistentFlags().StringVar(&configfile, "config", "", "name of the YAML configuration file to use")

if err := rootCmd.Execute(); err != nil {
log.Err(err).Msg("error when executing command")
Expand All @@ -100,26 +103,55 @@ func run(_ *cobra.Command, realJSONLineFileName string) {
}

driver := mimo.NewDriver(realReader, maskedReader, infra.SubscriberLogger{})

if configfile != "" {
if config, err := infra.LoadConfig(configfile); err != nil {
log.Fatal().Err(err).Msg("end MIMO")
} else {
driver.Configure(config)
}
}

haserror := false

if report, err := driver.Analyze(); err != nil {
log.Error().Err(err).Msg("end of program")
} else {
columns := report.Columns()
sort.Strings(columns)
for _, colname := range columns {
metrics := report.ColumnMetric(colname)
log.Info().
Str("field", colname).
Int64("count-nil", metrics.NilCount).
Int64("count-empty", metrics.EmptyCount).
Int64("count-masked", metrics.MaskedCount).
Int64("count-missed", metrics.NonMaskedCount()).
Float64("rate-masking", metrics.MaskedRate()).
Float64("rate-coherence", metrics.Coherence.Rate()).
Float64("rate-identifiable", metrics.Identifiant.Rate()).
Msg("summmary for column " + colname)
if metrics.Validate() >= 0 {
log.Info().
Str("field", colname).
Int64("count-nil", metrics.NilCount).
Int64("count-empty", metrics.EmptyCount).
Int64("count-masked", metrics.MaskedCount).
Int64("count-missed", metrics.NonMaskedCount()).
Float64("rate-masking", metrics.MaskedRate()).
Float64("rate-coherence", metrics.Coherence.Rate()).
Float64("rate-identifiable", metrics.Identifiant.Rate()).
Msg("summmary for column " + colname)
} else {
log.Error().
Str("field", colname).
Int64("count-nil", metrics.NilCount).
Int64("count-empty", metrics.EmptyCount).
Int64("count-masked", metrics.MaskedCount).
Int64("count-missed", metrics.NonMaskedCount()).
Float64("rate-masking", metrics.MaskedRate()).
Float64("rate-coherence", metrics.Coherence.Rate()).
Float64("rate-identifiable", metrics.Identifiant.Rate()).
Msg("summmary for column " + colname)
haserror = true
}
}
_ = infra.NewReportExporter().Export(report, "report.html")
}

if haserror {
os.Exit(1)
}
}

func initLog() {
Expand Down
10 changes: 10 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: "1"
metrics:
- name: "name"
exclude: [""]
coherentWith: ["name"]
constraints:
maskingRate:
shouldEqual: 1
coherentRate:
shouldBeGreaterThan: 0.5
Binary file added docs/MIMO-report.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
module github.com/cgi-fr/mimo

go 1.20
go 1.21

require (
github.com/Masterminds/sprig/v3 v3.2.3
github.com/mattn/go-isatty v0.0.14
github.com/rs/zerolog v1.28.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.5.1
gopkg.in/yaml.v3 v3.0.1
)

require (
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
131 changes: 131 additions & 0 deletions internal/infra/config_loader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright (C) 2023 CGI France
//
// This file is part of MIMO.
//
// MIMO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// MIMO is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with MIMO. If not, see <http://www.gnu.org/licenses/>.

package infra

import (
"fmt"
"os"

"github.com/cgi-fr/mimo/pkg/mimo"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
)

// Version of the YAML strcuture.
const Version string = "1"

// YAMLStructure of the file.
type YAMLStructure struct {
Version string `yaml:"version"`
Columns []YAMLColumn `yaml:"metrics,omitempty"`
}

// YAMLColumn defines how to store a column config in YAML format.
type YAMLColumn struct {
Name string `yaml:"name"`
Exclude []any `yaml:"exclude,omitempty"`
CoherentWith []string `yaml:"coherentWith,omitempty"`
Constraints map[string]YAMLConstraint `yaml:"constraints,omitempty"`
}

type YAMLConstraint map[string]float64

func LoadConfig(filename string) (mimo.Config, error) {
config := &YAMLStructure{
Version: Version,
Columns: []YAMLColumn{},
}

if _, err := os.Stat(filename); os.IsNotExist(err) {
return mimo.NewConfig(), fmt.Errorf("%w: %s", ErrConfigFileNotExists, filename)
}

log.Debug().Str("file", filename).Msg("loading config from file")

dat, err := os.ReadFile(filename)
if err != nil {
return mimo.NewConfig(), fmt.Errorf("%w: %s", err, filename)
}

err = yaml.Unmarshal(dat, config)
if err != nil {
return mimo.NewConfig(), fmt.Errorf("%w: %s", err, filename)
}

if config.Version != Version {
return mimo.NewConfig(), fmt.Errorf("%w: %s", ErrConfigInvalidVersion, filename)
}

return CreateConfig(config)
}

//nolint:cyclop
func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {
config := mimo.NewConfig()

for _, yamlcolumn := range yamlconfig.Columns {
column := mimo.ColumnConfig{
Exclude: yamlcolumn.Exclude,
CoherentWith: yamlcolumn.CoherentWith,
Constraints: []mimo.Constraint{},
}

for target, yamlconstraint := range yamlcolumn.Constraints {
for constraintType, value := range yamlconstraint {
constraint := mimo.Constraint{
Target: 0,
Type: 0,
Value: value,
}

switch target {
case "maskingRate":
constraint.Target = mimo.MaskingRate
case "coherentRate":
constraint.Target = mimo.CohenrentRate
case "identifiantRate":
constraint.Target = mimo.IdentifiantRate
default:
return config, fmt.Errorf("%w: %s", ErrConfigInvalidConstraintTarget, target)
}

switch constraintType {
case "shouldEqual":
constraint.Type = mimo.ShouldEqual
case "shouldBeGreaterThan":
constraint.Type = mimo.ShouldBeGreaterThan
case "shouldBeGreaterThanOrEqualTo":
constraint.Type = mimo.ShouldBeGreaterThanOrEqualTo
case "shouldBeLowerThan":
constraint.Type = mimo.ShouldBeLowerThan
case "shouldBeLessThanOrEqualTo":
constraint.Type = mimo.ShouldBeLessThanOrEqualTo
default:
return config, fmt.Errorf("%w: %s", ErrConfigInvalidConstraintType, constraintType)
}

column.Constraints = append(column.Constraints, constraint)
}
}

config.ColumnNames = append(config.ColumnNames, yamlcolumn.Name)
config.ColumnConfigs[yamlcolumn.Name] = column
}

return config, nil
}
Loading

0 comments on commit 47c0489

Please sign in to comment.