Skip to content

Commit

Permalink
feat: coherence source with templates (#16)
Browse files Browse the repository at this point in the history
* feat: wip! coherence source with templates

* feat: add sprig functions

* test: add venom test + changelog
  • Loading branch information
adrienaury authored Aug 28, 2023
1 parent a55ebb4 commit 0fcec45
Show file tree
Hide file tree
Showing 10 changed files with 190 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Types of changes
- `Added` computation on disk instead of memory with `--diskstorage` flag.
- `Added` analysis of deep nested structures (arrays and objects).
- `Added` validated constraints use a different shade of green in HTML report.
- `Added` possibility to use template string to generate coherent source with `coherentSource` parameter.

## [0.2.1]

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/rs/zerolog v1.28.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.7.0
golang.org/x/text v0.4.0
gopkg.in/yaml.v3 v3.0.1
)

Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
Expand Down
16 changes: 9 additions & 7 deletions internal/infra/config_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ type YAMLStructure struct {

// YAMLColumn defines how to store a column config in YAML format.
type YAMLColumn struct {
Name string `yaml:"name"`
Exclude []any `yaml:"exclude,omitempty"`
CoherentWith []string `yaml:"coherentWith,omitempty"`
Constraints map[string]YAMLConstraint `yaml:"constraints,omitempty"`
Name string `yaml:"name"`
Exclude []any `yaml:"exclude,omitempty"`
CoherentWith []string `yaml:"coherentWith,omitempty"`
CoherentSource string `yaml:"coherentSource,omitempty"`
Constraints map[string]YAMLConstraint `yaml:"constraints,omitempty"`
}

type YAMLConstraint map[string]float64
Expand Down Expand Up @@ -80,9 +81,10 @@ func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {

for _, yamlcolumn := range yamlconfig.Columns {
column := mimo.ColumnConfig{
Exclude: yamlcolumn.Exclude,
CoherentWith: yamlcolumn.CoherentWith,
Constraints: []mimo.Constraint{},
Exclude: yamlcolumn.Exclude,
CoherentWith: yamlcolumn.CoherentWith,
CoherentSource: yamlcolumn.CoherentSource,
Constraints: []mimo.Constraint{},
}

for target, yamlconstraint := range yamlcolumn.Constraints {
Expand Down
30 changes: 20 additions & 10 deletions pkg/mimo/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,25 +247,25 @@ func NewReport(subs []EventSubscriber, config Config, multiMapFactory MultimapFa
return Report{make(map[string]Metrics), subs, config, multiMapFactory}
}

func (r Report) UpdateDeep(root DataRow, realRow DataRow, maskedRow DataRow, path ...string) {
func (r Report) UpdateDeep(root DataRow, realRow DataRow, maskedRow DataRow, stack []any, path ...string) {
for key, realValue := range realRow {
newpath := append(path, key) //nolint:gocritic

switch typedRealValue := realValue.(type) {
case map[string]any:
if typedMaskedValue, ok := maskedRow[key].(map[string]any); ok {
r.UpdateDeep(root, typedRealValue, typedMaskedValue, newpath...)
r.UpdateDeep(root, typedRealValue, typedMaskedValue, append(stack, realValue), newpath...)
} else {
log.Warn().
Strs("path", newpath).
Msg("ignored path because structure is different between real and masked data")
}
case []any:
if typedMaskedValue, ok := maskedRow[key].([]any); ok {
r.UpdateArray(root, typedRealValue, typedMaskedValue, newpath...)
r.UpdateArray(root, typedRealValue, typedMaskedValue, append(stack, realValue), newpath...)
}
case nil, any:
r.UpdateValue(root, typedRealValue, maskedRow[key], newpath...)
r.UpdateValue(root, typedRealValue, maskedRow[key], append(stack, realValue), newpath...)
default:
log.Warn().
Strs("path", newpath).
Expand All @@ -274,14 +274,14 @@ func (r Report) UpdateDeep(root DataRow, realRow DataRow, maskedRow DataRow, pat
}
}

func (r Report) UpdateArray(root DataRow, realArray []any, maskedArray []any, path ...string) {
func (r Report) UpdateArray(root DataRow, realArray []any, maskedArray []any, stack []any, path ...string) {
for index := 0; index < len(realArray) && index < len(maskedArray); index++ {
newpath := append(path, "[]") //nolint:gocritic

switch typedRealItem := realArray[index].(type) {
case map[string]any:
if typedMaskedItem, ok := maskedArray[index].(map[string]any); ok {
r.UpdateDeep(root, typedRealItem, typedMaskedItem, newpath...)
r.UpdateDeep(root, typedRealItem, typedMaskedItem, append(stack, realArray[index]), newpath...)
} else {
log.Warn().
Strs("path", newpath).
Expand All @@ -290,10 +290,10 @@ func (r Report) UpdateArray(root DataRow, realArray []any, maskedArray []any, pa
}
case []any:
if typedMaskedItem, ok := maskedArray[index].([]any); ok {
r.UpdateArray(root, typedRealItem, typedMaskedItem, newpath...)
r.UpdateArray(root, typedRealItem, typedMaskedItem, append(stack, realArray[index]), newpath...)
}
case nil, any:
r.UpdateValue(root, typedRealItem, maskedArray[index], newpath...)
r.UpdateValue(root, typedRealItem, maskedArray[index], append(stack, realArray[index]), newpath...)
default:
log.Warn().
Strs("path", newpath).
Expand All @@ -303,7 +303,7 @@ func (r Report) UpdateArray(root DataRow, realArray []any, maskedArray []any, pa
}
}

func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, path ...string) {
func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, stack []any, path ...string) {
key := strings.Join(path, ".")

metrics, exists := r.Metrics[key]
Expand All @@ -327,6 +327,16 @@ func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, path .
}
}

if len(config.CoherentSource) > 0 {
source, err := generateCoherentSource(config.CoherentSource, root, stack)

log.Err(err).Str("result", source).Msg("generating coherence source from template")

if err == nil {
coherenceValues = append(coherenceValues, source) //nolint:makezero
}
}

if len(coherenceValues) == 0 {
coherenceValues = []any{realValue}
}
Expand All @@ -340,7 +350,7 @@ func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, path .
}

func (r Report) Update(realRow DataRow, maskedRow DataRow) {
r.UpdateDeep(realRow, realRow, maskedRow)
r.UpdateDeep(realRow, realRow, maskedRow, []any{})
}

func (r Report) Columns() []string {
Expand Down
14 changes: 8 additions & 6 deletions pkg/mimo/model_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ type Config struct {
}

type ColumnConfig struct {
Exclude []any // exclude values from the masking rate computation (default: exclude only nil values)
CoherentWith []string // list of fields from witch the coherent rate is computed (default: the current field)
Constraints []Constraint // list of constraints to validate
Exclude []any // exclude values from the masking rate computation (default: exclude only nil values)
CoherentWith []string // list of fields from witch the coherent rate is computed (default: the current field)
CoherentSource string // template to execute to create coherence source.
Constraints []Constraint // list of constraints to validate
}

type Constraint struct {
Expand Down Expand Up @@ -61,8 +62,9 @@ func NewConfig() Config {

func NewDefaultColumnConfig() ColumnConfig {
return ColumnConfig{
Exclude: []any{},
CoherentWith: []string{},
Constraints: []Constraint{},
Exclude: []any{},
CoherentWith: []string{},
CoherentSource: "",
Constraints: []Constraint{},
}
}
75 changes: 75 additions & 0 deletions pkg/mimo/template.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (C) 2023 CGI France
//
// This file is part of MIMO.
//
// MIMO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// MIMO is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with MIMO. If not, see <http://www.gnu.org/licenses/>.

package mimo

import (
"fmt"
"strings"
"text/template"
"unicode"

"github.com/Masterminds/sprig/v3"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)

func generateCoherentSource(tmplstring string, root DataRow, stack []any) (string, error) {
funcmap := generateFuncMap()

funcmap["Stack"] = generateStackFunc(stack)

tmpl, err := template.New("template").Funcs(sprig.TxtFuncMap()).Funcs(funcmap).Parse(tmplstring)
if err != nil {
return "", fmt.Errorf("%w", err)
}

result := &strings.Builder{}
err = tmpl.Execute(result, root)

return result.String(), err
}

func generateFuncMap() template.FuncMap {
funcMap := template.FuncMap{}

funcMap["ToUpper"] = strings.ToUpper
funcMap["ToLower"] = strings.ToLower
funcMap["NoAccent"] = rmAcc

return funcMap
}

func generateStackFunc(theStack []any) func(index int) any {
return func(index int) any {
if index > 0 {
return theStack[index-1]
}

return theStack[len(theStack)+index-1]
}
}

// rmAcc removes accents from string
// Function derived from: http://blog.golang.org/normalization
func rmAcc(s string) string {
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
result, _, _ := transform.String(t, s)

return result
}
4 changes: 4 additions & 0 deletions test/configs/config_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
version: "1"
metrics:
- name: "batchs.[].accounts.[].number"
coherentSource: "{{$batch := Stack -3}}{{Stack 0}} + {{$batch.id}}"
52 changes: 52 additions & 0 deletions test/reports/report_template_source_stack.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>MIMO Report</title>
<meta name="viewport" content="width=device-width,initial-scale=1" />
<meta name="description" content="MIMO Report" />
</head>
<body>
<h1>MIMO Report</h1>
<table border="1" cellspacing="0" cellpadding="5">
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
<th>Coherent Rate</th>
<th>Identifiable Rate</th>
<th>K</th>
</thead>
<tbody>

<tr>
<td>batchs.[].accounts.[].number</td>
<td>0</td>
<td>0</td>
<td>4</td>
<td>0</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">66.67 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">1</td>
</tr>

<tr>
<td>batchs.[].id</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>3</td>
<td style="background-color: orange">0.00 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">1</td>
</tr>

</tbody>
</table>
</body>
</html>
19 changes: 19 additions & 0 deletions test/suites/05-template-source.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Venom Test Suite definition
# Check Venom documentation for more information : https://github.com/ovh/venom
name: use templates to compute coherent source
testcases:
- name: template with use of Stack object
steps:
- script: echo '{"batchs":[{"id":1,"accounts":[{"number":"01234"},{"number":"56789"}]}]}' > working/real.jsonl
- script: echo '{"batchs":[{"id":1,"accounts":[{"number":"56789"}]},{"id":2,"accounts":[{"number":"01234"}]}]}' >> working/real.jsonl

- script: echo '{"batchs":[{"id":1,"accounts":[{"number":"A"},{"number":"B"}]}]}' > working/masked.jsonl
- script: echo '{"batchs":[{"id":1,"accounts":[{"number":"C"}]},{"id":2,"accounts":[{"number":"D"}]}]}' >> working/masked.jsonl

- script: cat working/masked.jsonl | mimo --config ../configs/config_template.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring batchs.[].id count-empty=0 count-masked=0 count-missed=3 count-nil=0 field=batchs.[].id rate-coherence=1 rate-identifiable=1 rate-masking=0
- result.systemerr ShouldContainSubstring batchs.[].accounts.[].number count-empty=0 count-masked=4 count-missed=0 count-nil=0 field=batchs.[].accounts.[].number rate-coherence=0.6666666666666666 rate-identifiable=1 rate-masking=1

- script: mv report.html ../reports/report_template_source_stack.html

0 comments on commit 0fcec45

Please sign in to comment.