Skip to content

Commit

Permalink
feat: exclude value with template (#36)
Browse files Browse the repository at this point in the history
* feat: exclude template

* test(venom): exclude template

* refactor: rename function to match usage
  • Loading branch information
adrienaury authored Sep 28, 2023
1 parent 248ec92 commit 095f70f
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Types of changes
## [0.6.0]

- `Added` log unmasked values with the `watch` flag.
- `Added` new config parameter `excludeTemplate` to exclude a value with a template expression.
- `Fixed` exclusion with coherent source specified.

## [0.5.0]
Expand Down
24 changes: 13 additions & 11 deletions internal/infra/config_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ type YAMLStructure struct {

// YAMLColumn defines how to store a column config in YAML format.
type YAMLColumn struct {
Name string `yaml:"name"`
Exclude []any `yaml:"exclude,omitempty"`
CoherentWith []string `yaml:"coherentWith,omitempty"`
CoherentSource string `yaml:"coherentSource,omitempty"`
Constraints map[string]YAMLConstraint `yaml:"constraints,omitempty"`
Alias string `yaml:"alias,omitempty"`
Name string `yaml:"name"`
Exclude []any `yaml:"exclude,omitempty"`
ExcludeTemplate string `yaml:"excludeTemplate,omitempty"`
CoherentWith []string `yaml:"coherentWith,omitempty"`
CoherentSource string `yaml:"coherentSource,omitempty"`
Constraints map[string]YAMLConstraint `yaml:"constraints,omitempty"`
Alias string `yaml:"alias,omitempty"`
}

type YAMLPreprocess struct {
Expand Down Expand Up @@ -91,11 +92,12 @@ func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {

for _, yamlcolumn := range yamlconfig.Columns {
column := mimo.ColumnConfig{
Exclude: yamlcolumn.Exclude,
CoherentWith: yamlcolumn.CoherentWith,
CoherentSource: yamlcolumn.CoherentSource,
Constraints: []mimo.Constraint{},
Alias: yamlcolumn.Alias,
Exclude: yamlcolumn.Exclude,
ExcludeTemplate: yamlcolumn.ExcludeTemplate,
CoherentWith: yamlcolumn.CoherentWith,
CoherentSource: yamlcolumn.CoherentSource,
Constraints: []mimo.Constraint{},
Alias: yamlcolumn.Alias,
}

for target, yamlconstraint := range yamlcolumn.Constraints {
Expand Down
4 changes: 2 additions & 2 deletions pkg/mimo/driver_preprocess.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ func preprocessValue(value any, paths []string, stack []any, templstr string, ro

if len(paths) == 1 {
if obj, ok := value.(map[string]any); ok {
obj[path], err = generateCoherentSource(templstr, root, append(stack, obj))
obj[path], err = applyTemplate(templstr, root, append(stack, obj))
}

if obj, ok := value.(DataRow); ok {
obj[path], err = generateCoherentSource(templstr, root, append(stack, obj))
obj[path], err = applyTemplate(templstr, root, append(stack, obj))
}

if err != nil {
Expand Down
31 changes: 22 additions & 9 deletions pkg/mimo/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,16 @@ func (m *Metrics) Update(
subs Suscribers,
config ColumnConfig,
) bool {
nonBlankCount := m.NonBlankCount()

realValueStr, realValueOk := toString(realValue)
maskedValueStr, maskedValueOk := toString(maskedValue)

if !realValueOk || !maskedValueOk {
return false // special case (arrays, objects) are not covered right now
return false
}

m.backend.IncreaseTotalCount()

excluded := isExcluded(config.Exclude, realValue, realValueStr)
excluded := config.excluded || isExcluded(config.Exclude, realValue, realValueStr)

if !excluded {
// coherence and identifiant rates are computed over all values by default (including nil values)
Expand Down Expand Up @@ -126,16 +124,21 @@ func (m *Metrics) Update(
if realValueStr != maskedValueStr {
m.backend.IncreaseMaskedCount()
} else {
subs.PostNonMaskedValue(fieldname, realValue)
if m.backend.GetMaskedCount() == nonBlankCount {
subs.PostFirstNonMaskedValue(fieldname, realValue)
}
m.postNonMaskedValue(subs, fieldname, realValue)
}
}

return true
}

func (m *Metrics) postNonMaskedValue(subs Suscribers, fieldname string, realValue any) {
if m.backend.GetMaskedCount() == m.NonBlankCount()-1 {
subs.PostFirstNonMaskedValue(fieldname, realValue)
}

subs.PostNonMaskedValue(fieldname, realValue)
}

func (m Metrics) NilCount() int64 {
return m.backend.GetNilCount()
}
Expand Down Expand Up @@ -422,6 +425,16 @@ func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, stack
coherenceValues = []any{realValue}
}

if len(config.ExcludeTemplate) > 0 {
result, err := applyTemplate(config.ExcludeTemplate, root, stack)

log.Err(err).Str("result", result).Msg("compute exclusion from template")

if exclude, err := strconv.ParseBool(result); exclude && err == nil {
config.excluded = true
}
}

if !metrics.Update(key, realValue, maskedValue, coherenceValues, r.subs, config) && !exists {
metrics.Coherence.Close()
metrics.Identifiant.Close()
Expand All @@ -442,7 +455,7 @@ func computeCoherenceValues(config ColumnConfig, root DataRow, stack []any) []an
}

if len(config.CoherentSource) > 0 {
source, err := generateCoherentSource(config.CoherentSource, root, stack)
source, err := applyTemplate(config.CoherentSource, root, stack)

log.Err(err).Str("result", source).Msg("generating coherence source from template")

Expand Down
25 changes: 15 additions & 10 deletions pkg/mimo/model_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ type Config struct {
}

type ColumnConfig struct {
Exclude []any // exclude values from the masking rate computation (default: exclude only nil values)
CoherentWith []string // list of fields from witch the coherent rate is computed (default: the current field)
CoherentSource string // template to execute to create coherence source
Constraints []Constraint // list of constraints to validate
Alias string // alias to use in persisted data
Exclude []any // exclude values from the masking rate computation (default: exclude only nil values)
ExcludeTemplate string // exclude values if template expression evaluate to True (default: False)
CoherentWith []string // list of fields from witch the coherent rate is computed (default: the current field)
CoherentSource string // template to execute to create coherence source
Constraints []Constraint // list of constraints to validate
Alias string // alias to use in persisted data

excluded bool
}

type PreprocessConfig struct {
Expand Down Expand Up @@ -70,10 +73,12 @@ func NewConfig() Config {

func NewDefaultColumnConfig() ColumnConfig {
return ColumnConfig{
Exclude: []any{},
CoherentWith: []string{},
CoherentSource: "",
Constraints: []Constraint{},
Alias: "",
Exclude: []any{},
ExcludeTemplate: "",
CoherentWith: []string{},
CoherentSource: "",
Constraints: []Constraint{},
Alias: "",
excluded: false,
}
}
2 changes: 1 addition & 1 deletion pkg/mimo/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
"golang.org/x/text/unicode/norm"
)

func generateCoherentSource(tmplstring string, root DataRow, stack []any) (string, error) {
func applyTemplate(tmplstring string, root DataRow, stack []any) (string, error) {
funcmap := generateFuncMap()

funcmap["Stack"] = generateStackFunc(stack)
Expand Down
4 changes: 4 additions & 0 deletions test/configs/config_exclude_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
version: "1"
metrics:
- name: "value"
excludeTemplate: '{{.email | hasSuffix ".fr"}}'
52 changes: 52 additions & 0 deletions test/reports/report_exclude_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<title>MIMO Report</title>
<meta name="viewport" content="width=device-width,initial-scale=1" />
<meta name="description" content="MIMO Report" />
</head>
<body>
<h1>MIMO Report</h1>
<table border="1" cellspacing="0" cellpadding="5">
<thead>
<th>Field</th>
<th>Nil</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
<th>Coherent Rate</th>
<th>Identifiable Rate</th>
<th>K</th>
</thead>
<tbody>

<tr>
<td>email</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>10</td>
<td style="background-color: orange">0.00 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">1</td>
</tr>

<tr>
<td>value</td>
<td>0</td>
<td>2</td>
<td>8</td>
<td>0</td>
<td style="background-color: lightgreen">100.00 %</td>
<td style="background-color: orange">50.00 %</td>
<td style="background-color: orange">80.00 %</td>
<td style="background-color: orange">1</td>
</tr>

</tbody>
</table>
</body>
</html>
32 changes: 32 additions & 0 deletions test/suites/10-exclusion.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: value exclusion
testcases:
- name: exclude values by template
steps:
- script: echo '{"value":"A","email":"[email protected]"}' > working/real.jsonl
- script: echo '{"value":"A","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"B","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"B","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"D","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"D","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"E","email":"[email protected]"}' >> working/real.jsonl
- script: echo '{"value":"E","email":"[email protected]"}' >> working/real.jsonl

- script: echo '{"value":"X","email":"[email protected]"}' > working/masked.jsonl
- script: echo '{"value":"Y","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"W","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"W","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"V","email":"[email protected]"}' >> working/masked.jsonl
- script: echo '{"value":"C","email":"[email protected]"}' >> working/masked.jsonl

- script: cat working/masked.jsonl | mimo --config ../configs/config_exclude_template.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-ignored=2 count-masked=8 count-missed=0 count-nil=0 field=value rate-coherence=0.5 rate-identifiable=0.8 rate-masking=1

- script: mv report.html ../reports/report_exclude_template.html

0 comments on commit 095f70f

Please sign in to comment.