Skip to content

Commit

Permalink
fix: rename empty to ignored (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
adrienaury authored Sep 2, 2023
1 parent bd67514 commit 889c93c
Show file tree
Hide file tree
Showing 28 changed files with 71 additions and 70 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Types of changes
## [0.4.0]

- `Added` possibility to create temporary fields with the `preprocess` configuration.
- `Fixed` rename label `empty` to `ignored` in report.

## [0.3.0]

Expand Down
4 changes: 2 additions & 2 deletions cmd/mimo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ func appendColumnMetric(report mimo.Report, colname string, haserror bool) bool
log.Info().
Str("field", colname).
Int64("count-nil", metrics.NilCount).
Int64("count-empty", metrics.EmptyCount).
Int64("count-ignored", metrics.IgnoredCount).
Int64("count-masked", metrics.MaskedCount).
Int64("count-missed", metrics.NonMaskedCount()).
Float64("rate-masking", metrics.MaskedRate()).
Expand All @@ -195,7 +195,7 @@ func appendColumnMetric(report mimo.Report, colname string, haserror bool) bool
log.Error().
Str("field", colname).
Int64("count-nil", metrics.NilCount).
Int64("count-empty", metrics.EmptyCount).
Int64("count-ignored", metrics.IgnoredCount).
Int64("count-masked", metrics.MaskedCount).
Int64("count-missed", metrics.NonMaskedCount()).
Float64("rate-masking", metrics.MaskedRate()).
Expand Down
4 changes: 2 additions & 2 deletions internal/infra/template/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand All @@ -25,7 +25,7 @@ <h1>MIMO Report</h1>
<tr>
<td>{{ $key }}</td>
<td>{{ $value.NilCount }}</td>
<td>{{ $value.EmptyCount }}</td>
<td>{{ $value.IgnoredCount }}</td>
<td>{{ $value.MaskedCount }}</td>
<td>{{ $value.NonMaskedCount }}</td>
<td style="background-color: {{ if $value.MaskedRateValidate | eq 1 }}green{{else if $value.MaskedRateValidate | eq -1}}red{{else if $value.MaskedRate | eq 1.00 }}lightgreen{{else}}orange{{end}}">{{ if ne (toString $value.MaskedRate) "NaN" }}{{ $value.MaskedRate | mulf 100.00 | printf "%0.02f" }} %{{ else }}NaN{{ end }}</td>
Expand Down
44 changes: 22 additions & 22 deletions pkg/mimo/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,28 @@ func (subs Suscribers) PostFirstNonMaskedValue(fieldname string, value any) {
}

type Metrics struct {
Fieldname string // Fieldname is name of column analyzed
TotalCount int64 // TotalCount is the number of values analyzed
NilCount int64 // NilCount is the number of null values in real data
EmptyCount int64 // EmptyCount is the number of empty values in real data (empty string or numbers at 0 value)
MaskedCount int64 // MaskedCount is the number of non-blank real values masked
Coherence Multimap // Coherence is a multimap used to compute the coherence rate
Identifiant Multimap // Identifiant is a multimap used to compute the identifiable rate
Constraints []Constraint // Constraints is the set of rules to validate
Fieldname string // Fieldname is name of column analyzed
TotalCount int64 // TotalCount is the number of values analyzed
NilCount int64 // NilCount is the number of null values in real data
IgnoredCount int64 // IgnoredCount is the number of ignored values in real data
MaskedCount int64 // MaskedCount is the number of non-blank real values masked
Coherence Multimap // Coherence is a multimap used to compute the coherence rate
Identifiant Multimap // Identifiant is a multimap used to compute the identifiable rate
Constraints []Constraint // Constraints is the set of rules to validate
}

type MultimapFactory func(fieldname string) Multimap

func NewMetrics(fieldname string, multimapFactory MultimapFactory, constraints ...Constraint) Metrics {
return Metrics{
Fieldname: fieldname,
TotalCount: 0,
NilCount: 0,
EmptyCount: 0,
MaskedCount: 0,
Coherence: multimapFactory(fieldname + "-coherence"),
Identifiant: multimapFactory(fieldname + "-identifiant"),
Constraints: constraints,
Fieldname: fieldname,
TotalCount: 0,
NilCount: 0,
IgnoredCount: 0,
MaskedCount: 0,
Coherence: multimapFactory(fieldname + "-coherence"),
Identifiant: multimapFactory(fieldname + "-identifiant"),
Constraints: constraints,
}
}

Expand Down Expand Up @@ -105,7 +105,7 @@ func (m *Metrics) Update(
}

if isExcluded(config.Exclude, realValue, realValueStr) {
m.EmptyCount++
m.IgnoredCount++

return true
}
Expand All @@ -121,17 +121,17 @@ func (m *Metrics) Update(
return true
}

// BlankCount is the number of blank (null or empty) values in real data.
// BlankCount is the number of blank (null or ignored) values in real data.
func (m Metrics) BlankCount() int64 {
return m.NilCount + m.EmptyCount
return m.NilCount + m.IgnoredCount
}

// NonBlankCount is the number of non-blank (non-null and non-empty) values in real data.
// NonBlankCount is the number of non-blank (non-null and non-ignored) values in real data.
func (m Metrics) NonBlankCount() int64 {
return m.TotalCount - m.BlankCount()
}

// NonMaskedCount is the number of non-blank (non-null and non-empty) values in real data that were not masked.
// NonMaskedCount is the number of non-blank (non-null and non-ignored) values in real data that were not masked.
func (m Metrics) NonMaskedCount() int64 {
return m.NonBlankCount() - m.MaskedCount
}
Expand All @@ -144,7 +144,7 @@ func (m Metrics) K() int {
// MaskedRate is equal to
//
// Number of non-blank real values masked
// / (Number of values analyzed - Number of blank (null or empty) values in real data) ).
// / (Number of values analyzed - Number of blank (null or ignored) values in real data) ).
func (m Metrics) MaskedRate() float64 {
return float64(m.MaskedCount) / float64(m.NonBlankCount())
}
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_1.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_2.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_3.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_4.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_5.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_6.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_7.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_8.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_bugfix_1.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_bugfix_exclude_numeric.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_deep_array.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_deep_mixed.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_deep_object.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_preprocess_simple.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_reuse_previous.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_reuse_previous_alias.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
2 changes: 1 addition & 1 deletion test/reports/report_template_source_stack.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ <h1>MIMO Report</h1>
<thead>
<th>Field</th>
<th>Nil</th>
<th>Empty</th>
<th>Ignored</th>
<th>Masked</th>
<th>Missed</th>
<th>Masking Rate</th>
Expand Down
12 changes: 6 additions & 6 deletions test/suites/02-validate-metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ testcases:
- script: cat working/masked.jsonl | mimo -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8

- script: mv report.html ../reports/report_1.html

Expand Down Expand Up @@ -66,7 +66,7 @@ testcases:
- script: cat working/masked.jsonl | mimo --config ../configs/config_exclude.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=2 count-masked=8 count-missed=0 count-nil=0 field=value rate-coherence=0.6 rate-identifiable=0.8 rate-masking=1
- result.systemerr ShouldContainSubstring value count-ignored=2 count-masked=8 count-missed=0 count-nil=0 field=value rate-coherence=0.6 rate-identifiable=0.8 rate-masking=1

- script: mv report.html ../reports/report_2.html

Expand Down Expand Up @@ -97,14 +97,14 @@ testcases:
- script: cat working/masked.jsonl | mimo --config ../configs/config_coherent.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=0 rate-identifiable=0.8 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=0 rate-identifiable=0.8 rate-masking=0.8

- script: mv report.html ../reports/report_3.html

- script: cat working/masked.jsonl | mimo --config ../configs/config_coherent2.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=1 rate-identifiable=0.8 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=1 rate-identifiable=0.8 rate-masking=0.8

- script: mv report.html ../reports/report_4.html

Expand Down Expand Up @@ -135,13 +135,13 @@ testcases:
- script: cat working/masked.jsonl | mimo --config ../configs/config_constraints_1.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=0.6 rate-identifiable=0.8 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=0.6 rate-identifiable=0.8 rate-masking=0.8

- script: mv report.html ../reports/report_5.html

- script: cat working/masked.jsonl | mimo --config ../configs/config_constraints_2.yaml -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 1
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=0.6 rate-identifiable=0.8 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=0 field=value rate-coherence=0.6 rate-identifiable=0.8 rate-masking=0.8

- script: mv report.html ../reports/report_6.html
6 changes: 3 additions & 3 deletions test/suites/03-persist-storage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ testcases:
- script: cat working/masked.jsonl | mimo -v3 --disk-storage working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8

- script: ls -l /tmp/ | grep mimo-pebble |wc -l
assertions:
Expand Down Expand Up @@ -80,7 +80,7 @@ testcases:
- script: cat working/masked.jsonl | mimo -v3 --disk-storage --persist working/data/ working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=8 count-missed=2 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=8 count-missed=2 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8

- script: mv report.html ../reports/report_8.html

Expand Down Expand Up @@ -123,6 +123,6 @@ testcases:
- script: cat working/masked.jsonl | mimo -v5 --disk-storage --persist working/data/ working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring value count-empty=0 count-masked=4 count-missed=1 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8
- result.systemerr ShouldContainSubstring value count-ignored=0 count-masked=4 count-missed=1 count-nil=1 field=value rate-coherence=0.6666666666666666 rate-identifiable=0.6 rate-masking=0.8

- script: mv report.html ../reports/report_reuse_previous.html
12 changes: 6 additions & 6 deletions test/suites/04-dig-objects.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ testcases:
- script: cat working/masked.jsonl | mimo -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring person.age count-empty=0 count-masked=2 count-missed=0 count-nil=0 field=person.age rate-coherence=1 rate-identifiable=1 rate-masking=1
- result.systemerr ShouldContainSubstring person.address.street count-empty=0 count-masked=2 count-missed=0 count-nil=0 field=person.address.street rate-coherence=0 rate-identifiable=1 rate-masking=1
- result.systemerr ShouldContainSubstring person.age count-ignored=0 count-masked=2 count-missed=0 count-nil=0 field=person.age rate-coherence=1 rate-identifiable=1 rate-masking=1
- result.systemerr ShouldContainSubstring person.address.street count-ignored=0 count-masked=2 count-missed=0 count-nil=0 field=person.address.street rate-coherence=0 rate-identifiable=1 rate-masking=1

- script: mv report.html ../reports/report_deep_object.html

Expand All @@ -29,8 +29,8 @@ testcases:
- script: cat working/masked.jsonl | mimo -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring groups.[] count-empty=0 count-masked=2 count-missed=2 count-nil=0 field=groups.[] rate-coherence=1 rate-identifiable=1 rate-masking=0.5
- result.systemerr ShouldContainSubstring groups.[].[] count-empty=0 count-masked=4 count-missed=0 count-nil=0 field=groups.[].[] rate-coherence=1 rate-identifiable=0 rate-masking=1
- result.systemerr ShouldContainSubstring groups.[] count-ignored=0 count-masked=2 count-missed=2 count-nil=0 field=groups.[] rate-coherence=1 rate-identifiable=1 rate-masking=0.5
- result.systemerr ShouldContainSubstring groups.[].[] count-ignored=0 count-masked=4 count-missed=0 count-nil=0 field=groups.[].[] rate-coherence=1 rate-identifiable=0 rate-masking=1

- script: mv report.html ../reports/report_deep_array.html

Expand All @@ -45,7 +45,7 @@ testcases:
- script: cat working/masked.jsonl | mimo -v3 working/real.jsonl
assertions:
- result.code ShouldEqual 0
- result.systemerr ShouldContainSubstring batchs.[].id count-empty=0 count-masked=0 count-missed=3 count-nil=0 field=batchs.[].id rate-coherence=1 rate-identifiable=1 rate-masking=0
- result.systemerr ShouldContainSubstring batchs.[].accounts.[].number count-empty=0 count-masked=4 count-missed=0 count-nil=0 field=batchs.[].accounts.[].number rate-coherence=1 rate-identifiable=1 rate-masking=1
- result.systemerr ShouldContainSubstring batchs.[].id count-ignored=0 count-masked=0 count-missed=3 count-nil=0 field=batchs.[].id rate-coherence=1 rate-identifiable=1 rate-masking=0
- result.systemerr ShouldContainSubstring batchs.[].accounts.[].number count-ignored=0 count-masked=4 count-missed=0 count-nil=0 field=batchs.[].accounts.[].number rate-coherence=1 rate-identifiable=1 rate-masking=1

- script: mv report.html ../reports/report_deep_mixed.html
Loading

0 comments on commit 889c93c

Please sign in to comment.