Skip to content

Commit

Permalink
So the header names are not necessarily unique...
Browse files Browse the repository at this point in the history
  • Loading branch information
wgnf committed Oct 23, 2021
1 parent 7937e06 commit 8f431c2
Show file tree
Hide file tree
Showing 15 changed files with 394 additions and 141 deletions.
4 changes: 4 additions & 0 deletions src/CsvProc9000/Csv/CsvColumn.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
namespace CsvProc9000.Csv
{
public record CsvColumn(int Index, string Name);
}
2 changes: 1 addition & 1 deletion src/CsvProc9000/Csv/CsvField.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace CsvProc9000.Csv
{
public record CsvField(string Name, string Value);
public record CsvField(CsvColumn Column, string Value);
}
43 changes: 0 additions & 43 deletions src/CsvProc9000/Csv/CsvFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,48 +27,5 @@ public void AddRow([NotNull] CsvRow row)

_rows.Add(row);
}

public async Task SaveToAsync(
IFileSystem fileSystem,
string destinationFileName,
string delimiter)
{
var contentStringBuilder = new StringBuilder();

var fieldNames = Rows
.SelectMany(row => row.Fields)
.Select(field => field.Name)
.Distinct()
.ToList();

// add header row
contentStringBuilder.AppendJoin(delimiter, fieldNames);
contentStringBuilder.AppendLine();

foreach (var row in Rows)
{
var firstIteration = true;

foreach (var fieldName in fieldNames)
{
// append the delimiter to the previous field when get here not in the first iteration
if (firstIteration) firstIteration = false;
else contentStringBuilder.Append(delimiter);

var field = row.Fields.FirstOrDefault(f => f.Name == fieldName);
var fieldValue = string.Empty;

if (field != null)
fieldValue = field.Value;

contentStringBuilder.Append(fieldValue);
}

contentStringBuilder.AppendLine();
}

var content = contentStringBuilder.ToString();
await fileSystem.File.WriteAllTextAsync(destinationFileName, content);
}
}
}
10 changes: 6 additions & 4 deletions src/CsvProc9000/Csv/CsvReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@ private static CsvRow ProcessCsvRow(IReaderRow csvReader, IEnumerable<string> he
{
var csvRow = new CsvRow();

foreach (var header in headers)
var headersList = headers.ToList();
for (var index = 0; index < headersList.Count; index++)
{
if (!csvReader.TryGetField<string>(header, out var fieldValue)) continue;

csvRow.AddOrUpdateField(header, fieldValue);
if (!csvReader.TryGetField<string>(index, out var fieldValue)) continue;

var column = new CsvColumn(index, headersList[index]);
csvRow.AddField(column, fieldValue);
}

return csvRow;
Expand Down
60 changes: 58 additions & 2 deletions src/CsvProc9000/Csv/CsvRow.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using JetBrains.Annotations;
using System.Linq;

namespace CsvProc9000.Csv
{
Expand All @@ -10,13 +11,68 @@ public class CsvRow

public IEnumerable<CsvField> Fields => _fields;

public void AddOrUpdateField([NotNull] string fieldName, [NotNull] string fieldValue)
public void AddField([NotNull] CsvColumn column, [NotNull] string fieldValue)
{
if (column == null) throw new ArgumentNullException(nameof(column));
if (fieldValue == null) throw new ArgumentNullException(nameof(fieldValue));

_fields.Add(new CsvField(column, fieldValue));
}

public void AddField([NotNull] string fieldName, [NotNull] string fieldValue)
{
if (fieldValue == null) throw new ArgumentNullException(nameof(fieldValue));
if (string.IsNullOrWhiteSpace(fieldName))
throw new ArgumentException("Value cannot be null or whitespace.", nameof(fieldName));

var nextIndexForColumn = Fields.Count() + 1;
var column = new CsvColumn(nextIndexForColumn, fieldName);

AddField(column, fieldValue);
}

public void AddOrUpdateField([NotNull] string fieldName, [NotNull] string fieldValue, int? fieldIndex)
{
if (fieldValue == null) throw new ArgumentNullException(nameof(fieldValue));
if (string.IsNullOrWhiteSpace(fieldName))
throw new ArgumentException("Value cannot be null or whitespace.", nameof(fieldName));

if (!TryGetCandidateToChange(fieldName, fieldIndex, out var fieldToChange))
{
AddField(fieldName, fieldValue);
return;
}

var index = _fields.IndexOf(fieldToChange);
_fields.Remove(fieldToChange);

var changedField = fieldToChange with { Value = fieldValue };
_fields.Insert(index, changedField);
}

private bool TryGetCandidateToChange(string fieldName, int? fieldIndex, out CsvField fieldToChange)
{
// get possible fields with the given name
var fieldCandidates = _fields
.Where(field => field.Column.Name == fieldName)
.ToList();

fieldToChange = null;

// when we found more than one field with that name, we need to find a possible field with the given index
if (fieldCandidates.Count > 1)
{
if (!fieldIndex.HasValue)
throw new ArgumentException(
$"Found more than one candidate for field name {fieldName} but no field index was provided",
nameof(fieldIndex));

fieldToChange = fieldCandidates.FirstOrDefault(field => field.Column.Index == fieldIndex.Value);
}
else
fieldToChange = fieldCandidates.FirstOrDefault();

_fields.Add(new CsvField(fieldName, fieldValue));
return fieldToChange != null;
}
}
}
22 changes: 22 additions & 0 deletions src/CsvProc9000/Options/Change.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using JetBrains.Annotations;

namespace CsvProc9000.Options
{
public class Change
{
[UsedImplicitly]
public string Field { get; set; }

/// <summary>
/// When the Field-Name is not unique
/// </summary>
[UsedImplicitly]
public int? FieldIndex { get; set; }

[UsedImplicitly]
public ChangeMode Mode { get; set; } = ChangeMode.AddOrUpdate;

[UsedImplicitly]
public string Value { get; set; }
}
}
8 changes: 8 additions & 0 deletions src/CsvProc9000/Options/ChangeMode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace CsvProc9000.Options
{
public enum ChangeMode
{
Add,
AddOrUpdate
}
}
2 changes: 1 addition & 1 deletion src/CsvProc9000/Options/Rule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ public class Rule
public List<Condition> Conditions { get; set; }

[UsedImplicitly]
public Dictionary<string, string> Steps { get; set; }
public List<Change> Changes { get; set; }
}
}
145 changes: 145 additions & 0 deletions src/CsvProc9000/Processors/ApplyRulesToCsvFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
using System;
using System.Linq;
using CsvProc9000.Csv;
using CsvProc9000.Options;
using JetBrains.Annotations;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;

namespace CsvProc9000.Processors
{
public class ApplyRulesToCsvFile : IApplyRulesToCsvFile
{
private readonly ILogger<ApplyRulesToCsvFile> _logger;
private readonly CsvProcessorOptions _processorOptions;

public ApplyRulesToCsvFile(
[NotNull] IOptions<CsvProcessorOptions> processorOptions,
[NotNull] ILogger<ApplyRulesToCsvFile> logger)
{
_processorOptions = processorOptions.Value ?? throw new ArgumentNullException(nameof(processorOptions));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public void Apply(CsvFile csvFile)
{
_logger.LogDebug("Processor: Applying rules to file {File}...", csvFile.OriginalFileName);

if (_processorOptions.Rules == null || !_processorOptions.Rules.Any())
{
_logger.LogWarning("Processor: Cannot process file {File} because there are no rules defined",
csvFile.OriginalFileName);
return;
}

foreach (var rule in _processorOptions.Rules)
ApplyRuleToFile(csvFile, rule);
}

private void ApplyRuleToFile(CsvFile csvFile, Rule rule)
{
if (rule.Conditions == null || !rule.Conditions.Any())
{
_logger.LogWarning("Processor: Skipping Rule at index {Index} because it has no conditions",
_processorOptions.Rules.IndexOf(rule));
return;
}

foreach (var row in csvFile.Rows)
ApplyRuleToRow(row, rule, csvFile);
}

private void ApplyRuleToRow(CsvRow row, Rule rule, CsvFile file)
{
if (!MeetsRowConditions(row, rule)) return;

_logger.LogTrace("Processor: Row at index {RowIndex} meets rule at index {RuleIndex}. Applying change(s)...",
file.Rows.ToList().IndexOf(row), _processorOptions.Rules.IndexOf(rule));

foreach (var change in rule.Changes)
try
{
ApplyChangeToRow(row, rule, file, change);
}
catch (Exception e)
{
_logger.LogError(e,
"Processor: Error occured while applying change at index {ChangeIndex} to row at index {RowIndex}",
rule.Changes.IndexOf(change), file.Rows.ToList().IndexOf(row));
}
}

private void ApplyChangeToRow(CsvRow row, Rule rule, CsvFile file, Change change)
{
if (string.IsNullOrWhiteSpace(change.Field))
{
_logger.LogWarning(
"Processor: Not applying change at index {ChangeIndex} for rule at index {RuleIndex} because no field name given",
rule.Changes.IndexOf(change), _processorOptions.Rules.IndexOf(rule));
return;
}

_logger.LogTrace(
"Processor: Row at index {RowIndex}: Applying change at index {ChangeIndex}: Field={Field}, Value={Value}, Mode={Mode}, Index={Index}",
file.Rows.ToList().IndexOf(row), rule.Changes.IndexOf(change), change.Field, change.Value, change.Mode, change.FieldIndex);

switch (change.Mode)
{
case ChangeMode.Add:
row.AddField(change.Field, change.Value);
break;
case ChangeMode.AddOrUpdate:
row.AddOrUpdateField(change.Field, change.Value, change.FieldIndex);
break;
default:
#pragma warning disable CA2208
throw new ArgumentOutOfRangeException(nameof(change.Mode),
$"Unknown value {change.Mode} for {nameof(ChangeMode)}");
#pragma warning restore CA2208
}
}

private static bool MeetsRowConditions(CsvRow row, Rule rule)
{
var meetsConditions = true;

// ReSharper disable once ForeachCanBeConvertedToQueryUsingAnotherGetEnumerator
foreach (var condition in rule.Conditions)
{
var potentialFields = row
.Fields
// first we select every field with the desired
.Where(field => field.Column.Name == condition.Field)
// then we check if those fields have the desired value
.Where(field => field.Value == condition.Value);

var anyFieldMatchesCondition = potentialFields.Any();

/*
* the conditions are met, when we found any fields that match the conditions in that row
*
* for clarity, what that boolean operation down there does:
*
* - case: meetsConditions = true, anyFieldMatchesCondition = true
* conditions were met, because at least on field meets the conditions in this row
* --> meetsConditions = true
*
* - case: meetsConditions = true, anyFieldMatchesCondition = false
* conditions are not met, because no field meets the conditions in this row
* --> meetsConditions = false
*
* - case: meetsConditions = false, anyFieldMatchesCondition = true
* conditions were not met before, but we need every condition to be met (AND link)
* --> meetsConditions = false
*
* - case: meetsConditions = false, anyFieldMatchesCondition = false
* nothing to explain here i guess
* --> meetsConditions = false
*/
meetsConditions = meetsConditions && anyFieldMatchesCondition;
}

return meetsConditions;
}
}
}
Loading

0 comments on commit 8f431c2

Please sign in to comment.