Skip to content

Commit

Permalink
feat: add ability to set charset for export (#31)
Browse files Browse the repository at this point in the history
* fix: replace UDE.CSharp with Ude.NetStandard

because Ude.NetStandard is actually compatible with net5.0

* feat: add ability to set charset for export

because that can sometimes be an issue for certain providers

* test: fix failing test
  • Loading branch information
wgnf authored Nov 7, 2021
1 parent b769c99 commit bc17311
Show file tree
Hide file tree
Showing 14 changed files with 131 additions and 46 deletions.
29 changes: 18 additions & 11 deletions src/CsvProc9000.Tests/Csv/CsvExporterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,27 @@ public async Task Export_Does_Not_Accept_Invalid_Parameters()
var (context, _) = CreateContext();
var sut = context.Build();

await Assert.ThrowsAnyAsync<ArgumentNullException>(() => sut.ExportAsync(null!, "something", ","));

await Assert.ThrowsAnyAsync<ArgumentException>(() => sut.ExportAsync(new CsvFile("something"), null!, ","));
await Assert.ThrowsAnyAsync<ArgumentNullException>(() => sut.ExportAsync(null!, "something", ",", "charset"));
await Assert.ThrowsAnyAsync<ArgumentException>(() => sut.ExportAsync(new CsvFile("something"), null!, ",", "charset"));
await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), string.Empty, ","));
await Assert.ThrowsAnyAsync<ArgumentException>(() => sut.ExportAsync(new CsvFile("something"), " ", ","));
sut.ExportAsync(new CsvFile("something"), string.Empty, ",", "charset"));
await Assert.ThrowsAnyAsync<ArgumentException>(() => sut.ExportAsync(new CsvFile("something"), " ", ",", "charset"));


await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), "something", null!));
sut.ExportAsync(new CsvFile("something"), "something", null!, "charset"));
await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), "something", string.Empty, "charset"));
await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), "something", " ", "charset"));

await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), "something", ",", null!));
await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), "something", string.Empty));
sut.ExportAsync(new CsvFile("something"), "something", ",", string.Empty));
await Assert.ThrowsAnyAsync<ArgumentException>(() =>
sut.ExportAsync(new CsvFile("something"), "something", " "));
sut.ExportAsync(new CsvFile("something"), "something", ",", " "));
}

[Fact]
Expand All @@ -71,7 +78,7 @@ public async Task Export_Makes_Sure_That_Destination_Directory_Exists()
.Returns(fileInfo.Object);

var file = new CsvFile("something");
await sut.ExportAsync(file, "some file", ",");
await sut.ExportAsync(file, "some file", ",", "charset");

directoryInfo.Verify(di => di.Create(), Times.Once);
}
Expand All @@ -98,7 +105,7 @@ public async Task Export_Writes_Data_To_File()
file.AddRow(row1);
file.AddRow(row2);

await sut.ExportAsync(file, "something", ",");
await sut.ExportAsync(file, "something", ",", "charset");

// 3 = 1 header, 2 rows
writer.Verify(w => w.NextRecordAsync(), Times.Exactly(3));
Expand All @@ -120,7 +127,7 @@ private static (ArrangeContext<CsvExporter>, Mock<IWriter>) CreateContext()
var writer = new Mock<IWriter>();
context
.For<ICsvWriterFactory>()
.Setup(wf => wf.Create(It.IsAny<string>(), It.IsAny<string>()))
.Setup(wf => wf.Create(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<string>()))
.Returns(writer.Object);

var directoryInfo = new Mock<IDirectoryInfo>();
Expand Down
15 changes: 12 additions & 3 deletions src/CsvProc9000.Tests/Jobs/CsvProcessJobWorkerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,13 @@ public async Task WorkOn_Processes_Correct_File(bool shouldDeleteFile)
context
.For<ICsvExporter>()
.Verify(
exporter => exporter.ExportAsync(It.IsAny<CsvFile>(), It.IsAny<string>(), It.IsAny<string>(),
It.IsAny<bool>()), Times.Once);
exporter => exporter.ExportAsync(
It.IsAny<CsvFile>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<bool>()),
Times.Once);


if (shouldDeleteFile)
Expand Down Expand Up @@ -156,7 +161,11 @@ public async Task WorkOn_Cleans_Up_When_Export_Fails()

context
.For<ICsvExporter>()
.Setup(exporter => exporter.ExportAsync(It.IsAny<CsvFile>(), It.IsAny<string>(), It.IsAny<string>(),
.Setup(exporter => exporter.ExportAsync(
It.IsAny<CsvFile>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<string>(),
It.IsAny<bool>()))
.Throws<Exception>();

Expand Down
1 change: 1 addition & 0 deletions src/CsvProc9000/Csv/Contracts/ICsvExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Task ExportAsync(
[NotNull] CsvFile file,
[NotNull] string destinationFileName,
[NotNull] string delimiter,
[NotNull] string charset,
bool wrapValuesInQuotes = false);
}
}
2 changes: 1 addition & 1 deletion src/CsvProc9000/Csv/Contracts/ICsvWriterFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ namespace CsvProc9000.Csv.Contracts
{
internal interface ICsvWriterFactory
{
IWriter Create(string file, string delimiter);
IWriter Create(string file, string delimiter, string charset);
}
}
5 changes: 4 additions & 1 deletion src/CsvProc9000/Csv/CsvExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,20 @@ public async Task ExportAsync(
CsvFile file,
string destinationFileName,
string delimiter,
string charset,
bool wrapValuesInQuotes = false)
{
if (file == null) throw new ArgumentNullException(nameof(file));
if (string.IsNullOrWhiteSpace(destinationFileName))
throw new ArgumentException("Value cannot be null or whitespace.", nameof(destinationFileName));
if (string.IsNullOrWhiteSpace(delimiter))
throw new ArgumentException("Value cannot be null or whitespace.", nameof(delimiter));
if (string.IsNullOrWhiteSpace(charset))
throw new ArgumentException("Value cannot be null or whitespace.", nameof(charset));

MakeSureDirectoryExists(destinationFileName);

await using var writer = _writerFactory.Create(destinationFileName, delimiter);
await using var writer = _writerFactory.Create(destinationFileName, delimiter, charset);

var columns = GetColumns(file).ToList();

Expand Down
1 change: 1 addition & 0 deletions src/CsvProc9000/Csv/CsvImporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public async Task<Result<CsvFile>> ImportAsync(
var file = await DoImportAsync(fileName, delimiter);
return new Result<CsvFile>(true, file);
}
// TODO: I have to rethink this thing... it's weird...
catch (IOException ex)
{
return new Result<CsvFile>(false, failureMessage: ex.Message);
Expand Down
30 changes: 5 additions & 25 deletions src/CsvProc9000/Csv/CsvReaderFactory.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
using CsvHelper;
using CsvHelper.Configuration;
using CsvProc9000.Csv.Contracts;
using Microsoft.Extensions.Logging;
using CsvProc9000.Utils.Contracts;
using System;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.IO.Abstractions;
using System.Text;
using Ude;

namespace CsvProc9000.Csv
{
[ExcludeFromCodeCoverage] // simple factory
internal sealed class CsvReaderFactory : ICsvReaderFactory
{
private readonly IFileSystem _fileSystem;
private readonly ILogger<CsvReaderFactory> _logger;
private readonly IFileHelper _fileHelper;

public CsvReaderFactory(
[NotNull] IFileSystem fileSystem,
[NotNull] ILogger<CsvReaderFactory> logger)
[NotNull] IFileHelper fileHelper)
{
_fileSystem = fileSystem ?? throw new ArgumentNullException(nameof(fileSystem));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_fileHelper = fileHelper ?? throw new ArgumentNullException(nameof(fileHelper));
}

public IReader Create(string file, string delimiter)
Expand All @@ -33,7 +31,7 @@ public IReader Create(string file, string delimiter)
* We're using FileShare.None here, so that no other process can modify the file while we're reading it
* and additionally we'll be 'notified' when another process is still writing to a file
*/
var encoding = GetEncoding(file);
var encoding = _fileHelper.DetectEncodingOfFile(file);
var fileStream = _fileSystem.FileStream.Create(file, FileMode.Open, FileAccess.Read, FileShare.None);
var streamReader = new StreamReader(fileStream, encoding);

Expand All @@ -47,23 +45,5 @@ public IReader Create(string file, string delimiter)

return reader;
}

private Encoding GetEncoding(string fileName)
{
using var fileStream = _fileSystem.FileStream.Create(fileName, FileMode.Open, FileAccess.Read);

_logger.LogTrace("Trying to detect charset of '{File}'...", fileName);

var detector = new CharsetDetector();
detector.Feed(fileStream);
detector.DataEnd();
var charset = detector.Charset;

_logger.LogTrace("Found charset '{Charset}' with a confidence of {Confidence}",
charset, detector.Confidence);

var encoding = CodePagesEncodingProvider.Instance.GetEncoding(charset);
return encoding;
}
}
}
17 changes: 13 additions & 4 deletions src/CsvProc9000/Csv/CsvWriterFactory.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
using CsvHelper;
using CsvHelper.Configuration;
using CsvProc9000.Csv.Contracts;
using CsvProc9000.Utils.Contracts;
using System;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Text;

namespace CsvProc9000.Csv
{
[ExcludeFromCodeCoverage] // simple factory
internal sealed class CsvWriterFactory : ICsvWriterFactory
{
public IWriter Create(string file, string delimiter)
private readonly IFileHelper _fileHelper;

public CsvWriterFactory([NotNull] IFileHelper fileHelper)
{
_fileHelper = fileHelper ?? throw new ArgumentNullException(nameof(fileHelper));
}

public IWriter Create(string file, string delimiter, string charset)
{
var encoding = _fileHelper.GetEncodingFromCharsetString(charset);
var csvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Delimiter = delimiter,
Encoding = Encoding.UTF8
Encoding = encoding
};

var streamWriter = new StreamWriter(file);
var streamWriter = new StreamWriter(file, false, encoding);
var writer = new CsvWriter(streamWriter, csvConfiguration);
return writer;
}
Expand Down
2 changes: 1 addition & 1 deletion src/CsvProc9000/CsvProc9000.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<PackageReference Include="Serilog.Settings.Configuration" Version="3.3.0" />
<PackageReference Include="System.IO.Abstractions" Version="13.2.47" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="5.0.0" />
<PackageReference Include="UDE.CSharp" Version="1.1.0" />
<PackageReference Include="Ude.NetStandard" Version="1.2.0" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 2 additions & 0 deletions src/CsvProc9000/Jobs/CsvProcessJobWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ private async Task<CsvFile> ImportFileAsync(CsvProcessJob job, Guid jobThreadId)
* because it's still being copied or written to, so we have to wait until that is finished
*/
CsvFile csvFile = null;
// TODO: I have to rethink this thing... it's weird...
while (csvFile == null)
{
var result =
Expand Down Expand Up @@ -119,6 +120,7 @@ private async Task<bool> ExportAsync(
await _csvExporter.ExportAsync(
file, destinationFileName,
_csvProcessorOptions.OutboxDelimiter,
_csvProcessorOptions.OutboxFileCharset,
_csvProcessorOptions.OutboxValuesInQuotes);

return true;
Expand Down
3 changes: 3 additions & 0 deletions src/CsvProc9000/Options/CsvProcessorOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public class CsvProcessorOptions
[UsedImplicitly]
public bool OutboxValuesInQuotes { get; set; }

[UsedImplicitly]
public string OutboxFileCharset { get; set; } = "UTF-8";

[UsedImplicitly]
public List<Rule> Rules { get; set; }
}
Expand Down
7 changes: 7 additions & 0 deletions src/CsvProc9000/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
using CsvProc9000.Jobs;
using CsvProc9000.Jobs.Contracts;
using CsvProc9000.Options;
using CsvProc9000.Utils;
using CsvProc9000.Utils.Contracts;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
Expand All @@ -12,6 +14,7 @@
using System;
using System.Diagnostics.CodeAnalysis;
using System.IO.Abstractions;
using System.Text;
using System.Threading.Tasks;
using ILogger = Serilog.ILogger;

Expand Down Expand Up @@ -88,9 +91,13 @@ private static void ConfigureServices(HostBuilderContext context, IServiceCollec

services.AddSingleton<ICsvProcessJobThreadFactory, CsvProcessJobThreadFactory>();

services.AddSingleton<IFileHelper, FileHelper>();

services.AddHostedService<CsvFileWatcherBackgroundService>();
services.AddHostedService<CsvExistingFileWatcherBackgroundService>();
services.AddHostedService<CsvProcessJobThreadSpawnerBackgroundService>();

Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
}
}
}
10 changes: 10 additions & 0 deletions src/CsvProc9000/Utils/Contracts/IFileHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using System.Text;

namespace CsvProc9000.Utils.Contracts
{
internal interface IFileHelper
{
Encoding DetectEncodingOfFile(string fileName);
Encoding GetEncodingFromCharsetString(string charset);
}
}
53 changes: 53 additions & 0 deletions src/CsvProc9000/Utils/FileHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
using CsvProc9000.Utils.Contracts;
using Microsoft.Extensions.Logging;
using System;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.IO.Abstractions;
using System.Text;
using Ude;

namespace CsvProc9000.Utils
{
[ExcludeFromCodeCoverage] // mostly untestable because of file-magic
internal sealed class FileHelper : IFileHelper
{
private readonly IFileSystem _fileSystem;
private readonly ILogger<FileHelper> _logger;

public FileHelper(
[NotNull] IFileSystem fileSystem,
[NotNull] ILogger<FileHelper> logger)
{
_fileSystem = fileSystem ?? throw new ArgumentNullException(nameof(fileSystem));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public Encoding DetectEncodingOfFile(string fileName)
{
using var fileStream = _fileSystem.FileStream.Create(fileName, FileMode.Open, FileAccess.Read);

_logger.LogTrace("Trying to detect charset of '{File}'", fileName);

var detector = new CharsetDetector();
detector.Feed(fileStream);
detector.DataEnd();
var charset = detector.Charset;

_logger.LogTrace("Found charset '{Charset}' with a confidence of {Confidence}",
charset, detector.Confidence);

var encodingFromCharset = GetEncodingFromCharsetString(charset);
return encodingFromCharset;
}

public Encoding GetEncodingFromCharsetString(string charset)
{
var encoding = Encoding.GetEncoding(charset);
if (encoding == null)
throw new ArgumentException($"Could not find encoding for charset '{charset}'", nameof(charset));

return encoding;
}
}
}

0 comments on commit bc17311

Please sign in to comment.